8014076: Arrays parallel and serial sorting improvements
authordl
Thu, 23 May 2013 18:34:15 +0100
changeset 17712 b56c69500af6
parent 17711 3cb70d5832aa
child 17713 c80e2ac3b938
8014076: Arrays parallel and serial sorting improvements Reviewed-by: chegar, mduigou
jdk/src/share/classes/java/util/Arrays.java
jdk/src/share/classes/java/util/ArraysParallelSortHelpers.java
jdk/src/share/classes/java/util/ComparableTimSort.java
jdk/src/share/classes/java/util/DualPivotQuicksort.java
jdk/src/share/classes/java/util/TimSort.java
jdk/test/java/util/Arrays/ParallelSorting.java
--- a/jdk/src/share/classes/java/util/Arrays.java	Thu May 23 15:50:37 2013 +0200
+++ b/jdk/src/share/classes/java/util/Arrays.java	Thu May 23 18:34:15 2013 +0100
@@ -40,7 +40,6 @@
 import java.util.stream.LongStream;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
-import static java.util.ArraysParallelSortHelpers.*;
 
 /**
  * This class contains various methods for manipulating arrays (such as
@@ -70,17 +69,62 @@
 public class Arrays {
 
     /**
-     * The minimum array length below which the sorting algorithm will not
-     * further partition the sorting task.
+     * The minimum array length below which a parallel sorting
+     * algorithm will not further partition the sorting task. Using
+     * smaller sizes typically results in memory contention across
+     * tasks that makes parallel speedups unlikely.
      */
-    // reasonable default so that we don't overcreate tasks
-    private static final int MIN_ARRAY_SORT_GRAN = 256;
+    private static final int MIN_ARRAY_SORT_GRAN = 1 << 13;
 
     // Suppresses default constructor, ensuring non-instantiability.
     private Arrays() {}
 
+    /**
+     * A comparator that implements the natural ordering of a group of
+     * mutually comparable elements. May be used when a supplied
+     * comparator is null. To simplify code-sharing within underlying
+     * implementations, the compare method only declares type Object
+     * for its second argument.
+     *
+     * Arrays class implementor's note: It is an empirical matter
+     * whether ComparableTimSort offers any performance benefit over
+     * TimSort used with this comparator.  If not, you are better off
+     * deleting or bypassing ComparableTimSort.  There is currently no
+     * empirical case for separating them for parallel sorting, so all
+     * public Object parallelSort methods use the same comparator
+     * based implementation.
+     */
+    static final class NaturalOrder implements Comparator<Object> {
+        @SuppressWarnings("unchecked")
+        public int compare(Object first, Object second) {
+            return ((Comparable<Object>)first).compareTo(second);
+        }
+        static final NaturalOrder INSTANCE = new NaturalOrder();
+    }
+
+    /**
+     * Checks that {@code fromIndex} and {@code toIndex} are in
+     * the range and throws an exception if they aren't.
+     */
+    private static void rangeCheck(int arrayLength, int fromIndex, int toIndex) {
+        if (fromIndex > toIndex) {
+            throw new IllegalArgumentException(
+                    "fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
+        }
+        if (fromIndex < 0) {
+            throw new ArrayIndexOutOfBoundsException(fromIndex);
+        }
+        if (toIndex > arrayLength) {
+            throw new ArrayIndexOutOfBoundsException(toIndex);
+        }
+    }
+
     /*
-     * Sorting of primitive type arrays.
+     * Sorting methods. Note that all public "sort" methods take the
+     * same form: Performing argument checks if necessary, and then
+     * expanding arguments into those required for the internal
+     * implementation methods residing in other package-private
+     * classes (except for legacyMergeSort, included in this class).
      */
 
     /**
@@ -95,7 +139,7 @@
      * @param a the array to be sorted
      */
     public static void sort(int[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -120,7 +164,7 @@
      */
     public static void sort(int[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -135,7 +179,7 @@
      * @param a the array to be sorted
      */
     public static void sort(long[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -160,7 +204,7 @@
      */
     public static void sort(long[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -175,7 +219,7 @@
      * @param a the array to be sorted
      */
     public static void sort(short[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -200,7 +244,7 @@
      */
     public static void sort(short[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -215,7 +259,7 @@
      * @param a the array to be sorted
      */
     public static void sort(char[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -240,7 +284,7 @@
      */
     public static void sort(char[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -255,7 +299,7 @@
      * @param a the array to be sorted
      */
     public static void sort(byte[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1);
     }
 
     /**
@@ -303,7 +347,7 @@
      * @param a the array to be sorted
      */
     public static void sort(float[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -336,7 +380,7 @@
      */
     public static void sort(float[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
     }
 
     /**
@@ -359,7 +403,7 @@
      * @param a the array to be sorted
      */
     public static void sort(double[] a) {
-        DualPivotQuicksort.sort(a);
+        DualPivotQuicksort.sort(a, 0, a.length - 1, null, 0, 0);
     }
 
     /**
@@ -392,7 +436,742 @@
      */
     public static void sort(double[] a, int fromIndex, int toIndex) {
         rangeCheck(a.length, fromIndex, toIndex);
-        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(byte[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(byte[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(byte[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1);
+        else
+            new ArraysParallelSortHelpers.FJByte.Sorter
+                (null, a, new byte[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(byte[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(byte[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(byte[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1);
+        else
+            new ArraysParallelSortHelpers.FJByte.Sorter
+                (null, a, new byte[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(char[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(char[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(char[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJChar.Sorter
+                (null, a, new char[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+      @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(char[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(char[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(char[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJChar.Sorter
+                (null, a, new char[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(short[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(short[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(short[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJShort.Sorter
+                (null, a, new short[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(short[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(short[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(short[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJShort.Sorter
+                (null, a, new short[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(int[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(int[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(int[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJInt.Sorter
+                (null, a, new int[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(int[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(int[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(int[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJInt.Sorter
+                (null, a, new int[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(long[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(long[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(long[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJLong.Sorter
+                (null, a, new long[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(long[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(long[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(long[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJLong.Sorter
+                (null, a, new long[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * <p>The {@code <} relation does not provide a total order on all float
+     * values: {@code -0.0f == 0.0f} is {@code true} and a {@code Float.NaN}
+     * value compares neither less than, greater than, nor equal to any value,
+     * even itself. This method uses the total order imposed by the method
+     * {@link Float#compareTo}: {@code -0.0f} is treated as less than value
+     * {@code 0.0f} and {@code Float.NaN} is considered greater than any
+     * other value and all {@code Float.NaN} values are considered equal.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(float[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(float[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(float[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJFloat.Sorter
+                (null, a, new float[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+     * <p>The {@code <} relation does not provide a total order on all float
+     * values: {@code -0.0f == 0.0f} is {@code true} and a {@code Float.NaN}
+     * value compares neither less than, greater than, nor equal to any value,
+     * even itself. This method uses the total order imposed by the method
+     * {@link Float#compareTo}: {@code -0.0f} is treated as less than value
+     * {@code 0.0f} and {@code Float.NaN} is considered greater than any
+     * other value and all {@code Float.NaN} values are considered equal.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(float[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(float[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(float[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJFloat.Sorter
+                (null, a, new float[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array into ascending numerical order.
+     *
+     * <p>The {@code <} relation does not provide a total order on all double
+     * values: {@code -0.0d == 0.0d} is {@code true} and a {@code Double.NaN}
+     * value compares neither less than, greater than, nor equal to any value,
+     * even itself. This method uses the total order imposed by the method
+     * {@link Double#compareTo}: {@code -0.0d} is treated as less than value
+     * {@code 0.0d} and {@code Double.NaN} is considered greater than any
+     * other value and all {@code Double.NaN} values are considered equal.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(double[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(double[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(double[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, 0, n - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJDouble.Sorter
+                (null, a, new double[n], 0, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the array into ascending numerical order.
+     * The range to be sorted extends from the index {@code fromIndex},
+     * inclusive, to the index {@code toIndex}, exclusive. If
+     * {@code fromIndex == toIndex}, the range to be sorted is empty.
+     *
+     * <p>The {@code <} relation does not provide a total order on all double
+     * values: {@code -0.0d == 0.0d} is {@code true} and a {@code Double.NaN}
+     * value compares neither less than, greater than, nor equal to any value,
+     * even itself. This method uses the total order imposed by the method
+     * {@link Double#compareTo}: {@code -0.0d} is treated as less than value
+     * {@code 0.0d} and {@code Double.NaN} is considered greater than any
+     * other value and all {@code Double.NaN} values are considered equal.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(double[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(double[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element, inclusive, to be sorted
+     * @param toIndex the index of the last element, exclusive, to be sorted
+     *
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
+     * @throws ArrayIndexOutOfBoundsException
+     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
+     *
+     * @since 1.8
+     */
+    public static void parallelSort(double[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            DualPivotQuicksort.sort(a, fromIndex, toIndex - 1, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJDouble.Sorter
+                (null, a, new double[n], fromIndex, n, 0,
+                 ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g).invoke();
+    }
+
+    /**
+     * Sorts the specified array of objects into ascending order, according
+     * to the {@linkplain Comparable natural ordering} of its elements.
+     * All elements in the array must implement the {@link Comparable}
+     * interface.  Furthermore, all elements in the array must be
+     * <i>mutually comparable</i> (that is, {@code e1.compareTo(e2)} must
+     * not throw a {@code ClassCastException} for any elements {@code e1}
+     * and {@code e2} in the array).
+     *
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(Object[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(Object[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     *
+     * @throws ClassCastException if the array contains elements that are not
+     *         <i>mutually comparable</i> (for example, strings and integers)
+     * @throws IllegalArgumentException (optional) if the natural
+     *         ordering of the array elements is found to violate the
+     *         {@link Comparable} contract
+     *
+     * @since 1.8
+     */
+    @SuppressWarnings("unchecked")
+    public static <T extends Comparable<? super T>> void parallelSort(T[] a) {
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, 0, n, NaturalOrder.INSTANCE, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 0, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, NaturalOrder.INSTANCE).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the specified array of objects into
+     * ascending order, according to the
+     * {@linkplain Comparable natural ordering} of its
+     * elements.  The range to be sorted extends from index
+     * {@code fromIndex}, inclusive, to index {@code toIndex}, exclusive.
+     * (If {@code fromIndex==toIndex}, the range to be sorted is empty.)  All
+     * elements in this range must implement the {@link Comparable}
+     * interface.  Furthermore, all elements in this range must be <i>mutually
+     * comparable</i> (that is, {@code e1.compareTo(e2)} must not throw a
+     * {@code ClassCastException} for any elements {@code e1} and
+     * {@code e2} in the array).
+     *
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(Object[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(Object[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element (inclusive) to be
+     *        sorted
+     * @param toIndex the index of the last element (exclusive) to be sorted
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex} or
+     *         (optional) if the natural ordering of the array elements is
+     *         found to violate the {@link Comparable} contract
+     * @throws ArrayIndexOutOfBoundsException if {@code fromIndex < 0} or
+     *         {@code toIndex > a.length}
+     * @throws ClassCastException if the array contains elements that are
+     *         not <i>mutually comparable</i> (for example, strings and
+     *         integers).
+     *
+     * @since 1.8
+     */
+    @SuppressWarnings("unchecked")
+    public static <T extends Comparable<? super T>>
+    void parallelSort(T[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, fromIndex, toIndex, NaturalOrder.INSTANCE, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 fromIndex, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, NaturalOrder.INSTANCE).invoke();
+    }
+
+    /**
+     * Sorts the specified array of objects according to the order induced by
+     * the specified comparator.  All elements in the array must be
+     * <i>mutually comparable</i> by the specified comparator (that is,
+     * {@code c.compare(e1, e2)} must not throw a {@code ClassCastException}
+     * for any elements {@code e1} and {@code e2} in the array).
+     *
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(Object[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(Object[]) Arrays.sort} method. The algorithm requires a
+     * working space no greater than the size of the original array. The
+     * {@link ForkJoinPool#commonPool() ForkJoin common pool} is used to
+     * execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param cmp the comparator to determine the order of the array.  A
+     *        {@code null} value indicates that the elements'
+     *        {@linkplain Comparable natural ordering} should be used.
+     * @throws ClassCastException if the array contains elements that are
+     *         not <i>mutually comparable</i> using the specified comparator
+     * @throws IllegalArgumentException (optional) if the comparator is
+     *         found to violate the {@link java.util.Comparator} contract
+     *
+     * @since 1.8
+     */
+    @SuppressWarnings("unchecked")
+    public static <T> void parallelSort(T[] a, Comparator<? super T> cmp) {
+        if (cmp == null)
+            cmp = NaturalOrder.INSTANCE;
+        int n = a.length, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, 0, n, cmp, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 0, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, cmp).invoke();
+    }
+
+    /**
+     * Sorts the specified range of the specified array of objects according
+     * to the order induced by the specified comparator.  The range to be
+     * sorted extends from index {@code fromIndex}, inclusive, to index
+     * {@code toIndex}, exclusive.  (If {@code fromIndex==toIndex}, the
+     * range to be sorted is empty.)  All elements in the range must be
+     * <i>mutually comparable</i> by the specified comparator (that is,
+     * {@code c.compare(e1, e2)} must not throw a {@code ClassCastException}
+     * for any elements {@code e1} and {@code e2} in the range).
+     *
+     * <p>This sort is guaranteed to be <i>stable</i>:  equal elements will
+     * not be reordered as a result of the sort.
+     *
+     * @implNote The sorting algorithm is a parallel sort-merge that breaks the
+     * array into sub-arrays that are themselves sorted and then merged. When
+     * the sub-array length reaches a minimum granularity, the sub-array is
+     * sorted using the appropriate {@link Arrays#sort(Object[]) Arrays.sort}
+     * method. If the length of the specified array is less than the minimum
+     * granularity, then it is sorted using the appropriate {@link
+     * Arrays#sort(Object[]) Arrays.sort} method. The algorithm requires a working
+     * space no greater than the size of the specified range of the original
+     * array. The {@link ForkJoinPool#commonPool() ForkJoin common pool} is
+     * used to execute any parallel tasks.
+     *
+     * @param a the array to be sorted
+     * @param fromIndex the index of the first element (inclusive) to be
+     *        sorted
+     * @param toIndex the index of the last element (exclusive) to be sorted
+     * @param cmp the comparator to determine the order of the array.  A
+     *        {@code null} value indicates that the elements'
+     *        {@linkplain Comparable natural ordering} should be used.
+     * @throws IllegalArgumentException if {@code fromIndex > toIndex} or
+     *         (optional) if the natural ordering of the array elements is
+     *         found to violate the {@link Comparable} contract
+     * @throws ArrayIndexOutOfBoundsException if {@code fromIndex < 0} or
+     *         {@code toIndex > a.length}
+     * @throws ClassCastException if the array contains elements that are
+     *         not <i>mutually comparable</i> (for example, strings and
+     *         integers).
+     *
+     * @since 1.8
+     */
+    @SuppressWarnings("unchecked")
+    public static <T> void parallelSort(T[] a, int fromIndex, int toIndex,
+                                        Comparator<? super T> cmp) {
+        rangeCheck(a.length, fromIndex, toIndex);
+        if (cmp == null)
+            cmp = NaturalOrder.INSTANCE;
+        int n = toIndex - fromIndex, p, g;
+        if (n <= MIN_ARRAY_SORT_GRAN ||
+            (p = ForkJoinPool.getCommonPoolParallelism()) == 1)
+            TimSort.sort(a, fromIndex, toIndex, cmp, null, 0, 0);
+        else
+            new ArraysParallelSortHelpers.FJObject.Sorter<T>
+                (null, a,
+                 (T[])Array.newInstance(a.getClass().getComponentType(), n),
+                 fromIndex, n, 0, ((g = n / (p << 2)) <= MIN_ARRAY_SORT_GRAN) ?
+                 MIN_ARRAY_SORT_GRAN : g, cmp).invoke();
     }
 
     /*
@@ -412,39 +1191,6 @@
                     "java.util.Arrays.useLegacyMergeSort")).booleanValue();
     }
 
-    /*
-     * If this platform has an optimizing VM, check whether ComparableTimSort
-     * offers any performance benefit over TimSort in conjunction with a
-     * comparator that returns:
-     *    {@code ((Comparable)first).compareTo(Second)}.
-     * If not, you are better off deleting ComparableTimSort to
-     * eliminate the code duplication.  In other words, the commented
-     * out code below is the preferable implementation for sorting
-     * arrays of Comparables if it offers sufficient performance.
-     */
-
-//    /**
-//     * A comparator that implements the natural ordering of a group of
-//     * mutually comparable elements.  Using this comparator saves us
-//     * from duplicating most of the code in this file (one version for
-//     * Comparables, one for explicit Comparators).
-//     */
-//    private static final Comparator<Object> NATURAL_ORDER =
-//            new Comparator<Object>() {
-//        @SuppressWarnings("unchecked")
-//        public int compare(Object first, Object second) {
-//            return ((Comparable<Object>)first).compareTo(second);
-//        }
-//    };
-//
-//    public static void sort(Object[] a) {
-//        sort(a, 0, a.length, NATURAL_ORDER);
-//    }
-//
-//    public static void sort(Object[] a, int fromIndex, int toIndex) {
-//        sort(a, fromIndex, toIndex, NATURAL_ORDER);
-//    }
-
     /**
      * Sorts the specified array of objects into ascending order, according
      * to the {@linkplain Comparable natural ordering} of its elements.
@@ -491,7 +1237,7 @@
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a);
         else
-            ComparableTimSort.sort(a);
+            ComparableTimSort.sort(a, 0, a.length, null, 0, 0);
     }
 
     /** To be removed in a future release. */
@@ -553,16 +1299,16 @@
      *         integers).
      */
     public static void sort(Object[] a, int fromIndex, int toIndex) {
+        rangeCheck(a.length, fromIndex, toIndex);
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a, fromIndex, toIndex);
         else
-            ComparableTimSort.sort(a, fromIndex, toIndex);
+            ComparableTimSort.sort(a, fromIndex, toIndex, null, 0, 0);
     }
 
     /** To be removed in a future release. */
     private static void legacyMergeSort(Object[] a,
                                         int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
         Object[] aux = copyOfRange(a, fromIndex, toIndex);
         mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
     }
@@ -676,10 +1422,12 @@
      *         found to violate the {@link Comparator} contract
      */
     public static <T> void sort(T[] a, Comparator<? super T> c) {
+        if (c == null)
+            c = NaturalOrder.INSTANCE;
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a, c);
         else
-            TimSort.sort(a, c);
+            TimSort.sort(a, 0, a.length, c, null, 0, 0);
     }
 
     /** To be removed in a future release. */
@@ -744,16 +1492,18 @@
      */
     public static <T> void sort(T[] a, int fromIndex, int toIndex,
                                 Comparator<? super T> c) {
+        if (c == null)
+            c = NaturalOrder.INSTANCE;
+        rangeCheck(a.length, fromIndex, toIndex);
         if (LegacyMergeSort.userRequested)
             legacyMergeSort(a, fromIndex, toIndex, c);
         else
-            TimSort.sort(a, fromIndex, toIndex, c);
+            TimSort.sort(a, fromIndex, toIndex, c, null, 0, 0);
     }
 
     /** To be removed in a future release. */
     private static <T> void legacyMergeSort(T[] a, int fromIndex, int toIndex,
                                             Comparator<? super T> c) {
-        rangeCheck(a.length, fromIndex, toIndex);
         T[] aux = copyOfRange(a, fromIndex, toIndex);
         if (c==null)
             mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
@@ -809,630 +1559,6 @@
         }
     }
 
-    /*
-     * Parallel sorting of primitive type arrays.
-     */
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(byte[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(byte[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(byte[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(byte[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJByte.Sorter task = new FJByte.Sorter(a, new byte[a.length], fromIndex,
-                                               nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(char[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(char[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(char[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(char[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJChar.Sorter task = new FJChar.Sorter(a, new char[a.length], fromIndex,
-                                               nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(short[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(short[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(short[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(short[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJShort.Sorter task = new FJShort.Sorter(a, new short[a.length], fromIndex,
-                                                 nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(int[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(int[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(int[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(int[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJInt.Sorter task = new FJInt.Sorter(a, new int[a.length], fromIndex,
-                                             nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(long[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(long[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(long[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(long[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJLong.Sorter task = new FJLong.Sorter(a, new long[a.length], fromIndex,
-                                               nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>The {@code <} relation does not provide a total order on all float
-     * values: {@code -0.0f == 0.0f} is {@code true} and a {@code Float.NaN}
-     * value compares neither less than, greater than, nor equal to any value,
-     * even itself. This method uses the total order imposed by the method
-     * {@link Float#compareTo}: {@code -0.0f} is treated as less than value
-     * {@code 0.0f} and {@code Float.NaN} is considered greater than any
-     * other value and all {@code Float.NaN} values are considered equal.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(float[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(float[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>The {@code <} relation does not provide a total order on all float
-     * values: {@code -0.0f == 0.0f} is {@code true} and a {@code Float.NaN}
-     * value compares neither less than, greater than, nor equal to any value,
-     * even itself. This method uses the total order imposed by the method
-     * {@link Float#compareTo}: {@code -0.0f} is treated as less than value
-     * {@code 0.0f} and {@code Float.NaN} is considered greater than any
-     * other value and all {@code Float.NaN} values are considered equal.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(float[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(float[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJFloat.Sorter task = new FJFloat.Sorter(a, new float[a.length], fromIndex,
-                                                 nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array into ascending numerical order.
-     *
-     * <p>The {@code <} relation does not provide a total order on all double
-     * values: {@code -0.0d == 0.0d} is {@code true} and a {@code Double.NaN}
-     * value compares neither less than, greater than, nor equal to any value,
-     * even itself. This method uses the total order imposed by the method
-     * {@link Double#compareTo}: {@code -0.0d} is treated as less than value
-     * {@code 0.0d} and {@code Double.NaN} is considered greater than any
-     * other value and all {@code Double.NaN} values are considered equal.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(double[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(double[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the array into ascending order. The range
-     * to be sorted extends from the index {@code fromIndex}, inclusive, to
-     * the index {@code toIndex}, exclusive. If {@code fromIndex == toIndex},
-     * the range to be sorted is empty.
-     *
-     * <p>The {@code <} relation does not provide a total order on all double
-     * values: {@code -0.0d == 0.0d} is {@code true} and a {@code Double.NaN}
-     * value compares neither less than, greater than, nor equal to any value,
-     * even itself. This method uses the total order imposed by the method
-     * {@link Double#compareTo}: {@code -0.0d} is treated as less than value
-     * {@code 0.0d} and {@code Double.NaN} is considered greater than any
-     * other value and all {@code Double.NaN} values are considered equal.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(double[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element, inclusive, to be sorted
-     * @param toIndex the index of the last element, exclusive, to be sorted
-     *
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex}
-     * @throws ArrayIndexOutOfBoundsException
-     *     if {@code fromIndex < 0} or {@code toIndex > a.length}
-     *
-     * @since 1.8
-     */
-    public static void parallelSort(double[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        int gran = getSplitThreshold(nelements);
-        FJDouble.Sorter task = new FJDouble.Sorter(a, new double[a.length],
-                                                   fromIndex, nelements, gran);
-        task.invoke();
-    }
-
-    /*
-     * Parallel sorting of complex type arrays.
-     */
-
-    /**
-     * Sorts the specified array of objects into ascending order, according
-     * to the {@linkplain Comparable natural ordering} of its elements.
-     * All elements in the array must implement the {@link Comparable}
-     * interface.  Furthermore, all elements in the array must be
-     * <i>mutually comparable</i> (that is, {@code e1.compareTo(e2)} must
-     * not throw a {@code ClassCastException} for any elements {@code e1}
-     * and {@code e2} in the array).
-     *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(Object[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     *
-     * @throws ClassCastException if the array contains elements that are not
-     *         <i>mutually comparable</i> (for example, strings and integers)
-     * @throws IllegalArgumentException (optional) if the natural
-     *         ordering of the array elements is found to violate the
-     *         {@link Comparable} contract
-     *
-     * @since 1.8
-     */
-    public static <T extends Comparable<? super T>> void parallelSort(T[] a) {
-        parallelSort(a, 0, a.length);
-    }
-
-    /**
-     * Sorts the specified range of the specified array of objects into
-     * ascending order, according to the
-     * {@linkplain Comparable natural ordering} of its
-     * elements.  The range to be sorted extends from index
-     * {@code fromIndex}, inclusive, to index {@code toIndex}, exclusive.
-     * (If {@code fromIndex==toIndex}, the range to be sorted is empty.)  All
-     * elements in this range must implement the {@link Comparable}
-     * interface.  Furthermore, all elements in this range must be <i>mutually
-     * comparable</i> (that is, {@code e1.compareTo(e2)} must not throw a
-     * {@code ClassCastException} for any elements {@code e1} and
-     * {@code e2} in the array).
-     *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(Object[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element (inclusive) to be
-     *        sorted
-     * @param toIndex the index of the last element (exclusive) to be sorted
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex} or
-     *         (optional) if the natural ordering of the array elements is
-     *         found to violate the {@link Comparable} contract
-     * @throws ArrayIndexOutOfBoundsException if {@code fromIndex < 0} or
-     *         {@code toIndex > a.length}
-     * @throws ClassCastException if the array contains elements that are
-     *         not <i>mutually comparable</i> (for example, strings and
-     *         integers).
-     *
-     * @since 1.8
-     */
-    public static <T extends Comparable<? super T>>
-            void parallelSort(T[] a, int fromIndex, int toIndex) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        Class<?> tc = a.getClass().getComponentType();
-        @SuppressWarnings("unchecked")
-        T[] workspace = (T[])Array.newInstance(tc, a.length);
-        int gran = getSplitThreshold(nelements);
-        FJComparable.Sorter<T> task = new FJComparable.Sorter<>(a, workspace,
-                                                                fromIndex,
-                                                                nelements, gran);
-        task.invoke();
-    }
-
-    /**
-     * Sorts the specified array of objects according to the order induced by
-     * the specified comparator.  All elements in the array must be
-     * <i>mutually comparable</i> by the specified comparator (that is,
-     * {@code c.compare(e1, e2)} must not throw a {@code ClassCastException}
-     * for any elements {@code e1} and {@code e2} in the array).
-     *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(Object[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param c the comparator to determine the order of the array.  A
-     *        {@code null} value indicates that the elements'
-     *        {@linkplain Comparable natural ordering} should be used.
-     * @throws ClassCastException if the array contains elements that are
-     *         not <i>mutually comparable</i> using the specified comparator
-     * @throws IllegalArgumentException (optional) if the comparator is
-     *         found to violate the {@link java.util.Comparator} contract
-     *
-     * @since 1.8
-     */
-    public static <T> void parallelSort(T[] a, Comparator<? super T> c) {
-        parallelSort(a, 0, a.length, c);
-    }
-
-    /**
-     * Sorts the specified range of the specified array of objects according
-     * to the order induced by the specified comparator.  The range to be
-     * sorted extends from index {@code fromIndex}, inclusive, to index
-     * {@code toIndex}, exclusive.  (If {@code fromIndex==toIndex}, the
-     * range to be sorted is empty.)  All elements in the range must be
-     * <i>mutually comparable</i> by the specified comparator (that is,
-     * {@code c.compare(e1, e2)} must not throw a {@code ClassCastException}
-     * for any elements {@code e1} and {@code e2} in the range).
-     *
-     * <p>This sort is not guaranteed to be <i>stable</i>:  equal elements
-     * may be reordered as a result of the sort.
-     *
-     * <p>Implementation note: The sorting algorithm is a parallel sort-merge
-     * that breaks the array into sub-arrays that are themselves sorted and then
-     * merged. When the sub-array length reaches a minimum granularity, the
-     * sub-array is sorted using the appropriate {@link Arrays#sort(Object[])
-     * Arrays.sort} method. The algorithm requires a working space equal to the
-     * size of the original array. The {@link
-     * java.util.concurrent.ForkJoinPool#commonPool() ForkJoin common pool} is
-     * used to execute any parallel tasks.
-     *
-     * @param a the array to be sorted
-     * @param fromIndex the index of the first element (inclusive) to be
-     *        sorted
-     * @param toIndex the index of the last element (exclusive) to be sorted
-     * @param c the comparator to determine the order of the array.  A
-     *        {@code null} value indicates that the elements'
-     *        {@linkplain Comparable natural ordering} should be used.
-     * @throws IllegalArgumentException if {@code fromIndex > toIndex} or
-     *         (optional) if the natural ordering of the array elements is
-     *         found to violate the {@link Comparable} contract
-     * @throws ArrayIndexOutOfBoundsException if {@code fromIndex < 0} or
-     *         {@code toIndex > a.length}
-     * @throws ClassCastException if the array contains elements that are
-     *         not <i>mutually comparable</i> (for example, strings and
-     *         integers).
-     *
-     * @since 1.8
-     */
-    public static <T> void parallelSort(T[] a, int fromIndex, int toIndex,
-                                        Comparator<? super T> c) {
-        rangeCheck(a.length, fromIndex, toIndex);
-        int nelements = toIndex - fromIndex;
-        Class<?> tc = a.getClass().getComponentType();
-        @SuppressWarnings("unchecked")
-        T[] workspace = (T[])Array.newInstance(tc, a.length);
-        int gran = getSplitThreshold(nelements);
-        FJComparator.Sorter<T> task = new FJComparator.Sorter<>(a, workspace,
-                                                                fromIndex,
-                                                                nelements, gran, c);
-        task.invoke();
-    }
-
-    /**
-     * Returns the size threshold for splitting into subtasks.
-     * By default, uses about 8 times as many tasks as threads
-     *
-     * @param n number of elements in the array to be processed
-     */
-    private static int getSplitThreshold(int n) {
-        int p = java.util.concurrent.ForkJoinPool.getCommonPoolParallelism();
-        int t = (p > 1) ? (1 + n / (p << 3)) : n;
-        return t < MIN_ARRAY_SORT_GRAN ? MIN_ARRAY_SORT_GRAN : t;
-    }
-
-    /**
-     * Checks that {@code fromIndex} and {@code toIndex} are in
-     * the range and throws an appropriate exception, if they aren't.
-     */
-    private static void rangeCheck(int length, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex) {
-            throw new IllegalArgumentException(
-                "fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")");
-        }
-        if (fromIndex < 0) {
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        }
-        if (toIndex > length) {
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-        }
-    }
-
     // Searching
 
     /**
--- a/jdk/src/share/classes/java/util/ArraysParallelSortHelpers.java	Thu May 23 15:50:37 2013 +0200
+++ b/jdk/src/share/classes/java/util/ArraysParallelSortHelpers.java	Thu May 23 18:34:15 2013 +0100
@@ -25,6 +25,7 @@
 package java.util;
 
 import java.util.concurrent.RecursiveAction;
+import java.util.concurrent.CountedCompleter;
 
 /**
  * Helper utilities for the parallel sort methods in Arrays.parallelSort.
@@ -44,1180 +45,966 @@
  *             c. merge them together
  *         3. merge together the two halves.
  *
- * One reason for splitting in quarters is that this guarantees
- * that the final sort is in the main array, not the workspace
- * array.  (workspace and main swap roles on each subsort step.)
- * Leaf-level sorts use a Sequential quicksort, that in turn uses
- * insertion sort if under threshold.  Otherwise it uses median of
- * three to pick pivot, and loops rather than recurses along left
- * path.
- *
+ * One reason for splitting in quarters is that this guarantees that
+ * the final sort is in the main array, not the workspace array.
+ * (workspace and main swap roles on each subsort step.)  Leaf-level
+ * sorts use the associated sequential sort.
  *
- * Merger classes perform merging for Sorter. If big enough, splits Left
- * partition in half; finds the greatest point in Right partition
- * less than the beginning of the second half of Left via binary
- * search; and then, in parallel, merges left half of Left with
- * elements of Right up to split point, and merges right half of
- * Left with elements of R past split point. At leaf, it just
- * sequentially merges. This is all messy to code; sadly we need
- * distinct versions for each type.
+ * Merger classes perform merging for Sorter.  They are structured
+ * such that if the underlying sort is stable (as is true for
+ * TimSort), then so is the full sort.  If big enough, they split the
+ * largest of the two partitions in half, find the greatest point in
+ * smaller partition less than the beginning of the second half of
+ * larger via binary search; and then merge in parallel the two
+ * partitions.  In part to ensure tasks are triggered in
+ * stability-preserving order, the current CountedCompleter design
+ * requires some little tasks to serve as place holders for triggering
+ * completion tasks.  These classes (EmptyCompleter and Relay) don't
+ * need to keep track of the arrays, and are never themselves forked,
+ * so don't hold any task state.
  *
+ * The primitive class versions (FJByte... FJDouble) are
+ * identical to each other except for type declarations.
+ *
+ * The base sequential sorts rely on non-public versions of TimSort,
+ * ComparableTimSort, and DualPivotQuicksort sort methods that accept
+ * temp workspace array slices that we will have already allocated, so
+ * avoids redundant allocation. (Except for DualPivotQuicksort byte[]
+ * sort, that does not ever use a workspace array.)
  */
 /*package*/ class ArraysParallelSortHelpers {
 
-    // RFE: we should only need a working array as large as the subarray
-    //      to be sorted, but the logic assumes that indices in the two
-    //      arrays always line-up
+    /*
+     * Style note: The task classes have a lot of parameters, that are
+     * stored as task fields and copied to local variables and used in
+     * compute() methods, We pack these into as few lines as possible,
+     * and hoist consistency checks among them before main loops, to
+     * reduce distraction.
+     */
 
-    /** byte support class */
-    static final class FJByte {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 749471161188027634L;
-            final byte[] a;     // array to be sorted.
-            final byte[] w;     // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
+    /**
+     * A placeholder task for Sorters, used for the lowest
+     * quartile task, that does not need to maintain array state.
+     */
+    static final class EmptyCompleter extends CountedCompleter<Void> {
+        static final long serialVersionUID = 2446542900576103244L;
+        EmptyCompleter(CountedCompleter<?> p) { super(p); }
+        public final void compute() { }
+    }
 
-            Sorter(byte[] a, byte[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
-            }
+    /**
+     * A trigger for secondary merge of two merges
+     */
+    static final class Relay extends CountedCompleter<Void> {
+        static final long serialVersionUID = 2446542900576103244L;
+        final CountedCompleter<?> task;
+        Relay(CountedCompleter<?> task) {
+            super(null, 1);
+            this.task = task;
+        }
+        public final void compute() { }
+        public final void onCompletion(CountedCompleter<?> t) {
+            task.compute();
+        }
+    }
 
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final byte[] a = this.a;
-                final byte[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,  g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l+h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h,
-                               l+h, n-h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   //skip rangeCheck
+    /** Object + Comparator support class */
+    static final class FJObject {
+        static final class Sorter<T> extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final T[] a, w;
+            final int base, size, wbase, gran;
+            Comparator<? super T> comparator;
+            Sorter(CountedCompleter<?> par, T[] a, T[] w, int base, int size,
+                   int wbase, int gran,
+                   Comparator<? super T> comparator) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
+                this.comparator = comparator;
+            }
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                Comparator<? super T> c = this.comparator;
+                T[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger<T>(s, w, a, wb, h,
+                                                       wb+h, n-h, b, g, c));
+                    Relay rc = new Relay(new Merger<T>(fc, a, w, b+h, q,
+                                                       b+u, n-u, wb+h, g, c));
+                    new Sorter<T>(rc, a, w, b+u, n-u, wb+u, g, c).fork();
+                    new Sorter<T>(rc, a, w, b+h, q, wb+h, g, c).fork();;
+                    Relay bc = new Relay(new Merger<T>(fc, a, w, b, q,
+                                                       b+q, h-q, wb, g, c));
+                    new Sorter<T>(bc, a, w, b+q, h-q, wb+q, g, c).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                TimSort.sort(a, b, b + n, c, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = -9090258248781844470L;
-            final byte[] a;
-            final byte[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
+        static final class Merger<T> extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final T[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Comparator<? super T> comparator;
+            Merger(CountedCompleter<?> par, T[] a, T[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran,
+                   Comparator<? super T> comparator) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
+                this.comparator = comparator;
+            }
 
-            Merger(byte[] a, byte[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+            public final void compute() {
+                Comparator<? super T> c = this.comparator;
+                T[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0 ||
+                    c == null)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        T split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (c.compare(split, a[rm + rb]) <= 0)
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
+                    }
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        T split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (c.compare(split, a[lm + lb]) <= 0)
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger<T> m = new Merger<T>(this, a, w, lb + lh, ln - lh,
+                                                rb + rh, rn - rh,
+                                                k + lh + rh, g, c);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
+                }
+
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    T t, al, ar;
+                    if (c.compare((al = a[lb]), (ar = a[rb])) <= 0) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
+                    w[k++] = t;
+                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+
+                tryComplete();
             }
 
-            public void compute() {
-                final byte[] a = this.a;
-                final byte[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    byte split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+        }
+    } // FJObject
+
+    /** byte support class */
+    static final class FJByte {
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final byte[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, byte[] a, byte[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
+            }
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                byte[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
+                }
+                DualPivotQuicksort.sort(a, b, b + n - 1);
+                s.tryComplete();
+            }
+        }
+
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final byte[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, byte[] a, byte[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
+            }
+
+            public final void compute() {
+                byte[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        byte split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        byte split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    byte al = a[l];
-                    byte ar = a[r];
-                    byte t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    byte t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJByte
 
     /** char support class */
     static final class FJChar {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 8723376019074596641L;
-            final char[] a;     // array to be sorted.
-            final char[] w;     // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(char[] a, char[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final char[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, char[] a, char[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final char[] a = this.a;
-                final char[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                char[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = -1383975444621698926L;
-            final char[] a;
-            final char[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(char[] a, char[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final char[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, char[] a, char[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final char[] a = this.a;
-                final char[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    char split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+            public final void compute() {
+                char[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        char split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        char split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    char al = a[l];
-                    char ar = a[r];
-                    char t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    char t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJChar
 
     /** short support class */
     static final class FJShort {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = -7886754793730583084L;
-            final short[] a;    // array to be sorted.
-            final short[] w;    // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(short[] a, short[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final short[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, short[] a, short[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final short[] a = this.a;
-                final short[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                short[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 3895749408536700048L;
-            final short[] a;
-            final short[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(short[] a, short[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final short[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, short[] a, short[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final short[] a = this.a;
-                final short[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    short split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+            public final void compute() {
+                short[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        short split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        short split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    short al = a[l];
-                    short ar = a[r];
-                    short t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    short t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJShort
 
     /** int support class */
     static final class FJInt {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 4263311808957292729L;
-            final int[] a;     // array to be sorted.
-            final int[] w;     // workspace for merge
-            final int origin;  // origin of the part of array we deal with
-            final int n;       // Number of elements in (sub)arrays.
-            final int gran;    // split control
-
-            Sorter(int[] a, int[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final int[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, int[] a, int[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final int[] a = this.a;
-                final int[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                int[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = -8727507284219982792L;
-            final int[] a;
-            final int[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(int[] a, int[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final int[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, int[] a, int[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final int[] a = this.a;
-                final int[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    int split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+            public final void compute() {
+                int[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        int split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        int split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    int al = a[l];
-                    int ar = a[r];
-                    int t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    int t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJInt
 
     /** long support class */
     static final class FJLong {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 6553695007444392455L;
-            final long[] a;     // array to be sorted.
-            final long[] w;     // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(long[] a, long[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final long[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, long[] a, long[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final long[] a = this.a;
-                final long[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                long[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 8843567516333283861L;
-            final long[] a;
-            final long[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(long[] a, long[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final long[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, long[] a, long[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final long[] a = this.a;
-                final long[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    long split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split <= a[ro + mid])
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+            public final void compute() {
+                long[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        long split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                      nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        long split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    long al = a[l];
-                    long ar = a[r];
-                    long t;
-                    if (al <= ar) {++l; t=al;} else {++r; t = ar;}
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    long t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
+                    }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJLong
 
     /** float support class */
     static final class FJFloat {
-        static final class Sorter extends RecursiveAction {
-            static final long serialVersionUID = 1602600178202763377L;
-            final float[] a;    // array to be sorted.
-            final float[] w;    // workspace for merge
-            final int origin;   // origin of the part of array we deal with
-            final int n;        // Number of elements in (sub)arrays.
-            final int gran;     // split control
-
-            Sorter(float[] a, float[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+        static final class Sorter extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final float[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, float[] a, float[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final float[] a = this.a;
-                final float[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                float[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 1518176433845397426L;
-            final float[] a;
-            final float[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(float[] a, float[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final float[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, float[] a, float[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final float[] a = this.a;
-                final float[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    float split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (Float.compare(split, a[ro+mid]) <= 0)
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+            public final void compute() {
+                float[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        float split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        float split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    float al = a[l];
-                    float ar = a[r];
-                    float t;
-                    if (Float.compare(al, ar) <= 0) {
-                        ++l;
-                        t = al;
-                    } else {
-                        ++r;
-                        t = ar;
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    float t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
                     }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJFloat
 
     /** double support class */
     static final class FJDouble {
-        static final class Sorter extends RecursiveAction {
+        static final class Sorter extends CountedCompleter<Void> {
             static final long serialVersionUID = 2446542900576103244L;
-            final double[] a;    // array to be sorted.
-            final double[] w;    // workspace for merge
-            final int origin;    // origin of the part of array we deal with
-            final int n;         // Number of elements in (sub)arrays.
-            final int gran;      // split control
-
-            Sorter(double[] a, double[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
+            final double[] a, w;
+            final int base, size, wbase, gran;
+            Sorter(CountedCompleter<?> par, double[] a, double[] w, int base,
+                   int size, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w; this.base = base; this.size = size;
+                this.wbase = wbase; this.gran = gran;
             }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final double[] a = this.a;
-                final double[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter(a, w, l, q,   g),
-                                                     new Sorter(a, w, l+q, h-q, g),
-                                                     new Merger(a, w, l,   q,
-                                                                l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter(a, w, l + h, q,   g),
-                                                     new Sorter(a, w, l+u, n-u, g),
-                                                     new Merger(a, w, l+h, q,
-                                                                l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    DualPivotQuicksort.sort(a, l, l+n-1);   // skip rangeCheck
+            public final void compute() {
+                CountedCompleter<?> s = this;
+                double[] a = this.a, w = this.w; // localize all params
+                int b = this.base, n = this.size, wb = this.wbase, g = this.gran;
+                while (n > g) {
+                    int h = n >>> 1, q = h >>> 1, u = h + q; // quartiles
+                    Relay fc = new Relay(new Merger(s, w, a, wb, h,
+                                                    wb+h, n-h, b, g));
+                    Relay rc = new Relay(new Merger(fc, a, w, b+h, q,
+                                                    b+u, n-u, wb+h, g));
+                    new Sorter(rc, a, w, b+u, n-u, wb+u, g).fork();
+                    new Sorter(rc, a, w, b+h, q, wb+h, g).fork();;
+                    Relay bc = new Relay(new Merger(fc, a, w, b, q,
+                                                    b+q, h-q, wb, g));
+                    new Sorter(bc, a, w, b+q, h-q, wb+q, g).fork();
+                    s = new EmptyCompleter(bc);
+                    n = q;
                 }
+                DualPivotQuicksort.sort(a, b, b + n - 1, w, wb, n);
+                s.tryComplete();
             }
         }
 
-        static final class Merger extends RecursiveAction {
-            static final long serialVersionUID = 8076242187166127592L;
-            final double[] a;
-            final double[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger next;
-
-            Merger(double[] a, double[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
+        static final class Merger extends CountedCompleter<Void> {
+            static final long serialVersionUID = 2446542900576103244L;
+            final double[] a, w; // main and workspace arrays
+            final int lbase, lsize, rbase, rsize, wbase, gran;
+            Merger(CountedCompleter<?> par, double[] a, double[] w,
+                   int lbase, int lsize, int rbase,
+                   int rsize, int wbase, int gran) {
+                super(par);
+                this.a = a; this.w = w;
+                this.lbase = lbase; this.lsize = lsize;
+                this.rbase = rbase; this.rsize = rsize;
+                this.wbase = wbase; this.gran = gran;
             }
 
-            public void compute() {
-                final double[] a = this.a;
-                final double[] w = this.w;
-                Merger rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    double split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (Double.compare(split, a[ro+mid]) <= 0)
-                            rh = mid;
-                        else
-                            rl = mid + 1;
+            public final void compute() {
+                double[] a = this.a, w = this.w; // localize all params
+                int lb = this.lbase, ln = this.lsize, rb = this.rbase,
+                    rn = this.rsize, k = this.wbase, g = this.gran;
+                if (a == null || w == null || lb < 0 || rb < 0 || k < 0)
+                    throw new IllegalStateException(); // hoist checks
+                for (int lh, rh;;) {  // split larger, find point in smaller
+                    if (ln >= rn) {
+                        if (ln <= g)
+                            break;
+                        rh = rn;
+                        double split = a[(lh = ln >>> 1) + lb];
+                        for (int lo = 0; lo < rh; ) {
+                            int rm = (lo + rh) >>> 1;
+                            if (split <= a[rm + rb])
+                                rh = rm;
+                            else
+                                lo = rm + 1;
+                        }
                     }
-                    (rights = new Merger(a, w, splitIndex, nleft-lh, ro+rh,
-                                         nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
+                    else {
+                        if (rn <= g)
+                            break;
+                        lh = ln;
+                        double split = a[(rh = rn >>> 1) + rb];
+                        for (int lo = 0; lo < lh; ) {
+                            int lm = (lo + lh) >>> 1;
+                            if (split <= a[lm + lb])
+                                lh = lm;
+                            else
+                                lo = lm + 1;
+                        }
+                    }
+                    Merger m = new Merger(this, a, w, lb + lh, ln - lh,
+                                          rb + rh, rn - rh,
+                                          k + lh + rh, g);
+                    rn = rh;
+                    ln = lh;
+                    addToPendingCount(1);
+                    m.fork();
                 }
 
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    double al = a[l];
-                    double ar = a[r];
-                    double t;
-                    if (Double.compare(al, ar) <= 0) {
-                        ++l;
-                        t = al;
-                    } else {
-                        ++r;
-                        t = ar;
+                int lf = lb + ln, rf = rb + rn; // index bounds
+                while (lb < lf && rb < rf) {
+                    double t, al, ar;
+                    if ((al = a[lb]) <= (ar = a[rb])) {
+                        lb++; t = al;
+                    }
+                    else {
+                        rb++; t = ar;
                     }
                     w[k++] = t;
                 }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
+                if (rb < rf)
+                    System.arraycopy(a, rb, w, k, rf - rb);
+                else if (lb < lf)
+                    System.arraycopy(a, lb, w, k, lf - lb);
+                tryComplete();
             }
         }
     } // FJDouble
 
-    /** Comparable support class */
-    static final class FJComparable {
-        static final class Sorter<T extends Comparable<? super T>> extends RecursiveAction {
-            static final long serialVersionUID = -1024003289463302522L;
-            final T[] a;
-            final T[] w;
-            final int origin;
-            final int n;
-            final int gran;
-
-            Sorter(T[] a, T[] w, int origin, int n, int gran) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.gran = gran;
-            }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final T[] a = this.a;
-                final T[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1;
-                    int q = n >>> 2;
-                    int u = h + q;
-                    FJSubSorter ls = new FJSubSorter(new Sorter<>(a, w, l, q,   g),
-                                                     new Sorter<>(a, w, l+q, h-q, g),
-                                                     new Merger<>(a, w, l,   q,
-                                                                  l+q, h-q, l, g, null));
-                    FJSubSorter rs = new FJSubSorter(new Sorter<>(a, w, l+h, q,   g),
-                                                     new Sorter<>(a, w, l+u, n-u, g),
-                                                     new Merger<>(a, w, l+h, q,
-                                                                  l+u, n-u, l+h, g, null));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger<>(w, a, l, h, l + h, n - h, l, g, null).compute();
-                } else {
-                    Arrays.sort(a, l, l+n);
-                }
-            }
-        }
-
-        static final class Merger<T extends Comparable<? super T>> extends RecursiveAction {
-            static final long serialVersionUID = -3989771675258379302L;
-            final T[] a;
-            final T[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger<T> next;
-
-            Merger(T[] a, T[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger<T> next) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
-            }
-
-            public void compute() {
-                final T[] a = this.a;
-                final T[] w = this.w;
-                Merger<T> rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    T split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (split.compareTo(a[ro + mid]) <= 0)
-                            rh = mid;
-                        else
-                            rl = mid + 1;
-                    }
-                    (rights = new Merger<>(a, w, splitIndex, nleft-lh, ro+rh,
-                                           nright-rh, wo+lh+rh, gran, rights)).fork();
-                    nleft = lh;
-                    nright = rh;
-                }
-
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    T al = a[l];
-                    T ar = a[r];
-                    T t;
-                    if (al.compareTo(ar) <= 0) {++l; t=al;} else {++r; t=ar; }
-                    w[k++] = t;
-                }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
-            }
-        }
-    } // FJComparable
-
-    /** Object + Comparator support class */
-    static final class FJComparator {
-        static final class Sorter<T> extends RecursiveAction {
-            static final long serialVersionUID = 9191600840025808581L;
-            final T[] a;       // array to be sorted.
-            final T[] w;       // workspace for merge
-            final int origin;  // origin of the part of array we deal with
-            final int n;       // Number of elements in (sub)arrays.
-            final int gran;    // split control
-            final Comparator<? super T> cmp; // Comparator to use
-
-            Sorter(T[] a, T[] w, int origin, int n, int gran, Comparator<? super T> cmp) {
-                this.a = a;
-                this.w = w;
-                this.origin = origin;
-                this.n = n;
-                this.cmp = cmp;
-                this.gran = gran;
-            }
-
-            public void compute() {
-                final int l = origin;
-                final int g = gran;
-                final int n = this.n;
-                final T[] a = this.a;
-                final T[] w = this.w;
-                if (n > g) {
-                    int h = n >>> 1; // half
-                    int q = n >>> 2; // lower quarter index
-                    int u = h + q;   // upper quarter
-                    FJSubSorter ls = new FJSubSorter(new Sorter<>(a, w, l, q,   g, cmp),
-                                                     new Sorter<>(a, w, l+q, h-q, g, cmp),
-                                                     new Merger<>(a, w, l,   q,
-                                                                  l+q, h-q, l, g, null, cmp));
-                    FJSubSorter rs = new FJSubSorter(new Sorter<>(a, w, l + h, q,   g, cmp),
-                                                     new Sorter<>(a, w, l+u, n-u, g, cmp),
-                                                     new Merger<>(a, w, l+h, q,
-                                                                  l+u, n-u, l+h, g, null, cmp));
-                    rs.fork();
-                    ls.compute();
-                    if (rs.tryUnfork()) rs.compute(); else rs.join();
-                    new Merger<>(w, a, l, h, l + h, n - h, l, g, null, cmp).compute();
-                } else {
-                    Arrays.sort(a, l, l+n, cmp);
-                }
-            }
-        }
-
-        static final class Merger<T> extends RecursiveAction {
-            static final long serialVersionUID = -2679539040379156203L;
-            final T[] a;
-            final T[] w;
-            final int lo;
-            final int ln;
-            final int ro;
-            final int rn;
-            final int wo;
-            final int gran;
-            final Merger<T> next;
-            final Comparator<? super T> cmp;
-
-            Merger(T[] a, T[] w, int lo, int ln, int ro, int rn, int wo,
-                   int gran, Merger<T> next, Comparator<? super T> cmp) {
-                this.a = a;
-                this.w = w;
-                this.lo = lo;
-                this.ln = ln;
-                this.ro = ro;
-                this.rn = rn;
-                this.wo = wo;
-                this.gran = gran;
-                this.next = next;
-                this.cmp = cmp;
-            }
-
-            public void compute() {
-                final T[] a = this.a;
-                final T[] w = this.w;
-                Merger<T> rights = null;
-                int nleft = ln;
-                int nright = rn;
-                while (nleft > gran) {
-                    int lh = nleft >>> 1;
-                    int splitIndex = lo + lh;
-                    T split = a[splitIndex];
-                    int rl = 0;
-                    int rh = nright;
-                    while (rl < rh) {
-                        int mid = (rl + rh) >>> 1;
-                        if (cmp.compare(split, a[ro+mid]) <= 0)
-                            rh = mid;
-                        else
-                            rl = mid + 1;
-                    }
-                    (rights = new Merger<>(a, w, splitIndex, nleft-lh, ro+rh,
-                                           nright-rh, wo+lh+rh, gran, rights, cmp)).fork();
-                    nleft = lh;
-                    nright = rh;
-                }
-
-                int l = lo;
-                int lFence = l + nleft;
-                int r = ro;
-                int rFence = r + nright;
-                int k = wo;
-                while (l < lFence && r < rFence) {
-                    T al = a[l];
-                    T ar = a[r];
-                    T t;
-                    if (cmp.compare(al, ar) <= 0) {
-                        ++l;
-                        t = al;
-                    } else {
-                        ++r;
-                        t = ar;
-                    }
-                    w[k++] = t;
-                }
-                while (l < lFence)
-                    w[k++] = a[l++];
-                while (r < rFence)
-                    w[k++] = a[r++];
-                while (rights != null) {
-                    if (rights.tryUnfork())
-                        rights.compute();
-                    else
-                        rights.join();
-                    rights = rights.next;
-                }
-            }
-        }
-    } // FJComparator
-
-    /** Utility class to sort half a partitioned array */
-    private static final class FJSubSorter extends RecursiveAction {
-        static final long serialVersionUID = 9159249695527935512L;
-        final RecursiveAction left;
-        final RecursiveAction right;
-        final RecursiveAction merger;
-
-        FJSubSorter(RecursiveAction left, RecursiveAction right,
-                    RecursiveAction merger) {
-            this.left = left;
-            this.right = right;
-            this.merger = merger;
-        }
-
-        public void compute() {
-            right.fork();
-            left.invoke();
-            right.join();
-            merger.invoke();
-        }
-    }
 }
--- a/jdk/src/share/classes/java/util/ComparableTimSort.java	Thu May 23 15:50:37 2013 +0200
+++ b/jdk/src/share/classes/java/util/ComparableTimSort.java	Thu May 23 18:34:15 2013 +0100
@@ -86,9 +86,13 @@
     private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
 
     /**
-     * Temp storage for merges.
+     * Temp storage for merges. A workspace array may optionally be
+     * provided in constructor, and if so will be used as long as it
+     * is big enough.
      */
     private Object[] tmp;
+    private int tmpBase; // base of tmp array slice
+    private int tmpLen;  // length of tmp array slice
 
     /**
      * A stack of pending runs yet to be merged.  Run i starts at
@@ -108,15 +112,27 @@
      * Creates a TimSort instance to maintain the state of an ongoing sort.
      *
      * @param a the array to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private ComparableTimSort(Object[] a) {
+    private ComparableTimSort(Object[] a, Object[] work, int workBase, int workLen) {
         this.a = a;
 
         // Allocate temp storage (which may be increased later if necessary)
         int len = a.length;
-        Object[] newArray = new Object[len < 2 * INITIAL_TMP_STORAGE_LENGTH ?
-                                       len >>> 1 : INITIAL_TMP_STORAGE_LENGTH];
-        tmp = newArray;
+        int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
+            len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
+        if (work == null || workLen < tlen || workBase + tlen > work.length) {
+            tmp = new Object[tlen];
+            tmpBase = 0;
+            tmpLen = tlen;
+        }
+        else {
+            tmp = work;
+            tmpBase = workBase;
+            tmpLen = workLen;
+        }
 
         /*
          * Allocate runs-to-be-merged stack (which cannot be expanded).  The
@@ -136,17 +152,28 @@
     }
 
     /*
-     * The next two methods (which are package private and static) constitute
-     * the entire API of this class.  Each of these methods obeys the contract
-     * of the public method with the same signature in java.util.Arrays.
+     * The next method (package private and static) constitutes the
+     * entire API of this class.
      */
 
-    static void sort(Object[] a) {
-          sort(a, 0, a.length);
-    }
+    /**
+     * Sorts the given range, using the given workspace array slice
+     * for temp storage when possible. This method is designed to be
+     * invoked from public methods (in class Arrays) after performing
+     * any necessary array bounds checks and expanding parameters into
+     * the required forms.
+     *
+     * @param a the array to be sorted
+     * @param lo the index of the first element, inclusive, to be sorted
+     * @param hi the index of the last element, exclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
+     * @since 1.8
+     */
+    static void sort(Object[] a, int lo, int hi, Object[] work, int workBase, int workLen) {
+        assert a != null && lo >= 0 && lo <= hi && hi <= a.length;
 
-    static void sort(Object[] a, int lo, int hi) {
-        rangeCheck(a.length, lo, hi);
         int nRemaining  = hi - lo;
         if (nRemaining < 2)
             return;  // Arrays of size 0 and 1 are always sorted
@@ -163,7 +190,7 @@
          * extending short natural runs to minRun elements, and merging runs
          * to maintain stack invariant.
          */
-        ComparableTimSort ts = new ComparableTimSort(a);
+        ComparableTimSort ts = new ComparableTimSort(a, work, workBase, workLen);
         int minRun = minRunLength(nRemaining);
         do {
             // Identify next run
@@ -619,11 +646,11 @@
         // Copy first run into temp array
         Object[] a = this.a; // For performance
         Object[] tmp = ensureCapacity(len1);
-        System.arraycopy(a, base1, tmp, 0, len1);
 
-        int cursor1 = 0;       // Indexes into tmp array
+        int cursor1 = tmpBase; // Indexes into tmp array
         int cursor2 = base2;   // Indexes int a
         int dest = base1;      // Indexes int a
+        System.arraycopy(a, base1, tmp, cursor1, len1);
 
         // Move first element of second run and deal with degenerate cases
         a[dest++] = a[cursor2++];
@@ -736,16 +763,17 @@
         // Copy second run into temp array
         Object[] a = this.a; // For performance
         Object[] tmp = ensureCapacity(len2);
-        System.arraycopy(a, base2, tmp, 0, len2);
+        int tmpBase = this.tmpBase;
+        System.arraycopy(a, base2, tmp, tmpBase, len2);
 
         int cursor1 = base1 + len1 - 1;  // Indexes into a
-        int cursor2 = len2 - 1;          // Indexes into tmp array
+        int cursor2 = tmpBase + len2 - 1; // Indexes into tmp array
         int dest = base2 + len2 - 1;     // Indexes into a
 
         // Move last element of first run and deal with degenerate cases
         a[dest--] = a[cursor1--];
         if (--len1 == 0) {
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
             return;
         }
         if (len2 == 1) {
@@ -803,7 +831,7 @@
                 if (--len2 == 1)
                     break outer;
 
-                count2 = len2 - gallopLeft((Comparable) a[cursor1], tmp, 0, len2, len2 - 1);
+                count2 = len2 - gallopLeft((Comparable) a[cursor1], tmp, tmpBase, len2, len2 - 1);
                 if (count2 != 0) {
                     dest -= count2;
                     cursor2 -= count2;
@@ -835,7 +863,7 @@
         } else {
             assert len1 == 0;
             assert len2 > 0;
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
         }
     }
 
@@ -848,7 +876,7 @@
      * @return tmp, whether or not it grew
      */
     private Object[]  ensureCapacity(int minCapacity) {
-        if (tmp.length < minCapacity) {
+        if (tmpLen < minCapacity) {
             // Compute smallest power of 2 > minCapacity
             int newSize = minCapacity;
             newSize |= newSize >> 1;
@@ -863,30 +891,13 @@
             else
                 newSize = Math.min(newSize, a.length >>> 1);
 
+            @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
             Object[] newArray = new Object[newSize];
             tmp = newArray;
+            tmpLen = newSize;
+            tmpBase = 0;
         }
         return tmp;
     }
 
-    /**
-     * Checks that fromIndex and toIndex are in range, and throws an
-     * appropriate exception if they aren't.
-     *
-     * @param arrayLen the length of the array
-     * @param fromIndex the index of the first element of the range
-     * @param toIndex the index after the last element of the range
-     * @throws IllegalArgumentException if fromIndex > toIndex
-     * @throws ArrayIndexOutOfBoundsException if fromIndex < 0
-     *         or toIndex > arrayLen
-     */
-    private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex)
-            throw new IllegalArgumentException("fromIndex(" + fromIndex +
-                       ") > toIndex(" + toIndex+")");
-        if (fromIndex < 0)
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        if (toIndex > arrayLen)
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-    }
 }
--- a/jdk/src/share/classes/java/util/DualPivotQuicksort.java	Thu May 23 15:50:37 2013 +0200
+++ b/jdk/src/share/classes/java/util/DualPivotQuicksort.java	Thu May 23 18:34:15 2013 +0100
@@ -32,6 +32,11 @@
  * quicksorts to degrade to quadratic performance, and is typically
  * faster than traditional (one-pivot) Quicksort implementations.
  *
+ * All exposed methods are package-private, designed to be invoked
+ * from public methods (in class Arrays) after performing any
+ * necessary array bounds checks and expanding parameters into the
+ * required forms.
+ *
  * @author Vladimir Yaroslavskiy
  * @author Jon Bentley
  * @author Josh Bloch
@@ -89,22 +94,18 @@
      */
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(int[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(int[] a, int left, int right) {
+    static void sort(int[] a, int left, int right,
+                     int[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -147,24 +148,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        int[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        int[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new int[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new int[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a;
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new int[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -172,21 +184,22 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
             int[] t = a; a = b; b = t;
+            int o = ao; ao = bo; bo = o;
         }
     }
 
@@ -529,22 +542,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(long[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(long[] a, int left, int right) {
+    static void sort(long[] a, int left, int right,
+                     long[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -587,24 +596,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        long[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        long[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new long[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new long[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a;
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new long[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -612,21 +632,22 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
             long[] t = a; a = b; b = t;
+            int o = ao; ao = bo; bo = o;
         }
     }
 
@@ -969,22 +990,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(short[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(short[] a, int left, int right) {
+    static void sort(short[] a, int left, int right,
+                     short[] work, int workBase, int workLen) {
         // Use counting sort on large arrays
         if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) {
             int[] count = new int[NUM_SHORT_VALUES];
@@ -1002,7 +1019,7 @@
                 } while (--s > 0);
             }
         } else { // Use Dual-Pivot Quicksort on small arrays
-            doSort(a, left, right);
+            doSort(a, left, right, work, workBase, workLen);
         }
     }
 
@@ -1015,8 +1032,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(short[] a, int left, int right) {
+    private static void doSort(short[] a, int left, int right,
+                               short[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -1059,24 +1080,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        short[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        short[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new short[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new short[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a;
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new short[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -1084,21 +1116,22 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
             short[] t = a; a = b; b = t;
+            int o = ao; ao = bo; bo = o;
         }
     }
 
@@ -1441,22 +1474,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(char[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(char[] a, int left, int right) {
+    static void sort(char[] a, int left, int right,
+                     char[] work, int workBase, int workLen) {
         // Use counting sort on large arrays
         if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) {
             int[] count = new int[NUM_CHAR_VALUES];
@@ -1474,7 +1503,7 @@
                 } while (--s > 0);
             }
         } else { // Use Dual-Pivot Quicksort on small arrays
-            doSort(a, left, right);
+            doSort(a, left, right, work, workBase, workLen);
         }
     }
 
@@ -1487,8 +1516,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(char[] a, int left, int right) {
+    private static void doSort(char[] a, int left, int right,
+                               char[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -1531,24 +1564,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        char[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        char[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new char[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new char[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a;
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new char[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -1556,21 +1600,22 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
             char[] t = a; a = b; b = t;
+            int o = ao; ao = bo; bo = o;
         }
     }
 
@@ -1916,22 +1961,13 @@
     private static final int NUM_BYTE_VALUES = 1 << 8;
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(byte[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
      * Sorts the specified range of the array.
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
      */
-    public static void sort(byte[] a, int left, int right) {
+    static void sort(byte[] a, int left, int right) {
         // Use counting sort on large arrays
         if (right - left > COUNTING_SORT_THRESHOLD_FOR_BYTE) {
             int[] count = new int[NUM_BYTE_VALUES];
@@ -1963,22 +1999,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(float[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(float[] a, int left, int right) {
+    static void sort(float[] a, int left, int right,
+                     float[] work, int workBase, int workLen) {
         /*
          * Phase 1: Move NaNs to the end of the array.
          */
@@ -1997,7 +2029,7 @@
         /*
          * Phase 2: Sort everything except NaNs (which are already in place).
          */
-        doSort(a, left, right);
+        doSort(a, left, right, work, workBase, workLen);
 
         /*
          * Phase 3: Place negative zeros before positive zeros.
@@ -2064,8 +2096,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(float[] a, int left, int right) {
+    private static void doSort(float[] a, int left, int right,
+                               float[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -2108,24 +2144,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        float[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        float[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new float[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new float[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a;
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new float[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -2133,21 +2180,22 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
             float[] t = a; a = b; b = t;
+            int o = ao; ao = bo; bo = o;
         }
     }
 
@@ -2490,22 +2538,18 @@
     }
 
     /**
-     * Sorts the specified array.
-     *
-     * @param a the array to be sorted
-     */
-    public static void sort(double[] a) {
-        sort(a, 0, a.length - 1);
-    }
-
-    /**
-     * Sorts the specified range of the array.
+     * Sorts the specified range of the array using the given
+     * workspace array slice if possible for merging
      *
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    public static void sort(double[] a, int left, int right) {
+    static void sort(double[] a, int left, int right,
+                     double[] work, int workBase, int workLen) {
         /*
          * Phase 1: Move NaNs to the end of the array.
          */
@@ -2524,7 +2568,7 @@
         /*
          * Phase 2: Sort everything except NaNs (which are already in place).
          */
-        doSort(a, left, right);
+        doSort(a, left, right, work, workBase, workLen);
 
         /*
          * Phase 3: Place negative zeros before positive zeros.
@@ -2591,8 +2635,12 @@
      * @param a the array to be sorted
      * @param left the index of the first element, inclusive, to be sorted
      * @param right the index of the last element, inclusive, to be sorted
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private static void doSort(double[] a, int left, int right) {
+    private static void doSort(double[] a, int left, int right,
+                               double[] work, int workBase, int workLen) {
         // Use Quicksort on small arrays
         if (right - left < QUICKSORT_THRESHOLD) {
             sort(a, left, right, true);
@@ -2635,24 +2683,35 @@
         }
 
         // Check special cases
+        // Implementation note: variable "right" is increased by 1.
         if (run[count] == right++) { // The last run contains one element
             run[++count] = right;
         } else if (count == 1) { // The array is already sorted
             return;
         }
 
-        /*
-         * Create temporary array, which is used for merging.
-         * Implementation note: variable "right" is increased by 1.
-         */
-        double[] b; byte odd = 0;
+        // Determine alternation base for merge
+        byte odd = 0;
         for (int n = 1; (n <<= 1) < count; odd ^= 1);
 
+        // Use or create temporary array b for merging
+        double[] b;                 // temp array; alternates with a
+        int ao, bo;              // array offsets from 'left'
+        int blen = right - left; // space needed for b
+        if (work == null || workLen < blen || workBase + blen > work.length) {
+            work = new double[blen];
+            workBase = 0;
+        }
         if (odd == 0) {
-            b = a; a = new double[b.length];
-            for (int i = left - 1; ++i < right; a[i] = b[i]);
+            System.arraycopy(a, left, work, workBase, blen);
+            b = a;
+            bo = 0;
+            a = work;
+            ao = workBase - left;
         } else {
-            b = new double[a.length];
+            b = work;
+            ao = 0;
+            bo = workBase - left;
         }
 
         // Merging
@@ -2660,21 +2719,22 @@
             for (int k = (last = 0) + 2; k <= count; k += 2) {
                 int hi = run[k], mi = run[k - 1];
                 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) {
-                    if (q >= hi || p < mi && a[p] <= a[q]) {
-                        b[i] = a[p++];
+                    if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) {
+                        b[i + bo] = a[p++ + ao];
                     } else {
-                        b[i] = a[q++];
+                        b[i + bo] = a[q++ + ao];
                     }
                 }
                 run[++last] = hi;
             }
             if ((count & 1) != 0) {
                 for (int i = right, lo = run[count - 1]; --i >= lo;
-                    b[i] = a[i]
+                    b[i + bo] = a[i + ao]
                 );
                 run[++last] = right;
             }
             double[] t = a; a = b; b = t;
+            int o = ao; ao = bo; bo = o;
         }
     }
 
--- a/jdk/src/share/classes/java/util/TimSort.java	Thu May 23 15:50:37 2013 +0200
+++ b/jdk/src/share/classes/java/util/TimSort.java	Thu May 23 18:34:15 2013 +0100
@@ -111,9 +111,13 @@
     private static final int INITIAL_TMP_STORAGE_LENGTH = 256;
 
     /**
-     * Temp storage for merges.
+     * Temp storage for merges. A workspace array may optionally be
+     * provided in constructor, and if so will be used as long as it
+     * is big enough.
      */
-    private T[] tmp; // Actual runtime type will be Object[], regardless of T
+    private T[] tmp;
+    private int tmpBase; // base of tmp array slice
+    private int tmpLen;  // length of tmp array slice
 
     /**
      * A stack of pending runs yet to be merged.  Run i starts at
@@ -134,17 +138,31 @@
      *
      * @param a the array to be sorted
      * @param c the comparator to determine the order of the sort
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
      */
-    private TimSort(T[] a, Comparator<? super T> c) {
+    private TimSort(T[] a, Comparator<? super T> c, T[] work, int workBase, int workLen) {
         this.a = a;
         this.c = c;
 
         // Allocate temp storage (which may be increased later if necessary)
         int len = a.length;
-        @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
-        T[] newArray = (T[]) new Object[len < 2 * INITIAL_TMP_STORAGE_LENGTH ?
-                                        len >>> 1 : INITIAL_TMP_STORAGE_LENGTH];
-        tmp = newArray;
+        int tlen = (len < 2 * INITIAL_TMP_STORAGE_LENGTH) ?
+            len >>> 1 : INITIAL_TMP_STORAGE_LENGTH;
+        if (work == null || workLen < tlen || workBase + tlen > work.length) {
+            @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
+            T[] newArray = (T[])java.lang.reflect.Array.newInstance
+                (a.getClass().getComponentType(), tlen);
+            tmp = newArray;
+            tmpBase = 0;
+            tmpLen = tlen;
+        }
+        else {
+            tmp = work;
+            tmpBase = workBase;
+            tmpLen = workLen;
+        }
 
         /*
          * Allocate runs-to-be-merged stack (which cannot be expanded).  The
@@ -164,22 +182,30 @@
     }
 
     /*
-     * The next two methods (which are package private and static) constitute
-     * the entire API of this class.  Each of these methods obeys the contract
-     * of the public method with the same signature in java.util.Arrays.
+     * The next method (package private and static) constitutes the
+     * entire API of this class.
      */
 
-    static <T> void sort(T[] a, Comparator<? super T> c) {
-        sort(a, 0, a.length, c);
-    }
+    /**
+     * Sorts the given range, using the given workspace array slice
+     * for temp storage when possible. This method is designed to be
+     * invoked from public methods (in class Arrays) after performing
+     * any necessary array bounds checks and expanding parameters into
+     * the required forms.
+     *
+     * @param a the array to be sorted
+     * @param lo the index of the first element, inclusive, to be sorted
+     * @param hi the index of the last element, exclusive, to be sorted
+     * @param c the comparator to use
+     * @param work a workspace array (slice)
+     * @param workBase origin of usable space in work array
+     * @param workLen usable size of work array
+     * @since 1.8
+     */
+    static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c,
+                         T[] work, int workBase, int workLen) {
+        assert c != null && a != null && lo >= 0 && lo <= hi && hi <= a.length;
 
-    static <T> void sort(T[] a, int lo, int hi, Comparator<? super T> c) {
-        if (c == null) {
-            Arrays.sort(a, lo, hi);
-            return;
-        }
-
-        rangeCheck(a.length, lo, hi);
         int nRemaining  = hi - lo;
         if (nRemaining < 2)
             return;  // Arrays of size 0 and 1 are always sorted
@@ -196,7 +222,7 @@
          * extending short natural runs to minRun elements, and merging runs
          * to maintain stack invariant.
          */
-        TimSort<T> ts = new TimSort<>(a, c);
+        TimSort<T> ts = new TimSort<>(a, c, work, workBase, workLen);
         int minRun = minRunLength(nRemaining);
         do {
             // Identify next run
@@ -653,11 +679,10 @@
         // Copy first run into temp array
         T[] a = this.a; // For performance
         T[] tmp = ensureCapacity(len1);
-        System.arraycopy(a, base1, tmp, 0, len1);
-
-        int cursor1 = 0;       // Indexes into tmp array
+        int cursor1 = tmpBase; // Indexes into tmp array
         int cursor2 = base2;   // Indexes int a
         int dest = base1;      // Indexes int a
+        System.arraycopy(a, base1, tmp, cursor1, len1);
 
         // Move first element of second run and deal with degenerate cases
         a[dest++] = a[cursor2++];
@@ -770,16 +795,17 @@
         // Copy second run into temp array
         T[] a = this.a; // For performance
         T[] tmp = ensureCapacity(len2);
-        System.arraycopy(a, base2, tmp, 0, len2);
+        int tmpBase = this.tmpBase;
+        System.arraycopy(a, base2, tmp, tmpBase, len2);
 
         int cursor1 = base1 + len1 - 1;  // Indexes into a
-        int cursor2 = len2 - 1;          // Indexes into tmp array
+        int cursor2 = tmpBase + len2 - 1; // Indexes into tmp array
         int dest = base2 + len2 - 1;     // Indexes into a
 
         // Move last element of first run and deal with degenerate cases
         a[dest--] = a[cursor1--];
         if (--len1 == 0) {
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
             return;
         }
         if (len2 == 1) {
@@ -838,7 +864,7 @@
                 if (--len2 == 1)
                     break outer;
 
-                count2 = len2 - gallopLeft(a[cursor1], tmp, 0, len2, len2 - 1, c);
+                count2 = len2 - gallopLeft(a[cursor1], tmp, tmpBase, len2, len2 - 1, c);
                 if (count2 != 0) {
                     dest -= count2;
                     cursor2 -= count2;
@@ -870,7 +896,7 @@
         } else {
             assert len1 == 0;
             assert len2 > 0;
-            System.arraycopy(tmp, 0, a, dest - (len2 - 1), len2);
+            System.arraycopy(tmp, tmpBase, a, dest - (len2 - 1), len2);
         }
     }
 
@@ -883,7 +909,7 @@
      * @return tmp, whether or not it grew
      */
     private T[] ensureCapacity(int minCapacity) {
-        if (tmp.length < minCapacity) {
+        if (tmpLen < minCapacity) {
             // Compute smallest power of 2 > minCapacity
             int newSize = minCapacity;
             newSize |= newSize >> 1;
@@ -899,30 +925,12 @@
                 newSize = Math.min(newSize, a.length >>> 1);
 
             @SuppressWarnings({"unchecked", "UnnecessaryLocalVariable"})
-            T[] newArray = (T[]) new Object[newSize];
+            T[] newArray = (T[])java.lang.reflect.Array.newInstance
+                (a.getClass().getComponentType(), newSize);
             tmp = newArray;
+            tmpLen = newSize;
+            tmpBase = 0;
         }
         return tmp;
     }
-
-    /**
-     * Checks that fromIndex and toIndex are in range, and throws an
-     * appropriate exception if they aren't.
-     *
-     * @param arrayLen the length of the array
-     * @param fromIndex the index of the first element of the range
-     * @param toIndex the index after the last element of the range
-     * @throws IllegalArgumentException if fromIndex > toIndex
-     * @throws ArrayIndexOutOfBoundsException if fromIndex < 0
-     *         or toIndex > arrayLen
-     */
-    private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
-        if (fromIndex > toIndex)
-            throw new IllegalArgumentException("fromIndex(" + fromIndex +
-                       ") > toIndex(" + toIndex+")");
-        if (fromIndex < 0)
-            throw new ArrayIndexOutOfBoundsException(fromIndex);
-        if (toIndex > arrayLen)
-            throw new ArrayIndexOutOfBoundsException(toIndex);
-    }
 }
--- a/jdk/test/java/util/Arrays/ParallelSorting.java	Thu May 23 15:50:37 2013 +0200
+++ b/jdk/test/java/util/Arrays/ParallelSorting.java	Thu May 23 18:34:15 2013 +0100
@@ -50,11 +50,11 @@
 
     // Array lengths used in a long run (default)
     private static final int[] LONG_RUN_LENGTHS = {
-        1, 2, 3, 5, 8, 13, 21, 34, 55, 100, 1000, 10000, 100000, 1000000 };
+        1000, 10000, 100000, 1000000 };
 
     // Array lengths used in a short run
     private static final int[] SHORT_RUN_LENGTHS = {
-        1, 2, 3, 21, 55, 1000, 10000 };
+        5000, 9000, 10000, 12000 };
 
     // Random initial values used in a long run (default)
     private static final long[] LONG_RUN_RANDOMS = { 666, 0xC0FFEE, 999 };