23 * questions. |
23 * questions. |
24 */ |
24 */ |
25 |
25 |
26 package java.util; |
26 package java.util; |
27 |
27 |
|
28 import java.util.concurrent.CountedCompleter; |
|
29 import java.util.concurrent.RecursiveTask; |
|
30 |
28 /** |
31 /** |
29 * This class implements the Dual-Pivot Quicksort algorithm by |
32 * This class implements powerful and fully optimized versions, both |
30 * Vladimir Yaroslavskiy, Jon Bentley, and Josh Bloch. The algorithm |
33 * sequential and parallel, of the Dual-Pivot Quicksort algorithm by |
31 * offers O(n log(n)) performance on many data sets that cause other |
34 * Vladimir Yaroslavskiy, Jon Bentley and Josh Bloch. This algorithm |
32 * quicksorts to degrade to quadratic performance, and is typically |
35 * offers O(n log(n)) performance on all data sets, and is typically |
33 * faster than traditional (one-pivot) Quicksort implementations. |
36 * faster than traditional (one-pivot) Quicksort implementations. |
34 * |
37 * |
35 * All exposed methods are package-private, designed to be invoked |
38 * There are also additional algorithms, invoked from the Dual-Pivot |
36 * from public methods (in class Arrays) after performing any |
39 * Quicksort, such as mixed insertion sort, merging of runs and heap |
37 * necessary array bounds checks and expanding parameters into the |
40 * sort, counting sort and parallel merge sort. |
38 * required forms. |
|
39 * |
41 * |
40 * @author Vladimir Yaroslavskiy |
42 * @author Vladimir Yaroslavskiy |
41 * @author Jon Bentley |
43 * @author Jon Bentley |
42 * @author Josh Bloch |
44 * @author Josh Bloch |
|
45 * @author Doug Lea |
43 * |
46 * |
44 * @version 2011.02.11 m765.827.12i:5\7pm |
47 * @version 2018.08.18 |
45 * @since 1.7 |
48 * |
|
49 * @since 1.7 * 14 |
46 */ |
50 */ |
47 final class DualPivotQuicksort { |
51 final class DualPivotQuicksort { |
48 |
52 |
49 /** |
53 /** |
50 * Prevents instantiation. |
54 * Prevents instantiation. |
51 */ |
55 */ |
52 private DualPivotQuicksort() {} |
56 private DualPivotQuicksort() {} |
53 |
57 |
54 /* |
58 /** |
55 * Tuning parameters. |
59 * Max array size to use mixed insertion sort. |
56 */ |
60 */ |
57 |
61 private static final int MAX_MIXED_INSERTION_SORT_SIZE = 65; |
58 /** |
62 |
59 * The maximum number of runs in merge sort. |
63 /** |
60 */ |
64 * Max array size to use insertion sort. |
61 private static final int MAX_RUN_COUNT = 67; |
65 */ |
62 |
66 private static final int MAX_INSERTION_SORT_SIZE = 44; |
63 /** |
67 |
64 * If the length of an array to be sorted is less than this |
68 /** |
65 * constant, Quicksort is used in preference to merge sort. |
69 * Min array size to perform sorting in parallel. |
66 */ |
70 */ |
67 private static final int QUICKSORT_THRESHOLD = 286; |
71 private static final int MIN_PARALLEL_SORT_SIZE = 4 << 10; |
68 |
72 |
69 /** |
73 /** |
70 * If the length of an array to be sorted is less than this |
74 * Min array size to try merging of runs. |
71 * constant, insertion sort is used in preference to Quicksort. |
75 */ |
72 */ |
76 private static final int MIN_TRY_MERGE_SIZE = 4 << 10; |
73 private static final int INSERTION_SORT_THRESHOLD = 47; |
77 |
74 |
78 /** |
75 /** |
79 * Min size of the first run to continue with scanning. |
76 * If the length of a byte array to be sorted is greater than this |
80 */ |
77 * constant, counting sort is used in preference to insertion sort. |
81 private static final int MIN_FIRST_RUN_SIZE = 16; |
78 */ |
82 |
79 private static final int COUNTING_SORT_THRESHOLD_FOR_BYTE = 29; |
83 /** |
80 |
84 * Min factor for the first runs to continue scanning. |
81 /** |
85 */ |
82 * If the length of a short or char array to be sorted is greater |
86 private static final int MIN_FIRST_RUNS_FACTOR = 7; |
83 * than this constant, counting sort is used in preference to Quicksort. |
87 |
84 */ |
88 /** |
85 private static final int COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR = 3200; |
89 * Max capacity of the index array for tracking runs. |
86 |
90 */ |
87 /* |
91 private static final int MAX_RUN_CAPACITY = 5 << 10; |
88 * Sorting methods for seven primitive types. |
92 |
89 */ |
93 /** |
90 |
94 * Min number of runs, required by parallel merging. |
91 /** |
95 */ |
92 * Sorts the specified range of the array using the given |
96 private static final int MIN_RUN_COUNT = 4; |
93 * workspace array slice if possible for merging |
97 |
|
98 /** |
|
99 * Min array size to use parallel merging of parts. |
|
100 */ |
|
101 private static final int MIN_PARALLEL_MERGE_PARTS_SIZE = 4 << 10; |
|
102 |
|
103 /** |
|
104 * Min size of a byte array to use counting sort. |
|
105 */ |
|
106 private static final int MIN_BYTE_COUNTING_SORT_SIZE = 64; |
|
107 |
|
108 /** |
|
109 * Min size of a short or char array to use counting sort. |
|
110 */ |
|
111 private static final int MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE = 1750; |
|
112 |
|
113 /** |
|
114 * Threshold of mixed insertion sort is incremented by this value. |
|
115 */ |
|
116 private static final int DELTA = 3 << 1; |
|
117 |
|
118 /** |
|
119 * Max recursive partitioning depth before using heap sort. |
|
120 */ |
|
121 private static final int MAX_RECURSION_DEPTH = 64 * DELTA; |
|
122 |
|
123 /** |
|
124 * Calculates the double depth of parallel merging. |
|
125 * Depth is negative, if tasks split before sorting. |
|
126 * |
|
127 * @param parallelism the parallelism level |
|
128 * @param size the target size |
|
129 * @return the depth of parallel merging |
|
130 */ |
|
131 private static int getDepth(int parallelism, int size) { |
|
132 int depth = 0; |
|
133 |
|
134 while ((parallelism >>= 3) > 0 && (size >>= 2) > 0) { |
|
135 depth -= 2; |
|
136 } |
|
137 return depth; |
|
138 } |
|
139 |
|
140 /** |
|
141 * Sorts the specified range of the array using parallel merge |
|
142 * sort and/or Dual-Pivot Quicksort. |
|
143 * |
|
144 * To balance the faster splitting and parallelism of merge sort |
|
145 * with the faster element partitioning of Quicksort, ranges are |
|
146 * subdivided in tiers such that, if there is enough parallelism, |
|
147 * the four-way parallel merge is started, still ensuring enough |
|
148 * parallelism to process the partitions. |
94 * |
149 * |
95 * @param a the array to be sorted |
150 * @param a the array to be sorted |
96 * @param left the index of the first element, inclusive, to be sorted |
151 * @param parallelism the parallelism level |
97 * @param right the index of the last element, inclusive, to be sorted |
152 * @param low the index of the first element, inclusive, to be sorted |
98 * @param work a workspace array (slice) |
153 * @param high the index of the last element, exclusive, to be sorted |
99 * @param workBase origin of usable space in work array |
154 */ |
100 * @param workLen usable size of work array |
155 static void sort(int[] a, int parallelism, int low, int high) { |
101 */ |
156 int size = high - low; |
102 static void sort(int[] a, int left, int right, |
157 |
103 int[] work, int workBase, int workLen) { |
158 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
104 // Use Quicksort on small arrays |
159 int depth = getDepth(parallelism, size >> 12); |
105 if (right - left < QUICKSORT_THRESHOLD) { |
160 int[] b = depth == 0 ? null : new int[size]; |
106 sort(a, left, right, true); |
161 new Sorter(null, a, b, low, size, low, depth).invoke(); |
107 return; |
162 } else { |
108 } |
163 sort(null, a, 0, low, high); |
109 |
164 } |
110 /* |
165 } |
111 * Index run[i] is the start of i-th run |
166 |
112 * (ascending or descending sequence). |
167 /** |
113 */ |
168 * Sorts the specified array using the Dual-Pivot Quicksort and/or |
114 int[] run = new int[MAX_RUN_COUNT + 1]; |
169 * other sorts in special-cases, possibly with parallel partitions. |
115 int count = 0; run[0] = left; |
170 * |
116 |
171 * @param sorter parallel context |
117 // Check if the array is nearly sorted |
172 * @param a the array to be sorted |
118 for (int k = left; k < right; run[count] = k) { |
173 * @param bits the combination of recursion depth and bit flag, where |
119 // Equal items in the beginning of the sequence |
174 * the right bit "0" indicates that array is the leftmost part |
120 while (k < right && a[k] == a[k + 1]) |
175 * @param low the index of the first element, inclusive, to be sorted |
121 k++; |
176 * @param high the index of the last element, exclusive, to be sorted |
122 if (k == right) break; // Sequence finishes with equal items |
177 */ |
123 if (a[k] < a[k + 1]) { // ascending |
178 static void sort(Sorter sorter, int[] a, int bits, int low, int high) { |
124 while (++k <= right && a[k - 1] <= a[k]); |
179 while (true) { |
125 } else if (a[k] > a[k + 1]) { // descending |
180 int end = high - 1, size = high - low; |
126 while (++k <= right && a[k - 1] >= a[k]); |
181 |
127 // Transform into an ascending sequence |
182 /* |
128 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { |
183 * Run mixed insertion sort on small non-leftmost parts. |
129 int t = a[lo]; a[lo] = a[hi]; a[hi] = t; |
184 */ |
130 } |
185 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
131 } |
186 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
132 |
|
133 // Merge a transformed descending sequence followed by an |
|
134 // ascending sequence |
|
135 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { |
|
136 count--; |
|
137 } |
|
138 |
|
139 /* |
|
140 * The array is not highly structured, |
|
141 * use Quicksort instead of merge sort. |
|
142 */ |
|
143 if (++count == MAX_RUN_COUNT) { |
|
144 sort(a, left, right, true); |
|
145 return; |
187 return; |
146 } |
188 } |
147 } |
189 |
148 |
190 /* |
149 // These invariants should hold true: |
191 * Invoke insertion sort on small leftmost part. |
150 // run[0] = 0 |
192 */ |
151 // run[<last>] = right + 1; (terminator) |
193 if (size < MAX_INSERTION_SORT_SIZE) { |
152 |
194 insertionSort(a, low, high); |
153 if (count == 0) { |
195 return; |
154 // A single equal run |
196 } |
155 return; |
197 |
156 } else if (count == 1 && run[count] > right) { |
198 /* |
157 // Either a single ascending or a transformed descending run. |
199 * Check if the whole array or large non-leftmost |
158 // Always check that a final run is a proper terminator, otherwise |
200 * parts are nearly sorted and then merge runs. |
159 // we have an unterminated trailing run, to handle downstream. |
201 */ |
160 return; |
202 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
161 } |
203 && tryMergeRuns(sorter, a, low, size)) { |
162 right++; |
204 return; |
163 if (run[count] < right) { |
205 } |
164 // Corner case: the final run is not a terminator. This may happen |
206 |
165 // if a final run is an equals run, or there is a single-element run |
207 /* |
166 // at the end. Fix up by adding a proper terminator at the end. |
208 * Switch to heap sort if execution |
167 // Note that we terminate with (right + 1), incremented earlier. |
209 * time is becoming quadratic. |
168 run[++count] = right; |
210 */ |
169 } |
211 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
170 |
212 heapSort(a, low, high); |
171 // Determine alternation base for merge |
213 return; |
172 byte odd = 0; |
214 } |
173 for (int n = 1; (n <<= 1) < count; odd ^= 1); |
215 |
174 |
216 /* |
175 // Use or create temporary array b for merging |
217 * Use an inexpensive approximation of the golden ratio |
176 int[] b; // temp array; alternates with a |
218 * to select five sample elements and determine pivots. |
177 int ao, bo; // array offsets from 'left' |
219 */ |
178 int blen = right - left; // space needed for b |
220 int step = (size >> 3) * 3 + 3; |
179 if (work == null || workLen < blen || workBase + blen > work.length) { |
221 |
180 work = new int[blen]; |
222 /* |
181 workBase = 0; |
223 * Five elements around (and including) the central element |
182 } |
224 * will be used for pivot selection as described below. The |
183 if (odd == 0) { |
225 * unequal choice of spacing these elements was empirically |
184 System.arraycopy(a, left, work, workBase, blen); |
226 * determined to work well on a wide variety of inputs. |
185 b = a; |
227 */ |
186 bo = 0; |
228 int e1 = low + step; |
187 a = work; |
229 int e5 = end - step; |
188 ao = workBase - left; |
230 int e3 = (e1 + e5) >>> 1; |
189 } else { |
231 int e2 = (e1 + e3) >>> 1; |
190 b = work; |
232 int e4 = (e3 + e5) >>> 1; |
191 ao = 0; |
233 int a3 = a[e3]; |
192 bo = workBase - left; |
234 |
193 } |
235 /* |
194 |
236 * Sort these elements in place by the combination |
195 // Merging |
237 * of 4-element sorting network and insertion sort. |
196 for (int last; count > 1; count = last) { |
|
197 for (int k = (last = 0) + 2; k <= count; k += 2) { |
|
198 int hi = run[k], mi = run[k - 1]; |
|
199 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { |
|
200 if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { |
|
201 b[i + bo] = a[p++ + ao]; |
|
202 } else { |
|
203 b[i + bo] = a[q++ + ao]; |
|
204 } |
|
205 } |
|
206 run[++last] = hi; |
|
207 } |
|
208 if ((count & 1) != 0) { |
|
209 for (int i = right, lo = run[count - 1]; --i >= lo; |
|
210 b[i + bo] = a[i + ao] |
|
211 ); |
|
212 run[++last] = right; |
|
213 } |
|
214 int[] t = a; a = b; b = t; |
|
215 int o = ao; ao = bo; bo = o; |
|
216 } |
|
217 } |
|
218 |
|
219 /** |
|
220 * Sorts the specified range of the array by Dual-Pivot Quicksort. |
|
221 * |
|
222 * @param a the array to be sorted |
|
223 * @param left the index of the first element, inclusive, to be sorted |
|
224 * @param right the index of the last element, inclusive, to be sorted |
|
225 * @param leftmost indicates if this part is the leftmost in the range |
|
226 */ |
|
227 private static void sort(int[] a, int left, int right, boolean leftmost) { |
|
228 int length = right - left + 1; |
|
229 |
|
230 // Use insertion sort on tiny arrays |
|
231 if (length < INSERTION_SORT_THRESHOLD) { |
|
232 if (leftmost) { |
|
233 /* |
|
234 * Traditional (without sentinel) insertion sort, |
|
235 * optimized for server VM, is used in case of |
|
236 * the leftmost part. |
|
237 */ |
|
238 for (int i = left, j = i; i < right; j = ++i) { |
|
239 int ai = a[i + 1]; |
|
240 while (ai < a[j]) { |
|
241 a[j + 1] = a[j]; |
|
242 if (j-- == left) { |
|
243 break; |
|
244 } |
|
245 } |
|
246 a[j + 1] = ai; |
|
247 } |
|
248 } else { |
|
249 /* |
|
250 * Skip the longest ascending sequence. |
|
251 */ |
|
252 do { |
|
253 if (left >= right) { |
|
254 return; |
|
255 } |
|
256 } while (a[++left] >= a[left - 1]); |
|
257 |
|
258 /* |
|
259 * Every element from adjoining part plays the role |
|
260 * of sentinel, therefore this allows us to avoid the |
|
261 * left range check on each iteration. Moreover, we use |
|
262 * the more optimized algorithm, so called pair insertion |
|
263 * sort, which is faster (in the context of Quicksort) |
|
264 * than traditional implementation of insertion sort. |
|
265 */ |
|
266 for (int k = left; ++left <= right; k = ++left) { |
|
267 int a1 = a[k], a2 = a[left]; |
|
268 |
|
269 if (a1 < a2) { |
|
270 a2 = a1; a1 = a[left]; |
|
271 } |
|
272 while (a1 < a[--k]) { |
|
273 a[k + 2] = a[k]; |
|
274 } |
|
275 a[++k + 1] = a1; |
|
276 |
|
277 while (a2 < a[--k]) { |
|
278 a[k + 1] = a[k]; |
|
279 } |
|
280 a[k + 1] = a2; |
|
281 } |
|
282 int last = a[right]; |
|
283 |
|
284 while (last < a[--right]) { |
|
285 a[right + 1] = a[right]; |
|
286 } |
|
287 a[right + 1] = last; |
|
288 } |
|
289 return; |
|
290 } |
|
291 |
|
292 // Inexpensive approximation of length / 7 |
|
293 int seventh = (length >> 3) + (length >> 6) + 1; |
|
294 |
|
295 /* |
|
296 * Sort five evenly spaced elements around (and including) the |
|
297 * center element in the range. These elements will be used for |
|
298 * pivot selection as described below. The choice for spacing |
|
299 * these elements was empirically determined to work well on |
|
300 * a wide variety of inputs. |
|
301 */ |
|
302 int e3 = (left + right) >>> 1; // The midpoint |
|
303 int e2 = e3 - seventh; |
|
304 int e1 = e2 - seventh; |
|
305 int e4 = e3 + seventh; |
|
306 int e5 = e4 + seventh; |
|
307 |
|
308 // Sort these elements using insertion sort |
|
309 if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
310 |
|
311 if (a[e3] < a[e2]) { int t = a[e3]; a[e3] = a[e2]; a[e2] = t; |
|
312 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
313 } |
|
314 if (a[e4] < a[e3]) { int t = a[e4]; a[e4] = a[e3]; a[e3] = t; |
|
315 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
316 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
317 } |
|
318 } |
|
319 if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; |
|
320 if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; |
|
321 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
322 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
323 } |
|
324 } |
|
325 } |
|
326 |
|
327 // Pointers |
|
328 int less = left; // The index of the first element of center part |
|
329 int great = right; // The index before the first element of right part |
|
330 |
|
331 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { |
|
332 /* |
|
333 * Use the second and fourth of the five sorted elements as pivots. |
|
334 * These values are inexpensive approximations of the first and |
|
335 * second terciles of the array. Note that pivot1 <= pivot2. |
|
336 */ |
|
337 int pivot1 = a[e2]; |
|
338 int pivot2 = a[e4]; |
|
339 |
|
340 /* |
|
341 * The first and the last elements to be sorted are moved to the |
|
342 * locations formerly occupied by the pivots. When partitioning |
|
343 * is complete, the pivots are swapped back into their final |
|
344 * positions, and excluded from subsequent sorting. |
|
345 */ |
|
346 a[e2] = a[left]; |
|
347 a[e4] = a[right]; |
|
348 |
|
349 /* |
|
350 * Skip elements, which are less or greater than pivot values. |
|
351 */ |
|
352 while (a[++less] < pivot1); |
|
353 while (a[--great] > pivot2); |
|
354 |
|
355 /* |
|
356 * Partitioning: |
|
357 * |
238 * |
358 * left part center part right part |
239 * 5 ------o-----------o------------ |
359 * +--------------------------------------------------------------+ |
240 * | | |
360 * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | |
241 * 4 ------|-----o-----o-----o------ |
361 * +--------------------------------------------------------------+ |
242 * | | | |
362 * ^ ^ ^ |
243 * 2 ------o-----|-----o-----o------ |
363 * | | | |
244 * | | |
364 * less k great |
245 * 1 ------------o-----o------------ |
365 * |
246 */ |
366 * Invariants: |
247 if (a[e5] < a[e2]) { int t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
367 * |
248 if (a[e4] < a[e1]) { int t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
368 * all in (left, less) < pivot1 |
249 if (a[e5] < a[e4]) { int t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
369 * pivot1 <= all in [less, k) <= pivot2 |
250 if (a[e2] < a[e1]) { int t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
370 * all in (great, right) > pivot2 |
251 if (a[e4] < a[e2]) { int t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
371 * |
252 |
372 * Pointer k is the first index of ?-part. |
253 if (a3 < a[e2]) { |
373 */ |
254 if (a3 < a[e1]) { |
374 outer: |
255 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
375 for (int k = less - 1; ++k <= great; ) { |
256 } else { |
376 int ak = a[k]; |
257 a[e3] = a[e2]; a[e2] = a3; |
377 if (ak < pivot1) { // Move a[k] to left part |
258 } |
378 a[k] = a[less]; |
259 } else if (a3 > a[e4]) { |
379 /* |
260 if (a3 > a[e5]) { |
380 * Here and below we use "a[i] = b; i++;" instead |
261 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
381 * of "a[i++] = b;" due to performance issue. |
262 } else { |
382 */ |
263 a[e3] = a[e4]; a[e4] = a3; |
383 a[less] = ak; |
264 } |
384 ++less; |
265 } |
385 } else if (ak > pivot2) { // Move a[k] to right part |
266 |
386 while (a[great] > pivot2) { |
267 // Pointers |
387 if (great-- == k) { |
268 int lower = low; // The index of the last element of the left part |
388 break outer; |
269 int upper = end; // The index of the first element of the right part |
389 } |
270 |
390 } |
271 /* |
391 if (a[great] < pivot1) { // a[great] <= pivot2 |
272 * Partitioning with 2 pivots in case of different elements. |
392 a[k] = a[less]; |
273 */ |
393 a[less] = a[great]; |
274 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
394 ++less; |
275 |
395 } else { // pivot1 <= a[great] <= pivot2 |
276 /* |
396 a[k] = a[great]; |
277 * Use the first and fifth of the five sorted elements as |
397 } |
278 * the pivots. These values are inexpensive approximation |
398 /* |
279 * of tertiles. Note, that pivot1 < pivot2. |
399 * Here and below we use "a[i] = b; i--;" instead |
280 */ |
400 * of "a[i--] = b;" due to performance issue. |
281 int pivot1 = a[e1]; |
401 */ |
282 int pivot2 = a[e5]; |
402 a[great] = ak; |
283 |
403 --great; |
284 /* |
404 } |
285 * The first and the last elements to be sorted are moved |
405 } |
286 * to the locations formerly occupied by the pivots. When |
406 |
287 * partitioning is completed, the pivots are swapped back |
407 // Swap pivots into their final positions |
288 * into their final positions, and excluded from the next |
408 a[left] = a[less - 1]; a[less - 1] = pivot1; |
289 * subsequent sorting. |
409 a[right] = a[great + 1]; a[great + 1] = pivot2; |
290 */ |
410 |
291 a[e1] = a[lower]; |
411 // Sort left and right parts recursively, excluding known pivots |
292 a[e5] = a[upper]; |
412 sort(a, left, less - 2, leftmost); |
293 |
413 sort(a, great + 2, right, false); |
294 /* |
414 |
295 * Skip elements, which are less or greater than the pivots. |
415 /* |
296 */ |
416 * If center part is too large (comprises > 4/7 of the array), |
297 while (a[++lower] < pivot1); |
417 * swap internal pivot values to ends. |
298 while (a[--upper] > pivot2); |
418 */ |
299 |
419 if (less < e1 && e5 < great) { |
300 /* |
420 /* |
301 * Backward 3-interval partitioning |
421 * Skip elements, which are equal to pivot values. |
302 * |
422 */ |
303 * left part central part right part |
423 while (a[less] == pivot1) { |
304 * +------------------------------------------------------------+ |
424 ++less; |
305 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
425 } |
306 * +------------------------------------------------------------+ |
426 |
307 * ^ ^ ^ |
427 while (a[great] == pivot2) { |
308 * | | | |
428 --great; |
309 * lower k upper |
429 } |
|
430 |
|
431 /* |
|
432 * Partitioning: |
|
433 * |
|
434 * left part center part right part |
|
435 * +----------------------------------------------------------+ |
|
436 * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | |
|
437 * +----------------------------------------------------------+ |
|
438 * ^ ^ ^ |
|
439 * | | | |
|
440 * less k great |
|
441 * |
310 * |
442 * Invariants: |
311 * Invariants: |
443 * |
312 * |
444 * all in (*, less) == pivot1 |
313 * all in (low, lower] < pivot1 |
445 * pivot1 < all in [less, k) < pivot2 |
314 * pivot1 <= all in (k, upper) <= pivot2 |
446 * all in (great, *) == pivot2 |
315 * all in [upper, end) > pivot2 |
447 * |
316 * |
448 * Pointer k is the first index of ?-part. |
317 * Pointer k is the last index of ?-part |
449 */ |
318 */ |
450 outer: |
319 for (int unused = --lower, k = ++upper; --k > lower; ) { |
451 for (int k = less - 1; ++k <= great; ) { |
|
452 int ak = a[k]; |
320 int ak = a[k]; |
453 if (ak == pivot1) { // Move a[k] to left part |
321 |
454 a[k] = a[less]; |
322 if (ak < pivot1) { // Move a[k] to the left side |
455 a[less] = ak; |
323 while (lower < k) { |
456 ++less; |
324 if (a[++lower] >= pivot1) { |
457 } else if (ak == pivot2) { // Move a[k] to right part |
325 if (a[lower] > pivot2) { |
458 while (a[great] == pivot2) { |
326 a[k] = a[--upper]; |
459 if (great-- == k) { |
327 a[upper] = a[lower]; |
460 break outer; |
328 } else { |
|
329 a[k] = a[lower]; |
|
330 } |
|
331 a[lower] = ak; |
|
332 break; |
461 } |
333 } |
462 } |
334 } |
463 if (a[great] == pivot1) { // a[great] < pivot2 |
335 } else if (ak > pivot2) { // Move a[k] to the right side |
464 a[k] = a[less]; |
336 a[k] = a[--upper]; |
465 /* |
337 a[upper] = ak; |
466 * Even though a[great] equals to pivot1, the |
338 } |
467 * assignment a[less] = pivot1 may be incorrect, |
339 } |
468 * if a[great] and pivot1 are floating-point zeros |
340 |
469 * of different signs. Therefore in float and |
341 /* |
470 * double sorting methods we have to use more |
342 * Swap the pivots into their final positions. |
471 * accurate assignment a[less] = a[great]. |
343 */ |
472 */ |
344 a[low] = a[lower]; a[lower] = pivot1; |
473 a[less] = pivot1; |
345 a[end] = a[upper]; a[upper] = pivot2; |
474 ++less; |
346 |
475 } else { // pivot1 < a[great] < pivot2 |
347 /* |
476 a[k] = a[great]; |
348 * Sort non-left parts recursively (possibly in parallel), |
|
349 * excluding known pivots. |
|
350 */ |
|
351 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
352 sorter.forkSorter(bits | 1, lower + 1, upper); |
|
353 sorter.forkSorter(bits | 1, upper + 1, high); |
|
354 } else { |
|
355 sort(sorter, a, bits | 1, lower + 1, upper); |
|
356 sort(sorter, a, bits | 1, upper + 1, high); |
|
357 } |
|
358 |
|
359 } else { // Use single pivot in case of many equal elements |
|
360 |
|
361 /* |
|
362 * Use the third of the five sorted elements as the pivot. |
|
363 * This value is inexpensive approximation of the median. |
|
364 */ |
|
365 int pivot = a[e3]; |
|
366 |
|
367 /* |
|
368 * The first element to be sorted is moved to the |
|
369 * location formerly occupied by the pivot. After |
|
370 * completion of partitioning the pivot is swapped |
|
371 * back into its final position, and excluded from |
|
372 * the next subsequent sorting. |
|
373 */ |
|
374 a[e3] = a[lower]; |
|
375 |
|
376 /* |
|
377 * Traditional 3-way (Dutch National Flag) partitioning |
|
378 * |
|
379 * left part central part right part |
|
380 * +------------------------------------------------------+ |
|
381 * | < pivot | ? | == pivot | > pivot | |
|
382 * +------------------------------------------------------+ |
|
383 * ^ ^ ^ |
|
384 * | | | |
|
385 * lower k upper |
|
386 * |
|
387 * Invariants: |
|
388 * |
|
389 * all in (low, lower] < pivot |
|
390 * all in (k, upper) == pivot |
|
391 * all in [upper, end] > pivot |
|
392 * |
|
393 * Pointer k is the last index of ?-part |
|
394 */ |
|
395 for (int k = ++upper; --k > lower; ) { |
|
396 int ak = a[k]; |
|
397 |
|
398 if (ak != pivot) { |
|
399 a[k] = pivot; |
|
400 |
|
401 if (ak < pivot) { // Move a[k] to the left side |
|
402 while (a[++lower] < pivot); |
|
403 |
|
404 if (a[lower] > pivot) { |
|
405 a[--upper] = a[lower]; |
|
406 } |
|
407 a[lower] = ak; |
|
408 } else { // ak > pivot - Move a[k] to the right side |
|
409 a[--upper] = ak; |
477 } |
410 } |
478 a[great] = ak; |
411 } |
479 --great; |
412 } |
480 } |
413 |
481 } |
414 /* |
482 } |
415 * Swap the pivot into its final position. |
483 |
416 */ |
484 // Sort center part recursively |
417 a[low] = a[lower]; a[lower] = pivot; |
485 sort(a, less, great, false); |
418 |
486 |
419 /* |
487 } else { // Partitioning with one pivot |
420 * Sort the right part (possibly in parallel), excluding |
488 /* |
421 * known pivot. All elements from the central part are |
489 * Use the third of the five sorted elements as pivot. |
422 * equal and therefore already sorted. |
490 * This value is inexpensive approximation of the median. |
423 */ |
491 */ |
424 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
492 int pivot = a[e3]; |
425 sorter.forkSorter(bits | 1, upper, high); |
493 |
426 } else { |
494 /* |
427 sort(sorter, a, bits | 1, upper, high); |
495 * Partitioning degenerates to the traditional 3-way |
428 } |
496 * (or "Dutch National Flag") schema: |
429 } |
|
430 high = lower; // Iterate along the left part |
|
431 } |
|
432 } |
|
433 |
|
434 /** |
|
435 * Sorts the specified range of the array using mixed insertion sort. |
|
436 * |
|
437 * Mixed insertion sort is combination of simple insertion sort, |
|
438 * pin insertion sort and pair insertion sort. |
|
439 * |
|
440 * In the context of Dual-Pivot Quicksort, the pivot element |
|
441 * from the left part plays the role of sentinel, because it |
|
442 * is less than any elements from the given part. Therefore, |
|
443 * expensive check of the left range can be skipped on each |
|
444 * iteration unless it is the leftmost call. |
|
445 * |
|
446 * @param a the array to be sorted |
|
447 * @param low the index of the first element, inclusive, to be sorted |
|
448 * @param end the index of the last element for simple insertion sort |
|
449 * @param high the index of the last element, exclusive, to be sorted |
|
450 */ |
|
451 private static void mixedInsertionSort(int[] a, int low, int end, int high) { |
|
452 if (end == high) { |
|
453 |
|
454 /* |
|
455 * Invoke simple insertion sort on tiny array. |
|
456 */ |
|
457 for (int i; ++low < end; ) { |
|
458 int ai = a[i = low]; |
|
459 |
|
460 while (ai < a[--i]) { |
|
461 a[i + 1] = a[i]; |
|
462 } |
|
463 a[i + 1] = ai; |
|
464 } |
|
465 } else { |
|
466 |
|
467 /* |
|
468 * Start with pin insertion sort on small part. |
497 * |
469 * |
498 * left part center part right part |
470 * Pin insertion sort is extended simple insertion sort. |
499 * +-------------------------------------------------+ |
471 * The main idea of this sort is to put elements larger |
500 * | < pivot | == pivot | ? | > pivot | |
472 * than an element called pin to the end of array (the |
501 * +-------------------------------------------------+ |
473 * proper area for such elements). It avoids expensive |
502 * ^ ^ ^ |
474 * movements of these elements through the whole array. |
503 * | | | |
475 */ |
504 * less k great |
476 int pin = a[end]; |
|
477 |
|
478 for (int i, p = high; ++low < end; ) { |
|
479 int ai = a[i = low]; |
|
480 |
|
481 if (ai < a[i - 1]) { // Small element |
|
482 |
|
483 /* |
|
484 * Insert small element into sorted part. |
|
485 */ |
|
486 a[i] = a[--i]; |
|
487 |
|
488 while (ai < a[--i]) { |
|
489 a[i + 1] = a[i]; |
|
490 } |
|
491 a[i + 1] = ai; |
|
492 |
|
493 } else if (p > i && ai > pin) { // Large element |
|
494 |
|
495 /* |
|
496 * Find element smaller than pin. |
|
497 */ |
|
498 while (a[--p] > pin); |
|
499 |
|
500 /* |
|
501 * Swap it with large element. |
|
502 */ |
|
503 if (p > i) { |
|
504 ai = a[p]; |
|
505 a[p] = a[i]; |
|
506 } |
|
507 |
|
508 /* |
|
509 * Insert small element into sorted part. |
|
510 */ |
|
511 while (ai < a[--i]) { |
|
512 a[i + 1] = a[i]; |
|
513 } |
|
514 a[i + 1] = ai; |
|
515 } |
|
516 } |
|
517 |
|
518 /* |
|
519 * Continue with pair insertion sort on remain part. |
|
520 */ |
|
521 for (int i; low < high; ++low) { |
|
522 int a1 = a[i = low], a2 = a[++low]; |
|
523 |
|
524 /* |
|
525 * Insert two elements per iteration: at first, insert the |
|
526 * larger element and then insert the smaller element, but |
|
527 * from the position where the larger element was inserted. |
|
528 */ |
|
529 if (a1 > a2) { |
|
530 |
|
531 while (a1 < a[--i]) { |
|
532 a[i + 2] = a[i]; |
|
533 } |
|
534 a[++i + 1] = a1; |
|
535 |
|
536 while (a2 < a[--i]) { |
|
537 a[i + 1] = a[i]; |
|
538 } |
|
539 a[i + 1] = a2; |
|
540 |
|
541 } else if (a1 < a[i - 1]) { |
|
542 |
|
543 while (a2 < a[--i]) { |
|
544 a[i + 2] = a[i]; |
|
545 } |
|
546 a[++i + 1] = a2; |
|
547 |
|
548 while (a1 < a[--i]) { |
|
549 a[i + 1] = a[i]; |
|
550 } |
|
551 a[i + 1] = a1; |
|
552 } |
|
553 } |
|
554 } |
|
555 } |
|
556 |
|
557 /** |
|
558 * Sorts the specified range of the array using insertion sort. |
|
559 * |
|
560 * @param a the array to be sorted |
|
561 * @param low the index of the first element, inclusive, to be sorted |
|
562 * @param high the index of the last element, exclusive, to be sorted |
|
563 */ |
|
564 private static void insertionSort(int[] a, int low, int high) { |
|
565 for (int i, k = low; ++k < high; ) { |
|
566 int ai = a[i = k]; |
|
567 |
|
568 if (ai < a[i - 1]) { |
|
569 while (--i >= low && ai < a[i]) { |
|
570 a[i + 1] = a[i]; |
|
571 } |
|
572 a[i + 1] = ai; |
|
573 } |
|
574 } |
|
575 } |
|
576 |
|
577 /** |
|
578 * Sorts the specified range of the array using heap sort. |
|
579 * |
|
580 * @param a the array to be sorted |
|
581 * @param low the index of the first element, inclusive, to be sorted |
|
582 * @param high the index of the last element, exclusive, to be sorted |
|
583 */ |
|
584 private static void heapSort(int[] a, int low, int high) { |
|
585 for (int k = (low + high) >>> 1; k > low; ) { |
|
586 pushDown(a, --k, a[k], low, high); |
|
587 } |
|
588 while (--high > low) { |
|
589 int max = a[low]; |
|
590 pushDown(a, low, a[high], low, high); |
|
591 a[high] = max; |
|
592 } |
|
593 } |
|
594 |
|
595 /** |
|
596 * Pushes specified element down during heap sort. |
|
597 * |
|
598 * @param a the given array |
|
599 * @param p the start index |
|
600 * @param value the given element |
|
601 * @param low the index of the first element, inclusive, to be sorted |
|
602 * @param high the index of the last element, exclusive, to be sorted |
|
603 */ |
|
604 private static void pushDown(int[] a, int p, int value, int low, int high) { |
|
605 for (int k ;; a[p] = a[p = k]) { |
|
606 k = (p << 1) - low + 2; // Index of the right child |
|
607 |
|
608 if (k > high) { |
|
609 break; |
|
610 } |
|
611 if (k == high || a[k] < a[k - 1]) { |
|
612 --k; |
|
613 } |
|
614 if (a[k] <= value) { |
|
615 break; |
|
616 } |
|
617 } |
|
618 a[p] = value; |
|
619 } |
|
620 |
|
621 /** |
|
622 * Tries to sort the specified range of the array. |
|
623 * |
|
624 * @param sorter parallel context |
|
625 * @param a the array to be sorted |
|
626 * @param low the index of the first element to be sorted |
|
627 * @param size the array size |
|
628 * @return true if finally sorted, false otherwise |
|
629 */ |
|
630 private static boolean tryMergeRuns(Sorter sorter, int[] a, int low, int size) { |
|
631 |
|
632 /* |
|
633 * The run array is constructed only if initial runs are |
|
634 * long enough to continue, run[i] then holds start index |
|
635 * of the i-th sequence of elements in non-descending order. |
|
636 */ |
|
637 int[] run = null; |
|
638 int high = low + size; |
|
639 int count = 1, last = low; |
|
640 |
|
641 /* |
|
642 * Identify all possible runs. |
|
643 */ |
|
644 for (int k = low + 1; k < high; ) { |
|
645 |
|
646 /* |
|
647 * Find the end index of the current run. |
|
648 */ |
|
649 if (a[k - 1] < a[k]) { |
|
650 |
|
651 // Identify ascending sequence |
|
652 while (++k < high && a[k - 1] <= a[k]); |
|
653 |
|
654 } else if (a[k - 1] > a[k]) { |
|
655 |
|
656 // Identify descending sequence |
|
657 while (++k < high && a[k - 1] >= a[k]); |
|
658 |
|
659 // Reverse into ascending order |
|
660 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
661 int ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
662 } |
|
663 } else { // Identify constant sequence |
|
664 for (int ak = a[k]; ++k < high && ak == a[k]; ); |
|
665 |
|
666 if (k < high) { |
|
667 continue; |
|
668 } |
|
669 } |
|
670 |
|
671 /* |
|
672 * Check special cases. |
|
673 */ |
|
674 if (run == null) { |
|
675 if (k == high) { |
|
676 |
|
677 /* |
|
678 * The array is monotonous sequence, |
|
679 * and therefore already sorted. |
|
680 */ |
|
681 return true; |
|
682 } |
|
683 |
|
684 if (k - low < MIN_FIRST_RUN_SIZE) { |
|
685 |
|
686 /* |
|
687 * The first run is too small |
|
688 * to proceed with scanning. |
|
689 */ |
|
690 return false; |
|
691 } |
|
692 |
|
693 run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
694 run[0] = low; |
|
695 |
|
696 } else if (a[last - 1] > a[last]) { |
|
697 |
|
698 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
699 |
|
700 /* |
|
701 * The first runs are not long |
|
702 * enough to continue scanning. |
|
703 */ |
|
704 return false; |
|
705 } |
|
706 |
|
707 if (++count == MAX_RUN_CAPACITY) { |
|
708 |
|
709 /* |
|
710 * Array is not highly structured. |
|
711 */ |
|
712 return false; |
|
713 } |
|
714 |
|
715 if (count == run.length) { |
|
716 |
|
717 /* |
|
718 * Increase capacity of index array. |
|
719 */ |
|
720 run = Arrays.copyOf(run, count << 1); |
|
721 } |
|
722 } |
|
723 run[count] = (last = k); |
|
724 } |
|
725 |
|
726 /* |
|
727 * Merge runs of highly structured array. |
|
728 */ |
|
729 if (count > 1) { |
|
730 int[] b; int offset = low; |
|
731 |
|
732 if (sorter == null || (b = (int[]) sorter.b) == null) { |
|
733 b = new int[size]; |
|
734 } else { |
|
735 offset = sorter.offset; |
|
736 } |
|
737 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
738 } |
|
739 return true; |
|
740 } |
|
741 |
|
742 /** |
|
743 * Merges the specified runs. |
|
744 * |
|
745 * @param a the source array |
|
746 * @param b the temporary buffer used in merging |
|
747 * @param offset the start index in the source, inclusive |
|
748 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
749 * @param parallel indicates whether merging is performed in parallel |
|
750 * @param run the start indexes of the runs, inclusive |
|
751 * @param lo the start index of the first run, inclusive |
|
752 * @param hi the start index of the last run, inclusive |
|
753 * @return the destination where runs are merged |
|
754 */ |
|
755 private static int[] mergeRuns(int[] a, int[] b, int offset, |
|
756 int aim, boolean parallel, int[] run, int lo, int hi) { |
|
757 |
|
758 if (hi - lo == 1) { |
|
759 if (aim >= 0) { |
|
760 return a; |
|
761 } |
|
762 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
763 b[--j] = a[--i] |
|
764 ); |
|
765 return b; |
|
766 } |
|
767 |
|
768 /* |
|
769 * Split into approximately equal parts. |
|
770 */ |
|
771 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
772 while (run[++mi + 1] <= rmi); |
|
773 |
|
774 /* |
|
775 * Merge the left and right parts. |
|
776 */ |
|
777 int[] a1, a2; |
|
778 |
|
779 if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
780 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
781 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
782 a2 = (int[]) merger.getDestination(); |
|
783 } else { |
|
784 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
785 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
786 } |
|
787 |
|
788 int[] dst = a1 == a ? b : a; |
|
789 |
|
790 int k = a1 == a ? run[lo] - offset : run[lo]; |
|
791 int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
792 int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
793 int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
794 int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
795 |
|
796 if (parallel) { |
|
797 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
798 } else { |
|
799 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
800 } |
|
801 return dst; |
|
802 } |
|
803 |
|
804 /** |
|
805 * Merges the sorted parts. |
|
806 * |
|
807 * @param merger parallel context |
|
808 * @param dst the destination where parts are merged |
|
809 * @param k the start index of the destination, inclusive |
|
810 * @param a1 the first part |
|
811 * @param lo1 the start index of the first part, inclusive |
|
812 * @param hi1 the end index of the first part, exclusive |
|
813 * @param a2 the second part |
|
814 * @param lo2 the start index of the second part, inclusive |
|
815 * @param hi2 the end index of the second part, exclusive |
|
816 */ |
|
817 private static void mergeParts(Merger merger, int[] dst, int k, |
|
818 int[] a1, int lo1, int hi1, int[] a2, int lo2, int hi2) { |
|
819 |
|
820 if (merger != null && a1 == a2) { |
|
821 |
|
822 while (true) { |
|
823 |
|
824 /* |
|
825 * The first part must be larger. |
|
826 */ |
|
827 if (hi1 - lo1 < hi2 - lo2) { |
|
828 int lo = lo1; lo1 = lo2; lo2 = lo; |
|
829 int hi = hi1; hi1 = hi2; hi2 = hi; |
|
830 } |
|
831 |
|
832 /* |
|
833 * Small parts will be merged sequentially. |
|
834 */ |
|
835 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
836 break; |
|
837 } |
|
838 |
|
839 /* |
|
840 * Find the median of the larger part. |
|
841 */ |
|
842 int mi1 = (lo1 + hi1) >>> 1; |
|
843 int key = a1[mi1]; |
|
844 int mi2 = hi2; |
|
845 |
|
846 /* |
|
847 * Partition the smaller part. |
|
848 */ |
|
849 for (int loo = lo2; loo < mi2; ) { |
|
850 int t = (loo + mi2) >>> 1; |
|
851 |
|
852 if (key > a2[t]) { |
|
853 loo = t + 1; |
|
854 } else { |
|
855 mi2 = t; |
|
856 } |
|
857 } |
|
858 |
|
859 int d = mi2 - lo2 + mi1 - lo1; |
|
860 |
|
861 /* |
|
862 * Merge the right sub-parts in parallel. |
|
863 */ |
|
864 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
865 |
|
866 /* |
|
867 * Process the sub-left parts. |
|
868 */ |
|
869 hi1 = mi1; |
|
870 hi2 = mi2; |
|
871 } |
|
872 } |
|
873 |
|
874 /* |
|
875 * Merge small parts sequentially. |
|
876 */ |
|
877 while (lo1 < hi1 && lo2 < hi2) { |
|
878 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
879 } |
|
880 if (dst != a1 || k < lo1) { |
|
881 while (lo1 < hi1) { |
|
882 dst[k++] = a1[lo1++]; |
|
883 } |
|
884 } |
|
885 if (dst != a2 || k < lo2) { |
|
886 while (lo2 < hi2) { |
|
887 dst[k++] = a2[lo2++]; |
|
888 } |
|
889 } |
|
890 } |
|
891 |
|
892 // [long] |
|
893 |
|
894 /** |
|
895 * Sorts the specified range of the array using parallel merge |
|
896 * sort and/or Dual-Pivot Quicksort. |
|
897 * |
|
898 * To balance the faster splitting and parallelism of merge sort |
|
899 * with the faster element partitioning of Quicksort, ranges are |
|
900 * subdivided in tiers such that, if there is enough parallelism, |
|
901 * the four-way parallel merge is started, still ensuring enough |
|
902 * parallelism to process the partitions. |
|
903 * |
|
904 * @param a the array to be sorted |
|
905 * @param parallelism the parallelism level |
|
906 * @param low the index of the first element, inclusive, to be sorted |
|
907 * @param high the index of the last element, exclusive, to be sorted |
|
908 */ |
|
909 static void sort(long[] a, int parallelism, int low, int high) { |
|
910 int size = high - low; |
|
911 |
|
912 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
913 int depth = getDepth(parallelism, size >> 12); |
|
914 long[] b = depth == 0 ? null : new long[size]; |
|
915 new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
916 } else { |
|
917 sort(null, a, 0, low, high); |
|
918 } |
|
919 } |
|
920 |
|
921 /** |
|
922 * Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
923 * other sorts in special-cases, possibly with parallel partitions. |
|
924 * |
|
925 * @param sorter parallel context |
|
926 * @param a the array to be sorted |
|
927 * @param bits the combination of recursion depth and bit flag, where |
|
928 * the right bit "0" indicates that array is the leftmost part |
|
929 * @param low the index of the first element, inclusive, to be sorted |
|
930 * @param high the index of the last element, exclusive, to be sorted |
|
931 */ |
|
932 static void sort(Sorter sorter, long[] a, int bits, int low, int high) { |
|
933 while (true) { |
|
934 int end = high - 1, size = high - low; |
|
935 |
|
936 /* |
|
937 * Run mixed insertion sort on small non-leftmost parts. |
|
938 */ |
|
939 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
940 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
941 return; |
|
942 } |
|
943 |
|
944 /* |
|
945 * Invoke insertion sort on small leftmost part. |
|
946 */ |
|
947 if (size < MAX_INSERTION_SORT_SIZE) { |
|
948 insertionSort(a, low, high); |
|
949 return; |
|
950 } |
|
951 |
|
952 /* |
|
953 * Check if the whole array or large non-leftmost |
|
954 * parts are nearly sorted and then merge runs. |
|
955 */ |
|
956 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
957 && tryMergeRuns(sorter, a, low, size)) { |
|
958 return; |
|
959 } |
|
960 |
|
961 /* |
|
962 * Switch to heap sort if execution |
|
963 * time is becoming quadratic. |
|
964 */ |
|
965 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
966 heapSort(a, low, high); |
|
967 return; |
|
968 } |
|
969 |
|
970 /* |
|
971 * Use an inexpensive approximation of the golden ratio |
|
972 * to select five sample elements and determine pivots. |
|
973 */ |
|
974 int step = (size >> 3) * 3 + 3; |
|
975 |
|
976 /* |
|
977 * Five elements around (and including) the central element |
|
978 * will be used for pivot selection as described below. The |
|
979 * unequal choice of spacing these elements was empirically |
|
980 * determined to work well on a wide variety of inputs. |
|
981 */ |
|
982 int e1 = low + step; |
|
983 int e5 = end - step; |
|
984 int e3 = (e1 + e5) >>> 1; |
|
985 int e2 = (e1 + e3) >>> 1; |
|
986 int e4 = (e3 + e5) >>> 1; |
|
987 long a3 = a[e3]; |
|
988 |
|
989 /* |
|
990 * Sort these elements in place by the combination |
|
991 * of 4-element sorting network and insertion sort. |
505 * |
992 * |
506 * Invariants: |
993 * 5 ------o-----------o------------ |
507 * |
994 * | | |
508 * all in (left, less) < pivot |
995 * 4 ------|-----o-----o-----o------ |
509 * all in [less, k) == pivot |
996 * | | | |
510 * all in (great, right) > pivot |
997 * 2 ------o-----|-----o-----o------ |
511 * |
998 * | | |
512 * Pointer k is the first index of ?-part. |
999 * 1 ------------o-----o------------ |
513 */ |
1000 */ |
514 for (int k = less; k <= great; ++k) { |
1001 if (a[e5] < a[e2]) { long t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
515 if (a[k] == pivot) { |
1002 if (a[e4] < a[e1]) { long t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
516 continue; |
1003 if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
517 } |
1004 if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
518 int ak = a[k]; |
1005 if (a[e4] < a[e2]) { long t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
519 if (ak < pivot) { // Move a[k] to left part |
1006 |
520 a[k] = a[less]; |
1007 if (a3 < a[e2]) { |
521 a[less] = ak; |
1008 if (a3 < a[e1]) { |
522 ++less; |
1009 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
523 } else { // a[k] > pivot - Move a[k] to right part |
1010 } else { |
524 while (a[great] > pivot) { |
1011 a[e3] = a[e2]; a[e2] = a3; |
525 --great; |
1012 } |
526 } |
1013 } else if (a3 > a[e4]) { |
527 if (a[great] < pivot) { // a[great] <= pivot |
1014 if (a3 > a[e5]) { |
528 a[k] = a[less]; |
1015 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
529 a[less] = a[great]; |
1016 } else { |
530 ++less; |
1017 a[e3] = a[e4]; a[e4] = a3; |
531 } else { // a[great] == pivot |
1018 } |
532 /* |
1019 } |
533 * Even though a[great] equals to pivot, the |
1020 |
534 * assignment a[k] = pivot may be incorrect, |
1021 // Pointers |
535 * if a[great] and pivot are floating-point |
1022 int lower = low; // The index of the last element of the left part |
536 * zeros of different signs. Therefore in float |
1023 int upper = end; // The index of the first element of the right part |
537 * and double sorting methods we have to use |
1024 |
538 * more accurate assignment a[k] = a[great]. |
1025 /* |
539 */ |
1026 * Partitioning with 2 pivots in case of different elements. |
540 a[k] = pivot; |
1027 */ |
541 } |
1028 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
542 a[great] = ak; |
1029 |
543 --great; |
1030 /* |
544 } |
1031 * Use the first and fifth of the five sorted elements as |
545 } |
1032 * the pivots. These values are inexpensive approximation |
546 |
1033 * of tertiles. Note, that pivot1 < pivot2. |
547 /* |
1034 */ |
548 * Sort left and right parts recursively. |
1035 long pivot1 = a[e1]; |
549 * All elements from center part are equal |
1036 long pivot2 = a[e5]; |
550 * and, therefore, already sorted. |
1037 |
551 */ |
1038 /* |
552 sort(a, left, less - 1, leftmost); |
1039 * The first and the last elements to be sorted are moved |
553 sort(a, great + 1, right, false); |
1040 * to the locations formerly occupied by the pivots. When |
554 } |
1041 * partitioning is completed, the pivots are swapped back |
555 } |
1042 * into their final positions, and excluded from the next |
556 |
1043 * subsequent sorting. |
557 /** |
1044 */ |
558 * Sorts the specified range of the array using the given |
1045 a[e1] = a[lower]; |
559 * workspace array slice if possible for merging |
1046 a[e5] = a[upper]; |
560 * |
1047 |
561 * @param a the array to be sorted |
1048 /* |
562 * @param left the index of the first element, inclusive, to be sorted |
1049 * Skip elements, which are less or greater than the pivots. |
563 * @param right the index of the last element, inclusive, to be sorted |
1050 */ |
564 * @param work a workspace array (slice) |
1051 while (a[++lower] < pivot1); |
565 * @param workBase origin of usable space in work array |
1052 while (a[--upper] > pivot2); |
566 * @param workLen usable size of work array |
1053 |
567 */ |
1054 /* |
568 static void sort(long[] a, int left, int right, |
1055 * Backward 3-interval partitioning |
569 long[] work, int workBase, int workLen) { |
1056 * |
570 // Use Quicksort on small arrays |
1057 * left part central part right part |
571 if (right - left < QUICKSORT_THRESHOLD) { |
1058 * +------------------------------------------------------------+ |
572 sort(a, left, right, true); |
1059 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
573 return; |
1060 * +------------------------------------------------------------+ |
574 } |
1061 * ^ ^ ^ |
575 |
1062 * | | | |
576 /* |
1063 * lower k upper |
577 * Index run[i] is the start of i-th run |
|
578 * (ascending or descending sequence). |
|
579 */ |
|
580 int[] run = new int[MAX_RUN_COUNT + 1]; |
|
581 int count = 0; run[0] = left; |
|
582 |
|
583 // Check if the array is nearly sorted |
|
584 for (int k = left; k < right; run[count] = k) { |
|
585 // Equal items in the beginning of the sequence |
|
586 while (k < right && a[k] == a[k + 1]) |
|
587 k++; |
|
588 if (k == right) break; // Sequence finishes with equal items |
|
589 if (a[k] < a[k + 1]) { // ascending |
|
590 while (++k <= right && a[k - 1] <= a[k]); |
|
591 } else if (a[k] > a[k + 1]) { // descending |
|
592 while (++k <= right && a[k - 1] >= a[k]); |
|
593 // Transform into an ascending sequence |
|
594 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { |
|
595 long t = a[lo]; a[lo] = a[hi]; a[hi] = t; |
|
596 } |
|
597 } |
|
598 |
|
599 // Merge a transformed descending sequence followed by an |
|
600 // ascending sequence |
|
601 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { |
|
602 count--; |
|
603 } |
|
604 |
|
605 /* |
|
606 * The array is not highly structured, |
|
607 * use Quicksort instead of merge sort. |
|
608 */ |
|
609 if (++count == MAX_RUN_COUNT) { |
|
610 sort(a, left, right, true); |
|
611 return; |
|
612 } |
|
613 } |
|
614 |
|
615 // These invariants should hold true: |
|
616 // run[0] = 0 |
|
617 // run[<last>] = right + 1; (terminator) |
|
618 |
|
619 if (count == 0) { |
|
620 // A single equal run |
|
621 return; |
|
622 } else if (count == 1 && run[count] > right) { |
|
623 // Either a single ascending or a transformed descending run. |
|
624 // Always check that a final run is a proper terminator, otherwise |
|
625 // we have an unterminated trailing run, to handle downstream. |
|
626 return; |
|
627 } |
|
628 right++; |
|
629 if (run[count] < right) { |
|
630 // Corner case: the final run is not a terminator. This may happen |
|
631 // if a final run is an equals run, or there is a single-element run |
|
632 // at the end. Fix up by adding a proper terminator at the end. |
|
633 // Note that we terminate with (right + 1), incremented earlier. |
|
634 run[++count] = right; |
|
635 } |
|
636 |
|
637 // Determine alternation base for merge |
|
638 byte odd = 0; |
|
639 for (int n = 1; (n <<= 1) < count; odd ^= 1); |
|
640 |
|
641 // Use or create temporary array b for merging |
|
642 long[] b; // temp array; alternates with a |
|
643 int ao, bo; // array offsets from 'left' |
|
644 int blen = right - left; // space needed for b |
|
645 if (work == null || workLen < blen || workBase + blen > work.length) { |
|
646 work = new long[blen]; |
|
647 workBase = 0; |
|
648 } |
|
649 if (odd == 0) { |
|
650 System.arraycopy(a, left, work, workBase, blen); |
|
651 b = a; |
|
652 bo = 0; |
|
653 a = work; |
|
654 ao = workBase - left; |
|
655 } else { |
|
656 b = work; |
|
657 ao = 0; |
|
658 bo = workBase - left; |
|
659 } |
|
660 |
|
661 // Merging |
|
662 for (int last; count > 1; count = last) { |
|
663 for (int k = (last = 0) + 2; k <= count; k += 2) { |
|
664 int hi = run[k], mi = run[k - 1]; |
|
665 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { |
|
666 if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { |
|
667 b[i + bo] = a[p++ + ao]; |
|
668 } else { |
|
669 b[i + bo] = a[q++ + ao]; |
|
670 } |
|
671 } |
|
672 run[++last] = hi; |
|
673 } |
|
674 if ((count & 1) != 0) { |
|
675 for (int i = right, lo = run[count - 1]; --i >= lo; |
|
676 b[i + bo] = a[i + ao] |
|
677 ); |
|
678 run[++last] = right; |
|
679 } |
|
680 long[] t = a; a = b; b = t; |
|
681 int o = ao; ao = bo; bo = o; |
|
682 } |
|
683 } |
|
684 |
|
685 /** |
|
686 * Sorts the specified range of the array by Dual-Pivot Quicksort. |
|
687 * |
|
688 * @param a the array to be sorted |
|
689 * @param left the index of the first element, inclusive, to be sorted |
|
690 * @param right the index of the last element, inclusive, to be sorted |
|
691 * @param leftmost indicates if this part is the leftmost in the range |
|
692 */ |
|
693 private static void sort(long[] a, int left, int right, boolean leftmost) { |
|
694 int length = right - left + 1; |
|
695 |
|
696 // Use insertion sort on tiny arrays |
|
697 if (length < INSERTION_SORT_THRESHOLD) { |
|
698 if (leftmost) { |
|
699 /* |
|
700 * Traditional (without sentinel) insertion sort, |
|
701 * optimized for server VM, is used in case of |
|
702 * the leftmost part. |
|
703 */ |
|
704 for (int i = left, j = i; i < right; j = ++i) { |
|
705 long ai = a[i + 1]; |
|
706 while (ai < a[j]) { |
|
707 a[j + 1] = a[j]; |
|
708 if (j-- == left) { |
|
709 break; |
|
710 } |
|
711 } |
|
712 a[j + 1] = ai; |
|
713 } |
|
714 } else { |
|
715 /* |
|
716 * Skip the longest ascending sequence. |
|
717 */ |
|
718 do { |
|
719 if (left >= right) { |
|
720 return; |
|
721 } |
|
722 } while (a[++left] >= a[left - 1]); |
|
723 |
|
724 /* |
|
725 * Every element from adjoining part plays the role |
|
726 * of sentinel, therefore this allows us to avoid the |
|
727 * left range check on each iteration. Moreover, we use |
|
728 * the more optimized algorithm, so called pair insertion |
|
729 * sort, which is faster (in the context of Quicksort) |
|
730 * than traditional implementation of insertion sort. |
|
731 */ |
|
732 for (int k = left; ++left <= right; k = ++left) { |
|
733 long a1 = a[k], a2 = a[left]; |
|
734 |
|
735 if (a1 < a2) { |
|
736 a2 = a1; a1 = a[left]; |
|
737 } |
|
738 while (a1 < a[--k]) { |
|
739 a[k + 2] = a[k]; |
|
740 } |
|
741 a[++k + 1] = a1; |
|
742 |
|
743 while (a2 < a[--k]) { |
|
744 a[k + 1] = a[k]; |
|
745 } |
|
746 a[k + 1] = a2; |
|
747 } |
|
748 long last = a[right]; |
|
749 |
|
750 while (last < a[--right]) { |
|
751 a[right + 1] = a[right]; |
|
752 } |
|
753 a[right + 1] = last; |
|
754 } |
|
755 return; |
|
756 } |
|
757 |
|
758 // Inexpensive approximation of length / 7 |
|
759 int seventh = (length >> 3) + (length >> 6) + 1; |
|
760 |
|
761 /* |
|
762 * Sort five evenly spaced elements around (and including) the |
|
763 * center element in the range. These elements will be used for |
|
764 * pivot selection as described below. The choice for spacing |
|
765 * these elements was empirically determined to work well on |
|
766 * a wide variety of inputs. |
|
767 */ |
|
768 int e3 = (left + right) >>> 1; // The midpoint |
|
769 int e2 = e3 - seventh; |
|
770 int e1 = e2 - seventh; |
|
771 int e4 = e3 + seventh; |
|
772 int e5 = e4 + seventh; |
|
773 |
|
774 // Sort these elements using insertion sort |
|
775 if (a[e2] < a[e1]) { long t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
776 |
|
777 if (a[e3] < a[e2]) { long t = a[e3]; a[e3] = a[e2]; a[e2] = t; |
|
778 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
779 } |
|
780 if (a[e4] < a[e3]) { long t = a[e4]; a[e4] = a[e3]; a[e3] = t; |
|
781 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
782 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
783 } |
|
784 } |
|
785 if (a[e5] < a[e4]) { long t = a[e5]; a[e5] = a[e4]; a[e4] = t; |
|
786 if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; |
|
787 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
788 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
789 } |
|
790 } |
|
791 } |
|
792 |
|
793 // Pointers |
|
794 int less = left; // The index of the first element of center part |
|
795 int great = right; // The index before the first element of right part |
|
796 |
|
797 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { |
|
798 /* |
|
799 * Use the second and fourth of the five sorted elements as pivots. |
|
800 * These values are inexpensive approximations of the first and |
|
801 * second terciles of the array. Note that pivot1 <= pivot2. |
|
802 */ |
|
803 long pivot1 = a[e2]; |
|
804 long pivot2 = a[e4]; |
|
805 |
|
806 /* |
|
807 * The first and the last elements to be sorted are moved to the |
|
808 * locations formerly occupied by the pivots. When partitioning |
|
809 * is complete, the pivots are swapped back into their final |
|
810 * positions, and excluded from subsequent sorting. |
|
811 */ |
|
812 a[e2] = a[left]; |
|
813 a[e4] = a[right]; |
|
814 |
|
815 /* |
|
816 * Skip elements, which are less or greater than pivot values. |
|
817 */ |
|
818 while (a[++less] < pivot1); |
|
819 while (a[--great] > pivot2); |
|
820 |
|
821 /* |
|
822 * Partitioning: |
|
823 * |
|
824 * left part center part right part |
|
825 * +--------------------------------------------------------------+ |
|
826 * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | |
|
827 * +--------------------------------------------------------------+ |
|
828 * ^ ^ ^ |
|
829 * | | | |
|
830 * less k great |
|
831 * |
|
832 * Invariants: |
|
833 * |
|
834 * all in (left, less) < pivot1 |
|
835 * pivot1 <= all in [less, k) <= pivot2 |
|
836 * all in (great, right) > pivot2 |
|
837 * |
|
838 * Pointer k is the first index of ?-part. |
|
839 */ |
|
840 outer: |
|
841 for (int k = less - 1; ++k <= great; ) { |
|
842 long ak = a[k]; |
|
843 if (ak < pivot1) { // Move a[k] to left part |
|
844 a[k] = a[less]; |
|
845 /* |
|
846 * Here and below we use "a[i] = b; i++;" instead |
|
847 * of "a[i++] = b;" due to performance issue. |
|
848 */ |
|
849 a[less] = ak; |
|
850 ++less; |
|
851 } else if (ak > pivot2) { // Move a[k] to right part |
|
852 while (a[great] > pivot2) { |
|
853 if (great-- == k) { |
|
854 break outer; |
|
855 } |
|
856 } |
|
857 if (a[great] < pivot1) { // a[great] <= pivot2 |
|
858 a[k] = a[less]; |
|
859 a[less] = a[great]; |
|
860 ++less; |
|
861 } else { // pivot1 <= a[great] <= pivot2 |
|
862 a[k] = a[great]; |
|
863 } |
|
864 /* |
|
865 * Here and below we use "a[i] = b; i--;" instead |
|
866 * of "a[i--] = b;" due to performance issue. |
|
867 */ |
|
868 a[great] = ak; |
|
869 --great; |
|
870 } |
|
871 } |
|
872 |
|
873 // Swap pivots into their final positions |
|
874 a[left] = a[less - 1]; a[less - 1] = pivot1; |
|
875 a[right] = a[great + 1]; a[great + 1] = pivot2; |
|
876 |
|
877 // Sort left and right parts recursively, excluding known pivots |
|
878 sort(a, left, less - 2, leftmost); |
|
879 sort(a, great + 2, right, false); |
|
880 |
|
881 /* |
|
882 * If center part is too large (comprises > 4/7 of the array), |
|
883 * swap internal pivot values to ends. |
|
884 */ |
|
885 if (less < e1 && e5 < great) { |
|
886 /* |
|
887 * Skip elements, which are equal to pivot values. |
|
888 */ |
|
889 while (a[less] == pivot1) { |
|
890 ++less; |
|
891 } |
|
892 |
|
893 while (a[great] == pivot2) { |
|
894 --great; |
|
895 } |
|
896 |
|
897 /* |
|
898 * Partitioning: |
|
899 * |
|
900 * left part center part right part |
|
901 * +----------------------------------------------------------+ |
|
902 * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | |
|
903 * +----------------------------------------------------------+ |
|
904 * ^ ^ ^ |
|
905 * | | | |
|
906 * less k great |
|
907 * |
1064 * |
908 * Invariants: |
1065 * Invariants: |
909 * |
1066 * |
910 * all in (*, less) == pivot1 |
1067 * all in (low, lower] < pivot1 |
911 * pivot1 < all in [less, k) < pivot2 |
1068 * pivot1 <= all in (k, upper) <= pivot2 |
912 * all in (great, *) == pivot2 |
1069 * all in [upper, end) > pivot2 |
913 * |
1070 * |
914 * Pointer k is the first index of ?-part. |
1071 * Pointer k is the last index of ?-part |
915 */ |
1072 */ |
916 outer: |
1073 for (int unused = --lower, k = ++upper; --k > lower; ) { |
917 for (int k = less - 1; ++k <= great; ) { |
|
918 long ak = a[k]; |
1074 long ak = a[k]; |
919 if (ak == pivot1) { // Move a[k] to left part |
1075 |
920 a[k] = a[less]; |
1076 if (ak < pivot1) { // Move a[k] to the left side |
921 a[less] = ak; |
1077 while (lower < k) { |
922 ++less; |
1078 if (a[++lower] >= pivot1) { |
923 } else if (ak == pivot2) { // Move a[k] to right part |
1079 if (a[lower] > pivot2) { |
924 while (a[great] == pivot2) { |
1080 a[k] = a[--upper]; |
925 if (great-- == k) { |
1081 a[upper] = a[lower]; |
926 break outer; |
1082 } else { |
|
1083 a[k] = a[lower]; |
|
1084 } |
|
1085 a[lower] = ak; |
|
1086 break; |
927 } |
1087 } |
928 } |
1088 } |
929 if (a[great] == pivot1) { // a[great] < pivot2 |
1089 } else if (ak > pivot2) { // Move a[k] to the right side |
930 a[k] = a[less]; |
1090 a[k] = a[--upper]; |
931 /* |
1091 a[upper] = ak; |
932 * Even though a[great] equals to pivot1, the |
1092 } |
933 * assignment a[less] = pivot1 may be incorrect, |
1093 } |
934 * if a[great] and pivot1 are floating-point zeros |
1094 |
935 * of different signs. Therefore in float and |
1095 /* |
936 * double sorting methods we have to use more |
1096 * Swap the pivots into their final positions. |
937 * accurate assignment a[less] = a[great]. |
1097 */ |
938 */ |
1098 a[low] = a[lower]; a[lower] = pivot1; |
939 a[less] = pivot1; |
1099 a[end] = a[upper]; a[upper] = pivot2; |
940 ++less; |
1100 |
941 } else { // pivot1 < a[great] < pivot2 |
1101 /* |
942 a[k] = a[great]; |
1102 * Sort non-left parts recursively (possibly in parallel), |
|
1103 * excluding known pivots. |
|
1104 */ |
|
1105 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
1106 sorter.forkSorter(bits | 1, lower + 1, upper); |
|
1107 sorter.forkSorter(bits | 1, upper + 1, high); |
|
1108 } else { |
|
1109 sort(sorter, a, bits | 1, lower + 1, upper); |
|
1110 sort(sorter, a, bits | 1, upper + 1, high); |
|
1111 } |
|
1112 |
|
1113 } else { // Use single pivot in case of many equal elements |
|
1114 |
|
1115 /* |
|
1116 * Use the third of the five sorted elements as the pivot. |
|
1117 * This value is inexpensive approximation of the median. |
|
1118 */ |
|
1119 long pivot = a[e3]; |
|
1120 |
|
1121 /* |
|
1122 * The first element to be sorted is moved to the |
|
1123 * location formerly occupied by the pivot. After |
|
1124 * completion of partitioning the pivot is swapped |
|
1125 * back into its final position, and excluded from |
|
1126 * the next subsequent sorting. |
|
1127 */ |
|
1128 a[e3] = a[lower]; |
|
1129 |
|
1130 /* |
|
1131 * Traditional 3-way (Dutch National Flag) partitioning |
|
1132 * |
|
1133 * left part central part right part |
|
1134 * +------------------------------------------------------+ |
|
1135 * | < pivot | ? | == pivot | > pivot | |
|
1136 * +------------------------------------------------------+ |
|
1137 * ^ ^ ^ |
|
1138 * | | | |
|
1139 * lower k upper |
|
1140 * |
|
1141 * Invariants: |
|
1142 * |
|
1143 * all in (low, lower] < pivot |
|
1144 * all in (k, upper) == pivot |
|
1145 * all in [upper, end] > pivot |
|
1146 * |
|
1147 * Pointer k is the last index of ?-part |
|
1148 */ |
|
1149 for (int k = ++upper; --k > lower; ) { |
|
1150 long ak = a[k]; |
|
1151 |
|
1152 if (ak != pivot) { |
|
1153 a[k] = pivot; |
|
1154 |
|
1155 if (ak < pivot) { // Move a[k] to the left side |
|
1156 while (a[++lower] < pivot); |
|
1157 |
|
1158 if (a[lower] > pivot) { |
|
1159 a[--upper] = a[lower]; |
|
1160 } |
|
1161 a[lower] = ak; |
|
1162 } else { // ak > pivot - Move a[k] to the right side |
|
1163 a[--upper] = ak; |
943 } |
1164 } |
944 a[great] = ak; |
1165 } |
945 --great; |
1166 } |
946 } |
1167 |
947 } |
1168 /* |
948 } |
1169 * Swap the pivot into its final position. |
949 |
1170 */ |
950 // Sort center part recursively |
1171 a[low] = a[lower]; a[lower] = pivot; |
951 sort(a, less, great, false); |
1172 |
952 |
1173 /* |
953 } else { // Partitioning with one pivot |
1174 * Sort the right part (possibly in parallel), excluding |
954 /* |
1175 * known pivot. All elements from the central part are |
955 * Use the third of the five sorted elements as pivot. |
1176 * equal and therefore already sorted. |
956 * This value is inexpensive approximation of the median. |
1177 */ |
957 */ |
1178 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
958 long pivot = a[e3]; |
1179 sorter.forkSorter(bits | 1, upper, high); |
959 |
1180 } else { |
960 /* |
1181 sort(sorter, a, bits | 1, upper, high); |
961 * Partitioning degenerates to the traditional 3-way |
1182 } |
962 * (or "Dutch National Flag") schema: |
1183 } |
|
1184 high = lower; // Iterate along the left part |
|
1185 } |
|
1186 } |
|
1187 |
|
1188 /** |
|
1189 * Sorts the specified range of the array using mixed insertion sort. |
|
1190 * |
|
1191 * Mixed insertion sort is combination of simple insertion sort, |
|
1192 * pin insertion sort and pair insertion sort. |
|
1193 * |
|
1194 * In the context of Dual-Pivot Quicksort, the pivot element |
|
1195 * from the left part plays the role of sentinel, because it |
|
1196 * is less than any elements from the given part. Therefore, |
|
1197 * expensive check of the left range can be skipped on each |
|
1198 * iteration unless it is the leftmost call. |
|
1199 * |
|
1200 * @param a the array to be sorted |
|
1201 * @param low the index of the first element, inclusive, to be sorted |
|
1202 * @param end the index of the last element for simple insertion sort |
|
1203 * @param high the index of the last element, exclusive, to be sorted |
|
1204 */ |
|
1205 private static void mixedInsertionSort(long[] a, int low, int end, int high) { |
|
1206 if (end == high) { |
|
1207 |
|
1208 /* |
|
1209 * Invoke simple insertion sort on tiny array. |
|
1210 */ |
|
1211 for (int i; ++low < end; ) { |
|
1212 long ai = a[i = low]; |
|
1213 |
|
1214 while (ai < a[--i]) { |
|
1215 a[i + 1] = a[i]; |
|
1216 } |
|
1217 a[i + 1] = ai; |
|
1218 } |
|
1219 } else { |
|
1220 |
|
1221 /* |
|
1222 * Start with pin insertion sort on small part. |
963 * |
1223 * |
964 * left part center part right part |
1224 * Pin insertion sort is extended simple insertion sort. |
965 * +-------------------------------------------------+ |
1225 * The main idea of this sort is to put elements larger |
966 * | < pivot | == pivot | ? | > pivot | |
1226 * than an element called pin to the end of array (the |
967 * +-------------------------------------------------+ |
1227 * proper area for such elements). It avoids expensive |
968 * ^ ^ ^ |
1228 * movements of these elements through the whole array. |
969 * | | | |
1229 */ |
970 * less k great |
1230 long pin = a[end]; |
|
1231 |
|
1232 for (int i, p = high; ++low < end; ) { |
|
1233 long ai = a[i = low]; |
|
1234 |
|
1235 if (ai < a[i - 1]) { // Small element |
|
1236 |
|
1237 /* |
|
1238 * Insert small element into sorted part. |
|
1239 */ |
|
1240 a[i] = a[--i]; |
|
1241 |
|
1242 while (ai < a[--i]) { |
|
1243 a[i + 1] = a[i]; |
|
1244 } |
|
1245 a[i + 1] = ai; |
|
1246 |
|
1247 } else if (p > i && ai > pin) { // Large element |
|
1248 |
|
1249 /* |
|
1250 * Find element smaller than pin. |
|
1251 */ |
|
1252 while (a[--p] > pin); |
|
1253 |
|
1254 /* |
|
1255 * Swap it with large element. |
|
1256 */ |
|
1257 if (p > i) { |
|
1258 ai = a[p]; |
|
1259 a[p] = a[i]; |
|
1260 } |
|
1261 |
|
1262 /* |
|
1263 * Insert small element into sorted part. |
|
1264 */ |
|
1265 while (ai < a[--i]) { |
|
1266 a[i + 1] = a[i]; |
|
1267 } |
|
1268 a[i + 1] = ai; |
|
1269 } |
|
1270 } |
|
1271 |
|
1272 /* |
|
1273 * Continue with pair insertion sort on remain part. |
|
1274 */ |
|
1275 for (int i; low < high; ++low) { |
|
1276 long a1 = a[i = low], a2 = a[++low]; |
|
1277 |
|
1278 /* |
|
1279 * Insert two elements per iteration: at first, insert the |
|
1280 * larger element and then insert the smaller element, but |
|
1281 * from the position where the larger element was inserted. |
|
1282 */ |
|
1283 if (a1 > a2) { |
|
1284 |
|
1285 while (a1 < a[--i]) { |
|
1286 a[i + 2] = a[i]; |
|
1287 } |
|
1288 a[++i + 1] = a1; |
|
1289 |
|
1290 while (a2 < a[--i]) { |
|
1291 a[i + 1] = a[i]; |
|
1292 } |
|
1293 a[i + 1] = a2; |
|
1294 |
|
1295 } else if (a1 < a[i - 1]) { |
|
1296 |
|
1297 while (a2 < a[--i]) { |
|
1298 a[i + 2] = a[i]; |
|
1299 } |
|
1300 a[++i + 1] = a2; |
|
1301 |
|
1302 while (a1 < a[--i]) { |
|
1303 a[i + 1] = a[i]; |
|
1304 } |
|
1305 a[i + 1] = a1; |
|
1306 } |
|
1307 } |
|
1308 } |
|
1309 } |
|
1310 |
|
1311 /** |
|
1312 * Sorts the specified range of the array using insertion sort. |
|
1313 * |
|
1314 * @param a the array to be sorted |
|
1315 * @param low the index of the first element, inclusive, to be sorted |
|
1316 * @param high the index of the last element, exclusive, to be sorted |
|
1317 */ |
|
1318 private static void insertionSort(long[] a, int low, int high) { |
|
1319 for (int i, k = low; ++k < high; ) { |
|
1320 long ai = a[i = k]; |
|
1321 |
|
1322 if (ai < a[i - 1]) { |
|
1323 while (--i >= low && ai < a[i]) { |
|
1324 a[i + 1] = a[i]; |
|
1325 } |
|
1326 a[i + 1] = ai; |
|
1327 } |
|
1328 } |
|
1329 } |
|
1330 |
|
1331 /** |
|
1332 * Sorts the specified range of the array using heap sort. |
|
1333 * |
|
1334 * @param a the array to be sorted |
|
1335 * @param low the index of the first element, inclusive, to be sorted |
|
1336 * @param high the index of the last element, exclusive, to be sorted |
|
1337 */ |
|
1338 private static void heapSort(long[] a, int low, int high) { |
|
1339 for (int k = (low + high) >>> 1; k > low; ) { |
|
1340 pushDown(a, --k, a[k], low, high); |
|
1341 } |
|
1342 while (--high > low) { |
|
1343 long max = a[low]; |
|
1344 pushDown(a, low, a[high], low, high); |
|
1345 a[high] = max; |
|
1346 } |
|
1347 } |
|
1348 |
|
1349 /** |
|
1350 * Pushes specified element down during heap sort. |
|
1351 * |
|
1352 * @param a the given array |
|
1353 * @param p the start index |
|
1354 * @param value the given element |
|
1355 * @param low the index of the first element, inclusive, to be sorted |
|
1356 * @param high the index of the last element, exclusive, to be sorted |
|
1357 */ |
|
1358 private static void pushDown(long[] a, int p, long value, int low, int high) { |
|
1359 for (int k ;; a[p] = a[p = k]) { |
|
1360 k = (p << 1) - low + 2; // Index of the right child |
|
1361 |
|
1362 if (k > high) { |
|
1363 break; |
|
1364 } |
|
1365 if (k == high || a[k] < a[k - 1]) { |
|
1366 --k; |
|
1367 } |
|
1368 if (a[k] <= value) { |
|
1369 break; |
|
1370 } |
|
1371 } |
|
1372 a[p] = value; |
|
1373 } |
|
1374 |
|
1375 /** |
|
1376 * Tries to sort the specified range of the array. |
|
1377 * |
|
1378 * @param sorter parallel context |
|
1379 * @param a the array to be sorted |
|
1380 * @param low the index of the first element to be sorted |
|
1381 * @param size the array size |
|
1382 * @return true if finally sorted, false otherwise |
|
1383 */ |
|
1384 private static boolean tryMergeRuns(Sorter sorter, long[] a, int low, int size) { |
|
1385 |
|
1386 /* |
|
1387 * The run array is constructed only if initial runs are |
|
1388 * long enough to continue, run[i] then holds start index |
|
1389 * of the i-th sequence of elements in non-descending order. |
|
1390 */ |
|
1391 int[] run = null; |
|
1392 int high = low + size; |
|
1393 int count = 1, last = low; |
|
1394 |
|
1395 /* |
|
1396 * Identify all possible runs. |
|
1397 */ |
|
1398 for (int k = low + 1; k < high; ) { |
|
1399 |
|
1400 /* |
|
1401 * Find the end index of the current run. |
|
1402 */ |
|
1403 if (a[k - 1] < a[k]) { |
|
1404 |
|
1405 // Identify ascending sequence |
|
1406 while (++k < high && a[k - 1] <= a[k]); |
|
1407 |
|
1408 } else if (a[k - 1] > a[k]) { |
|
1409 |
|
1410 // Identify descending sequence |
|
1411 while (++k < high && a[k - 1] >= a[k]); |
|
1412 |
|
1413 // Reverse into ascending order |
|
1414 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
1415 long ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
1416 } |
|
1417 } else { // Identify constant sequence |
|
1418 for (long ak = a[k]; ++k < high && ak == a[k]; ); |
|
1419 |
|
1420 if (k < high) { |
|
1421 continue; |
|
1422 } |
|
1423 } |
|
1424 |
|
1425 /* |
|
1426 * Check special cases. |
|
1427 */ |
|
1428 if (run == null) { |
|
1429 if (k == high) { |
|
1430 |
|
1431 /* |
|
1432 * The array is monotonous sequence, |
|
1433 * and therefore already sorted. |
|
1434 */ |
|
1435 return true; |
|
1436 } |
|
1437 |
|
1438 if (k - low < MIN_FIRST_RUN_SIZE) { |
|
1439 |
|
1440 /* |
|
1441 * The first run is too small |
|
1442 * to proceed with scanning. |
|
1443 */ |
|
1444 return false; |
|
1445 } |
|
1446 |
|
1447 run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
1448 run[0] = low; |
|
1449 |
|
1450 } else if (a[last - 1] > a[last]) { |
|
1451 |
|
1452 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
1453 |
|
1454 /* |
|
1455 * The first runs are not long |
|
1456 * enough to continue scanning. |
|
1457 */ |
|
1458 return false; |
|
1459 } |
|
1460 |
|
1461 if (++count == MAX_RUN_CAPACITY) { |
|
1462 |
|
1463 /* |
|
1464 * Array is not highly structured. |
|
1465 */ |
|
1466 return false; |
|
1467 } |
|
1468 |
|
1469 if (count == run.length) { |
|
1470 |
|
1471 /* |
|
1472 * Increase capacity of index array. |
|
1473 */ |
|
1474 run = Arrays.copyOf(run, count << 1); |
|
1475 } |
|
1476 } |
|
1477 run[count] = (last = k); |
|
1478 } |
|
1479 |
|
1480 /* |
|
1481 * Merge runs of highly structured array. |
|
1482 */ |
|
1483 if (count > 1) { |
|
1484 long[] b; int offset = low; |
|
1485 |
|
1486 if (sorter == null || (b = (long[]) sorter.b) == null) { |
|
1487 b = new long[size]; |
|
1488 } else { |
|
1489 offset = sorter.offset; |
|
1490 } |
|
1491 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
1492 } |
|
1493 return true; |
|
1494 } |
|
1495 |
|
1496 /** |
|
1497 * Merges the specified runs. |
|
1498 * |
|
1499 * @param a the source array |
|
1500 * @param b the temporary buffer used in merging |
|
1501 * @param offset the start index in the source, inclusive |
|
1502 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
1503 * @param parallel indicates whether merging is performed in parallel |
|
1504 * @param run the start indexes of the runs, inclusive |
|
1505 * @param lo the start index of the first run, inclusive |
|
1506 * @param hi the start index of the last run, inclusive |
|
1507 * @return the destination where runs are merged |
|
1508 */ |
|
1509 private static long[] mergeRuns(long[] a, long[] b, int offset, |
|
1510 int aim, boolean parallel, int[] run, int lo, int hi) { |
|
1511 |
|
1512 if (hi - lo == 1) { |
|
1513 if (aim >= 0) { |
|
1514 return a; |
|
1515 } |
|
1516 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
1517 b[--j] = a[--i] |
|
1518 ); |
|
1519 return b; |
|
1520 } |
|
1521 |
|
1522 /* |
|
1523 * Split into approximately equal parts. |
|
1524 */ |
|
1525 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
1526 while (run[++mi + 1] <= rmi); |
|
1527 |
|
1528 /* |
|
1529 * Merge the left and right parts. |
|
1530 */ |
|
1531 long[] a1, a2; |
|
1532 |
|
1533 if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
1534 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
1535 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
1536 a2 = (long[]) merger.getDestination(); |
|
1537 } else { |
|
1538 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
1539 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
1540 } |
|
1541 |
|
1542 long[] dst = a1 == a ? b : a; |
|
1543 |
|
1544 int k = a1 == a ? run[lo] - offset : run[lo]; |
|
1545 int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
1546 int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
1547 int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
1548 int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
1549 |
|
1550 if (parallel) { |
|
1551 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
1552 } else { |
|
1553 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
1554 } |
|
1555 return dst; |
|
1556 } |
|
1557 |
|
1558 /** |
|
1559 * Merges the sorted parts. |
|
1560 * |
|
1561 * @param merger parallel context |
|
1562 * @param dst the destination where parts are merged |
|
1563 * @param k the start index of the destination, inclusive |
|
1564 * @param a1 the first part |
|
1565 * @param lo1 the start index of the first part, inclusive |
|
1566 * @param hi1 the end index of the first part, exclusive |
|
1567 * @param a2 the second part |
|
1568 * @param lo2 the start index of the second part, inclusive |
|
1569 * @param hi2 the end index of the second part, exclusive |
|
1570 */ |
|
1571 private static void mergeParts(Merger merger, long[] dst, int k, |
|
1572 long[] a1, int lo1, int hi1, long[] a2, int lo2, int hi2) { |
|
1573 |
|
1574 if (merger != null && a1 == a2) { |
|
1575 |
|
1576 while (true) { |
|
1577 |
|
1578 /* |
|
1579 * The first part must be larger. |
|
1580 */ |
|
1581 if (hi1 - lo1 < hi2 - lo2) { |
|
1582 int lo = lo1; lo1 = lo2; lo2 = lo; |
|
1583 int hi = hi1; hi1 = hi2; hi2 = hi; |
|
1584 } |
|
1585 |
|
1586 /* |
|
1587 * Small parts will be merged sequentially. |
|
1588 */ |
|
1589 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
1590 break; |
|
1591 } |
|
1592 |
|
1593 /* |
|
1594 * Find the median of the larger part. |
|
1595 */ |
|
1596 int mi1 = (lo1 + hi1) >>> 1; |
|
1597 long key = a1[mi1]; |
|
1598 int mi2 = hi2; |
|
1599 |
|
1600 /* |
|
1601 * Partition the smaller part. |
|
1602 */ |
|
1603 for (int loo = lo2; loo < mi2; ) { |
|
1604 int t = (loo + mi2) >>> 1; |
|
1605 |
|
1606 if (key > a2[t]) { |
|
1607 loo = t + 1; |
|
1608 } else { |
|
1609 mi2 = t; |
|
1610 } |
|
1611 } |
|
1612 |
|
1613 int d = mi2 - lo2 + mi1 - lo1; |
|
1614 |
|
1615 /* |
|
1616 * Merge the right sub-parts in parallel. |
|
1617 */ |
|
1618 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
1619 |
|
1620 /* |
|
1621 * Process the sub-left parts. |
|
1622 */ |
|
1623 hi1 = mi1; |
|
1624 hi2 = mi2; |
|
1625 } |
|
1626 } |
|
1627 |
|
1628 /* |
|
1629 * Merge small parts sequentially. |
|
1630 */ |
|
1631 while (lo1 < hi1 && lo2 < hi2) { |
|
1632 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
1633 } |
|
1634 if (dst != a1 || k < lo1) { |
|
1635 while (lo1 < hi1) { |
|
1636 dst[k++] = a1[lo1++]; |
|
1637 } |
|
1638 } |
|
1639 if (dst != a2 || k < lo2) { |
|
1640 while (lo2 < hi2) { |
|
1641 dst[k++] = a2[lo2++]; |
|
1642 } |
|
1643 } |
|
1644 } |
|
1645 |
|
1646 // [byte] |
|
1647 |
|
1648 /** |
|
1649 * Sorts the specified range of the array using |
|
1650 * counting sort or insertion sort. |
|
1651 * |
|
1652 * @param a the array to be sorted |
|
1653 * @param low the index of the first element, inclusive, to be sorted |
|
1654 * @param high the index of the last element, exclusive, to be sorted |
|
1655 */ |
|
1656 static void sort(byte[] a, int low, int high) { |
|
1657 if (high - low > MIN_BYTE_COUNTING_SORT_SIZE) { |
|
1658 countingSort(a, low, high); |
|
1659 } else { |
|
1660 insertionSort(a, low, high); |
|
1661 } |
|
1662 } |
|
1663 |
|
1664 /** |
|
1665 * Sorts the specified range of the array using insertion sort. |
|
1666 * |
|
1667 * @param a the array to be sorted |
|
1668 * @param low the index of the first element, inclusive, to be sorted |
|
1669 * @param high the index of the last element, exclusive, to be sorted |
|
1670 */ |
|
1671 private static void insertionSort(byte[] a, int low, int high) { |
|
1672 for (int i, k = low; ++k < high; ) { |
|
1673 byte ai = a[i = k]; |
|
1674 |
|
1675 if (ai < a[i - 1]) { |
|
1676 while (--i >= low && ai < a[i]) { |
|
1677 a[i + 1] = a[i]; |
|
1678 } |
|
1679 a[i + 1] = ai; |
|
1680 } |
|
1681 } |
|
1682 } |
|
1683 |
|
1684 /** |
|
1685 * The number of distinct byte values. |
|
1686 */ |
|
1687 private static final int NUM_BYTE_VALUES = 1 << 8; |
|
1688 |
|
1689 /** |
|
1690 * Max index of byte counter. |
|
1691 */ |
|
1692 private static final int MAX_BYTE_INDEX = Byte.MAX_VALUE + NUM_BYTE_VALUES + 1; |
|
1693 |
|
1694 /** |
|
1695 * Sorts the specified range of the array using counting sort. |
|
1696 * |
|
1697 * @param a the array to be sorted |
|
1698 * @param low the index of the first element, inclusive, to be sorted |
|
1699 * @param high the index of the last element, exclusive, to be sorted |
|
1700 */ |
|
1701 private static void countingSort(byte[] a, int low, int high) { |
|
1702 int[] count = new int[NUM_BYTE_VALUES]; |
|
1703 |
|
1704 /* |
|
1705 * Compute a histogram with the number of each values. |
|
1706 */ |
|
1707 for (int i = high; i > low; ++count[a[--i] & 0xFF]); |
|
1708 |
|
1709 /* |
|
1710 * Place values on their final positions. |
|
1711 */ |
|
1712 if (high - low > NUM_BYTE_VALUES) { |
|
1713 for (int i = MAX_BYTE_INDEX; --i > Byte.MAX_VALUE; ) { |
|
1714 int value = i & 0xFF; |
|
1715 |
|
1716 for (low = high - count[value]; high > low; |
|
1717 a[--high] = (byte) value |
|
1718 ); |
|
1719 } |
|
1720 } else { |
|
1721 for (int i = MAX_BYTE_INDEX; high > low; ) { |
|
1722 while (count[--i & 0xFF] == 0); |
|
1723 |
|
1724 int value = i & 0xFF; |
|
1725 int c = count[value]; |
|
1726 |
|
1727 do { |
|
1728 a[--high] = (byte) value; |
|
1729 } while (--c > 0); |
|
1730 } |
|
1731 } |
|
1732 } |
|
1733 |
|
1734 // [char] |
|
1735 |
|
1736 /** |
|
1737 * Sorts the specified range of the array using |
|
1738 * counting sort or Dual-Pivot Quicksort. |
|
1739 * |
|
1740 * @param a the array to be sorted |
|
1741 * @param low the index of the first element, inclusive, to be sorted |
|
1742 * @param high the index of the last element, exclusive, to be sorted |
|
1743 */ |
|
1744 static void sort(char[] a, int low, int high) { |
|
1745 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { |
|
1746 countingSort(a, low, high); |
|
1747 } else { |
|
1748 sort(a, 0, low, high); |
|
1749 } |
|
1750 } |
|
1751 |
|
1752 /** |
|
1753 * Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
1754 * other sorts in special-cases, possibly with parallel partitions. |
|
1755 * |
|
1756 * @param a the array to be sorted |
|
1757 * @param bits the combination of recursion depth and bit flag, where |
|
1758 * the right bit "0" indicates that array is the leftmost part |
|
1759 * @param low the index of the first element, inclusive, to be sorted |
|
1760 * @param high the index of the last element, exclusive, to be sorted |
|
1761 */ |
|
1762 static void sort(char[] a, int bits, int low, int high) { |
|
1763 while (true) { |
|
1764 int end = high - 1, size = high - low; |
|
1765 |
|
1766 /* |
|
1767 * Invoke insertion sort on small leftmost part. |
|
1768 */ |
|
1769 if (size < MAX_INSERTION_SORT_SIZE) { |
|
1770 insertionSort(a, low, high); |
|
1771 return; |
|
1772 } |
|
1773 |
|
1774 /* |
|
1775 * Switch to counting sort if execution |
|
1776 * time is becoming quadratic. |
|
1777 */ |
|
1778 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
1779 countingSort(a, low, high); |
|
1780 return; |
|
1781 } |
|
1782 |
|
1783 /* |
|
1784 * Use an inexpensive approximation of the golden ratio |
|
1785 * to select five sample elements and determine pivots. |
|
1786 */ |
|
1787 int step = (size >> 3) * 3 + 3; |
|
1788 |
|
1789 /* |
|
1790 * Five elements around (and including) the central element |
|
1791 * will be used for pivot selection as described below. The |
|
1792 * unequal choice of spacing these elements was empirically |
|
1793 * determined to work well on a wide variety of inputs. |
|
1794 */ |
|
1795 int e1 = low + step; |
|
1796 int e5 = end - step; |
|
1797 int e3 = (e1 + e5) >>> 1; |
|
1798 int e2 = (e1 + e3) >>> 1; |
|
1799 int e4 = (e3 + e5) >>> 1; |
|
1800 char a3 = a[e3]; |
|
1801 |
|
1802 /* |
|
1803 * Sort these elements in place by the combination |
|
1804 * of 4-element sorting network and insertion sort. |
971 * |
1805 * |
972 * Invariants: |
1806 * 5 ------o-----------o------------ |
973 * |
1807 * | | |
974 * all in (left, less) < pivot |
1808 * 4 ------|-----o-----o-----o------ |
975 * all in [less, k) == pivot |
1809 * | | | |
976 * all in (great, right) > pivot |
1810 * 2 ------o-----|-----o-----o------ |
977 * |
1811 * | | |
978 * Pointer k is the first index of ?-part. |
1812 * 1 ------------o-----o------------ |
979 */ |
1813 */ |
980 for (int k = less; k <= great; ++k) { |
1814 if (a[e5] < a[e2]) { char t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
981 if (a[k] == pivot) { |
1815 if (a[e4] < a[e1]) { char t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
982 continue; |
1816 if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
983 } |
1817 if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
984 long ak = a[k]; |
1818 if (a[e4] < a[e2]) { char t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
985 if (ak < pivot) { // Move a[k] to left part |
1819 |
986 a[k] = a[less]; |
1820 if (a3 < a[e2]) { |
987 a[less] = ak; |
1821 if (a3 < a[e1]) { |
988 ++less; |
1822 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
989 } else { // a[k] > pivot - Move a[k] to right part |
1823 } else { |
990 while (a[great] > pivot) { |
1824 a[e3] = a[e2]; a[e2] = a3; |
991 --great; |
1825 } |
992 } |
1826 } else if (a3 > a[e4]) { |
993 if (a[great] < pivot) { // a[great] <= pivot |
1827 if (a3 > a[e5]) { |
994 a[k] = a[less]; |
1828 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
995 a[less] = a[great]; |
1829 } else { |
996 ++less; |
1830 a[e3] = a[e4]; a[e4] = a3; |
997 } else { // a[great] == pivot |
1831 } |
998 /* |
1832 } |
999 * Even though a[great] equals to pivot, the |
1833 |
1000 * assignment a[k] = pivot may be incorrect, |
1834 // Pointers |
1001 * if a[great] and pivot are floating-point |
1835 int lower = low; // The index of the last element of the left part |
1002 * zeros of different signs. Therefore in float |
1836 int upper = end; // The index of the first element of the right part |
1003 * and double sorting methods we have to use |
1837 |
1004 * more accurate assignment a[k] = a[great]. |
1838 /* |
1005 */ |
1839 * Partitioning with 2 pivots in case of different elements. |
1006 a[k] = pivot; |
1840 */ |
1007 } |
1841 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
1008 a[great] = ak; |
1842 |
1009 --great; |
1843 /* |
1010 } |
1844 * Use the first and fifth of the five sorted elements as |
1011 } |
1845 * the pivots. These values are inexpensive approximation |
1012 |
1846 * of tertiles. Note, that pivot1 < pivot2. |
1013 /* |
1847 */ |
1014 * Sort left and right parts recursively. |
1848 char pivot1 = a[e1]; |
1015 * All elements from center part are equal |
1849 char pivot2 = a[e5]; |
1016 * and, therefore, already sorted. |
1850 |
1017 */ |
1851 /* |
1018 sort(a, left, less - 1, leftmost); |
1852 * The first and the last elements to be sorted are moved |
1019 sort(a, great + 1, right, false); |
1853 * to the locations formerly occupied by the pivots. When |
1020 } |
1854 * partitioning is completed, the pivots are swapped back |
1021 } |
1855 * into their final positions, and excluded from the next |
1022 |
1856 * subsequent sorting. |
1023 /** |
1857 */ |
1024 * Sorts the specified range of the array using the given |
1858 a[e1] = a[lower]; |
1025 * workspace array slice if possible for merging |
1859 a[e5] = a[upper]; |
1026 * |
1860 |
1027 * @param a the array to be sorted |
1861 /* |
1028 * @param left the index of the first element, inclusive, to be sorted |
1862 * Skip elements, which are less or greater than the pivots. |
1029 * @param right the index of the last element, inclusive, to be sorted |
1863 */ |
1030 * @param work a workspace array (slice) |
1864 while (a[++lower] < pivot1); |
1031 * @param workBase origin of usable space in work array |
1865 while (a[--upper] > pivot2); |
1032 * @param workLen usable size of work array |
1866 |
1033 */ |
1867 /* |
1034 static void sort(short[] a, int left, int right, |
1868 * Backward 3-interval partitioning |
1035 short[] work, int workBase, int workLen) { |
1869 * |
1036 // Use counting sort on large arrays |
1870 * left part central part right part |
1037 if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) { |
1871 * +------------------------------------------------------------+ |
1038 int[] count = new int[NUM_SHORT_VALUES]; |
1872 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
1039 |
1873 * +------------------------------------------------------------+ |
1040 for (int i = left - 1; ++i <= right; |
1874 * ^ ^ ^ |
1041 count[a[i] - Short.MIN_VALUE]++ |
1875 * | | | |
1042 ); |
1876 * lower k upper |
1043 for (int i = NUM_SHORT_VALUES, k = right + 1; k > left; ) { |
|
1044 while (count[--i] == 0); |
|
1045 short value = (short) (i + Short.MIN_VALUE); |
|
1046 int s = count[i]; |
|
1047 |
|
1048 do { |
|
1049 a[--k] = value; |
|
1050 } while (--s > 0); |
|
1051 } |
|
1052 } else { // Use Dual-Pivot Quicksort on small arrays |
|
1053 doSort(a, left, right, work, workBase, workLen); |
|
1054 } |
|
1055 } |
|
1056 |
|
1057 /** The number of distinct short values. */ |
|
1058 private static final int NUM_SHORT_VALUES = 1 << 16; |
|
1059 |
|
1060 /** |
|
1061 * Sorts the specified range of the array. |
|
1062 * |
|
1063 * @param a the array to be sorted |
|
1064 * @param left the index of the first element, inclusive, to be sorted |
|
1065 * @param right the index of the last element, inclusive, to be sorted |
|
1066 * @param work a workspace array (slice) |
|
1067 * @param workBase origin of usable space in work array |
|
1068 * @param workLen usable size of work array |
|
1069 */ |
|
1070 private static void doSort(short[] a, int left, int right, |
|
1071 short[] work, int workBase, int workLen) { |
|
1072 // Use Quicksort on small arrays |
|
1073 if (right - left < QUICKSORT_THRESHOLD) { |
|
1074 sort(a, left, right, true); |
|
1075 return; |
|
1076 } |
|
1077 |
|
1078 /* |
|
1079 * Index run[i] is the start of i-th run |
|
1080 * (ascending or descending sequence). |
|
1081 */ |
|
1082 int[] run = new int[MAX_RUN_COUNT + 1]; |
|
1083 int count = 0; run[0] = left; |
|
1084 |
|
1085 // Check if the array is nearly sorted |
|
1086 for (int k = left; k < right; run[count] = k) { |
|
1087 // Equal items in the beginning of the sequence |
|
1088 while (k < right && a[k] == a[k + 1]) |
|
1089 k++; |
|
1090 if (k == right) break; // Sequence finishes with equal items |
|
1091 if (a[k] < a[k + 1]) { // ascending |
|
1092 while (++k <= right && a[k - 1] <= a[k]); |
|
1093 } else if (a[k] > a[k + 1]) { // descending |
|
1094 while (++k <= right && a[k - 1] >= a[k]); |
|
1095 // Transform into an ascending sequence |
|
1096 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { |
|
1097 short t = a[lo]; a[lo] = a[hi]; a[hi] = t; |
|
1098 } |
|
1099 } |
|
1100 |
|
1101 // Merge a transformed descending sequence followed by an |
|
1102 // ascending sequence |
|
1103 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { |
|
1104 count--; |
|
1105 } |
|
1106 |
|
1107 /* |
|
1108 * The array is not highly structured, |
|
1109 * use Quicksort instead of merge sort. |
|
1110 */ |
|
1111 if (++count == MAX_RUN_COUNT) { |
|
1112 sort(a, left, right, true); |
|
1113 return; |
|
1114 } |
|
1115 } |
|
1116 |
|
1117 // These invariants should hold true: |
|
1118 // run[0] = 0 |
|
1119 // run[<last>] = right + 1; (terminator) |
|
1120 |
|
1121 if (count == 0) { |
|
1122 // A single equal run |
|
1123 return; |
|
1124 } else if (count == 1 && run[count] > right) { |
|
1125 // Either a single ascending or a transformed descending run. |
|
1126 // Always check that a final run is a proper terminator, otherwise |
|
1127 // we have an unterminated trailing run, to handle downstream. |
|
1128 return; |
|
1129 } |
|
1130 right++; |
|
1131 if (run[count] < right) { |
|
1132 // Corner case: the final run is not a terminator. This may happen |
|
1133 // if a final run is an equals run, or there is a single-element run |
|
1134 // at the end. Fix up by adding a proper terminator at the end. |
|
1135 // Note that we terminate with (right + 1), incremented earlier. |
|
1136 run[++count] = right; |
|
1137 } |
|
1138 |
|
1139 // Determine alternation base for merge |
|
1140 byte odd = 0; |
|
1141 for (int n = 1; (n <<= 1) < count; odd ^= 1); |
|
1142 |
|
1143 // Use or create temporary array b for merging |
|
1144 short[] b; // temp array; alternates with a |
|
1145 int ao, bo; // array offsets from 'left' |
|
1146 int blen = right - left; // space needed for b |
|
1147 if (work == null || workLen < blen || workBase + blen > work.length) { |
|
1148 work = new short[blen]; |
|
1149 workBase = 0; |
|
1150 } |
|
1151 if (odd == 0) { |
|
1152 System.arraycopy(a, left, work, workBase, blen); |
|
1153 b = a; |
|
1154 bo = 0; |
|
1155 a = work; |
|
1156 ao = workBase - left; |
|
1157 } else { |
|
1158 b = work; |
|
1159 ao = 0; |
|
1160 bo = workBase - left; |
|
1161 } |
|
1162 |
|
1163 // Merging |
|
1164 for (int last; count > 1; count = last) { |
|
1165 for (int k = (last = 0) + 2; k <= count; k += 2) { |
|
1166 int hi = run[k], mi = run[k - 1]; |
|
1167 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { |
|
1168 if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { |
|
1169 b[i + bo] = a[p++ + ao]; |
|
1170 } else { |
|
1171 b[i + bo] = a[q++ + ao]; |
|
1172 } |
|
1173 } |
|
1174 run[++last] = hi; |
|
1175 } |
|
1176 if ((count & 1) != 0) { |
|
1177 for (int i = right, lo = run[count - 1]; --i >= lo; |
|
1178 b[i + bo] = a[i + ao] |
|
1179 ); |
|
1180 run[++last] = right; |
|
1181 } |
|
1182 short[] t = a; a = b; b = t; |
|
1183 int o = ao; ao = bo; bo = o; |
|
1184 } |
|
1185 } |
|
1186 |
|
1187 /** |
|
1188 * Sorts the specified range of the array by Dual-Pivot Quicksort. |
|
1189 * |
|
1190 * @param a the array to be sorted |
|
1191 * @param left the index of the first element, inclusive, to be sorted |
|
1192 * @param right the index of the last element, inclusive, to be sorted |
|
1193 * @param leftmost indicates if this part is the leftmost in the range |
|
1194 */ |
|
1195 private static void sort(short[] a, int left, int right, boolean leftmost) { |
|
1196 int length = right - left + 1; |
|
1197 |
|
1198 // Use insertion sort on tiny arrays |
|
1199 if (length < INSERTION_SORT_THRESHOLD) { |
|
1200 if (leftmost) { |
|
1201 /* |
|
1202 * Traditional (without sentinel) insertion sort, |
|
1203 * optimized for server VM, is used in case of |
|
1204 * the leftmost part. |
|
1205 */ |
|
1206 for (int i = left, j = i; i < right; j = ++i) { |
|
1207 short ai = a[i + 1]; |
|
1208 while (ai < a[j]) { |
|
1209 a[j + 1] = a[j]; |
|
1210 if (j-- == left) { |
|
1211 break; |
|
1212 } |
|
1213 } |
|
1214 a[j + 1] = ai; |
|
1215 } |
|
1216 } else { |
|
1217 /* |
|
1218 * Skip the longest ascending sequence. |
|
1219 */ |
|
1220 do { |
|
1221 if (left >= right) { |
|
1222 return; |
|
1223 } |
|
1224 } while (a[++left] >= a[left - 1]); |
|
1225 |
|
1226 /* |
|
1227 * Every element from adjoining part plays the role |
|
1228 * of sentinel, therefore this allows us to avoid the |
|
1229 * left range check on each iteration. Moreover, we use |
|
1230 * the more optimized algorithm, so called pair insertion |
|
1231 * sort, which is faster (in the context of Quicksort) |
|
1232 * than traditional implementation of insertion sort. |
|
1233 */ |
|
1234 for (int k = left; ++left <= right; k = ++left) { |
|
1235 short a1 = a[k], a2 = a[left]; |
|
1236 |
|
1237 if (a1 < a2) { |
|
1238 a2 = a1; a1 = a[left]; |
|
1239 } |
|
1240 while (a1 < a[--k]) { |
|
1241 a[k + 2] = a[k]; |
|
1242 } |
|
1243 a[++k + 1] = a1; |
|
1244 |
|
1245 while (a2 < a[--k]) { |
|
1246 a[k + 1] = a[k]; |
|
1247 } |
|
1248 a[k + 1] = a2; |
|
1249 } |
|
1250 short last = a[right]; |
|
1251 |
|
1252 while (last < a[--right]) { |
|
1253 a[right + 1] = a[right]; |
|
1254 } |
|
1255 a[right + 1] = last; |
|
1256 } |
|
1257 return; |
|
1258 } |
|
1259 |
|
1260 // Inexpensive approximation of length / 7 |
|
1261 int seventh = (length >> 3) + (length >> 6) + 1; |
|
1262 |
|
1263 /* |
|
1264 * Sort five evenly spaced elements around (and including) the |
|
1265 * center element in the range. These elements will be used for |
|
1266 * pivot selection as described below. The choice for spacing |
|
1267 * these elements was empirically determined to work well on |
|
1268 * a wide variety of inputs. |
|
1269 */ |
|
1270 int e3 = (left + right) >>> 1; // The midpoint |
|
1271 int e2 = e3 - seventh; |
|
1272 int e1 = e2 - seventh; |
|
1273 int e4 = e3 + seventh; |
|
1274 int e5 = e4 + seventh; |
|
1275 |
|
1276 // Sort these elements using insertion sort |
|
1277 if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
1278 |
|
1279 if (a[e3] < a[e2]) { short t = a[e3]; a[e3] = a[e2]; a[e2] = t; |
|
1280 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
1281 } |
|
1282 if (a[e4] < a[e3]) { short t = a[e4]; a[e4] = a[e3]; a[e3] = t; |
|
1283 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
1284 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
1285 } |
|
1286 } |
|
1287 if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; |
|
1288 if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; |
|
1289 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
1290 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
1291 } |
|
1292 } |
|
1293 } |
|
1294 |
|
1295 // Pointers |
|
1296 int less = left; // The index of the first element of center part |
|
1297 int great = right; // The index before the first element of right part |
|
1298 |
|
1299 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { |
|
1300 /* |
|
1301 * Use the second and fourth of the five sorted elements as pivots. |
|
1302 * These values are inexpensive approximations of the first and |
|
1303 * second terciles of the array. Note that pivot1 <= pivot2. |
|
1304 */ |
|
1305 short pivot1 = a[e2]; |
|
1306 short pivot2 = a[e4]; |
|
1307 |
|
1308 /* |
|
1309 * The first and the last elements to be sorted are moved to the |
|
1310 * locations formerly occupied by the pivots. When partitioning |
|
1311 * is complete, the pivots are swapped back into their final |
|
1312 * positions, and excluded from subsequent sorting. |
|
1313 */ |
|
1314 a[e2] = a[left]; |
|
1315 a[e4] = a[right]; |
|
1316 |
|
1317 /* |
|
1318 * Skip elements, which are less or greater than pivot values. |
|
1319 */ |
|
1320 while (a[++less] < pivot1); |
|
1321 while (a[--great] > pivot2); |
|
1322 |
|
1323 /* |
|
1324 * Partitioning: |
|
1325 * |
|
1326 * left part center part right part |
|
1327 * +--------------------------------------------------------------+ |
|
1328 * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | |
|
1329 * +--------------------------------------------------------------+ |
|
1330 * ^ ^ ^ |
|
1331 * | | | |
|
1332 * less k great |
|
1333 * |
|
1334 * Invariants: |
|
1335 * |
|
1336 * all in (left, less) < pivot1 |
|
1337 * pivot1 <= all in [less, k) <= pivot2 |
|
1338 * all in (great, right) > pivot2 |
|
1339 * |
|
1340 * Pointer k is the first index of ?-part. |
|
1341 */ |
|
1342 outer: |
|
1343 for (int k = less - 1; ++k <= great; ) { |
|
1344 short ak = a[k]; |
|
1345 if (ak < pivot1) { // Move a[k] to left part |
|
1346 a[k] = a[less]; |
|
1347 /* |
|
1348 * Here and below we use "a[i] = b; i++;" instead |
|
1349 * of "a[i++] = b;" due to performance issue. |
|
1350 */ |
|
1351 a[less] = ak; |
|
1352 ++less; |
|
1353 } else if (ak > pivot2) { // Move a[k] to right part |
|
1354 while (a[great] > pivot2) { |
|
1355 if (great-- == k) { |
|
1356 break outer; |
|
1357 } |
|
1358 } |
|
1359 if (a[great] < pivot1) { // a[great] <= pivot2 |
|
1360 a[k] = a[less]; |
|
1361 a[less] = a[great]; |
|
1362 ++less; |
|
1363 } else { // pivot1 <= a[great] <= pivot2 |
|
1364 a[k] = a[great]; |
|
1365 } |
|
1366 /* |
|
1367 * Here and below we use "a[i] = b; i--;" instead |
|
1368 * of "a[i--] = b;" due to performance issue. |
|
1369 */ |
|
1370 a[great] = ak; |
|
1371 --great; |
|
1372 } |
|
1373 } |
|
1374 |
|
1375 // Swap pivots into their final positions |
|
1376 a[left] = a[less - 1]; a[less - 1] = pivot1; |
|
1377 a[right] = a[great + 1]; a[great + 1] = pivot2; |
|
1378 |
|
1379 // Sort left and right parts recursively, excluding known pivots |
|
1380 sort(a, left, less - 2, leftmost); |
|
1381 sort(a, great + 2, right, false); |
|
1382 |
|
1383 /* |
|
1384 * If center part is too large (comprises > 4/7 of the array), |
|
1385 * swap internal pivot values to ends. |
|
1386 */ |
|
1387 if (less < e1 && e5 < great) { |
|
1388 /* |
|
1389 * Skip elements, which are equal to pivot values. |
|
1390 */ |
|
1391 while (a[less] == pivot1) { |
|
1392 ++less; |
|
1393 } |
|
1394 |
|
1395 while (a[great] == pivot2) { |
|
1396 --great; |
|
1397 } |
|
1398 |
|
1399 /* |
|
1400 * Partitioning: |
|
1401 * |
|
1402 * left part center part right part |
|
1403 * +----------------------------------------------------------+ |
|
1404 * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | |
|
1405 * +----------------------------------------------------------+ |
|
1406 * ^ ^ ^ |
|
1407 * | | | |
|
1408 * less k great |
|
1409 * |
1877 * |
1410 * Invariants: |
1878 * Invariants: |
1411 * |
1879 * |
1412 * all in (*, less) == pivot1 |
1880 * all in (low, lower] < pivot1 |
1413 * pivot1 < all in [less, k) < pivot2 |
1881 * pivot1 <= all in (k, upper) <= pivot2 |
1414 * all in (great, *) == pivot2 |
1882 * all in [upper, end) > pivot2 |
1415 * |
1883 * |
1416 * Pointer k is the first index of ?-part. |
1884 * Pointer k is the last index of ?-part |
1417 */ |
1885 */ |
1418 outer: |
1886 for (int unused = --lower, k = ++upper; --k > lower; ) { |
1419 for (int k = less - 1; ++k <= great; ) { |
1887 char ak = a[k]; |
1420 short ak = a[k]; |
1888 |
1421 if (ak == pivot1) { // Move a[k] to left part |
1889 if (ak < pivot1) { // Move a[k] to the left side |
1422 a[k] = a[less]; |
1890 while (lower < k) { |
1423 a[less] = ak; |
1891 if (a[++lower] >= pivot1) { |
1424 ++less; |
1892 if (a[lower] > pivot2) { |
1425 } else if (ak == pivot2) { // Move a[k] to right part |
1893 a[k] = a[--upper]; |
1426 while (a[great] == pivot2) { |
1894 a[upper] = a[lower]; |
1427 if (great-- == k) { |
1895 } else { |
1428 break outer; |
1896 a[k] = a[lower]; |
|
1897 } |
|
1898 a[lower] = ak; |
|
1899 break; |
1429 } |
1900 } |
1430 } |
1901 } |
1431 if (a[great] == pivot1) { // a[great] < pivot2 |
1902 } else if (ak > pivot2) { // Move a[k] to the right side |
1432 a[k] = a[less]; |
1903 a[k] = a[--upper]; |
1433 /* |
1904 a[upper] = ak; |
1434 * Even though a[great] equals to pivot1, the |
1905 } |
1435 * assignment a[less] = pivot1 may be incorrect, |
1906 } |
1436 * if a[great] and pivot1 are floating-point zeros |
1907 |
1437 * of different signs. Therefore in float and |
1908 /* |
1438 * double sorting methods we have to use more |
1909 * Swap the pivots into their final positions. |
1439 * accurate assignment a[less] = a[great]. |
1910 */ |
1440 */ |
1911 a[low] = a[lower]; a[lower] = pivot1; |
1441 a[less] = pivot1; |
1912 a[end] = a[upper]; a[upper] = pivot2; |
1442 ++less; |
1913 |
1443 } else { // pivot1 < a[great] < pivot2 |
1914 /* |
1444 a[k] = a[great]; |
1915 * Sort non-left parts recursively, |
|
1916 * excluding known pivots. |
|
1917 */ |
|
1918 sort(a, bits | 1, lower + 1, upper); |
|
1919 sort(a, bits | 1, upper + 1, high); |
|
1920 |
|
1921 } else { // Use single pivot in case of many equal elements |
|
1922 |
|
1923 /* |
|
1924 * Use the third of the five sorted elements as the pivot. |
|
1925 * This value is inexpensive approximation of the median. |
|
1926 */ |
|
1927 char pivot = a[e3]; |
|
1928 |
|
1929 /* |
|
1930 * The first element to be sorted is moved to the |
|
1931 * location formerly occupied by the pivot. After |
|
1932 * completion of partitioning the pivot is swapped |
|
1933 * back into its final position, and excluded from |
|
1934 * the next subsequent sorting. |
|
1935 */ |
|
1936 a[e3] = a[lower]; |
|
1937 |
|
1938 /* |
|
1939 * Traditional 3-way (Dutch National Flag) partitioning |
|
1940 * |
|
1941 * left part central part right part |
|
1942 * +------------------------------------------------------+ |
|
1943 * | < pivot | ? | == pivot | > pivot | |
|
1944 * +------------------------------------------------------+ |
|
1945 * ^ ^ ^ |
|
1946 * | | | |
|
1947 * lower k upper |
|
1948 * |
|
1949 * Invariants: |
|
1950 * |
|
1951 * all in (low, lower] < pivot |
|
1952 * all in (k, upper) == pivot |
|
1953 * all in [upper, end] > pivot |
|
1954 * |
|
1955 * Pointer k is the last index of ?-part |
|
1956 */ |
|
1957 for (int k = ++upper; --k > lower; ) { |
|
1958 char ak = a[k]; |
|
1959 |
|
1960 if (ak != pivot) { |
|
1961 a[k] = pivot; |
|
1962 |
|
1963 if (ak < pivot) { // Move a[k] to the left side |
|
1964 while (a[++lower] < pivot); |
|
1965 |
|
1966 if (a[lower] > pivot) { |
|
1967 a[--upper] = a[lower]; |
|
1968 } |
|
1969 a[lower] = ak; |
|
1970 } else { // ak > pivot - Move a[k] to the right side |
|
1971 a[--upper] = ak; |
1445 } |
1972 } |
1446 a[great] = ak; |
1973 } |
1447 --great; |
1974 } |
1448 } |
1975 |
1449 } |
1976 /* |
1450 } |
1977 * Swap the pivot into its final position. |
1451 |
1978 */ |
1452 // Sort center part recursively |
1979 a[low] = a[lower]; a[lower] = pivot; |
1453 sort(a, less, great, false); |
1980 |
1454 |
1981 /* |
1455 } else { // Partitioning with one pivot |
1982 * Sort the right part, excluding known pivot. |
1456 /* |
1983 * All elements from the central part are |
1457 * Use the third of the five sorted elements as pivot. |
1984 * equal and therefore already sorted. |
1458 * This value is inexpensive approximation of the median. |
1985 */ |
1459 */ |
1986 sort(a, bits | 1, upper, high); |
1460 short pivot = a[e3]; |
1987 } |
1461 |
1988 high = lower; // Iterate along the left part |
1462 /* |
1989 } |
1463 * Partitioning degenerates to the traditional 3-way |
1990 } |
1464 * (or "Dutch National Flag") schema: |
1991 |
|
1992 /** |
|
1993 * Sorts the specified range of the array using insertion sort. |
|
1994 * |
|
1995 * @param a the array to be sorted |
|
1996 * @param low the index of the first element, inclusive, to be sorted |
|
1997 * @param high the index of the last element, exclusive, to be sorted |
|
1998 */ |
|
1999 private static void insertionSort(char[] a, int low, int high) { |
|
2000 for (int i, k = low; ++k < high; ) { |
|
2001 char ai = a[i = k]; |
|
2002 |
|
2003 if (ai < a[i - 1]) { |
|
2004 while (--i >= low && ai < a[i]) { |
|
2005 a[i + 1] = a[i]; |
|
2006 } |
|
2007 a[i + 1] = ai; |
|
2008 } |
|
2009 } |
|
2010 } |
|
2011 |
|
2012 /** |
|
2013 * The number of distinct char values. |
|
2014 */ |
|
2015 private static final int NUM_CHAR_VALUES = 1 << 16; |
|
2016 |
|
2017 /** |
|
2018 * Sorts the specified range of the array using counting sort. |
|
2019 * |
|
2020 * @param a the array to be sorted |
|
2021 * @param low the index of the first element, inclusive, to be sorted |
|
2022 * @param high the index of the last element, exclusive, to be sorted |
|
2023 */ |
|
2024 private static void countingSort(char[] a, int low, int high) { |
|
2025 int[] count = new int[NUM_CHAR_VALUES]; |
|
2026 |
|
2027 /* |
|
2028 * Compute a histogram with the number of each values. |
|
2029 */ |
|
2030 for (int i = high; i > low; ++count[a[--i]]); |
|
2031 |
|
2032 /* |
|
2033 * Place values on their final positions. |
|
2034 */ |
|
2035 if (high - low > NUM_CHAR_VALUES) { |
|
2036 for (int i = NUM_CHAR_VALUES; i > 0; ) { |
|
2037 for (low = high - count[--i]; high > low; |
|
2038 a[--high] = (char) i |
|
2039 ); |
|
2040 } |
|
2041 } else { |
|
2042 for (int i = NUM_CHAR_VALUES; high > low; ) { |
|
2043 while (count[--i] == 0); |
|
2044 int c = count[i]; |
|
2045 |
|
2046 do { |
|
2047 a[--high] = (char) i; |
|
2048 } while (--c > 0); |
|
2049 } |
|
2050 } |
|
2051 } |
|
2052 |
|
2053 // [short] |
|
2054 |
|
2055 /** |
|
2056 * Sorts the specified range of the array using |
|
2057 * counting sort or Dual-Pivot Quicksort. |
|
2058 * |
|
2059 * @param a the array to be sorted |
|
2060 * @param low the index of the first element, inclusive, to be sorted |
|
2061 * @param high the index of the last element, exclusive, to be sorted |
|
2062 */ |
|
2063 static void sort(short[] a, int low, int high) { |
|
2064 if (high - low > MIN_SHORT_OR_CHAR_COUNTING_SORT_SIZE) { |
|
2065 countingSort(a, low, high); |
|
2066 } else { |
|
2067 sort(a, 0, low, high); |
|
2068 } |
|
2069 } |
|
2070 |
|
2071 /** |
|
2072 * Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
2073 * other sorts in special-cases, possibly with parallel partitions. |
|
2074 * |
|
2075 * @param a the array to be sorted |
|
2076 * @param bits the combination of recursion depth and bit flag, where |
|
2077 * the right bit "0" indicates that array is the leftmost part |
|
2078 * @param low the index of the first element, inclusive, to be sorted |
|
2079 * @param high the index of the last element, exclusive, to be sorted |
|
2080 */ |
|
2081 static void sort(short[] a, int bits, int low, int high) { |
|
2082 while (true) { |
|
2083 int end = high - 1, size = high - low; |
|
2084 |
|
2085 /* |
|
2086 * Invoke insertion sort on small leftmost part. |
|
2087 */ |
|
2088 if (size < MAX_INSERTION_SORT_SIZE) { |
|
2089 insertionSort(a, low, high); |
|
2090 return; |
|
2091 } |
|
2092 |
|
2093 /* |
|
2094 * Switch to counting sort if execution |
|
2095 * time is becoming quadratic. |
|
2096 */ |
|
2097 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
2098 countingSort(a, low, high); |
|
2099 return; |
|
2100 } |
|
2101 |
|
2102 /* |
|
2103 * Use an inexpensive approximation of the golden ratio |
|
2104 * to select five sample elements and determine pivots. |
|
2105 */ |
|
2106 int step = (size >> 3) * 3 + 3; |
|
2107 |
|
2108 /* |
|
2109 * Five elements around (and including) the central element |
|
2110 * will be used for pivot selection as described below. The |
|
2111 * unequal choice of spacing these elements was empirically |
|
2112 * determined to work well on a wide variety of inputs. |
|
2113 */ |
|
2114 int e1 = low + step; |
|
2115 int e5 = end - step; |
|
2116 int e3 = (e1 + e5) >>> 1; |
|
2117 int e2 = (e1 + e3) >>> 1; |
|
2118 int e4 = (e3 + e5) >>> 1; |
|
2119 short a3 = a[e3]; |
|
2120 |
|
2121 /* |
|
2122 * Sort these elements in place by the combination |
|
2123 * of 4-element sorting network and insertion sort. |
1465 * |
2124 * |
1466 * left part center part right part |
2125 * 5 ------o-----------o------------ |
1467 * +-------------------------------------------------+ |
2126 * | | |
1468 * | < pivot | == pivot | ? | > pivot | |
2127 * 4 ------|-----o-----o-----o------ |
1469 * +-------------------------------------------------+ |
2128 * | | | |
1470 * ^ ^ ^ |
2129 * 2 ------o-----|-----o-----o------ |
1471 * | | | |
2130 * | | |
1472 * less k great |
2131 * 1 ------------o-----o------------ |
1473 * |
2132 */ |
1474 * Invariants: |
2133 if (a[e5] < a[e2]) { short t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
1475 * |
2134 if (a[e4] < a[e1]) { short t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
1476 * all in (left, less) < pivot |
2135 if (a[e5] < a[e4]) { short t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
1477 * all in [less, k) == pivot |
2136 if (a[e2] < a[e1]) { short t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
1478 * all in (great, right) > pivot |
2137 if (a[e4] < a[e2]) { short t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
1479 * |
2138 |
1480 * Pointer k is the first index of ?-part. |
2139 if (a3 < a[e2]) { |
1481 */ |
2140 if (a3 < a[e1]) { |
1482 for (int k = less; k <= great; ++k) { |
2141 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
1483 if (a[k] == pivot) { |
2142 } else { |
1484 continue; |
2143 a[e3] = a[e2]; a[e2] = a3; |
1485 } |
2144 } |
1486 short ak = a[k]; |
2145 } else if (a3 > a[e4]) { |
1487 if (ak < pivot) { // Move a[k] to left part |
2146 if (a3 > a[e5]) { |
1488 a[k] = a[less]; |
2147 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
1489 a[less] = ak; |
2148 } else { |
1490 ++less; |
2149 a[e3] = a[e4]; a[e4] = a3; |
1491 } else { // a[k] > pivot - Move a[k] to right part |
2150 } |
1492 while (a[great] > pivot) { |
2151 } |
1493 --great; |
2152 |
1494 } |
2153 // Pointers |
1495 if (a[great] < pivot) { // a[great] <= pivot |
2154 int lower = low; // The index of the last element of the left part |
1496 a[k] = a[less]; |
2155 int upper = end; // The index of the first element of the right part |
1497 a[less] = a[great]; |
2156 |
1498 ++less; |
2157 /* |
1499 } else { // a[great] == pivot |
2158 * Partitioning with 2 pivots in case of different elements. |
1500 /* |
2159 */ |
1501 * Even though a[great] equals to pivot, the |
2160 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
1502 * assignment a[k] = pivot may be incorrect, |
2161 |
1503 * if a[great] and pivot are floating-point |
2162 /* |
1504 * zeros of different signs. Therefore in float |
2163 * Use the first and fifth of the five sorted elements as |
1505 * and double sorting methods we have to use |
2164 * the pivots. These values are inexpensive approximation |
1506 * more accurate assignment a[k] = a[great]. |
2165 * of tertiles. Note, that pivot1 < pivot2. |
1507 */ |
2166 */ |
1508 a[k] = pivot; |
2167 short pivot1 = a[e1]; |
1509 } |
2168 short pivot2 = a[e5]; |
1510 a[great] = ak; |
2169 |
1511 --great; |
2170 /* |
1512 } |
2171 * The first and the last elements to be sorted are moved |
1513 } |
2172 * to the locations formerly occupied by the pivots. When |
1514 |
2173 * partitioning is completed, the pivots are swapped back |
1515 /* |
2174 * into their final positions, and excluded from the next |
1516 * Sort left and right parts recursively. |
2175 * subsequent sorting. |
1517 * All elements from center part are equal |
2176 */ |
1518 * and, therefore, already sorted. |
2177 a[e1] = a[lower]; |
1519 */ |
2178 a[e5] = a[upper]; |
1520 sort(a, left, less - 1, leftmost); |
2179 |
1521 sort(a, great + 1, right, false); |
2180 /* |
1522 } |
2181 * Skip elements, which are less or greater than the pivots. |
1523 } |
2182 */ |
1524 |
2183 while (a[++lower] < pivot1); |
1525 /** |
2184 while (a[--upper] > pivot2); |
1526 * Sorts the specified range of the array using the given |
2185 |
1527 * workspace array slice if possible for merging |
2186 /* |
1528 * |
2187 * Backward 3-interval partitioning |
1529 * @param a the array to be sorted |
2188 * |
1530 * @param left the index of the first element, inclusive, to be sorted |
2189 * left part central part right part |
1531 * @param right the index of the last element, inclusive, to be sorted |
2190 * +------------------------------------------------------------+ |
1532 * @param work a workspace array (slice) |
2191 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
1533 * @param workBase origin of usable space in work array |
2192 * +------------------------------------------------------------+ |
1534 * @param workLen usable size of work array |
2193 * ^ ^ ^ |
1535 */ |
2194 * | | | |
1536 static void sort(char[] a, int left, int right, |
2195 * lower k upper |
1537 char[] work, int workBase, int workLen) { |
|
1538 // Use counting sort on large arrays |
|
1539 if (right - left > COUNTING_SORT_THRESHOLD_FOR_SHORT_OR_CHAR) { |
|
1540 int[] count = new int[NUM_CHAR_VALUES]; |
|
1541 |
|
1542 for (int i = left - 1; ++i <= right; |
|
1543 count[a[i]]++ |
|
1544 ); |
|
1545 for (int i = NUM_CHAR_VALUES, k = right + 1; k > left; ) { |
|
1546 while (count[--i] == 0); |
|
1547 char value = (char) i; |
|
1548 int s = count[i]; |
|
1549 |
|
1550 do { |
|
1551 a[--k] = value; |
|
1552 } while (--s > 0); |
|
1553 } |
|
1554 } else { // Use Dual-Pivot Quicksort on small arrays |
|
1555 doSort(a, left, right, work, workBase, workLen); |
|
1556 } |
|
1557 } |
|
1558 |
|
1559 /** The number of distinct char values. */ |
|
1560 private static final int NUM_CHAR_VALUES = 1 << 16; |
|
1561 |
|
1562 /** |
|
1563 * Sorts the specified range of the array. |
|
1564 * |
|
1565 * @param a the array to be sorted |
|
1566 * @param left the index of the first element, inclusive, to be sorted |
|
1567 * @param right the index of the last element, inclusive, to be sorted |
|
1568 * @param work a workspace array (slice) |
|
1569 * @param workBase origin of usable space in work array |
|
1570 * @param workLen usable size of work array |
|
1571 */ |
|
1572 private static void doSort(char[] a, int left, int right, |
|
1573 char[] work, int workBase, int workLen) { |
|
1574 // Use Quicksort on small arrays |
|
1575 if (right - left < QUICKSORT_THRESHOLD) { |
|
1576 sort(a, left, right, true); |
|
1577 return; |
|
1578 } |
|
1579 |
|
1580 /* |
|
1581 * Index run[i] is the start of i-th run |
|
1582 * (ascending or descending sequence). |
|
1583 */ |
|
1584 int[] run = new int[MAX_RUN_COUNT + 1]; |
|
1585 int count = 0; run[0] = left; |
|
1586 |
|
1587 // Check if the array is nearly sorted |
|
1588 for (int k = left; k < right; run[count] = k) { |
|
1589 // Equal items in the beginning of the sequence |
|
1590 while (k < right && a[k] == a[k + 1]) |
|
1591 k++; |
|
1592 if (k == right) break; // Sequence finishes with equal items |
|
1593 if (a[k] < a[k + 1]) { // ascending |
|
1594 while (++k <= right && a[k - 1] <= a[k]); |
|
1595 } else if (a[k] > a[k + 1]) { // descending |
|
1596 while (++k <= right && a[k - 1] >= a[k]); |
|
1597 // Transform into an ascending sequence |
|
1598 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { |
|
1599 char t = a[lo]; a[lo] = a[hi]; a[hi] = t; |
|
1600 } |
|
1601 } |
|
1602 |
|
1603 // Merge a transformed descending sequence followed by an |
|
1604 // ascending sequence |
|
1605 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { |
|
1606 count--; |
|
1607 } |
|
1608 |
|
1609 /* |
|
1610 * The array is not highly structured, |
|
1611 * use Quicksort instead of merge sort. |
|
1612 */ |
|
1613 if (++count == MAX_RUN_COUNT) { |
|
1614 sort(a, left, right, true); |
|
1615 return; |
|
1616 } |
|
1617 } |
|
1618 |
|
1619 // These invariants should hold true: |
|
1620 // run[0] = 0 |
|
1621 // run[<last>] = right + 1; (terminator) |
|
1622 |
|
1623 if (count == 0) { |
|
1624 // A single equal run |
|
1625 return; |
|
1626 } else if (count == 1 && run[count] > right) { |
|
1627 // Either a single ascending or a transformed descending run. |
|
1628 // Always check that a final run is a proper terminator, otherwise |
|
1629 // we have an unterminated trailing run, to handle downstream. |
|
1630 return; |
|
1631 } |
|
1632 right++; |
|
1633 if (run[count] < right) { |
|
1634 // Corner case: the final run is not a terminator. This may happen |
|
1635 // if a final run is an equals run, or there is a single-element run |
|
1636 // at the end. Fix up by adding a proper terminator at the end. |
|
1637 // Note that we terminate with (right + 1), incremented earlier. |
|
1638 run[++count] = right; |
|
1639 } |
|
1640 |
|
1641 // Determine alternation base for merge |
|
1642 byte odd = 0; |
|
1643 for (int n = 1; (n <<= 1) < count; odd ^= 1); |
|
1644 |
|
1645 // Use or create temporary array b for merging |
|
1646 char[] b; // temp array; alternates with a |
|
1647 int ao, bo; // array offsets from 'left' |
|
1648 int blen = right - left; // space needed for b |
|
1649 if (work == null || workLen < blen || workBase + blen > work.length) { |
|
1650 work = new char[blen]; |
|
1651 workBase = 0; |
|
1652 } |
|
1653 if (odd == 0) { |
|
1654 System.arraycopy(a, left, work, workBase, blen); |
|
1655 b = a; |
|
1656 bo = 0; |
|
1657 a = work; |
|
1658 ao = workBase - left; |
|
1659 } else { |
|
1660 b = work; |
|
1661 ao = 0; |
|
1662 bo = workBase - left; |
|
1663 } |
|
1664 |
|
1665 // Merging |
|
1666 for (int last; count > 1; count = last) { |
|
1667 for (int k = (last = 0) + 2; k <= count; k += 2) { |
|
1668 int hi = run[k], mi = run[k - 1]; |
|
1669 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { |
|
1670 if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { |
|
1671 b[i + bo] = a[p++ + ao]; |
|
1672 } else { |
|
1673 b[i + bo] = a[q++ + ao]; |
|
1674 } |
|
1675 } |
|
1676 run[++last] = hi; |
|
1677 } |
|
1678 if ((count & 1) != 0) { |
|
1679 for (int i = right, lo = run[count - 1]; --i >= lo; |
|
1680 b[i + bo] = a[i + ao] |
|
1681 ); |
|
1682 run[++last] = right; |
|
1683 } |
|
1684 char[] t = a; a = b; b = t; |
|
1685 int o = ao; ao = bo; bo = o; |
|
1686 } |
|
1687 } |
|
1688 |
|
1689 /** |
|
1690 * Sorts the specified range of the array by Dual-Pivot Quicksort. |
|
1691 * |
|
1692 * @param a the array to be sorted |
|
1693 * @param left the index of the first element, inclusive, to be sorted |
|
1694 * @param right the index of the last element, inclusive, to be sorted |
|
1695 * @param leftmost indicates if this part is the leftmost in the range |
|
1696 */ |
|
1697 private static void sort(char[] a, int left, int right, boolean leftmost) { |
|
1698 int length = right - left + 1; |
|
1699 |
|
1700 // Use insertion sort on tiny arrays |
|
1701 if (length < INSERTION_SORT_THRESHOLD) { |
|
1702 if (leftmost) { |
|
1703 /* |
|
1704 * Traditional (without sentinel) insertion sort, |
|
1705 * optimized for server VM, is used in case of |
|
1706 * the leftmost part. |
|
1707 */ |
|
1708 for (int i = left, j = i; i < right; j = ++i) { |
|
1709 char ai = a[i + 1]; |
|
1710 while (ai < a[j]) { |
|
1711 a[j + 1] = a[j]; |
|
1712 if (j-- == left) { |
|
1713 break; |
|
1714 } |
|
1715 } |
|
1716 a[j + 1] = ai; |
|
1717 } |
|
1718 } else { |
|
1719 /* |
|
1720 * Skip the longest ascending sequence. |
|
1721 */ |
|
1722 do { |
|
1723 if (left >= right) { |
|
1724 return; |
|
1725 } |
|
1726 } while (a[++left] >= a[left - 1]); |
|
1727 |
|
1728 /* |
|
1729 * Every element from adjoining part plays the role |
|
1730 * of sentinel, therefore this allows us to avoid the |
|
1731 * left range check on each iteration. Moreover, we use |
|
1732 * the more optimized algorithm, so called pair insertion |
|
1733 * sort, which is faster (in the context of Quicksort) |
|
1734 * than traditional implementation of insertion sort. |
|
1735 */ |
|
1736 for (int k = left; ++left <= right; k = ++left) { |
|
1737 char a1 = a[k], a2 = a[left]; |
|
1738 |
|
1739 if (a1 < a2) { |
|
1740 a2 = a1; a1 = a[left]; |
|
1741 } |
|
1742 while (a1 < a[--k]) { |
|
1743 a[k + 2] = a[k]; |
|
1744 } |
|
1745 a[++k + 1] = a1; |
|
1746 |
|
1747 while (a2 < a[--k]) { |
|
1748 a[k + 1] = a[k]; |
|
1749 } |
|
1750 a[k + 1] = a2; |
|
1751 } |
|
1752 char last = a[right]; |
|
1753 |
|
1754 while (last < a[--right]) { |
|
1755 a[right + 1] = a[right]; |
|
1756 } |
|
1757 a[right + 1] = last; |
|
1758 } |
|
1759 return; |
|
1760 } |
|
1761 |
|
1762 // Inexpensive approximation of length / 7 |
|
1763 int seventh = (length >> 3) + (length >> 6) + 1; |
|
1764 |
|
1765 /* |
|
1766 * Sort five evenly spaced elements around (and including) the |
|
1767 * center element in the range. These elements will be used for |
|
1768 * pivot selection as described below. The choice for spacing |
|
1769 * these elements was empirically determined to work well on |
|
1770 * a wide variety of inputs. |
|
1771 */ |
|
1772 int e3 = (left + right) >>> 1; // The midpoint |
|
1773 int e2 = e3 - seventh; |
|
1774 int e1 = e2 - seventh; |
|
1775 int e4 = e3 + seventh; |
|
1776 int e5 = e4 + seventh; |
|
1777 |
|
1778 // Sort these elements using insertion sort |
|
1779 if (a[e2] < a[e1]) { char t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
1780 |
|
1781 if (a[e3] < a[e2]) { char t = a[e3]; a[e3] = a[e2]; a[e2] = t; |
|
1782 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
1783 } |
|
1784 if (a[e4] < a[e3]) { char t = a[e4]; a[e4] = a[e3]; a[e3] = t; |
|
1785 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
1786 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
1787 } |
|
1788 } |
|
1789 if (a[e5] < a[e4]) { char t = a[e5]; a[e5] = a[e4]; a[e4] = t; |
|
1790 if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; |
|
1791 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
1792 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
1793 } |
|
1794 } |
|
1795 } |
|
1796 |
|
1797 // Pointers |
|
1798 int less = left; // The index of the first element of center part |
|
1799 int great = right; // The index before the first element of right part |
|
1800 |
|
1801 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { |
|
1802 /* |
|
1803 * Use the second and fourth of the five sorted elements as pivots. |
|
1804 * These values are inexpensive approximations of the first and |
|
1805 * second terciles of the array. Note that pivot1 <= pivot2. |
|
1806 */ |
|
1807 char pivot1 = a[e2]; |
|
1808 char pivot2 = a[e4]; |
|
1809 |
|
1810 /* |
|
1811 * The first and the last elements to be sorted are moved to the |
|
1812 * locations formerly occupied by the pivots. When partitioning |
|
1813 * is complete, the pivots are swapped back into their final |
|
1814 * positions, and excluded from subsequent sorting. |
|
1815 */ |
|
1816 a[e2] = a[left]; |
|
1817 a[e4] = a[right]; |
|
1818 |
|
1819 /* |
|
1820 * Skip elements, which are less or greater than pivot values. |
|
1821 */ |
|
1822 while (a[++less] < pivot1); |
|
1823 while (a[--great] > pivot2); |
|
1824 |
|
1825 /* |
|
1826 * Partitioning: |
|
1827 * |
|
1828 * left part center part right part |
|
1829 * +--------------------------------------------------------------+ |
|
1830 * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | |
|
1831 * +--------------------------------------------------------------+ |
|
1832 * ^ ^ ^ |
|
1833 * | | | |
|
1834 * less k great |
|
1835 * |
|
1836 * Invariants: |
|
1837 * |
|
1838 * all in (left, less) < pivot1 |
|
1839 * pivot1 <= all in [less, k) <= pivot2 |
|
1840 * all in (great, right) > pivot2 |
|
1841 * |
|
1842 * Pointer k is the first index of ?-part. |
|
1843 */ |
|
1844 outer: |
|
1845 for (int k = less - 1; ++k <= great; ) { |
|
1846 char ak = a[k]; |
|
1847 if (ak < pivot1) { // Move a[k] to left part |
|
1848 a[k] = a[less]; |
|
1849 /* |
|
1850 * Here and below we use "a[i] = b; i++;" instead |
|
1851 * of "a[i++] = b;" due to performance issue. |
|
1852 */ |
|
1853 a[less] = ak; |
|
1854 ++less; |
|
1855 } else if (ak > pivot2) { // Move a[k] to right part |
|
1856 while (a[great] > pivot2) { |
|
1857 if (great-- == k) { |
|
1858 break outer; |
|
1859 } |
|
1860 } |
|
1861 if (a[great] < pivot1) { // a[great] <= pivot2 |
|
1862 a[k] = a[less]; |
|
1863 a[less] = a[great]; |
|
1864 ++less; |
|
1865 } else { // pivot1 <= a[great] <= pivot2 |
|
1866 a[k] = a[great]; |
|
1867 } |
|
1868 /* |
|
1869 * Here and below we use "a[i] = b; i--;" instead |
|
1870 * of "a[i--] = b;" due to performance issue. |
|
1871 */ |
|
1872 a[great] = ak; |
|
1873 --great; |
|
1874 } |
|
1875 } |
|
1876 |
|
1877 // Swap pivots into their final positions |
|
1878 a[left] = a[less - 1]; a[less - 1] = pivot1; |
|
1879 a[right] = a[great + 1]; a[great + 1] = pivot2; |
|
1880 |
|
1881 // Sort left and right parts recursively, excluding known pivots |
|
1882 sort(a, left, less - 2, leftmost); |
|
1883 sort(a, great + 2, right, false); |
|
1884 |
|
1885 /* |
|
1886 * If center part is too large (comprises > 4/7 of the array), |
|
1887 * swap internal pivot values to ends. |
|
1888 */ |
|
1889 if (less < e1 && e5 < great) { |
|
1890 /* |
|
1891 * Skip elements, which are equal to pivot values. |
|
1892 */ |
|
1893 while (a[less] == pivot1) { |
|
1894 ++less; |
|
1895 } |
|
1896 |
|
1897 while (a[great] == pivot2) { |
|
1898 --great; |
|
1899 } |
|
1900 |
|
1901 /* |
|
1902 * Partitioning: |
|
1903 * |
|
1904 * left part center part right part |
|
1905 * +----------------------------------------------------------+ |
|
1906 * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | |
|
1907 * +----------------------------------------------------------+ |
|
1908 * ^ ^ ^ |
|
1909 * | | | |
|
1910 * less k great |
|
1911 * |
2196 * |
1912 * Invariants: |
2197 * Invariants: |
1913 * |
2198 * |
1914 * all in (*, less) == pivot1 |
2199 * all in (low, lower] < pivot1 |
1915 * pivot1 < all in [less, k) < pivot2 |
2200 * pivot1 <= all in (k, upper) <= pivot2 |
1916 * all in (great, *) == pivot2 |
2201 * all in [upper, end) > pivot2 |
1917 * |
2202 * |
1918 * Pointer k is the first index of ?-part. |
2203 * Pointer k is the last index of ?-part |
1919 */ |
2204 */ |
1920 outer: |
2205 for (int unused = --lower, k = ++upper; --k > lower; ) { |
1921 for (int k = less - 1; ++k <= great; ) { |
2206 short ak = a[k]; |
1922 char ak = a[k]; |
2207 |
1923 if (ak == pivot1) { // Move a[k] to left part |
2208 if (ak < pivot1) { // Move a[k] to the left side |
1924 a[k] = a[less]; |
2209 while (lower < k) { |
1925 a[less] = ak; |
2210 if (a[++lower] >= pivot1) { |
1926 ++less; |
2211 if (a[lower] > pivot2) { |
1927 } else if (ak == pivot2) { // Move a[k] to right part |
2212 a[k] = a[--upper]; |
1928 while (a[great] == pivot2) { |
2213 a[upper] = a[lower]; |
1929 if (great-- == k) { |
2214 } else { |
1930 break outer; |
2215 a[k] = a[lower]; |
|
2216 } |
|
2217 a[lower] = ak; |
|
2218 break; |
1931 } |
2219 } |
1932 } |
2220 } |
1933 if (a[great] == pivot1) { // a[great] < pivot2 |
2221 } else if (ak > pivot2) { // Move a[k] to the right side |
1934 a[k] = a[less]; |
2222 a[k] = a[--upper]; |
1935 /* |
2223 a[upper] = ak; |
1936 * Even though a[great] equals to pivot1, the |
2224 } |
1937 * assignment a[less] = pivot1 may be incorrect, |
2225 } |
1938 * if a[great] and pivot1 are floating-point zeros |
2226 |
1939 * of different signs. Therefore in float and |
2227 /* |
1940 * double sorting methods we have to use more |
2228 * Swap the pivots into their final positions. |
1941 * accurate assignment a[less] = a[great]. |
2229 */ |
1942 */ |
2230 a[low] = a[lower]; a[lower] = pivot1; |
1943 a[less] = pivot1; |
2231 a[end] = a[upper]; a[upper] = pivot2; |
1944 ++less; |
2232 |
1945 } else { // pivot1 < a[great] < pivot2 |
2233 /* |
1946 a[k] = a[great]; |
2234 * Sort non-left parts recursively, |
|
2235 * excluding known pivots. |
|
2236 */ |
|
2237 sort(a, bits | 1, lower + 1, upper); |
|
2238 sort(a, bits | 1, upper + 1, high); |
|
2239 |
|
2240 } else { // Use single pivot in case of many equal elements |
|
2241 |
|
2242 /* |
|
2243 * Use the third of the five sorted elements as the pivot. |
|
2244 * This value is inexpensive approximation of the median. |
|
2245 */ |
|
2246 short pivot = a[e3]; |
|
2247 |
|
2248 /* |
|
2249 * The first element to be sorted is moved to the |
|
2250 * location formerly occupied by the pivot. After |
|
2251 * completion of partitioning the pivot is swapped |
|
2252 * back into its final position, and excluded from |
|
2253 * the next subsequent sorting. |
|
2254 */ |
|
2255 a[e3] = a[lower]; |
|
2256 |
|
2257 /* |
|
2258 * Traditional 3-way (Dutch National Flag) partitioning |
|
2259 * |
|
2260 * left part central part right part |
|
2261 * +------------------------------------------------------+ |
|
2262 * | < pivot | ? | == pivot | > pivot | |
|
2263 * +------------------------------------------------------+ |
|
2264 * ^ ^ ^ |
|
2265 * | | | |
|
2266 * lower k upper |
|
2267 * |
|
2268 * Invariants: |
|
2269 * |
|
2270 * all in (low, lower] < pivot |
|
2271 * all in (k, upper) == pivot |
|
2272 * all in [upper, end] > pivot |
|
2273 * |
|
2274 * Pointer k is the last index of ?-part |
|
2275 */ |
|
2276 for (int k = ++upper; --k > lower; ) { |
|
2277 short ak = a[k]; |
|
2278 |
|
2279 if (ak != pivot) { |
|
2280 a[k] = pivot; |
|
2281 |
|
2282 if (ak < pivot) { // Move a[k] to the left side |
|
2283 while (a[++lower] < pivot); |
|
2284 |
|
2285 if (a[lower] > pivot) { |
|
2286 a[--upper] = a[lower]; |
|
2287 } |
|
2288 a[lower] = ak; |
|
2289 } else { // ak > pivot - Move a[k] to the right side |
|
2290 a[--upper] = ak; |
1947 } |
2291 } |
1948 a[great] = ak; |
2292 } |
1949 --great; |
2293 } |
1950 } |
2294 |
1951 } |
2295 /* |
1952 } |
2296 * Swap the pivot into its final position. |
1953 |
2297 */ |
1954 // Sort center part recursively |
2298 a[low] = a[lower]; a[lower] = pivot; |
1955 sort(a, less, great, false); |
2299 |
1956 |
2300 /* |
1957 } else { // Partitioning with one pivot |
2301 * Sort the right part, excluding known pivot. |
1958 /* |
2302 * All elements from the central part are |
1959 * Use the third of the five sorted elements as pivot. |
2303 * equal and therefore already sorted. |
1960 * This value is inexpensive approximation of the median. |
2304 */ |
1961 */ |
2305 sort(a, bits | 1, upper, high); |
1962 char pivot = a[e3]; |
2306 } |
1963 |
2307 high = lower; // Iterate along the left part |
1964 /* |
2308 } |
1965 * Partitioning degenerates to the traditional 3-way |
2309 } |
1966 * (or "Dutch National Flag") schema: |
2310 |
|
2311 /** |
|
2312 * Sorts the specified range of the array using insertion sort. |
|
2313 * |
|
2314 * @param a the array to be sorted |
|
2315 * @param low the index of the first element, inclusive, to be sorted |
|
2316 * @param high the index of the last element, exclusive, to be sorted |
|
2317 */ |
|
2318 private static void insertionSort(short[] a, int low, int high) { |
|
2319 for (int i, k = low; ++k < high; ) { |
|
2320 short ai = a[i = k]; |
|
2321 |
|
2322 if (ai < a[i - 1]) { |
|
2323 while (--i >= low && ai < a[i]) { |
|
2324 a[i + 1] = a[i]; |
|
2325 } |
|
2326 a[i + 1] = ai; |
|
2327 } |
|
2328 } |
|
2329 } |
|
2330 |
|
2331 /** |
|
2332 * The number of distinct short values. |
|
2333 */ |
|
2334 private static final int NUM_SHORT_VALUES = 1 << 16; |
|
2335 |
|
2336 /** |
|
2337 * Max index of short counter. |
|
2338 */ |
|
2339 private static final int MAX_SHORT_INDEX = Short.MAX_VALUE + NUM_SHORT_VALUES + 1; |
|
2340 |
|
2341 /** |
|
2342 * Sorts the specified range of the array using counting sort. |
|
2343 * |
|
2344 * @param a the array to be sorted |
|
2345 * @param low the index of the first element, inclusive, to be sorted |
|
2346 * @param high the index of the last element, exclusive, to be sorted |
|
2347 */ |
|
2348 private static void countingSort(short[] a, int low, int high) { |
|
2349 int[] count = new int[NUM_SHORT_VALUES]; |
|
2350 |
|
2351 /* |
|
2352 * Compute a histogram with the number of each values. |
|
2353 */ |
|
2354 for (int i = high; i > low; ++count[a[--i] & 0xFFFF]); |
|
2355 |
|
2356 /* |
|
2357 * Place values on their final positions. |
|
2358 */ |
|
2359 if (high - low > NUM_SHORT_VALUES) { |
|
2360 for (int i = MAX_SHORT_INDEX; --i > Short.MAX_VALUE; ) { |
|
2361 int value = i & 0xFFFF; |
|
2362 |
|
2363 for (low = high - count[value]; high > low; |
|
2364 a[--high] = (short) value |
|
2365 ); |
|
2366 } |
|
2367 } else { |
|
2368 for (int i = MAX_SHORT_INDEX; high > low; ) { |
|
2369 while (count[--i & 0xFFFF] == 0); |
|
2370 |
|
2371 int value = i & 0xFFFF; |
|
2372 int c = count[value]; |
|
2373 |
|
2374 do { |
|
2375 a[--high] = (short) value; |
|
2376 } while (--c > 0); |
|
2377 } |
|
2378 } |
|
2379 } |
|
2380 |
|
2381 // [float] |
|
2382 |
|
2383 /** |
|
2384 * Sorts the specified range of the array using parallel merge |
|
2385 * sort and/or Dual-Pivot Quicksort. |
|
2386 * |
|
2387 * To balance the faster splitting and parallelism of merge sort |
|
2388 * with the faster element partitioning of Quicksort, ranges are |
|
2389 * subdivided in tiers such that, if there is enough parallelism, |
|
2390 * the four-way parallel merge is started, still ensuring enough |
|
2391 * parallelism to process the partitions. |
|
2392 * |
|
2393 * @param a the array to be sorted |
|
2394 * @param parallelism the parallelism level |
|
2395 * @param low the index of the first element, inclusive, to be sorted |
|
2396 * @param high the index of the last element, exclusive, to be sorted |
|
2397 */ |
|
2398 static void sort(float[] a, int parallelism, int low, int high) { |
|
2399 /* |
|
2400 * Phase 1. Count the number of negative zero -0.0f, |
|
2401 * turn them into positive zero, and move all NaNs |
|
2402 * to the end of the array. |
|
2403 */ |
|
2404 int numNegativeZero = 0; |
|
2405 |
|
2406 for (int k = high; k > low; ) { |
|
2407 float ak = a[--k]; |
|
2408 |
|
2409 if (ak == 0.0f && Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f |
|
2410 numNegativeZero += 1; |
|
2411 a[k] = 0.0f; |
|
2412 } else if (ak != ak) { // ak is NaN |
|
2413 a[k] = a[--high]; |
|
2414 a[high] = ak; |
|
2415 } |
|
2416 } |
|
2417 |
|
2418 /* |
|
2419 * Phase 2. Sort everything except NaNs, |
|
2420 * which are already in place. |
|
2421 */ |
|
2422 int size = high - low; |
|
2423 |
|
2424 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
2425 int depth = getDepth(parallelism, size >> 12); |
|
2426 float[] b = depth == 0 ? null : new float[size]; |
|
2427 new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
2428 } else { |
|
2429 sort(null, a, 0, low, high); |
|
2430 } |
|
2431 |
|
2432 /* |
|
2433 * Phase 3. Turn positive zero 0.0f |
|
2434 * back into negative zero -0.0f. |
|
2435 */ |
|
2436 if (++numNegativeZero == 1) { |
|
2437 return; |
|
2438 } |
|
2439 |
|
2440 /* |
|
2441 * Find the position one less than |
|
2442 * the index of the first zero. |
|
2443 */ |
|
2444 while (low <= high) { |
|
2445 int middle = (low + high) >>> 1; |
|
2446 |
|
2447 if (a[middle] < 0) { |
|
2448 low = middle + 1; |
|
2449 } else { |
|
2450 high = middle - 1; |
|
2451 } |
|
2452 } |
|
2453 |
|
2454 /* |
|
2455 * Replace the required number of 0.0f by -0.0f. |
|
2456 */ |
|
2457 while (--numNegativeZero > 0) { |
|
2458 a[++high] = -0.0f; |
|
2459 } |
|
2460 } |
|
2461 |
|
2462 /** |
|
2463 * Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
2464 * other sorts in special-cases, possibly with parallel partitions. |
|
2465 * |
|
2466 * @param sorter parallel context |
|
2467 * @param a the array to be sorted |
|
2468 * @param bits the combination of recursion depth and bit flag, where |
|
2469 * the right bit "0" indicates that array is the leftmost part |
|
2470 * @param low the index of the first element, inclusive, to be sorted |
|
2471 * @param high the index of the last element, exclusive, to be sorted |
|
2472 */ |
|
2473 static void sort(Sorter sorter, float[] a, int bits, int low, int high) { |
|
2474 while (true) { |
|
2475 int end = high - 1, size = high - low; |
|
2476 |
|
2477 /* |
|
2478 * Run mixed insertion sort on small non-leftmost parts. |
|
2479 */ |
|
2480 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
2481 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
2482 return; |
|
2483 } |
|
2484 |
|
2485 /* |
|
2486 * Invoke insertion sort on small leftmost part. |
|
2487 */ |
|
2488 if (size < MAX_INSERTION_SORT_SIZE) { |
|
2489 insertionSort(a, low, high); |
|
2490 return; |
|
2491 } |
|
2492 |
|
2493 /* |
|
2494 * Check if the whole array or large non-leftmost |
|
2495 * parts are nearly sorted and then merge runs. |
|
2496 */ |
|
2497 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
2498 && tryMergeRuns(sorter, a, low, size)) { |
|
2499 return; |
|
2500 } |
|
2501 |
|
2502 /* |
|
2503 * Switch to heap sort if execution |
|
2504 * time is becoming quadratic. |
|
2505 */ |
|
2506 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
2507 heapSort(a, low, high); |
|
2508 return; |
|
2509 } |
|
2510 |
|
2511 /* |
|
2512 * Use an inexpensive approximation of the golden ratio |
|
2513 * to select five sample elements and determine pivots. |
|
2514 */ |
|
2515 int step = (size >> 3) * 3 + 3; |
|
2516 |
|
2517 /* |
|
2518 * Five elements around (and including) the central element |
|
2519 * will be used for pivot selection as described below. The |
|
2520 * unequal choice of spacing these elements was empirically |
|
2521 * determined to work well on a wide variety of inputs. |
|
2522 */ |
|
2523 int e1 = low + step; |
|
2524 int e5 = end - step; |
|
2525 int e3 = (e1 + e5) >>> 1; |
|
2526 int e2 = (e1 + e3) >>> 1; |
|
2527 int e4 = (e3 + e5) >>> 1; |
|
2528 float a3 = a[e3]; |
|
2529 |
|
2530 /* |
|
2531 * Sort these elements in place by the combination |
|
2532 * of 4-element sorting network and insertion sort. |
1967 * |
2533 * |
1968 * left part center part right part |
2534 * 5 ------o-----------o------------ |
1969 * +-------------------------------------------------+ |
2535 * | | |
1970 * | < pivot | == pivot | ? | > pivot | |
2536 * 4 ------|-----o-----o-----o------ |
1971 * +-------------------------------------------------+ |
2537 * | | | |
1972 * ^ ^ ^ |
2538 * 2 ------o-----|-----o-----o------ |
1973 * | | | |
2539 * | | |
1974 * less k great |
2540 * 1 ------------o-----o------------ |
1975 * |
2541 */ |
1976 * Invariants: |
2542 if (a[e5] < a[e2]) { float t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
1977 * |
2543 if (a[e4] < a[e1]) { float t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
1978 * all in (left, less) < pivot |
2544 if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
1979 * all in [less, k) == pivot |
2545 if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
1980 * all in (great, right) > pivot |
2546 if (a[e4] < a[e2]) { float t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
1981 * |
2547 |
1982 * Pointer k is the first index of ?-part. |
2548 if (a3 < a[e2]) { |
1983 */ |
2549 if (a3 < a[e1]) { |
1984 for (int k = less; k <= great; ++k) { |
2550 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
1985 if (a[k] == pivot) { |
2551 } else { |
1986 continue; |
2552 a[e3] = a[e2]; a[e2] = a3; |
1987 } |
2553 } |
1988 char ak = a[k]; |
2554 } else if (a3 > a[e4]) { |
1989 if (ak < pivot) { // Move a[k] to left part |
2555 if (a3 > a[e5]) { |
1990 a[k] = a[less]; |
2556 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
1991 a[less] = ak; |
2557 } else { |
1992 ++less; |
2558 a[e3] = a[e4]; a[e4] = a3; |
1993 } else { // a[k] > pivot - Move a[k] to right part |
2559 } |
1994 while (a[great] > pivot) { |
2560 } |
1995 --great; |
2561 |
1996 } |
2562 // Pointers |
1997 if (a[great] < pivot) { // a[great] <= pivot |
2563 int lower = low; // The index of the last element of the left part |
1998 a[k] = a[less]; |
2564 int upper = end; // The index of the first element of the right part |
1999 a[less] = a[great]; |
2565 |
2000 ++less; |
2566 /* |
2001 } else { // a[great] == pivot |
2567 * Partitioning with 2 pivots in case of different elements. |
2002 /* |
2568 */ |
2003 * Even though a[great] equals to pivot, the |
2569 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
2004 * assignment a[k] = pivot may be incorrect, |
2570 |
2005 * if a[great] and pivot are floating-point |
2571 /* |
2006 * zeros of different signs. Therefore in float |
2572 * Use the first and fifth of the five sorted elements as |
2007 * and double sorting methods we have to use |
2573 * the pivots. These values are inexpensive approximation |
2008 * more accurate assignment a[k] = a[great]. |
2574 * of tertiles. Note, that pivot1 < pivot2. |
2009 */ |
2575 */ |
2010 a[k] = pivot; |
2576 float pivot1 = a[e1]; |
2011 } |
2577 float pivot2 = a[e5]; |
2012 a[great] = ak; |
2578 |
2013 --great; |
2579 /* |
2014 } |
2580 * The first and the last elements to be sorted are moved |
2015 } |
2581 * to the locations formerly occupied by the pivots. When |
2016 |
2582 * partitioning is completed, the pivots are swapped back |
2017 /* |
2583 * into their final positions, and excluded from the next |
2018 * Sort left and right parts recursively. |
2584 * subsequent sorting. |
2019 * All elements from center part are equal |
2585 */ |
2020 * and, therefore, already sorted. |
2586 a[e1] = a[lower]; |
2021 */ |
2587 a[e5] = a[upper]; |
2022 sort(a, left, less - 1, leftmost); |
2588 |
2023 sort(a, great + 1, right, false); |
2589 /* |
2024 } |
2590 * Skip elements, which are less or greater than the pivots. |
2025 } |
2591 */ |
2026 |
2592 while (a[++lower] < pivot1); |
2027 /** The number of distinct byte values. */ |
2593 while (a[--upper] > pivot2); |
2028 private static final int NUM_BYTE_VALUES = 1 << 8; |
2594 |
2029 |
2595 /* |
2030 /** |
2596 * Backward 3-interval partitioning |
2031 * Sorts the specified range of the array. |
2597 * |
2032 * |
2598 * left part central part right part |
2033 * @param a the array to be sorted |
2599 * +------------------------------------------------------------+ |
2034 * @param left the index of the first element, inclusive, to be sorted |
2600 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
2035 * @param right the index of the last element, inclusive, to be sorted |
2601 * +------------------------------------------------------------+ |
2036 */ |
2602 * ^ ^ ^ |
2037 static void sort(byte[] a, int left, int right) { |
2603 * | | | |
2038 // Use counting sort on large arrays |
2604 * lower k upper |
2039 if (right - left > COUNTING_SORT_THRESHOLD_FOR_BYTE) { |
|
2040 int[] count = new int[NUM_BYTE_VALUES]; |
|
2041 |
|
2042 for (int i = left - 1; ++i <= right; |
|
2043 count[a[i] - Byte.MIN_VALUE]++ |
|
2044 ); |
|
2045 for (int i = NUM_BYTE_VALUES, k = right + 1; k > left; ) { |
|
2046 while (count[--i] == 0); |
|
2047 byte value = (byte) (i + Byte.MIN_VALUE); |
|
2048 int s = count[i]; |
|
2049 |
|
2050 do { |
|
2051 a[--k] = value; |
|
2052 } while (--s > 0); |
|
2053 } |
|
2054 } else { // Use insertion sort on small arrays |
|
2055 for (int i = left, j = i; i < right; j = ++i) { |
|
2056 byte ai = a[i + 1]; |
|
2057 while (ai < a[j]) { |
|
2058 a[j + 1] = a[j]; |
|
2059 if (j-- == left) { |
|
2060 break; |
|
2061 } |
|
2062 } |
|
2063 a[j + 1] = ai; |
|
2064 } |
|
2065 } |
|
2066 } |
|
2067 |
|
2068 /** |
|
2069 * Sorts the specified range of the array using the given |
|
2070 * workspace array slice if possible for merging |
|
2071 * |
|
2072 * @param a the array to be sorted |
|
2073 * @param left the index of the first element, inclusive, to be sorted |
|
2074 * @param right the index of the last element, inclusive, to be sorted |
|
2075 * @param work a workspace array (slice) |
|
2076 * @param workBase origin of usable space in work array |
|
2077 * @param workLen usable size of work array |
|
2078 */ |
|
2079 static void sort(float[] a, int left, int right, |
|
2080 float[] work, int workBase, int workLen) { |
|
2081 /* |
|
2082 * Phase 1: Move NaNs to the end of the array. |
|
2083 */ |
|
2084 while (left <= right && Float.isNaN(a[right])) { |
|
2085 --right; |
|
2086 } |
|
2087 for (int k = right; --k >= left; ) { |
|
2088 float ak = a[k]; |
|
2089 if (ak != ak) { // a[k] is NaN |
|
2090 a[k] = a[right]; |
|
2091 a[right] = ak; |
|
2092 --right; |
|
2093 } |
|
2094 } |
|
2095 |
|
2096 /* |
|
2097 * Phase 2: Sort everything except NaNs (which are already in place). |
|
2098 */ |
|
2099 doSort(a, left, right, work, workBase, workLen); |
|
2100 |
|
2101 /* |
|
2102 * Phase 3: Place negative zeros before positive zeros. |
|
2103 */ |
|
2104 int hi = right; |
|
2105 |
|
2106 /* |
|
2107 * Find the first zero, or first positive, or last negative element. |
|
2108 */ |
|
2109 while (left < hi) { |
|
2110 int middle = (left + hi) >>> 1; |
|
2111 float middleValue = a[middle]; |
|
2112 |
|
2113 if (middleValue < 0.0f) { |
|
2114 left = middle + 1; |
|
2115 } else { |
|
2116 hi = middle; |
|
2117 } |
|
2118 } |
|
2119 |
|
2120 /* |
|
2121 * Skip the last negative value (if any) or all leading negative zeros. |
|
2122 */ |
|
2123 while (left <= right && Float.floatToRawIntBits(a[left]) < 0) { |
|
2124 ++left; |
|
2125 } |
|
2126 |
|
2127 /* |
|
2128 * Move negative zeros to the beginning of the sub-range. |
|
2129 * |
|
2130 * Partitioning: |
|
2131 * |
|
2132 * +----------------------------------------------------+ |
|
2133 * | < 0.0 | -0.0 | 0.0 | ? ( >= 0.0 ) | |
|
2134 * +----------------------------------------------------+ |
|
2135 * ^ ^ ^ |
|
2136 * | | | |
|
2137 * left p k |
|
2138 * |
|
2139 * Invariants: |
|
2140 * |
|
2141 * all in (*, left) < 0.0 |
|
2142 * all in [left, p) == -0.0 |
|
2143 * all in [p, k) == 0.0 |
|
2144 * all in [k, right] >= 0.0 |
|
2145 * |
|
2146 * Pointer k is the first index of ?-part. |
|
2147 */ |
|
2148 for (int k = left, p = left - 1; ++k <= right; ) { |
|
2149 float ak = a[k]; |
|
2150 if (ak != 0.0f) { |
|
2151 break; |
|
2152 } |
|
2153 if (Float.floatToRawIntBits(ak) < 0) { // ak is -0.0f |
|
2154 a[k] = 0.0f; |
|
2155 a[++p] = -0.0f; |
|
2156 } |
|
2157 } |
|
2158 } |
|
2159 |
|
2160 /** |
|
2161 * Sorts the specified range of the array. |
|
2162 * |
|
2163 * @param a the array to be sorted |
|
2164 * @param left the index of the first element, inclusive, to be sorted |
|
2165 * @param right the index of the last element, inclusive, to be sorted |
|
2166 * @param work a workspace array (slice) |
|
2167 * @param workBase origin of usable space in work array |
|
2168 * @param workLen usable size of work array |
|
2169 */ |
|
2170 private static void doSort(float[] a, int left, int right, |
|
2171 float[] work, int workBase, int workLen) { |
|
2172 // Use Quicksort on small arrays |
|
2173 if (right - left < QUICKSORT_THRESHOLD) { |
|
2174 sort(a, left, right, true); |
|
2175 return; |
|
2176 } |
|
2177 |
|
2178 /* |
|
2179 * Index run[i] is the start of i-th run |
|
2180 * (ascending or descending sequence). |
|
2181 */ |
|
2182 int[] run = new int[MAX_RUN_COUNT + 1]; |
|
2183 int count = 0; run[0] = left; |
|
2184 |
|
2185 // Check if the array is nearly sorted |
|
2186 for (int k = left; k < right; run[count] = k) { |
|
2187 // Equal items in the beginning of the sequence |
|
2188 while (k < right && a[k] == a[k + 1]) |
|
2189 k++; |
|
2190 if (k == right) break; // Sequence finishes with equal items |
|
2191 if (a[k] < a[k + 1]) { // ascending |
|
2192 while (++k <= right && a[k - 1] <= a[k]); |
|
2193 } else if (a[k] > a[k + 1]) { // descending |
|
2194 while (++k <= right && a[k - 1] >= a[k]); |
|
2195 // Transform into an ascending sequence |
|
2196 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { |
|
2197 float t = a[lo]; a[lo] = a[hi]; a[hi] = t; |
|
2198 } |
|
2199 } |
|
2200 |
|
2201 // Merge a transformed descending sequence followed by an |
|
2202 // ascending sequence |
|
2203 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { |
|
2204 count--; |
|
2205 } |
|
2206 |
|
2207 /* |
|
2208 * The array is not highly structured, |
|
2209 * use Quicksort instead of merge sort. |
|
2210 */ |
|
2211 if (++count == MAX_RUN_COUNT) { |
|
2212 sort(a, left, right, true); |
|
2213 return; |
|
2214 } |
|
2215 } |
|
2216 |
|
2217 // These invariants should hold true: |
|
2218 // run[0] = 0 |
|
2219 // run[<last>] = right + 1; (terminator) |
|
2220 |
|
2221 if (count == 0) { |
|
2222 // A single equal run |
|
2223 return; |
|
2224 } else if (count == 1 && run[count] > right) { |
|
2225 // Either a single ascending or a transformed descending run. |
|
2226 // Always check that a final run is a proper terminator, otherwise |
|
2227 // we have an unterminated trailing run, to handle downstream. |
|
2228 return; |
|
2229 } |
|
2230 right++; |
|
2231 if (run[count] < right) { |
|
2232 // Corner case: the final run is not a terminator. This may happen |
|
2233 // if a final run is an equals run, or there is a single-element run |
|
2234 // at the end. Fix up by adding a proper terminator at the end. |
|
2235 // Note that we terminate with (right + 1), incremented earlier. |
|
2236 run[++count] = right; |
|
2237 } |
|
2238 |
|
2239 // Determine alternation base for merge |
|
2240 byte odd = 0; |
|
2241 for (int n = 1; (n <<= 1) < count; odd ^= 1); |
|
2242 |
|
2243 // Use or create temporary array b for merging |
|
2244 float[] b; // temp array; alternates with a |
|
2245 int ao, bo; // array offsets from 'left' |
|
2246 int blen = right - left; // space needed for b |
|
2247 if (work == null || workLen < blen || workBase + blen > work.length) { |
|
2248 work = new float[blen]; |
|
2249 workBase = 0; |
|
2250 } |
|
2251 if (odd == 0) { |
|
2252 System.arraycopy(a, left, work, workBase, blen); |
|
2253 b = a; |
|
2254 bo = 0; |
|
2255 a = work; |
|
2256 ao = workBase - left; |
|
2257 } else { |
|
2258 b = work; |
|
2259 ao = 0; |
|
2260 bo = workBase - left; |
|
2261 } |
|
2262 |
|
2263 // Merging |
|
2264 for (int last; count > 1; count = last) { |
|
2265 for (int k = (last = 0) + 2; k <= count; k += 2) { |
|
2266 int hi = run[k], mi = run[k - 1]; |
|
2267 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { |
|
2268 if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { |
|
2269 b[i + bo] = a[p++ + ao]; |
|
2270 } else { |
|
2271 b[i + bo] = a[q++ + ao]; |
|
2272 } |
|
2273 } |
|
2274 run[++last] = hi; |
|
2275 } |
|
2276 if ((count & 1) != 0) { |
|
2277 for (int i = right, lo = run[count - 1]; --i >= lo; |
|
2278 b[i + bo] = a[i + ao] |
|
2279 ); |
|
2280 run[++last] = right; |
|
2281 } |
|
2282 float[] t = a; a = b; b = t; |
|
2283 int o = ao; ao = bo; bo = o; |
|
2284 } |
|
2285 } |
|
2286 |
|
2287 /** |
|
2288 * Sorts the specified range of the array by Dual-Pivot Quicksort. |
|
2289 * |
|
2290 * @param a the array to be sorted |
|
2291 * @param left the index of the first element, inclusive, to be sorted |
|
2292 * @param right the index of the last element, inclusive, to be sorted |
|
2293 * @param leftmost indicates if this part is the leftmost in the range |
|
2294 */ |
|
2295 private static void sort(float[] a, int left, int right, boolean leftmost) { |
|
2296 int length = right - left + 1; |
|
2297 |
|
2298 // Use insertion sort on tiny arrays |
|
2299 if (length < INSERTION_SORT_THRESHOLD) { |
|
2300 if (leftmost) { |
|
2301 /* |
|
2302 * Traditional (without sentinel) insertion sort, |
|
2303 * optimized for server VM, is used in case of |
|
2304 * the leftmost part. |
|
2305 */ |
|
2306 for (int i = left, j = i; i < right; j = ++i) { |
|
2307 float ai = a[i + 1]; |
|
2308 while (ai < a[j]) { |
|
2309 a[j + 1] = a[j]; |
|
2310 if (j-- == left) { |
|
2311 break; |
|
2312 } |
|
2313 } |
|
2314 a[j + 1] = ai; |
|
2315 } |
|
2316 } else { |
|
2317 /* |
|
2318 * Skip the longest ascending sequence. |
|
2319 */ |
|
2320 do { |
|
2321 if (left >= right) { |
|
2322 return; |
|
2323 } |
|
2324 } while (a[++left] >= a[left - 1]); |
|
2325 |
|
2326 /* |
|
2327 * Every element from adjoining part plays the role |
|
2328 * of sentinel, therefore this allows us to avoid the |
|
2329 * left range check on each iteration. Moreover, we use |
|
2330 * the more optimized algorithm, so called pair insertion |
|
2331 * sort, which is faster (in the context of Quicksort) |
|
2332 * than traditional implementation of insertion sort. |
|
2333 */ |
|
2334 for (int k = left; ++left <= right; k = ++left) { |
|
2335 float a1 = a[k], a2 = a[left]; |
|
2336 |
|
2337 if (a1 < a2) { |
|
2338 a2 = a1; a1 = a[left]; |
|
2339 } |
|
2340 while (a1 < a[--k]) { |
|
2341 a[k + 2] = a[k]; |
|
2342 } |
|
2343 a[++k + 1] = a1; |
|
2344 |
|
2345 while (a2 < a[--k]) { |
|
2346 a[k + 1] = a[k]; |
|
2347 } |
|
2348 a[k + 1] = a2; |
|
2349 } |
|
2350 float last = a[right]; |
|
2351 |
|
2352 while (last < a[--right]) { |
|
2353 a[right + 1] = a[right]; |
|
2354 } |
|
2355 a[right + 1] = last; |
|
2356 } |
|
2357 return; |
|
2358 } |
|
2359 |
|
2360 // Inexpensive approximation of length / 7 |
|
2361 int seventh = (length >> 3) + (length >> 6) + 1; |
|
2362 |
|
2363 /* |
|
2364 * Sort five evenly spaced elements around (and including) the |
|
2365 * center element in the range. These elements will be used for |
|
2366 * pivot selection as described below. The choice for spacing |
|
2367 * these elements was empirically determined to work well on |
|
2368 * a wide variety of inputs. |
|
2369 */ |
|
2370 int e3 = (left + right) >>> 1; // The midpoint |
|
2371 int e2 = e3 - seventh; |
|
2372 int e1 = e2 - seventh; |
|
2373 int e4 = e3 + seventh; |
|
2374 int e5 = e4 + seventh; |
|
2375 |
|
2376 // Sort these elements using insertion sort |
|
2377 if (a[e2] < a[e1]) { float t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
2378 |
|
2379 if (a[e3] < a[e2]) { float t = a[e3]; a[e3] = a[e2]; a[e2] = t; |
|
2380 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
2381 } |
|
2382 if (a[e4] < a[e3]) { float t = a[e4]; a[e4] = a[e3]; a[e3] = t; |
|
2383 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
2384 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
2385 } |
|
2386 } |
|
2387 if (a[e5] < a[e4]) { float t = a[e5]; a[e5] = a[e4]; a[e4] = t; |
|
2388 if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; |
|
2389 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
2390 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
2391 } |
|
2392 } |
|
2393 } |
|
2394 |
|
2395 // Pointers |
|
2396 int less = left; // The index of the first element of center part |
|
2397 int great = right; // The index before the first element of right part |
|
2398 |
|
2399 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { |
|
2400 /* |
|
2401 * Use the second and fourth of the five sorted elements as pivots. |
|
2402 * These values are inexpensive approximations of the first and |
|
2403 * second terciles of the array. Note that pivot1 <= pivot2. |
|
2404 */ |
|
2405 float pivot1 = a[e2]; |
|
2406 float pivot2 = a[e4]; |
|
2407 |
|
2408 /* |
|
2409 * The first and the last elements to be sorted are moved to the |
|
2410 * locations formerly occupied by the pivots. When partitioning |
|
2411 * is complete, the pivots are swapped back into their final |
|
2412 * positions, and excluded from subsequent sorting. |
|
2413 */ |
|
2414 a[e2] = a[left]; |
|
2415 a[e4] = a[right]; |
|
2416 |
|
2417 /* |
|
2418 * Skip elements, which are less or greater than pivot values. |
|
2419 */ |
|
2420 while (a[++less] < pivot1); |
|
2421 while (a[--great] > pivot2); |
|
2422 |
|
2423 /* |
|
2424 * Partitioning: |
|
2425 * |
|
2426 * left part center part right part |
|
2427 * +--------------------------------------------------------------+ |
|
2428 * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | |
|
2429 * +--------------------------------------------------------------+ |
|
2430 * ^ ^ ^ |
|
2431 * | | | |
|
2432 * less k great |
|
2433 * |
|
2434 * Invariants: |
|
2435 * |
|
2436 * all in (left, less) < pivot1 |
|
2437 * pivot1 <= all in [less, k) <= pivot2 |
|
2438 * all in (great, right) > pivot2 |
|
2439 * |
|
2440 * Pointer k is the first index of ?-part. |
|
2441 */ |
|
2442 outer: |
|
2443 for (int k = less - 1; ++k <= great; ) { |
|
2444 float ak = a[k]; |
|
2445 if (ak < pivot1) { // Move a[k] to left part |
|
2446 a[k] = a[less]; |
|
2447 /* |
|
2448 * Here and below we use "a[i] = b; i++;" instead |
|
2449 * of "a[i++] = b;" due to performance issue. |
|
2450 */ |
|
2451 a[less] = ak; |
|
2452 ++less; |
|
2453 } else if (ak > pivot2) { // Move a[k] to right part |
|
2454 while (a[great] > pivot2) { |
|
2455 if (great-- == k) { |
|
2456 break outer; |
|
2457 } |
|
2458 } |
|
2459 if (a[great] < pivot1) { // a[great] <= pivot2 |
|
2460 a[k] = a[less]; |
|
2461 a[less] = a[great]; |
|
2462 ++less; |
|
2463 } else { // pivot1 <= a[great] <= pivot2 |
|
2464 a[k] = a[great]; |
|
2465 } |
|
2466 /* |
|
2467 * Here and below we use "a[i] = b; i--;" instead |
|
2468 * of "a[i--] = b;" due to performance issue. |
|
2469 */ |
|
2470 a[great] = ak; |
|
2471 --great; |
|
2472 } |
|
2473 } |
|
2474 |
|
2475 // Swap pivots into their final positions |
|
2476 a[left] = a[less - 1]; a[less - 1] = pivot1; |
|
2477 a[right] = a[great + 1]; a[great + 1] = pivot2; |
|
2478 |
|
2479 // Sort left and right parts recursively, excluding known pivots |
|
2480 sort(a, left, less - 2, leftmost); |
|
2481 sort(a, great + 2, right, false); |
|
2482 |
|
2483 /* |
|
2484 * If center part is too large (comprises > 4/7 of the array), |
|
2485 * swap internal pivot values to ends. |
|
2486 */ |
|
2487 if (less < e1 && e5 < great) { |
|
2488 /* |
|
2489 * Skip elements, which are equal to pivot values. |
|
2490 */ |
|
2491 while (a[less] == pivot1) { |
|
2492 ++less; |
|
2493 } |
|
2494 |
|
2495 while (a[great] == pivot2) { |
|
2496 --great; |
|
2497 } |
|
2498 |
|
2499 /* |
|
2500 * Partitioning: |
|
2501 * |
|
2502 * left part center part right part |
|
2503 * +----------------------------------------------------------+ |
|
2504 * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | |
|
2505 * +----------------------------------------------------------+ |
|
2506 * ^ ^ ^ |
|
2507 * | | | |
|
2508 * less k great |
|
2509 * |
2605 * |
2510 * Invariants: |
2606 * Invariants: |
2511 * |
2607 * |
2512 * all in (*, less) == pivot1 |
2608 * all in (low, lower] < pivot1 |
2513 * pivot1 < all in [less, k) < pivot2 |
2609 * pivot1 <= all in (k, upper) <= pivot2 |
2514 * all in (great, *) == pivot2 |
2610 * all in [upper, end) > pivot2 |
2515 * |
2611 * |
2516 * Pointer k is the first index of ?-part. |
2612 * Pointer k is the last index of ?-part |
2517 */ |
2613 */ |
2518 outer: |
2614 for (int unused = --lower, k = ++upper; --k > lower; ) { |
2519 for (int k = less - 1; ++k <= great; ) { |
|
2520 float ak = a[k]; |
2615 float ak = a[k]; |
2521 if (ak == pivot1) { // Move a[k] to left part |
2616 |
2522 a[k] = a[less]; |
2617 if (ak < pivot1) { // Move a[k] to the left side |
2523 a[less] = ak; |
2618 while (lower < k) { |
2524 ++less; |
2619 if (a[++lower] >= pivot1) { |
2525 } else if (ak == pivot2) { // Move a[k] to right part |
2620 if (a[lower] > pivot2) { |
2526 while (a[great] == pivot2) { |
2621 a[k] = a[--upper]; |
2527 if (great-- == k) { |
2622 a[upper] = a[lower]; |
2528 break outer; |
2623 } else { |
|
2624 a[k] = a[lower]; |
|
2625 } |
|
2626 a[lower] = ak; |
|
2627 break; |
2529 } |
2628 } |
2530 } |
2629 } |
2531 if (a[great] == pivot1) { // a[great] < pivot2 |
2630 } else if (ak > pivot2) { // Move a[k] to the right side |
2532 a[k] = a[less]; |
2631 a[k] = a[--upper]; |
2533 /* |
2632 a[upper] = ak; |
2534 * Even though a[great] equals to pivot1, the |
2633 } |
2535 * assignment a[less] = pivot1 may be incorrect, |
2634 } |
2536 * if a[great] and pivot1 are floating-point zeros |
2635 |
2537 * of different signs. Therefore in float and |
2636 /* |
2538 * double sorting methods we have to use more |
2637 * Swap the pivots into their final positions. |
2539 * accurate assignment a[less] = a[great]. |
2638 */ |
2540 */ |
2639 a[low] = a[lower]; a[lower] = pivot1; |
2541 a[less] = a[great]; |
2640 a[end] = a[upper]; a[upper] = pivot2; |
2542 ++less; |
2641 |
2543 } else { // pivot1 < a[great] < pivot2 |
2642 /* |
2544 a[k] = a[great]; |
2643 * Sort non-left parts recursively (possibly in parallel), |
|
2644 * excluding known pivots. |
|
2645 */ |
|
2646 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
2647 sorter.forkSorter(bits | 1, lower + 1, upper); |
|
2648 sorter.forkSorter(bits | 1, upper + 1, high); |
|
2649 } else { |
|
2650 sort(sorter, a, bits | 1, lower + 1, upper); |
|
2651 sort(sorter, a, bits | 1, upper + 1, high); |
|
2652 } |
|
2653 |
|
2654 } else { // Use single pivot in case of many equal elements |
|
2655 |
|
2656 /* |
|
2657 * Use the third of the five sorted elements as the pivot. |
|
2658 * This value is inexpensive approximation of the median. |
|
2659 */ |
|
2660 float pivot = a[e3]; |
|
2661 |
|
2662 /* |
|
2663 * The first element to be sorted is moved to the |
|
2664 * location formerly occupied by the pivot. After |
|
2665 * completion of partitioning the pivot is swapped |
|
2666 * back into its final position, and excluded from |
|
2667 * the next subsequent sorting. |
|
2668 */ |
|
2669 a[e3] = a[lower]; |
|
2670 |
|
2671 /* |
|
2672 * Traditional 3-way (Dutch National Flag) partitioning |
|
2673 * |
|
2674 * left part central part right part |
|
2675 * +------------------------------------------------------+ |
|
2676 * | < pivot | ? | == pivot | > pivot | |
|
2677 * +------------------------------------------------------+ |
|
2678 * ^ ^ ^ |
|
2679 * | | | |
|
2680 * lower k upper |
|
2681 * |
|
2682 * Invariants: |
|
2683 * |
|
2684 * all in (low, lower] < pivot |
|
2685 * all in (k, upper) == pivot |
|
2686 * all in [upper, end] > pivot |
|
2687 * |
|
2688 * Pointer k is the last index of ?-part |
|
2689 */ |
|
2690 for (int k = ++upper; --k > lower; ) { |
|
2691 float ak = a[k]; |
|
2692 |
|
2693 if (ak != pivot) { |
|
2694 a[k] = pivot; |
|
2695 |
|
2696 if (ak < pivot) { // Move a[k] to the left side |
|
2697 while (a[++lower] < pivot); |
|
2698 |
|
2699 if (a[lower] > pivot) { |
|
2700 a[--upper] = a[lower]; |
|
2701 } |
|
2702 a[lower] = ak; |
|
2703 } else { // ak > pivot - Move a[k] to the right side |
|
2704 a[--upper] = ak; |
2545 } |
2705 } |
2546 a[great] = ak; |
2706 } |
2547 --great; |
2707 } |
2548 } |
2708 |
2549 } |
2709 /* |
2550 } |
2710 * Swap the pivot into its final position. |
2551 |
2711 */ |
2552 // Sort center part recursively |
2712 a[low] = a[lower]; a[lower] = pivot; |
2553 sort(a, less, great, false); |
2713 |
2554 |
2714 /* |
2555 } else { // Partitioning with one pivot |
2715 * Sort the right part (possibly in parallel), excluding |
2556 /* |
2716 * known pivot. All elements from the central part are |
2557 * Use the third of the five sorted elements as pivot. |
2717 * equal and therefore already sorted. |
2558 * This value is inexpensive approximation of the median. |
2718 */ |
2559 */ |
2719 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
2560 float pivot = a[e3]; |
2720 sorter.forkSorter(bits | 1, upper, high); |
2561 |
2721 } else { |
2562 /* |
2722 sort(sorter, a, bits | 1, upper, high); |
2563 * Partitioning degenerates to the traditional 3-way |
2723 } |
2564 * (or "Dutch National Flag") schema: |
2724 } |
|
2725 high = lower; // Iterate along the left part |
|
2726 } |
|
2727 } |
|
2728 |
|
2729 /** |
|
2730 * Sorts the specified range of the array using mixed insertion sort. |
|
2731 * |
|
2732 * Mixed insertion sort is combination of simple insertion sort, |
|
2733 * pin insertion sort and pair insertion sort. |
|
2734 * |
|
2735 * In the context of Dual-Pivot Quicksort, the pivot element |
|
2736 * from the left part plays the role of sentinel, because it |
|
2737 * is less than any elements from the given part. Therefore, |
|
2738 * expensive check of the left range can be skipped on each |
|
2739 * iteration unless it is the leftmost call. |
|
2740 * |
|
2741 * @param a the array to be sorted |
|
2742 * @param low the index of the first element, inclusive, to be sorted |
|
2743 * @param end the index of the last element for simple insertion sort |
|
2744 * @param high the index of the last element, exclusive, to be sorted |
|
2745 */ |
|
2746 private static void mixedInsertionSort(float[] a, int low, int end, int high) { |
|
2747 if (end == high) { |
|
2748 |
|
2749 /* |
|
2750 * Invoke simple insertion sort on tiny array. |
|
2751 */ |
|
2752 for (int i; ++low < end; ) { |
|
2753 float ai = a[i = low]; |
|
2754 |
|
2755 while (ai < a[--i]) { |
|
2756 a[i + 1] = a[i]; |
|
2757 } |
|
2758 a[i + 1] = ai; |
|
2759 } |
|
2760 } else { |
|
2761 |
|
2762 /* |
|
2763 * Start with pin insertion sort on small part. |
2565 * |
2764 * |
2566 * left part center part right part |
2765 * Pin insertion sort is extended simple insertion sort. |
2567 * +-------------------------------------------------+ |
2766 * The main idea of this sort is to put elements larger |
2568 * | < pivot | == pivot | ? | > pivot | |
2767 * than an element called pin to the end of array (the |
2569 * +-------------------------------------------------+ |
2768 * proper area for such elements). It avoids expensive |
2570 * ^ ^ ^ |
2769 * movements of these elements through the whole array. |
2571 * | | | |
2770 */ |
2572 * less k great |
2771 float pin = a[end]; |
|
2772 |
|
2773 for (int i, p = high; ++low < end; ) { |
|
2774 float ai = a[i = low]; |
|
2775 |
|
2776 if (ai < a[i - 1]) { // Small element |
|
2777 |
|
2778 /* |
|
2779 * Insert small element into sorted part. |
|
2780 */ |
|
2781 a[i] = a[--i]; |
|
2782 |
|
2783 while (ai < a[--i]) { |
|
2784 a[i + 1] = a[i]; |
|
2785 } |
|
2786 a[i + 1] = ai; |
|
2787 |
|
2788 } else if (p > i && ai > pin) { // Large element |
|
2789 |
|
2790 /* |
|
2791 * Find element smaller than pin. |
|
2792 */ |
|
2793 while (a[--p] > pin); |
|
2794 |
|
2795 /* |
|
2796 * Swap it with large element. |
|
2797 */ |
|
2798 if (p > i) { |
|
2799 ai = a[p]; |
|
2800 a[p] = a[i]; |
|
2801 } |
|
2802 |
|
2803 /* |
|
2804 * Insert small element into sorted part. |
|
2805 */ |
|
2806 while (ai < a[--i]) { |
|
2807 a[i + 1] = a[i]; |
|
2808 } |
|
2809 a[i + 1] = ai; |
|
2810 } |
|
2811 } |
|
2812 |
|
2813 /* |
|
2814 * Continue with pair insertion sort on remain part. |
|
2815 */ |
|
2816 for (int i; low < high; ++low) { |
|
2817 float a1 = a[i = low], a2 = a[++low]; |
|
2818 |
|
2819 /* |
|
2820 * Insert two elements per iteration: at first, insert the |
|
2821 * larger element and then insert the smaller element, but |
|
2822 * from the position where the larger element was inserted. |
|
2823 */ |
|
2824 if (a1 > a2) { |
|
2825 |
|
2826 while (a1 < a[--i]) { |
|
2827 a[i + 2] = a[i]; |
|
2828 } |
|
2829 a[++i + 1] = a1; |
|
2830 |
|
2831 while (a2 < a[--i]) { |
|
2832 a[i + 1] = a[i]; |
|
2833 } |
|
2834 a[i + 1] = a2; |
|
2835 |
|
2836 } else if (a1 < a[i - 1]) { |
|
2837 |
|
2838 while (a2 < a[--i]) { |
|
2839 a[i + 2] = a[i]; |
|
2840 } |
|
2841 a[++i + 1] = a2; |
|
2842 |
|
2843 while (a1 < a[--i]) { |
|
2844 a[i + 1] = a[i]; |
|
2845 } |
|
2846 a[i + 1] = a1; |
|
2847 } |
|
2848 } |
|
2849 } |
|
2850 } |
|
2851 |
|
2852 /** |
|
2853 * Sorts the specified range of the array using insertion sort. |
|
2854 * |
|
2855 * @param a the array to be sorted |
|
2856 * @param low the index of the first element, inclusive, to be sorted |
|
2857 * @param high the index of the last element, exclusive, to be sorted |
|
2858 */ |
|
2859 private static void insertionSort(float[] a, int low, int high) { |
|
2860 for (int i, k = low; ++k < high; ) { |
|
2861 float ai = a[i = k]; |
|
2862 |
|
2863 if (ai < a[i - 1]) { |
|
2864 while (--i >= low && ai < a[i]) { |
|
2865 a[i + 1] = a[i]; |
|
2866 } |
|
2867 a[i + 1] = ai; |
|
2868 } |
|
2869 } |
|
2870 } |
|
2871 |
|
2872 /** |
|
2873 * Sorts the specified range of the array using heap sort. |
|
2874 * |
|
2875 * @param a the array to be sorted |
|
2876 * @param low the index of the first element, inclusive, to be sorted |
|
2877 * @param high the index of the last element, exclusive, to be sorted |
|
2878 */ |
|
2879 private static void heapSort(float[] a, int low, int high) { |
|
2880 for (int k = (low + high) >>> 1; k > low; ) { |
|
2881 pushDown(a, --k, a[k], low, high); |
|
2882 } |
|
2883 while (--high > low) { |
|
2884 float max = a[low]; |
|
2885 pushDown(a, low, a[high], low, high); |
|
2886 a[high] = max; |
|
2887 } |
|
2888 } |
|
2889 |
|
2890 /** |
|
2891 * Pushes specified element down during heap sort. |
|
2892 * |
|
2893 * @param a the given array |
|
2894 * @param p the start index |
|
2895 * @param value the given element |
|
2896 * @param low the index of the first element, inclusive, to be sorted |
|
2897 * @param high the index of the last element, exclusive, to be sorted |
|
2898 */ |
|
2899 private static void pushDown(float[] a, int p, float value, int low, int high) { |
|
2900 for (int k ;; a[p] = a[p = k]) { |
|
2901 k = (p << 1) - low + 2; // Index of the right child |
|
2902 |
|
2903 if (k > high) { |
|
2904 break; |
|
2905 } |
|
2906 if (k == high || a[k] < a[k - 1]) { |
|
2907 --k; |
|
2908 } |
|
2909 if (a[k] <= value) { |
|
2910 break; |
|
2911 } |
|
2912 } |
|
2913 a[p] = value; |
|
2914 } |
|
2915 |
|
2916 /** |
|
2917 * Tries to sort the specified range of the array. |
|
2918 * |
|
2919 * @param sorter parallel context |
|
2920 * @param a the array to be sorted |
|
2921 * @param low the index of the first element to be sorted |
|
2922 * @param size the array size |
|
2923 * @return true if finally sorted, false otherwise |
|
2924 */ |
|
2925 private static boolean tryMergeRuns(Sorter sorter, float[] a, int low, int size) { |
|
2926 |
|
2927 /* |
|
2928 * The run array is constructed only if initial runs are |
|
2929 * long enough to continue, run[i] then holds start index |
|
2930 * of the i-th sequence of elements in non-descending order. |
|
2931 */ |
|
2932 int[] run = null; |
|
2933 int high = low + size; |
|
2934 int count = 1, last = low; |
|
2935 |
|
2936 /* |
|
2937 * Identify all possible runs. |
|
2938 */ |
|
2939 for (int k = low + 1; k < high; ) { |
|
2940 |
|
2941 /* |
|
2942 * Find the end index of the current run. |
|
2943 */ |
|
2944 if (a[k - 1] < a[k]) { |
|
2945 |
|
2946 // Identify ascending sequence |
|
2947 while (++k < high && a[k - 1] <= a[k]); |
|
2948 |
|
2949 } else if (a[k - 1] > a[k]) { |
|
2950 |
|
2951 // Identify descending sequence |
|
2952 while (++k < high && a[k - 1] >= a[k]); |
|
2953 |
|
2954 // Reverse into ascending order |
|
2955 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
2956 float ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
2957 } |
|
2958 } else { // Identify constant sequence |
|
2959 for (float ak = a[k]; ++k < high && ak == a[k]; ); |
|
2960 |
|
2961 if (k < high) { |
|
2962 continue; |
|
2963 } |
|
2964 } |
|
2965 |
|
2966 /* |
|
2967 * Check special cases. |
|
2968 */ |
|
2969 if (run == null) { |
|
2970 if (k == high) { |
|
2971 |
|
2972 /* |
|
2973 * The array is monotonous sequence, |
|
2974 * and therefore already sorted. |
|
2975 */ |
|
2976 return true; |
|
2977 } |
|
2978 |
|
2979 if (k - low < MIN_FIRST_RUN_SIZE) { |
|
2980 |
|
2981 /* |
|
2982 * The first run is too small |
|
2983 * to proceed with scanning. |
|
2984 */ |
|
2985 return false; |
|
2986 } |
|
2987 |
|
2988 run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
|
2989 run[0] = low; |
|
2990 |
|
2991 } else if (a[last - 1] > a[last]) { |
|
2992 |
|
2993 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
|
2994 |
|
2995 /* |
|
2996 * The first runs are not long |
|
2997 * enough to continue scanning. |
|
2998 */ |
|
2999 return false; |
|
3000 } |
|
3001 |
|
3002 if (++count == MAX_RUN_CAPACITY) { |
|
3003 |
|
3004 /* |
|
3005 * Array is not highly structured. |
|
3006 */ |
|
3007 return false; |
|
3008 } |
|
3009 |
|
3010 if (count == run.length) { |
|
3011 |
|
3012 /* |
|
3013 * Increase capacity of index array. |
|
3014 */ |
|
3015 run = Arrays.copyOf(run, count << 1); |
|
3016 } |
|
3017 } |
|
3018 run[count] = (last = k); |
|
3019 } |
|
3020 |
|
3021 /* |
|
3022 * Merge runs of highly structured array. |
|
3023 */ |
|
3024 if (count > 1) { |
|
3025 float[] b; int offset = low; |
|
3026 |
|
3027 if (sorter == null || (b = (float[]) sorter.b) == null) { |
|
3028 b = new float[size]; |
|
3029 } else { |
|
3030 offset = sorter.offset; |
|
3031 } |
|
3032 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
3033 } |
|
3034 return true; |
|
3035 } |
|
3036 |
|
3037 /** |
|
3038 * Merges the specified runs. |
|
3039 * |
|
3040 * @param a the source array |
|
3041 * @param b the temporary buffer used in merging |
|
3042 * @param offset the start index in the source, inclusive |
|
3043 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
3044 * @param parallel indicates whether merging is performed in parallel |
|
3045 * @param run the start indexes of the runs, inclusive |
|
3046 * @param lo the start index of the first run, inclusive |
|
3047 * @param hi the start index of the last run, inclusive |
|
3048 * @return the destination where runs are merged |
|
3049 */ |
|
3050 private static float[] mergeRuns(float[] a, float[] b, int offset, |
|
3051 int aim, boolean parallel, int[] run, int lo, int hi) { |
|
3052 |
|
3053 if (hi - lo == 1) { |
|
3054 if (aim >= 0) { |
|
3055 return a; |
|
3056 } |
|
3057 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
3058 b[--j] = a[--i] |
|
3059 ); |
|
3060 return b; |
|
3061 } |
|
3062 |
|
3063 /* |
|
3064 * Split into approximately equal parts. |
|
3065 */ |
|
3066 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
3067 while (run[++mi + 1] <= rmi); |
|
3068 |
|
3069 /* |
|
3070 * Merge the left and right parts. |
|
3071 */ |
|
3072 float[] a1, a2; |
|
3073 |
|
3074 if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
3075 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
3076 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
3077 a2 = (float[]) merger.getDestination(); |
|
3078 } else { |
|
3079 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
3080 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
3081 } |
|
3082 |
|
3083 float[] dst = a1 == a ? b : a; |
|
3084 |
|
3085 int k = a1 == a ? run[lo] - offset : run[lo]; |
|
3086 int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
3087 int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
3088 int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
3089 int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
3090 |
|
3091 if (parallel) { |
|
3092 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
3093 } else { |
|
3094 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
3095 } |
|
3096 return dst; |
|
3097 } |
|
3098 |
|
3099 /** |
|
3100 * Merges the sorted parts. |
|
3101 * |
|
3102 * @param merger parallel context |
|
3103 * @param dst the destination where parts are merged |
|
3104 * @param k the start index of the destination, inclusive |
|
3105 * @param a1 the first part |
|
3106 * @param lo1 the start index of the first part, inclusive |
|
3107 * @param hi1 the end index of the first part, exclusive |
|
3108 * @param a2 the second part |
|
3109 * @param lo2 the start index of the second part, inclusive |
|
3110 * @param hi2 the end index of the second part, exclusive |
|
3111 */ |
|
3112 private static void mergeParts(Merger merger, float[] dst, int k, |
|
3113 float[] a1, int lo1, int hi1, float[] a2, int lo2, int hi2) { |
|
3114 |
|
3115 if (merger != null && a1 == a2) { |
|
3116 |
|
3117 while (true) { |
|
3118 |
|
3119 /* |
|
3120 * The first part must be larger. |
|
3121 */ |
|
3122 if (hi1 - lo1 < hi2 - lo2) { |
|
3123 int lo = lo1; lo1 = lo2; lo2 = lo; |
|
3124 int hi = hi1; hi1 = hi2; hi2 = hi; |
|
3125 } |
|
3126 |
|
3127 /* |
|
3128 * Small parts will be merged sequentially. |
|
3129 */ |
|
3130 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
3131 break; |
|
3132 } |
|
3133 |
|
3134 /* |
|
3135 * Find the median of the larger part. |
|
3136 */ |
|
3137 int mi1 = (lo1 + hi1) >>> 1; |
|
3138 float key = a1[mi1]; |
|
3139 int mi2 = hi2; |
|
3140 |
|
3141 /* |
|
3142 * Partition the smaller part. |
|
3143 */ |
|
3144 for (int loo = lo2; loo < mi2; ) { |
|
3145 int t = (loo + mi2) >>> 1; |
|
3146 |
|
3147 if (key > a2[t]) { |
|
3148 loo = t + 1; |
|
3149 } else { |
|
3150 mi2 = t; |
|
3151 } |
|
3152 } |
|
3153 |
|
3154 int d = mi2 - lo2 + mi1 - lo1; |
|
3155 |
|
3156 /* |
|
3157 * Merge the right sub-parts in parallel. |
|
3158 */ |
|
3159 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
3160 |
|
3161 /* |
|
3162 * Process the sub-left parts. |
|
3163 */ |
|
3164 hi1 = mi1; |
|
3165 hi2 = mi2; |
|
3166 } |
|
3167 } |
|
3168 |
|
3169 /* |
|
3170 * Merge small parts sequentially. |
|
3171 */ |
|
3172 while (lo1 < hi1 && lo2 < hi2) { |
|
3173 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
3174 } |
|
3175 if (dst != a1 || k < lo1) { |
|
3176 while (lo1 < hi1) { |
|
3177 dst[k++] = a1[lo1++]; |
|
3178 } |
|
3179 } |
|
3180 if (dst != a2 || k < lo2) { |
|
3181 while (lo2 < hi2) { |
|
3182 dst[k++] = a2[lo2++]; |
|
3183 } |
|
3184 } |
|
3185 } |
|
3186 |
|
3187 // [double] |
|
3188 |
|
3189 /** |
|
3190 * Sorts the specified range of the array using parallel merge |
|
3191 * sort and/or Dual-Pivot Quicksort. |
|
3192 * |
|
3193 * To balance the faster splitting and parallelism of merge sort |
|
3194 * with the faster element partitioning of Quicksort, ranges are |
|
3195 * subdivided in tiers such that, if there is enough parallelism, |
|
3196 * the four-way parallel merge is started, still ensuring enough |
|
3197 * parallelism to process the partitions. |
|
3198 * |
|
3199 * @param a the array to be sorted |
|
3200 * @param parallelism the parallelism level |
|
3201 * @param low the index of the first element, inclusive, to be sorted |
|
3202 * @param high the index of the last element, exclusive, to be sorted |
|
3203 */ |
|
3204 static void sort(double[] a, int parallelism, int low, int high) { |
|
3205 /* |
|
3206 * Phase 1. Count the number of negative zero -0.0d, |
|
3207 * turn them into positive zero, and move all NaNs |
|
3208 * to the end of the array. |
|
3209 */ |
|
3210 int numNegativeZero = 0; |
|
3211 |
|
3212 for (int k = high; k > low; ) { |
|
3213 double ak = a[--k]; |
|
3214 |
|
3215 if (ak == 0.0d && Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d |
|
3216 numNegativeZero += 1; |
|
3217 a[k] = 0.0d; |
|
3218 } else if (ak != ak) { // ak is NaN |
|
3219 a[k] = a[--high]; |
|
3220 a[high] = ak; |
|
3221 } |
|
3222 } |
|
3223 |
|
3224 /* |
|
3225 * Phase 2. Sort everything except NaNs, |
|
3226 * which are already in place. |
|
3227 */ |
|
3228 int size = high - low; |
|
3229 |
|
3230 if (parallelism > 1 && size > MIN_PARALLEL_SORT_SIZE) { |
|
3231 int depth = getDepth(parallelism, size >> 12); |
|
3232 double[] b = depth == 0 ? null : new double[size]; |
|
3233 new Sorter(null, a, b, low, size, low, depth).invoke(); |
|
3234 } else { |
|
3235 sort(null, a, 0, low, high); |
|
3236 } |
|
3237 |
|
3238 /* |
|
3239 * Phase 3. Turn positive zero 0.0d |
|
3240 * back into negative zero -0.0d. |
|
3241 */ |
|
3242 if (++numNegativeZero == 1) { |
|
3243 return; |
|
3244 } |
|
3245 |
|
3246 /* |
|
3247 * Find the position one less than |
|
3248 * the index of the first zero. |
|
3249 */ |
|
3250 while (low <= high) { |
|
3251 int middle = (low + high) >>> 1; |
|
3252 |
|
3253 if (a[middle] < 0) { |
|
3254 low = middle + 1; |
|
3255 } else { |
|
3256 high = middle - 1; |
|
3257 } |
|
3258 } |
|
3259 |
|
3260 /* |
|
3261 * Replace the required number of 0.0d by -0.0d. |
|
3262 */ |
|
3263 while (--numNegativeZero > 0) { |
|
3264 a[++high] = -0.0d; |
|
3265 } |
|
3266 } |
|
3267 |
|
3268 /** |
|
3269 * Sorts the specified array using the Dual-Pivot Quicksort and/or |
|
3270 * other sorts in special-cases, possibly with parallel partitions. |
|
3271 * |
|
3272 * @param sorter parallel context |
|
3273 * @param a the array to be sorted |
|
3274 * @param bits the combination of recursion depth and bit flag, where |
|
3275 * the right bit "0" indicates that array is the leftmost part |
|
3276 * @param low the index of the first element, inclusive, to be sorted |
|
3277 * @param high the index of the last element, exclusive, to be sorted |
|
3278 */ |
|
3279 static void sort(Sorter sorter, double[] a, int bits, int low, int high) { |
|
3280 while (true) { |
|
3281 int end = high - 1, size = high - low; |
|
3282 |
|
3283 /* |
|
3284 * Run mixed insertion sort on small non-leftmost parts. |
|
3285 */ |
|
3286 if (size < MAX_MIXED_INSERTION_SORT_SIZE + bits && (bits & 1) > 0) { |
|
3287 mixedInsertionSort(a, low, high - 3 * ((size >> 5) << 3), high); |
|
3288 return; |
|
3289 } |
|
3290 |
|
3291 /* |
|
3292 * Invoke insertion sort on small leftmost part. |
|
3293 */ |
|
3294 if (size < MAX_INSERTION_SORT_SIZE) { |
|
3295 insertionSort(a, low, high); |
|
3296 return; |
|
3297 } |
|
3298 |
|
3299 /* |
|
3300 * Check if the whole array or large non-leftmost |
|
3301 * parts are nearly sorted and then merge runs. |
|
3302 */ |
|
3303 if ((bits == 0 || size > MIN_TRY_MERGE_SIZE && (bits & 1) > 0) |
|
3304 && tryMergeRuns(sorter, a, low, size)) { |
|
3305 return; |
|
3306 } |
|
3307 |
|
3308 /* |
|
3309 * Switch to heap sort if execution |
|
3310 * time is becoming quadratic. |
|
3311 */ |
|
3312 if ((bits += DELTA) > MAX_RECURSION_DEPTH) { |
|
3313 heapSort(a, low, high); |
|
3314 return; |
|
3315 } |
|
3316 |
|
3317 /* |
|
3318 * Use an inexpensive approximation of the golden ratio |
|
3319 * to select five sample elements and determine pivots. |
|
3320 */ |
|
3321 int step = (size >> 3) * 3 + 3; |
|
3322 |
|
3323 /* |
|
3324 * Five elements around (and including) the central element |
|
3325 * will be used for pivot selection as described below. The |
|
3326 * unequal choice of spacing these elements was empirically |
|
3327 * determined to work well on a wide variety of inputs. |
|
3328 */ |
|
3329 int e1 = low + step; |
|
3330 int e5 = end - step; |
|
3331 int e3 = (e1 + e5) >>> 1; |
|
3332 int e2 = (e1 + e3) >>> 1; |
|
3333 int e4 = (e3 + e5) >>> 1; |
|
3334 double a3 = a[e3]; |
|
3335 |
|
3336 /* |
|
3337 * Sort these elements in place by the combination |
|
3338 * of 4-element sorting network and insertion sort. |
2573 * |
3339 * |
2574 * Invariants: |
3340 * 5 ------o-----------o------------ |
2575 * |
3341 * | | |
2576 * all in (left, less) < pivot |
3342 * 4 ------|-----o-----o-----o------ |
2577 * all in [less, k) == pivot |
3343 * | | | |
2578 * all in (great, right) > pivot |
3344 * 2 ------o-----|-----o-----o------ |
2579 * |
3345 * | | |
2580 * Pointer k is the first index of ?-part. |
3346 * 1 ------------o-----o------------ |
2581 */ |
3347 */ |
2582 for (int k = less; k <= great; ++k) { |
3348 if (a[e5] < a[e2]) { double t = a[e5]; a[e5] = a[e2]; a[e2] = t; } |
2583 if (a[k] == pivot) { |
3349 if (a[e4] < a[e1]) { double t = a[e4]; a[e4] = a[e1]; a[e1] = t; } |
2584 continue; |
3350 if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; } |
2585 } |
3351 if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
2586 float ak = a[k]; |
3352 if (a[e4] < a[e2]) { double t = a[e4]; a[e4] = a[e2]; a[e2] = t; } |
2587 if (ak < pivot) { // Move a[k] to left part |
3353 |
2588 a[k] = a[less]; |
3354 if (a3 < a[e2]) { |
2589 a[less] = ak; |
3355 if (a3 < a[e1]) { |
2590 ++less; |
3356 a[e3] = a[e2]; a[e2] = a[e1]; a[e1] = a3; |
2591 } else { // a[k] > pivot - Move a[k] to right part |
3357 } else { |
2592 while (a[great] > pivot) { |
3358 a[e3] = a[e2]; a[e2] = a3; |
2593 --great; |
3359 } |
2594 } |
3360 } else if (a3 > a[e4]) { |
2595 if (a[great] < pivot) { // a[great] <= pivot |
3361 if (a3 > a[e5]) { |
2596 a[k] = a[less]; |
3362 a[e3] = a[e4]; a[e4] = a[e5]; a[e5] = a3; |
2597 a[less] = a[great]; |
3363 } else { |
2598 ++less; |
3364 a[e3] = a[e4]; a[e4] = a3; |
2599 } else { // a[great] == pivot |
3365 } |
2600 /* |
3366 } |
2601 * Even though a[great] equals to pivot, the |
3367 |
2602 * assignment a[k] = pivot may be incorrect, |
3368 // Pointers |
2603 * if a[great] and pivot are floating-point |
3369 int lower = low; // The index of the last element of the left part |
2604 * zeros of different signs. Therefore in float |
3370 int upper = end; // The index of the first element of the right part |
2605 * and double sorting methods we have to use |
3371 |
2606 * more accurate assignment a[k] = a[great]. |
3372 /* |
2607 */ |
3373 * Partitioning with 2 pivots in case of different elements. |
2608 a[k] = a[great]; |
3374 */ |
2609 } |
3375 if (a[e1] < a[e2] && a[e2] < a[e3] && a[e3] < a[e4] && a[e4] < a[e5]) { |
2610 a[great] = ak; |
3376 |
2611 --great; |
3377 /* |
2612 } |
3378 * Use the first and fifth of the five sorted elements as |
2613 } |
3379 * the pivots. These values are inexpensive approximation |
2614 |
3380 * of tertiles. Note, that pivot1 < pivot2. |
2615 /* |
3381 */ |
2616 * Sort left and right parts recursively. |
3382 double pivot1 = a[e1]; |
2617 * All elements from center part are equal |
3383 double pivot2 = a[e5]; |
2618 * and, therefore, already sorted. |
3384 |
2619 */ |
3385 /* |
2620 sort(a, left, less - 1, leftmost); |
3386 * The first and the last elements to be sorted are moved |
2621 sort(a, great + 1, right, false); |
3387 * to the locations formerly occupied by the pivots. When |
2622 } |
3388 * partitioning is completed, the pivots are swapped back |
2623 } |
3389 * into their final positions, and excluded from the next |
2624 |
3390 * subsequent sorting. |
2625 /** |
3391 */ |
2626 * Sorts the specified range of the array using the given |
3392 a[e1] = a[lower]; |
2627 * workspace array slice if possible for merging |
3393 a[e5] = a[upper]; |
2628 * |
3394 |
2629 * @param a the array to be sorted |
3395 /* |
2630 * @param left the index of the first element, inclusive, to be sorted |
3396 * Skip elements, which are less or greater than the pivots. |
2631 * @param right the index of the last element, inclusive, to be sorted |
3397 */ |
2632 * @param work a workspace array (slice) |
3398 while (a[++lower] < pivot1); |
2633 * @param workBase origin of usable space in work array |
3399 while (a[--upper] > pivot2); |
2634 * @param workLen usable size of work array |
3400 |
2635 */ |
3401 /* |
2636 static void sort(double[] a, int left, int right, |
3402 * Backward 3-interval partitioning |
2637 double[] work, int workBase, int workLen) { |
3403 * |
2638 /* |
3404 * left part central part right part |
2639 * Phase 1: Move NaNs to the end of the array. |
3405 * +------------------------------------------------------------+ |
2640 */ |
3406 * | < pivot1 | ? | pivot1 <= && <= pivot2 | > pivot2 | |
2641 while (left <= right && Double.isNaN(a[right])) { |
3407 * +------------------------------------------------------------+ |
2642 --right; |
3408 * ^ ^ ^ |
2643 } |
3409 * | | | |
2644 for (int k = right; --k >= left; ) { |
3410 * lower k upper |
2645 double ak = a[k]; |
|
2646 if (ak != ak) { // a[k] is NaN |
|
2647 a[k] = a[right]; |
|
2648 a[right] = ak; |
|
2649 --right; |
|
2650 } |
|
2651 } |
|
2652 |
|
2653 /* |
|
2654 * Phase 2: Sort everything except NaNs (which are already in place). |
|
2655 */ |
|
2656 doSort(a, left, right, work, workBase, workLen); |
|
2657 |
|
2658 /* |
|
2659 * Phase 3: Place negative zeros before positive zeros. |
|
2660 */ |
|
2661 int hi = right; |
|
2662 |
|
2663 /* |
|
2664 * Find the first zero, or first positive, or last negative element. |
|
2665 */ |
|
2666 while (left < hi) { |
|
2667 int middle = (left + hi) >>> 1; |
|
2668 double middleValue = a[middle]; |
|
2669 |
|
2670 if (middleValue < 0.0d) { |
|
2671 left = middle + 1; |
|
2672 } else { |
|
2673 hi = middle; |
|
2674 } |
|
2675 } |
|
2676 |
|
2677 /* |
|
2678 * Skip the last negative value (if any) or all leading negative zeros. |
|
2679 */ |
|
2680 while (left <= right && Double.doubleToRawLongBits(a[left]) < 0) { |
|
2681 ++left; |
|
2682 } |
|
2683 |
|
2684 /* |
|
2685 * Move negative zeros to the beginning of the sub-range. |
|
2686 * |
|
2687 * Partitioning: |
|
2688 * |
|
2689 * +----------------------------------------------------+ |
|
2690 * | < 0.0 | -0.0 | 0.0 | ? ( >= 0.0 ) | |
|
2691 * +----------------------------------------------------+ |
|
2692 * ^ ^ ^ |
|
2693 * | | | |
|
2694 * left p k |
|
2695 * |
|
2696 * Invariants: |
|
2697 * |
|
2698 * all in (*, left) < 0.0 |
|
2699 * all in [left, p) == -0.0 |
|
2700 * all in [p, k) == 0.0 |
|
2701 * all in [k, right] >= 0.0 |
|
2702 * |
|
2703 * Pointer k is the first index of ?-part. |
|
2704 */ |
|
2705 for (int k = left, p = left - 1; ++k <= right; ) { |
|
2706 double ak = a[k]; |
|
2707 if (ak != 0.0d) { |
|
2708 break; |
|
2709 } |
|
2710 if (Double.doubleToRawLongBits(ak) < 0) { // ak is -0.0d |
|
2711 a[k] = 0.0d; |
|
2712 a[++p] = -0.0d; |
|
2713 } |
|
2714 } |
|
2715 } |
|
2716 |
|
2717 /** |
|
2718 * Sorts the specified range of the array. |
|
2719 * |
|
2720 * @param a the array to be sorted |
|
2721 * @param left the index of the first element, inclusive, to be sorted |
|
2722 * @param right the index of the last element, inclusive, to be sorted |
|
2723 * @param work a workspace array (slice) |
|
2724 * @param workBase origin of usable space in work array |
|
2725 * @param workLen usable size of work array |
|
2726 */ |
|
2727 private static void doSort(double[] a, int left, int right, |
|
2728 double[] work, int workBase, int workLen) { |
|
2729 // Use Quicksort on small arrays |
|
2730 if (right - left < QUICKSORT_THRESHOLD) { |
|
2731 sort(a, left, right, true); |
|
2732 return; |
|
2733 } |
|
2734 |
|
2735 /* |
|
2736 * Index run[i] is the start of i-th run |
|
2737 * (ascending or descending sequence). |
|
2738 */ |
|
2739 int[] run = new int[MAX_RUN_COUNT + 1]; |
|
2740 int count = 0; run[0] = left; |
|
2741 |
|
2742 // Check if the array is nearly sorted |
|
2743 for (int k = left; k < right; run[count] = k) { |
|
2744 // Equal items in the beginning of the sequence |
|
2745 while (k < right && a[k] == a[k + 1]) |
|
2746 k++; |
|
2747 if (k == right) break; // Sequence finishes with equal items |
|
2748 if (a[k] < a[k + 1]) { // ascending |
|
2749 while (++k <= right && a[k - 1] <= a[k]); |
|
2750 } else if (a[k] > a[k + 1]) { // descending |
|
2751 while (++k <= right && a[k - 1] >= a[k]); |
|
2752 // Transform into an ascending sequence |
|
2753 for (int lo = run[count] - 1, hi = k; ++lo < --hi; ) { |
|
2754 double t = a[lo]; a[lo] = a[hi]; a[hi] = t; |
|
2755 } |
|
2756 } |
|
2757 |
|
2758 // Merge a transformed descending sequence followed by an |
|
2759 // ascending sequence |
|
2760 if (run[count] > left && a[run[count]] >= a[run[count] - 1]) { |
|
2761 count--; |
|
2762 } |
|
2763 |
|
2764 /* |
|
2765 * The array is not highly structured, |
|
2766 * use Quicksort instead of merge sort. |
|
2767 */ |
|
2768 if (++count == MAX_RUN_COUNT) { |
|
2769 sort(a, left, right, true); |
|
2770 return; |
|
2771 } |
|
2772 } |
|
2773 |
|
2774 // These invariants should hold true: |
|
2775 // run[0] = 0 |
|
2776 // run[<last>] = right + 1; (terminator) |
|
2777 |
|
2778 if (count == 0) { |
|
2779 // A single equal run |
|
2780 return; |
|
2781 } else if (count == 1 && run[count] > right) { |
|
2782 // Either a single ascending or a transformed descending run. |
|
2783 // Always check that a final run is a proper terminator, otherwise |
|
2784 // we have an unterminated trailing run, to handle downstream. |
|
2785 return; |
|
2786 } |
|
2787 right++; |
|
2788 if (run[count] < right) { |
|
2789 // Corner case: the final run is not a terminator. This may happen |
|
2790 // if a final run is an equals run, or there is a single-element run |
|
2791 // at the end. Fix up by adding a proper terminator at the end. |
|
2792 // Note that we terminate with (right + 1), incremented earlier. |
|
2793 run[++count] = right; |
|
2794 } |
|
2795 |
|
2796 // Determine alternation base for merge |
|
2797 byte odd = 0; |
|
2798 for (int n = 1; (n <<= 1) < count; odd ^= 1); |
|
2799 |
|
2800 // Use or create temporary array b for merging |
|
2801 double[] b; // temp array; alternates with a |
|
2802 int ao, bo; // array offsets from 'left' |
|
2803 int blen = right - left; // space needed for b |
|
2804 if (work == null || workLen < blen || workBase + blen > work.length) { |
|
2805 work = new double[blen]; |
|
2806 workBase = 0; |
|
2807 } |
|
2808 if (odd == 0) { |
|
2809 System.arraycopy(a, left, work, workBase, blen); |
|
2810 b = a; |
|
2811 bo = 0; |
|
2812 a = work; |
|
2813 ao = workBase - left; |
|
2814 } else { |
|
2815 b = work; |
|
2816 ao = 0; |
|
2817 bo = workBase - left; |
|
2818 } |
|
2819 |
|
2820 // Merging |
|
2821 for (int last; count > 1; count = last) { |
|
2822 for (int k = (last = 0) + 2; k <= count; k += 2) { |
|
2823 int hi = run[k], mi = run[k - 1]; |
|
2824 for (int i = run[k - 2], p = i, q = mi; i < hi; ++i) { |
|
2825 if (q >= hi || p < mi && a[p + ao] <= a[q + ao]) { |
|
2826 b[i + bo] = a[p++ + ao]; |
|
2827 } else { |
|
2828 b[i + bo] = a[q++ + ao]; |
|
2829 } |
|
2830 } |
|
2831 run[++last] = hi; |
|
2832 } |
|
2833 if ((count & 1) != 0) { |
|
2834 for (int i = right, lo = run[count - 1]; --i >= lo; |
|
2835 b[i + bo] = a[i + ao] |
|
2836 ); |
|
2837 run[++last] = right; |
|
2838 } |
|
2839 double[] t = a; a = b; b = t; |
|
2840 int o = ao; ao = bo; bo = o; |
|
2841 } |
|
2842 } |
|
2843 |
|
2844 /** |
|
2845 * Sorts the specified range of the array by Dual-Pivot Quicksort. |
|
2846 * |
|
2847 * @param a the array to be sorted |
|
2848 * @param left the index of the first element, inclusive, to be sorted |
|
2849 * @param right the index of the last element, inclusive, to be sorted |
|
2850 * @param leftmost indicates if this part is the leftmost in the range |
|
2851 */ |
|
2852 private static void sort(double[] a, int left, int right, boolean leftmost) { |
|
2853 int length = right - left + 1; |
|
2854 |
|
2855 // Use insertion sort on tiny arrays |
|
2856 if (length < INSERTION_SORT_THRESHOLD) { |
|
2857 if (leftmost) { |
|
2858 /* |
|
2859 * Traditional (without sentinel) insertion sort, |
|
2860 * optimized for server VM, is used in case of |
|
2861 * the leftmost part. |
|
2862 */ |
|
2863 for (int i = left, j = i; i < right; j = ++i) { |
|
2864 double ai = a[i + 1]; |
|
2865 while (ai < a[j]) { |
|
2866 a[j + 1] = a[j]; |
|
2867 if (j-- == left) { |
|
2868 break; |
|
2869 } |
|
2870 } |
|
2871 a[j + 1] = ai; |
|
2872 } |
|
2873 } else { |
|
2874 /* |
|
2875 * Skip the longest ascending sequence. |
|
2876 */ |
|
2877 do { |
|
2878 if (left >= right) { |
|
2879 return; |
|
2880 } |
|
2881 } while (a[++left] >= a[left - 1]); |
|
2882 |
|
2883 /* |
|
2884 * Every element from adjoining part plays the role |
|
2885 * of sentinel, therefore this allows us to avoid the |
|
2886 * left range check on each iteration. Moreover, we use |
|
2887 * the more optimized algorithm, so called pair insertion |
|
2888 * sort, which is faster (in the context of Quicksort) |
|
2889 * than traditional implementation of insertion sort. |
|
2890 */ |
|
2891 for (int k = left; ++left <= right; k = ++left) { |
|
2892 double a1 = a[k], a2 = a[left]; |
|
2893 |
|
2894 if (a1 < a2) { |
|
2895 a2 = a1; a1 = a[left]; |
|
2896 } |
|
2897 while (a1 < a[--k]) { |
|
2898 a[k + 2] = a[k]; |
|
2899 } |
|
2900 a[++k + 1] = a1; |
|
2901 |
|
2902 while (a2 < a[--k]) { |
|
2903 a[k + 1] = a[k]; |
|
2904 } |
|
2905 a[k + 1] = a2; |
|
2906 } |
|
2907 double last = a[right]; |
|
2908 |
|
2909 while (last < a[--right]) { |
|
2910 a[right + 1] = a[right]; |
|
2911 } |
|
2912 a[right + 1] = last; |
|
2913 } |
|
2914 return; |
|
2915 } |
|
2916 |
|
2917 // Inexpensive approximation of length / 7 |
|
2918 int seventh = (length >> 3) + (length >> 6) + 1; |
|
2919 |
|
2920 /* |
|
2921 * Sort five evenly spaced elements around (and including) the |
|
2922 * center element in the range. These elements will be used for |
|
2923 * pivot selection as described below. The choice for spacing |
|
2924 * these elements was empirically determined to work well on |
|
2925 * a wide variety of inputs. |
|
2926 */ |
|
2927 int e3 = (left + right) >>> 1; // The midpoint |
|
2928 int e2 = e3 - seventh; |
|
2929 int e1 = e2 - seventh; |
|
2930 int e4 = e3 + seventh; |
|
2931 int e5 = e4 + seventh; |
|
2932 |
|
2933 // Sort these elements using insertion sort |
|
2934 if (a[e2] < a[e1]) { double t = a[e2]; a[e2] = a[e1]; a[e1] = t; } |
|
2935 |
|
2936 if (a[e3] < a[e2]) { double t = a[e3]; a[e3] = a[e2]; a[e2] = t; |
|
2937 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
2938 } |
|
2939 if (a[e4] < a[e3]) { double t = a[e4]; a[e4] = a[e3]; a[e3] = t; |
|
2940 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
2941 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
2942 } |
|
2943 } |
|
2944 if (a[e5] < a[e4]) { double t = a[e5]; a[e5] = a[e4]; a[e4] = t; |
|
2945 if (t < a[e3]) { a[e4] = a[e3]; a[e3] = t; |
|
2946 if (t < a[e2]) { a[e3] = a[e2]; a[e2] = t; |
|
2947 if (t < a[e1]) { a[e2] = a[e1]; a[e1] = t; } |
|
2948 } |
|
2949 } |
|
2950 } |
|
2951 |
|
2952 // Pointers |
|
2953 int less = left; // The index of the first element of center part |
|
2954 int great = right; // The index before the first element of right part |
|
2955 |
|
2956 if (a[e1] != a[e2] && a[e2] != a[e3] && a[e3] != a[e4] && a[e4] != a[e5]) { |
|
2957 /* |
|
2958 * Use the second and fourth of the five sorted elements as pivots. |
|
2959 * These values are inexpensive approximations of the first and |
|
2960 * second terciles of the array. Note that pivot1 <= pivot2. |
|
2961 */ |
|
2962 double pivot1 = a[e2]; |
|
2963 double pivot2 = a[e4]; |
|
2964 |
|
2965 /* |
|
2966 * The first and the last elements to be sorted are moved to the |
|
2967 * locations formerly occupied by the pivots. When partitioning |
|
2968 * is complete, the pivots are swapped back into their final |
|
2969 * positions, and excluded from subsequent sorting. |
|
2970 */ |
|
2971 a[e2] = a[left]; |
|
2972 a[e4] = a[right]; |
|
2973 |
|
2974 /* |
|
2975 * Skip elements, which are less or greater than pivot values. |
|
2976 */ |
|
2977 while (a[++less] < pivot1); |
|
2978 while (a[--great] > pivot2); |
|
2979 |
|
2980 /* |
|
2981 * Partitioning: |
|
2982 * |
|
2983 * left part center part right part |
|
2984 * +--------------------------------------------------------------+ |
|
2985 * | < pivot1 | pivot1 <= && <= pivot2 | ? | > pivot2 | |
|
2986 * +--------------------------------------------------------------+ |
|
2987 * ^ ^ ^ |
|
2988 * | | | |
|
2989 * less k great |
|
2990 * |
|
2991 * Invariants: |
|
2992 * |
|
2993 * all in (left, less) < pivot1 |
|
2994 * pivot1 <= all in [less, k) <= pivot2 |
|
2995 * all in (great, right) > pivot2 |
|
2996 * |
|
2997 * Pointer k is the first index of ?-part. |
|
2998 */ |
|
2999 outer: |
|
3000 for (int k = less - 1; ++k <= great; ) { |
|
3001 double ak = a[k]; |
|
3002 if (ak < pivot1) { // Move a[k] to left part |
|
3003 a[k] = a[less]; |
|
3004 /* |
|
3005 * Here and below we use "a[i] = b; i++;" instead |
|
3006 * of "a[i++] = b;" due to performance issue. |
|
3007 */ |
|
3008 a[less] = ak; |
|
3009 ++less; |
|
3010 } else if (ak > pivot2) { // Move a[k] to right part |
|
3011 while (a[great] > pivot2) { |
|
3012 if (great-- == k) { |
|
3013 break outer; |
|
3014 } |
|
3015 } |
|
3016 if (a[great] < pivot1) { // a[great] <= pivot2 |
|
3017 a[k] = a[less]; |
|
3018 a[less] = a[great]; |
|
3019 ++less; |
|
3020 } else { // pivot1 <= a[great] <= pivot2 |
|
3021 a[k] = a[great]; |
|
3022 } |
|
3023 /* |
|
3024 * Here and below we use "a[i] = b; i--;" instead |
|
3025 * of "a[i--] = b;" due to performance issue. |
|
3026 */ |
|
3027 a[great] = ak; |
|
3028 --great; |
|
3029 } |
|
3030 } |
|
3031 |
|
3032 // Swap pivots into their final positions |
|
3033 a[left] = a[less - 1]; a[less - 1] = pivot1; |
|
3034 a[right] = a[great + 1]; a[great + 1] = pivot2; |
|
3035 |
|
3036 // Sort left and right parts recursively, excluding known pivots |
|
3037 sort(a, left, less - 2, leftmost); |
|
3038 sort(a, great + 2, right, false); |
|
3039 |
|
3040 /* |
|
3041 * If center part is too large (comprises > 4/7 of the array), |
|
3042 * swap internal pivot values to ends. |
|
3043 */ |
|
3044 if (less < e1 && e5 < great) { |
|
3045 /* |
|
3046 * Skip elements, which are equal to pivot values. |
|
3047 */ |
|
3048 while (a[less] == pivot1) { |
|
3049 ++less; |
|
3050 } |
|
3051 |
|
3052 while (a[great] == pivot2) { |
|
3053 --great; |
|
3054 } |
|
3055 |
|
3056 /* |
|
3057 * Partitioning: |
|
3058 * |
|
3059 * left part center part right part |
|
3060 * +----------------------------------------------------------+ |
|
3061 * | == pivot1 | pivot1 < && < pivot2 | ? | == pivot2 | |
|
3062 * +----------------------------------------------------------+ |
|
3063 * ^ ^ ^ |
|
3064 * | | | |
|
3065 * less k great |
|
3066 * |
3411 * |
3067 * Invariants: |
3412 * Invariants: |
3068 * |
3413 * |
3069 * all in (*, less) == pivot1 |
3414 * all in (low, lower] < pivot1 |
3070 * pivot1 < all in [less, k) < pivot2 |
3415 * pivot1 <= all in (k, upper) <= pivot2 |
3071 * all in (great, *) == pivot2 |
3416 * all in [upper, end) > pivot2 |
3072 * |
3417 * |
3073 * Pointer k is the first index of ?-part. |
3418 * Pointer k is the last index of ?-part |
3074 */ |
3419 */ |
3075 outer: |
3420 for (int unused = --lower, k = ++upper; --k > lower; ) { |
3076 for (int k = less - 1; ++k <= great; ) { |
|
3077 double ak = a[k]; |
3421 double ak = a[k]; |
3078 if (ak == pivot1) { // Move a[k] to left part |
3422 |
3079 a[k] = a[less]; |
3423 if (ak < pivot1) { // Move a[k] to the left side |
3080 a[less] = ak; |
3424 while (lower < k) { |
3081 ++less; |
3425 if (a[++lower] >= pivot1) { |
3082 } else if (ak == pivot2) { // Move a[k] to right part |
3426 if (a[lower] > pivot2) { |
3083 while (a[great] == pivot2) { |
3427 a[k] = a[--upper]; |
3084 if (great-- == k) { |
3428 a[upper] = a[lower]; |
3085 break outer; |
3429 } else { |
|
3430 a[k] = a[lower]; |
|
3431 } |
|
3432 a[lower] = ak; |
|
3433 break; |
3086 } |
3434 } |
3087 } |
3435 } |
3088 if (a[great] == pivot1) { // a[great] < pivot2 |
3436 } else if (ak > pivot2) { // Move a[k] to the right side |
3089 a[k] = a[less]; |
3437 a[k] = a[--upper]; |
3090 /* |
3438 a[upper] = ak; |
3091 * Even though a[great] equals to pivot1, the |
3439 } |
3092 * assignment a[less] = pivot1 may be incorrect, |
3440 } |
3093 * if a[great] and pivot1 are floating-point zeros |
3441 |
3094 * of different signs. Therefore in float and |
3442 /* |
3095 * double sorting methods we have to use more |
3443 * Swap the pivots into their final positions. |
3096 * accurate assignment a[less] = a[great]. |
3444 */ |
3097 */ |
3445 a[low] = a[lower]; a[lower] = pivot1; |
3098 a[less] = a[great]; |
3446 a[end] = a[upper]; a[upper] = pivot2; |
3099 ++less; |
3447 |
3100 } else { // pivot1 < a[great] < pivot2 |
3448 /* |
3101 a[k] = a[great]; |
3449 * Sort non-left parts recursively (possibly in parallel), |
|
3450 * excluding known pivots. |
|
3451 */ |
|
3452 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
|
3453 sorter.forkSorter(bits | 1, lower + 1, upper); |
|
3454 sorter.forkSorter(bits | 1, upper + 1, high); |
|
3455 } else { |
|
3456 sort(sorter, a, bits | 1, lower + 1, upper); |
|
3457 sort(sorter, a, bits | 1, upper + 1, high); |
|
3458 } |
|
3459 |
|
3460 } else { // Use single pivot in case of many equal elements |
|
3461 |
|
3462 /* |
|
3463 * Use the third of the five sorted elements as the pivot. |
|
3464 * This value is inexpensive approximation of the median. |
|
3465 */ |
|
3466 double pivot = a[e3]; |
|
3467 |
|
3468 /* |
|
3469 * The first element to be sorted is moved to the |
|
3470 * location formerly occupied by the pivot. After |
|
3471 * completion of partitioning the pivot is swapped |
|
3472 * back into its final position, and excluded from |
|
3473 * the next subsequent sorting. |
|
3474 */ |
|
3475 a[e3] = a[lower]; |
|
3476 |
|
3477 /* |
|
3478 * Traditional 3-way (Dutch National Flag) partitioning |
|
3479 * |
|
3480 * left part central part right part |
|
3481 * +------------------------------------------------------+ |
|
3482 * | < pivot | ? | == pivot | > pivot | |
|
3483 * +------------------------------------------------------+ |
|
3484 * ^ ^ ^ |
|
3485 * | | | |
|
3486 * lower k upper |
|
3487 * |
|
3488 * Invariants: |
|
3489 * |
|
3490 * all in (low, lower] < pivot |
|
3491 * all in (k, upper) == pivot |
|
3492 * all in [upper, end] > pivot |
|
3493 * |
|
3494 * Pointer k is the last index of ?-part |
|
3495 */ |
|
3496 for (int k = ++upper; --k > lower; ) { |
|
3497 double ak = a[k]; |
|
3498 |
|
3499 if (ak != pivot) { |
|
3500 a[k] = pivot; |
|
3501 |
|
3502 if (ak < pivot) { // Move a[k] to the left side |
|
3503 while (a[++lower] < pivot); |
|
3504 |
|
3505 if (a[lower] > pivot) { |
|
3506 a[--upper] = a[lower]; |
|
3507 } |
|
3508 a[lower] = ak; |
|
3509 } else { // ak > pivot - Move a[k] to the right side |
|
3510 a[--upper] = ak; |
3102 } |
3511 } |
3103 a[great] = ak; |
3512 } |
3104 --great; |
3513 } |
3105 } |
3514 |
3106 } |
3515 /* |
3107 } |
3516 * Swap the pivot into its final position. |
3108 |
3517 */ |
3109 // Sort center part recursively |
3518 a[low] = a[lower]; a[lower] = pivot; |
3110 sort(a, less, great, false); |
3519 |
3111 |
3520 /* |
3112 } else { // Partitioning with one pivot |
3521 * Sort the right part (possibly in parallel), excluding |
3113 /* |
3522 * known pivot. All elements from the central part are |
3114 * Use the third of the five sorted elements as pivot. |
3523 * equal and therefore already sorted. |
3115 * This value is inexpensive approximation of the median. |
3524 */ |
3116 */ |
3525 if (size > MIN_PARALLEL_SORT_SIZE && sorter != null) { |
3117 double pivot = a[e3]; |
3526 sorter.forkSorter(bits | 1, upper, high); |
3118 |
3527 } else { |
3119 /* |
3528 sort(sorter, a, bits | 1, upper, high); |
3120 * Partitioning degenerates to the traditional 3-way |
3529 } |
3121 * (or "Dutch National Flag") schema: |
3530 } |
|
3531 high = lower; // Iterate along the left part |
|
3532 } |
|
3533 } |
|
3534 |
|
3535 /** |
|
3536 * Sorts the specified range of the array using mixed insertion sort. |
|
3537 * |
|
3538 * Mixed insertion sort is combination of simple insertion sort, |
|
3539 * pin insertion sort and pair insertion sort. |
|
3540 * |
|
3541 * In the context of Dual-Pivot Quicksort, the pivot element |
|
3542 * from the left part plays the role of sentinel, because it |
|
3543 * is less than any elements from the given part. Therefore, |
|
3544 * expensive check of the left range can be skipped on each |
|
3545 * iteration unless it is the leftmost call. |
|
3546 * |
|
3547 * @param a the array to be sorted |
|
3548 * @param low the index of the first element, inclusive, to be sorted |
|
3549 * @param end the index of the last element for simple insertion sort |
|
3550 * @param high the index of the last element, exclusive, to be sorted |
|
3551 */ |
|
3552 private static void mixedInsertionSort(double[] a, int low, int end, int high) { |
|
3553 if (end == high) { |
|
3554 |
|
3555 /* |
|
3556 * Invoke simple insertion sort on tiny array. |
|
3557 */ |
|
3558 for (int i; ++low < end; ) { |
|
3559 double ai = a[i = low]; |
|
3560 |
|
3561 while (ai < a[--i]) { |
|
3562 a[i + 1] = a[i]; |
|
3563 } |
|
3564 a[i + 1] = ai; |
|
3565 } |
|
3566 } else { |
|
3567 |
|
3568 /* |
|
3569 * Start with pin insertion sort on small part. |
3122 * |
3570 * |
3123 * left part center part right part |
3571 * Pin insertion sort is extended simple insertion sort. |
3124 * +-------------------------------------------------+ |
3572 * The main idea of this sort is to put elements larger |
3125 * | < pivot | == pivot | ? | > pivot | |
3573 * than an element called pin to the end of array (the |
3126 * +-------------------------------------------------+ |
3574 * proper area for such elements). It avoids expensive |
3127 * ^ ^ ^ |
3575 * movements of these elements through the whole array. |
3128 * | | | |
3576 */ |
3129 * less k great |
3577 double pin = a[end]; |
3130 * |
3578 |
3131 * Invariants: |
3579 for (int i, p = high; ++low < end; ) { |
3132 * |
3580 double ai = a[i = low]; |
3133 * all in (left, less) < pivot |
3581 |
3134 * all in [less, k) == pivot |
3582 if (ai < a[i - 1]) { // Small element |
3135 * all in (great, right) > pivot |
3583 |
3136 * |
3584 /* |
3137 * Pointer k is the first index of ?-part. |
3585 * Insert small element into sorted part. |
3138 */ |
3586 */ |
3139 for (int k = less; k <= great; ++k) { |
3587 a[i] = a[--i]; |
3140 if (a[k] == pivot) { |
3588 |
|
3589 while (ai < a[--i]) { |
|
3590 a[i + 1] = a[i]; |
|
3591 } |
|
3592 a[i + 1] = ai; |
|
3593 |
|
3594 } else if (p > i && ai > pin) { // Large element |
|
3595 |
|
3596 /* |
|
3597 * Find element smaller than pin. |
|
3598 */ |
|
3599 while (a[--p] > pin); |
|
3600 |
|
3601 /* |
|
3602 * Swap it with large element. |
|
3603 */ |
|
3604 if (p > i) { |
|
3605 ai = a[p]; |
|
3606 a[p] = a[i]; |
|
3607 } |
|
3608 |
|
3609 /* |
|
3610 * Insert small element into sorted part. |
|
3611 */ |
|
3612 while (ai < a[--i]) { |
|
3613 a[i + 1] = a[i]; |
|
3614 } |
|
3615 a[i + 1] = ai; |
|
3616 } |
|
3617 } |
|
3618 |
|
3619 /* |
|
3620 * Continue with pair insertion sort on remain part. |
|
3621 */ |
|
3622 for (int i; low < high; ++low) { |
|
3623 double a1 = a[i = low], a2 = a[++low]; |
|
3624 |
|
3625 /* |
|
3626 * Insert two elements per iteration: at first, insert the |
|
3627 * larger element and then insert the smaller element, but |
|
3628 * from the position where the larger element was inserted. |
|
3629 */ |
|
3630 if (a1 > a2) { |
|
3631 |
|
3632 while (a1 < a[--i]) { |
|
3633 a[i + 2] = a[i]; |
|
3634 } |
|
3635 a[++i + 1] = a1; |
|
3636 |
|
3637 while (a2 < a[--i]) { |
|
3638 a[i + 1] = a[i]; |
|
3639 } |
|
3640 a[i + 1] = a2; |
|
3641 |
|
3642 } else if (a1 < a[i - 1]) { |
|
3643 |
|
3644 while (a2 < a[--i]) { |
|
3645 a[i + 2] = a[i]; |
|
3646 } |
|
3647 a[++i + 1] = a2; |
|
3648 |
|
3649 while (a1 < a[--i]) { |
|
3650 a[i + 1] = a[i]; |
|
3651 } |
|
3652 a[i + 1] = a1; |
|
3653 } |
|
3654 } |
|
3655 } |
|
3656 } |
|
3657 |
|
3658 /** |
|
3659 * Sorts the specified range of the array using insertion sort. |
|
3660 * |
|
3661 * @param a the array to be sorted |
|
3662 * @param low the index of the first element, inclusive, to be sorted |
|
3663 * @param high the index of the last element, exclusive, to be sorted |
|
3664 */ |
|
3665 private static void insertionSort(double[] a, int low, int high) { |
|
3666 for (int i, k = low; ++k < high; ) { |
|
3667 double ai = a[i = k]; |
|
3668 |
|
3669 if (ai < a[i - 1]) { |
|
3670 while (--i >= low && ai < a[i]) { |
|
3671 a[i + 1] = a[i]; |
|
3672 } |
|
3673 a[i + 1] = ai; |
|
3674 } |
|
3675 } |
|
3676 } |
|
3677 |
|
3678 /** |
|
3679 * Sorts the specified range of the array using heap sort. |
|
3680 * |
|
3681 * @param a the array to be sorted |
|
3682 * @param low the index of the first element, inclusive, to be sorted |
|
3683 * @param high the index of the last element, exclusive, to be sorted |
|
3684 */ |
|
3685 private static void heapSort(double[] a, int low, int high) { |
|
3686 for (int k = (low + high) >>> 1; k > low; ) { |
|
3687 pushDown(a, --k, a[k], low, high); |
|
3688 } |
|
3689 while (--high > low) { |
|
3690 double max = a[low]; |
|
3691 pushDown(a, low, a[high], low, high); |
|
3692 a[high] = max; |
|
3693 } |
|
3694 } |
|
3695 |
|
3696 /** |
|
3697 * Pushes specified element down during heap sort. |
|
3698 * |
|
3699 * @param a the given array |
|
3700 * @param p the start index |
|
3701 * @param value the given element |
|
3702 * @param low the index of the first element, inclusive, to be sorted |
|
3703 * @param high the index of the last element, exclusive, to be sorted |
|
3704 */ |
|
3705 private static void pushDown(double[] a, int p, double value, int low, int high) { |
|
3706 for (int k ;; a[p] = a[p = k]) { |
|
3707 k = (p << 1) - low + 2; // Index of the right child |
|
3708 |
|
3709 if (k > high) { |
|
3710 break; |
|
3711 } |
|
3712 if (k == high || a[k] < a[k - 1]) { |
|
3713 --k; |
|
3714 } |
|
3715 if (a[k] <= value) { |
|
3716 break; |
|
3717 } |
|
3718 } |
|
3719 a[p] = value; |
|
3720 } |
|
3721 |
|
3722 /** |
|
3723 * Tries to sort the specified range of the array. |
|
3724 * |
|
3725 * @param sorter parallel context |
|
3726 * @param a the array to be sorted |
|
3727 * @param low the index of the first element to be sorted |
|
3728 * @param size the array size |
|
3729 * @return true if finally sorted, false otherwise |
|
3730 */ |
|
3731 private static boolean tryMergeRuns(Sorter sorter, double[] a, int low, int size) { |
|
3732 |
|
3733 /* |
|
3734 * The run array is constructed only if initial runs are |
|
3735 * long enough to continue, run[i] then holds start index |
|
3736 * of the i-th sequence of elements in non-descending order. |
|
3737 */ |
|
3738 int[] run = null; |
|
3739 int high = low + size; |
|
3740 int count = 1, last = low; |
|
3741 |
|
3742 /* |
|
3743 * Identify all possible runs. |
|
3744 */ |
|
3745 for (int k = low + 1; k < high; ) { |
|
3746 |
|
3747 /* |
|
3748 * Find the end index of the current run. |
|
3749 */ |
|
3750 if (a[k - 1] < a[k]) { |
|
3751 |
|
3752 // Identify ascending sequence |
|
3753 while (++k < high && a[k - 1] <= a[k]); |
|
3754 |
|
3755 } else if (a[k - 1] > a[k]) { |
|
3756 |
|
3757 // Identify descending sequence |
|
3758 while (++k < high && a[k - 1] >= a[k]); |
|
3759 |
|
3760 // Reverse into ascending order |
|
3761 for (int i = last - 1, j = k; ++i < --j && a[i] > a[j]; ) { |
|
3762 double ai = a[i]; a[i] = a[j]; a[j] = ai; |
|
3763 } |
|
3764 } else { // Identify constant sequence |
|
3765 for (double ak = a[k]; ++k < high && ak == a[k]; ); |
|
3766 |
|
3767 if (k < high) { |
3141 continue; |
3768 continue; |
3142 } |
3769 } |
3143 double ak = a[k]; |
3770 } |
3144 if (ak < pivot) { // Move a[k] to left part |
3771 |
3145 a[k] = a[less]; |
3772 /* |
3146 a[less] = ak; |
3773 * Check special cases. |
3147 ++less; |
3774 */ |
3148 } else { // a[k] > pivot - Move a[k] to right part |
3775 if (run == null) { |
3149 while (a[great] > pivot) { |
3776 if (k == high) { |
3150 --great; |
3777 |
3151 } |
3778 /* |
3152 if (a[great] < pivot) { // a[great] <= pivot |
3779 * The array is monotonous sequence, |
3153 a[k] = a[less]; |
3780 * and therefore already sorted. |
3154 a[less] = a[great]; |
3781 */ |
3155 ++less; |
3782 return true; |
3156 } else { // a[great] == pivot |
3783 } |
3157 /* |
3784 |
3158 * Even though a[great] equals to pivot, the |
3785 if (k - low < MIN_FIRST_RUN_SIZE) { |
3159 * assignment a[k] = pivot may be incorrect, |
3786 |
3160 * if a[great] and pivot are floating-point |
3787 /* |
3161 * zeros of different signs. Therefore in float |
3788 * The first run is too small |
3162 * and double sorting methods we have to use |
3789 * to proceed with scanning. |
3163 * more accurate assignment a[k] = a[great]. |
3790 */ |
3164 */ |
3791 return false; |
3165 a[k] = a[great]; |
3792 } |
3166 } |
3793 |
3167 a[great] = ak; |
3794 run = new int[((size >> 10) | 0x7F) & 0x3FF]; |
3168 --great; |
3795 run[0] = low; |
3169 } |
3796 |
3170 } |
3797 } else if (a[last - 1] > a[last]) { |
3171 |
3798 |
3172 /* |
3799 if (count > (k - low) >> MIN_FIRST_RUNS_FACTOR) { |
3173 * Sort left and right parts recursively. |
3800 |
3174 * All elements from center part are equal |
3801 /* |
3175 * and, therefore, already sorted. |
3802 * The first runs are not long |
3176 */ |
3803 * enough to continue scanning. |
3177 sort(a, left, less - 1, leftmost); |
3804 */ |
3178 sort(a, great + 1, right, false); |
3805 return false; |
|
3806 } |
|
3807 |
|
3808 if (++count == MAX_RUN_CAPACITY) { |
|
3809 |
|
3810 /* |
|
3811 * Array is not highly structured. |
|
3812 */ |
|
3813 return false; |
|
3814 } |
|
3815 |
|
3816 if (count == run.length) { |
|
3817 |
|
3818 /* |
|
3819 * Increase capacity of index array. |
|
3820 */ |
|
3821 run = Arrays.copyOf(run, count << 1); |
|
3822 } |
|
3823 } |
|
3824 run[count] = (last = k); |
|
3825 } |
|
3826 |
|
3827 /* |
|
3828 * Merge runs of highly structured array. |
|
3829 */ |
|
3830 if (count > 1) { |
|
3831 double[] b; int offset = low; |
|
3832 |
|
3833 if (sorter == null || (b = (double[]) sorter.b) == null) { |
|
3834 b = new double[size]; |
|
3835 } else { |
|
3836 offset = sorter.offset; |
|
3837 } |
|
3838 mergeRuns(a, b, offset, 1, sorter != null, run, 0, count); |
|
3839 } |
|
3840 return true; |
|
3841 } |
|
3842 |
|
3843 /** |
|
3844 * Merges the specified runs. |
|
3845 * |
|
3846 * @param a the source array |
|
3847 * @param b the temporary buffer used in merging |
|
3848 * @param offset the start index in the source, inclusive |
|
3849 * @param aim specifies merging: to source ( > 0), buffer ( < 0) or any ( == 0) |
|
3850 * @param parallel indicates whether merging is performed in parallel |
|
3851 * @param run the start indexes of the runs, inclusive |
|
3852 * @param lo the start index of the first run, inclusive |
|
3853 * @param hi the start index of the last run, inclusive |
|
3854 * @return the destination where runs are merged |
|
3855 */ |
|
3856 private static double[] mergeRuns(double[] a, double[] b, int offset, |
|
3857 int aim, boolean parallel, int[] run, int lo, int hi) { |
|
3858 |
|
3859 if (hi - lo == 1) { |
|
3860 if (aim >= 0) { |
|
3861 return a; |
|
3862 } |
|
3863 for (int i = run[hi], j = i - offset, low = run[lo]; i > low; |
|
3864 b[--j] = a[--i] |
|
3865 ); |
|
3866 return b; |
|
3867 } |
|
3868 |
|
3869 /* |
|
3870 * Split into approximately equal parts. |
|
3871 */ |
|
3872 int mi = lo, rmi = (run[lo] + run[hi]) >>> 1; |
|
3873 while (run[++mi + 1] <= rmi); |
|
3874 |
|
3875 /* |
|
3876 * Merge the left and right parts. |
|
3877 */ |
|
3878 double[] a1, a2; |
|
3879 |
|
3880 if (parallel && hi - lo > MIN_RUN_COUNT) { |
|
3881 RunMerger merger = new RunMerger(a, b, offset, 0, run, mi, hi).forkMe(); |
|
3882 a1 = mergeRuns(a, b, offset, -aim, true, run, lo, mi); |
|
3883 a2 = (double[]) merger.getDestination(); |
|
3884 } else { |
|
3885 a1 = mergeRuns(a, b, offset, -aim, false, run, lo, mi); |
|
3886 a2 = mergeRuns(a, b, offset, 0, false, run, mi, hi); |
|
3887 } |
|
3888 |
|
3889 double[] dst = a1 == a ? b : a; |
|
3890 |
|
3891 int k = a1 == a ? run[lo] - offset : run[lo]; |
|
3892 int lo1 = a1 == b ? run[lo] - offset : run[lo]; |
|
3893 int hi1 = a1 == b ? run[mi] - offset : run[mi]; |
|
3894 int lo2 = a2 == b ? run[mi] - offset : run[mi]; |
|
3895 int hi2 = a2 == b ? run[hi] - offset : run[hi]; |
|
3896 |
|
3897 if (parallel) { |
|
3898 new Merger(null, dst, k, a1, lo1, hi1, a2, lo2, hi2).invoke(); |
|
3899 } else { |
|
3900 mergeParts(null, dst, k, a1, lo1, hi1, a2, lo2, hi2); |
|
3901 } |
|
3902 return dst; |
|
3903 } |
|
3904 |
|
3905 /** |
|
3906 * Merges the sorted parts. |
|
3907 * |
|
3908 * @param merger parallel context |
|
3909 * @param dst the destination where parts are merged |
|
3910 * @param k the start index of the destination, inclusive |
|
3911 * @param a1 the first part |
|
3912 * @param lo1 the start index of the first part, inclusive |
|
3913 * @param hi1 the end index of the first part, exclusive |
|
3914 * @param a2 the second part |
|
3915 * @param lo2 the start index of the second part, inclusive |
|
3916 * @param hi2 the end index of the second part, exclusive |
|
3917 */ |
|
3918 private static void mergeParts(Merger merger, double[] dst, int k, |
|
3919 double[] a1, int lo1, int hi1, double[] a2, int lo2, int hi2) { |
|
3920 |
|
3921 if (merger != null && a1 == a2) { |
|
3922 |
|
3923 while (true) { |
|
3924 |
|
3925 /* |
|
3926 * The first part must be larger. |
|
3927 */ |
|
3928 if (hi1 - lo1 < hi2 - lo2) { |
|
3929 int lo = lo1; lo1 = lo2; lo2 = lo; |
|
3930 int hi = hi1; hi1 = hi2; hi2 = hi; |
|
3931 } |
|
3932 |
|
3933 /* |
|
3934 * Small parts will be merged sequentially. |
|
3935 */ |
|
3936 if (hi1 - lo1 < MIN_PARALLEL_MERGE_PARTS_SIZE) { |
|
3937 break; |
|
3938 } |
|
3939 |
|
3940 /* |
|
3941 * Find the median of the larger part. |
|
3942 */ |
|
3943 int mi1 = (lo1 + hi1) >>> 1; |
|
3944 double key = a1[mi1]; |
|
3945 int mi2 = hi2; |
|
3946 |
|
3947 /* |
|
3948 * Partition the smaller part. |
|
3949 */ |
|
3950 for (int loo = lo2; loo < mi2; ) { |
|
3951 int t = (loo + mi2) >>> 1; |
|
3952 |
|
3953 if (key > a2[t]) { |
|
3954 loo = t + 1; |
|
3955 } else { |
|
3956 mi2 = t; |
|
3957 } |
|
3958 } |
|
3959 |
|
3960 int d = mi2 - lo2 + mi1 - lo1; |
|
3961 |
|
3962 /* |
|
3963 * Merge the right sub-parts in parallel. |
|
3964 */ |
|
3965 merger.forkMerger(dst, k + d, a1, mi1, hi1, a2, mi2, hi2); |
|
3966 |
|
3967 /* |
|
3968 * Process the sub-left parts. |
|
3969 */ |
|
3970 hi1 = mi1; |
|
3971 hi2 = mi2; |
|
3972 } |
|
3973 } |
|
3974 |
|
3975 /* |
|
3976 * Merge small parts sequentially. |
|
3977 */ |
|
3978 while (lo1 < hi1 && lo2 < hi2) { |
|
3979 dst[k++] = a1[lo1] < a2[lo2] ? a1[lo1++] : a2[lo2++]; |
|
3980 } |
|
3981 if (dst != a1 || k < lo1) { |
|
3982 while (lo1 < hi1) { |
|
3983 dst[k++] = a1[lo1++]; |
|
3984 } |
|
3985 } |
|
3986 if (dst != a2 || k < lo2) { |
|
3987 while (lo2 < hi2) { |
|
3988 dst[k++] = a2[lo2++]; |
|
3989 } |
|
3990 } |
|
3991 } |
|
3992 |
|
3993 // [class] |
|
3994 |
|
3995 /** |
|
3996 * This class implements parallel sorting. |
|
3997 */ |
|
3998 private static final class Sorter extends CountedCompleter<Void> { |
|
3999 private static final long serialVersionUID = 20180818L; |
|
4000 private final Object a, b; |
|
4001 private final int low, size, offset, depth; |
|
4002 |
|
4003 private Sorter(CountedCompleter<?> parent, |
|
4004 Object a, Object b, int low, int size, int offset, int depth) { |
|
4005 super(parent); |
|
4006 this.a = a; |
|
4007 this.b = b; |
|
4008 this.low = low; |
|
4009 this.size = size; |
|
4010 this.offset = offset; |
|
4011 this.depth = depth; |
|
4012 } |
|
4013 |
|
4014 @Override |
|
4015 public final void compute() { |
|
4016 if (depth < 0) { |
|
4017 setPendingCount(2); |
|
4018 int half = size >> 1; |
|
4019 new Sorter(this, b, a, low, half, offset, depth + 1).fork(); |
|
4020 new Sorter(this, b, a, low + half, size - half, offset, depth + 1).compute(); |
|
4021 } else { |
|
4022 if (a instanceof int[]) { |
|
4023 sort(this, (int[]) a, depth, low, low + size); |
|
4024 } else if (a instanceof long[]) { |
|
4025 sort(this, (long[]) a, depth, low, low + size); |
|
4026 } else if (a instanceof float[]) { |
|
4027 sort(this, (float[]) a, depth, low, low + size); |
|
4028 } else if (a instanceof double[]) { |
|
4029 sort(this, (double[]) a, depth, low, low + size); |
|
4030 } else { |
|
4031 throw new IllegalArgumentException( |
|
4032 "Unknown type of array: " + a.getClass().getName()); |
|
4033 } |
|
4034 } |
|
4035 tryComplete(); |
|
4036 } |
|
4037 |
|
4038 @Override |
|
4039 public final void onCompletion(CountedCompleter<?> caller) { |
|
4040 if (depth < 0) { |
|
4041 int mi = low + (size >> 1); |
|
4042 boolean src = (depth & 1) == 0; |
|
4043 |
|
4044 new Merger(null, |
|
4045 a, |
|
4046 src ? low : low - offset, |
|
4047 b, |
|
4048 src ? low - offset : low, |
|
4049 src ? mi - offset : mi, |
|
4050 b, |
|
4051 src ? mi - offset : mi, |
|
4052 src ? low + size - offset : low + size |
|
4053 ).invoke(); |
|
4054 } |
|
4055 } |
|
4056 |
|
4057 private void forkSorter(int depth, int low, int high) { |
|
4058 addToPendingCount(1); |
|
4059 Object a = this.a; // Use local variable for performance |
|
4060 new Sorter(this, a, b, low, high - low, offset, depth).fork(); |
|
4061 } |
|
4062 } |
|
4063 |
|
4064 /** |
|
4065 * This class implements parallel merging. |
|
4066 */ |
|
4067 private static final class Merger extends CountedCompleter<Void> { |
|
4068 private static final long serialVersionUID = 20180818L; |
|
4069 private final Object dst, a1, a2; |
|
4070 private final int k, lo1, hi1, lo2, hi2; |
|
4071 |
|
4072 private Merger(CountedCompleter<?> parent, Object dst, int k, |
|
4073 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { |
|
4074 super(parent); |
|
4075 this.dst = dst; |
|
4076 this.k = k; |
|
4077 this.a1 = a1; |
|
4078 this.lo1 = lo1; |
|
4079 this.hi1 = hi1; |
|
4080 this.a2 = a2; |
|
4081 this.lo2 = lo2; |
|
4082 this.hi2 = hi2; |
|
4083 } |
|
4084 |
|
4085 @Override |
|
4086 public final void compute() { |
|
4087 if (dst instanceof int[]) { |
|
4088 mergeParts(this, (int[]) dst, k, |
|
4089 (int[]) a1, lo1, hi1, (int[]) a2, lo2, hi2); |
|
4090 } else if (dst instanceof long[]) { |
|
4091 mergeParts(this, (long[]) dst, k, |
|
4092 (long[]) a1, lo1, hi1, (long[]) a2, lo2, hi2); |
|
4093 } else if (dst instanceof float[]) { |
|
4094 mergeParts(this, (float[]) dst, k, |
|
4095 (float[]) a1, lo1, hi1, (float[]) a2, lo2, hi2); |
|
4096 } else if (dst instanceof double[]) { |
|
4097 mergeParts(this, (double[]) dst, k, |
|
4098 (double[]) a1, lo1, hi1, (double[]) a2, lo2, hi2); |
|
4099 } else { |
|
4100 throw new IllegalArgumentException( |
|
4101 "Unknown type of array: " + dst.getClass().getName()); |
|
4102 } |
|
4103 propagateCompletion(); |
|
4104 } |
|
4105 |
|
4106 private void forkMerger(Object dst, int k, |
|
4107 Object a1, int lo1, int hi1, Object a2, int lo2, int hi2) { |
|
4108 addToPendingCount(1); |
|
4109 new Merger(this, dst, k, a1, lo1, hi1, a2, lo2, hi2).fork(); |
|
4110 } |
|
4111 } |
|
4112 |
|
4113 /** |
|
4114 * This class implements parallel merging of runs. |
|
4115 */ |
|
4116 private static final class RunMerger extends RecursiveTask<Object> { |
|
4117 private static final long serialVersionUID = 20180818L; |
|
4118 private final Object a, b; |
|
4119 private final int[] run; |
|
4120 private final int offset, aim, lo, hi; |
|
4121 |
|
4122 private RunMerger(Object a, Object b, int offset, |
|
4123 int aim, int[] run, int lo, int hi) { |
|
4124 this.a = a; |
|
4125 this.b = b; |
|
4126 this.offset = offset; |
|
4127 this.aim = aim; |
|
4128 this.run = run; |
|
4129 this.lo = lo; |
|
4130 this.hi = hi; |
|
4131 } |
|
4132 |
|
4133 @Override |
|
4134 protected final Object compute() { |
|
4135 if (a instanceof int[]) { |
|
4136 return mergeRuns((int[]) a, (int[]) b, offset, aim, true, run, lo, hi); |
|
4137 } |
|
4138 if (a instanceof long[]) { |
|
4139 return mergeRuns((long[]) a, (long[]) b, offset, aim, true, run, lo, hi); |
|
4140 } |
|
4141 if (a instanceof float[]) { |
|
4142 return mergeRuns((float[]) a, (float[]) b, offset, aim, true, run, lo, hi); |
|
4143 } |
|
4144 if (a instanceof double[]) { |
|
4145 return mergeRuns((double[]) a, (double[]) b, offset, aim, true, run, lo, hi); |
|
4146 } |
|
4147 throw new IllegalArgumentException( |
|
4148 "Unknown type of array: " + a.getClass().getName()); |
|
4149 } |
|
4150 |
|
4151 private RunMerger forkMe() { |
|
4152 fork(); |
|
4153 return this; |
|
4154 } |
|
4155 |
|
4156 private Object getDestination() { |
|
4157 join(); |
|
4158 return getRawResult(); |
3179 } |
4159 } |
3180 } |
4160 } |
3181 } |
4161 } |