|
1 /* |
|
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
3 * |
|
4 * This code is free software; you can redistribute it and/or modify it |
|
5 * under the terms of the GNU General Public License version 2 only, as |
|
6 * published by the Free Software Foundation. Oracle designates this |
|
7 * particular file as subject to the "Classpath" exception as provided |
|
8 * by Oracle in the LICENSE file that accompanied this code. |
|
9 * |
|
10 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 * version 2 for more details (a copy is included in the LICENSE file that |
|
14 * accompanied this code). |
|
15 * |
|
16 * You should have received a copy of the GNU General Public License version |
|
17 * 2 along with this work; if not, write to the Free Software Foundation, |
|
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 * |
|
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 * or visit www.oracle.com if you need additional information or have any |
|
22 * questions. |
|
23 */ |
|
24 |
|
25 /* |
|
26 * This file is available under and governed by the GNU General Public |
|
27 * License version 2 only, as published by the Free Software Foundation. |
|
28 * However, the following notice accompanied the original version of this |
|
29 * file: |
|
30 * |
|
31 * Written by Doug Lea with assistance from members of JCP JSR-166 |
|
32 * Expert Group and released to the public domain, as explained at |
|
33 * http://creativecommons.org/publicdomain/zero/1.0/ |
|
34 */ |
|
35 |
|
36 package java.util.concurrent.atomic; |
|
37 import java.util.function.LongBinaryOperator; |
|
38 import java.util.function.DoubleBinaryOperator; |
|
39 import java.util.concurrent.ThreadLocalRandom; |
|
40 |
|
41 /** |
|
42 * A package-local class holding common representation and mechanics |
|
43 * for classes supporting dynamic striping on 64bit values. The class |
|
44 * extends Number so that concrete subclasses must publicly do so. |
|
45 */ |
|
46 abstract class Striped64 extends Number { |
|
47 /* |
|
48 * This class maintains a lazily-initialized table of atomically |
|
49 * updated variables, plus an extra "base" field. The table size |
|
50 * is a power of two. Indexing uses masked per-thread hash codes. |
|
51 * Nearly all declarations in this class are package-private, |
|
52 * accessed directly by subclasses. |
|
53 * |
|
54 * Table entries are of class Cell; a variant of AtomicLong padded |
|
55 * to reduce cache contention on most processors. Padding is |
|
56 * overkill for most Atomics because they are usually irregularly |
|
57 * scattered in memory and thus don't interfere much with each |
|
58 * other. But Atomic objects residing in arrays will tend to be |
|
59 * placed adjacent to each other, and so will most often share |
|
60 * cache lines (with a huge negative performance impact) without |
|
61 * this precaution. |
|
62 * |
|
63 * In part because Cells are relatively large, we avoid creating |
|
64 * them until they are needed. When there is no contention, all |
|
65 * updates are made to the base field. Upon first contention (a |
|
66 * failed CAS on base update), the table is initialized to size 2. |
|
67 * The table size is doubled upon further contention until |
|
68 * reaching the nearest power of two greater than or equal to the |
|
69 * number of CPUS. Table slots remain empty (null) until they are |
|
70 * needed. |
|
71 * |
|
72 * A single spinlock ("cellsBusy") is used for initializing and |
|
73 * resizing the table, as well as populating slots with new Cells. |
|
74 * There is no need for a blocking lock; when the lock is not |
|
75 * available, threads try other slots (or the base). During these |
|
76 * retries, there is increased contention and reduced locality, |
|
77 * which is still better than alternatives. |
|
78 * |
|
79 * The Thread probe fields maintained via ThreadLocalRandom serve |
|
80 * as per-thread hash codes. We let them remain uninitialized as |
|
81 * zero (if they come in this way) until they contend at slot |
|
82 * 0. They are then initialized to values that typically do not |
|
83 * often conflict with others. Contention and/or table collisions |
|
84 * are indicated by failed CASes when performing an update |
|
85 * operation. Upon a collision, if the table size is less than |
|
86 * the capacity, it is doubled in size unless some other thread |
|
87 * holds the lock. If a hashed slot is empty, and lock is |
|
88 * available, a new Cell is created. Otherwise, if the slot |
|
89 * exists, a CAS is tried. Retries proceed by "double hashing", |
|
90 * using a secondary hash (Marsaglia XorShift) to try to find a |
|
91 * free slot. |
|
92 * |
|
93 * The table size is capped because, when there are more threads |
|
94 * than CPUs, supposing that each thread were bound to a CPU, |
|
95 * there would exist a perfect hash function mapping threads to |
|
96 * slots that eliminates collisions. When we reach capacity, we |
|
97 * search for this mapping by randomly varying the hash codes of |
|
98 * colliding threads. Because search is random, and collisions |
|
99 * only become known via CAS failures, convergence can be slow, |
|
100 * and because threads are typically not bound to CPUS forever, |
|
101 * may not occur at all. However, despite these limitations, |
|
102 * observed contention rates are typically low in these cases. |
|
103 * |
|
104 * It is possible for a Cell to become unused when threads that |
|
105 * once hashed to it terminate, as well as in the case where |
|
106 * doubling the table causes no thread to hash to it under |
|
107 * expanded mask. We do not try to detect or remove such cells, |
|
108 * under the assumption that for long-running instances, observed |
|
109 * contention levels will recur, so the cells will eventually be |
|
110 * needed again; and for short-lived ones, it does not matter. |
|
111 */ |
|
112 |
|
113 /** |
|
114 * Padded variant of AtomicLong supporting only raw accesses plus CAS. |
|
115 * The value field is placed between pads, hoping that the JVM doesn't |
|
116 * reorder them. |
|
117 * |
|
118 * JVM intrinsics note: It would be possible to use a release-only |
|
119 * form of CAS here, if it were provided. |
|
120 */ |
|
121 static final class Cell { |
|
122 volatile long p0, p1, p2, p3, p4, p5, p6; |
|
123 volatile long value; |
|
124 volatile long q0, q1, q2, q3, q4, q5, q6; |
|
125 Cell(long x) { value = x; } |
|
126 |
|
127 final boolean cas(long cmp, long val) { |
|
128 return UNSAFE.compareAndSwapLong(this, valueOffset, cmp, val); |
|
129 } |
|
130 |
|
131 // Unsafe mechanics |
|
132 private static final sun.misc.Unsafe UNSAFE; |
|
133 private static final long valueOffset; |
|
134 static { |
|
135 try { |
|
136 UNSAFE = sun.misc.Unsafe.getUnsafe(); |
|
137 Class<?> ak = Cell.class; |
|
138 valueOffset = UNSAFE.objectFieldOffset |
|
139 (ak.getDeclaredField("value")); |
|
140 } catch (Exception e) { |
|
141 throw new Error(e); |
|
142 } |
|
143 } |
|
144 } |
|
145 |
|
146 /** Number of CPUS, to place bound on table size */ |
|
147 static final int NCPU = Runtime.getRuntime().availableProcessors(); |
|
148 |
|
149 /** |
|
150 * Table of cells. When non-null, size is a power of 2. |
|
151 */ |
|
152 transient volatile Cell[] cells; |
|
153 |
|
154 /** |
|
155 * Base value, used mainly when there is no contention, but also as |
|
156 * a fallback during table initialization races. Updated via CAS. |
|
157 */ |
|
158 transient volatile long base; |
|
159 |
|
160 /** |
|
161 * Spinlock (locked via CAS) used when resizing and/or creating Cells. |
|
162 */ |
|
163 transient volatile int cellsBusy; |
|
164 |
|
165 /** |
|
166 * Package-private default constructor |
|
167 */ |
|
168 Striped64() { |
|
169 } |
|
170 |
|
171 /** |
|
172 * CASes the base field. |
|
173 */ |
|
174 final boolean casBase(long cmp, long val) { |
|
175 return UNSAFE.compareAndSwapLong(this, BASE, cmp, val); |
|
176 } |
|
177 |
|
178 /** |
|
179 * CASes the cellsBusy field from 0 to 1 to acquire lock. |
|
180 */ |
|
181 final boolean casCellsBusy() { |
|
182 return UNSAFE.compareAndSwapInt(this, CELLSBUSY, 0, 1); |
|
183 } |
|
184 |
|
185 /** |
|
186 * Returns the probe value for the current thread. |
|
187 * Duplicated from ThreadLocalRandom because of packaging restrictions. |
|
188 */ |
|
189 static final int getProbe() { |
|
190 return UNSAFE.getInt(Thread.currentThread(), PROBE); |
|
191 } |
|
192 |
|
193 /** |
|
194 * Pseudo-randomly advances and records the given probe value for the |
|
195 * given thread. |
|
196 * Duplicated from ThreadLocalRandom because of packaging restrictions. |
|
197 */ |
|
198 static final int advanceProbe(int probe) { |
|
199 probe ^= probe << 13; // xorshift |
|
200 probe ^= probe >>> 17; |
|
201 probe ^= probe << 5; |
|
202 UNSAFE.putInt(Thread.currentThread(), PROBE, probe); |
|
203 return probe; |
|
204 } |
|
205 |
|
206 /** |
|
207 * Handles cases of updates involving initialization, resizing, |
|
208 * creating new Cells, and/or contention. See above for |
|
209 * explanation. This method suffers the usual non-modularity |
|
210 * problems of optimistic retry code, relying on rechecked sets of |
|
211 * reads. |
|
212 * |
|
213 * @param x the value |
|
214 * @param fn the update function, or null for add (this convention |
|
215 * avoids the need for an extra field or function in LongAdder). |
|
216 * @param wasUncontended false if CAS failed before call |
|
217 */ |
|
218 final void longAccumulate(long x, LongBinaryOperator fn, |
|
219 boolean wasUncontended) { |
|
220 int h; |
|
221 if ((h = getProbe()) == 0) { |
|
222 ThreadLocalRandom.current(); // force initialization |
|
223 h = getProbe(); |
|
224 wasUncontended = true; |
|
225 } |
|
226 boolean collide = false; // True if last slot nonempty |
|
227 for (;;) { |
|
228 Cell[] as; Cell a; int n; long v; |
|
229 if ((as = cells) != null && (n = as.length) > 0) { |
|
230 if ((a = as[(n - 1) & h]) == null) { |
|
231 if (cellsBusy == 0) { // Try to attach new Cell |
|
232 Cell r = new Cell(x); // Optimistically create |
|
233 if (cellsBusy == 0 && casCellsBusy()) { |
|
234 boolean created = false; |
|
235 try { // Recheck under lock |
|
236 Cell[] rs; int m, j; |
|
237 if ((rs = cells) != null && |
|
238 (m = rs.length) > 0 && |
|
239 rs[j = (m - 1) & h] == null) { |
|
240 rs[j] = r; |
|
241 created = true; |
|
242 } |
|
243 } finally { |
|
244 cellsBusy = 0; |
|
245 } |
|
246 if (created) |
|
247 break; |
|
248 continue; // Slot is now non-empty |
|
249 } |
|
250 } |
|
251 collide = false; |
|
252 } |
|
253 else if (!wasUncontended) // CAS already known to fail |
|
254 wasUncontended = true; // Continue after rehash |
|
255 else if (a.cas(v = a.value, ((fn == null) ? v + x : |
|
256 fn.operateAsLong(v, x)))) |
|
257 break; |
|
258 else if (n >= NCPU || cells != as) |
|
259 collide = false; // At max size or stale |
|
260 else if (!collide) |
|
261 collide = true; |
|
262 else if (cellsBusy == 0 && casCellsBusy()) { |
|
263 try { |
|
264 if (cells == as) { // Expand table unless stale |
|
265 Cell[] rs = new Cell[n << 1]; |
|
266 for (int i = 0; i < n; ++i) |
|
267 rs[i] = as[i]; |
|
268 cells = rs; |
|
269 } |
|
270 } finally { |
|
271 cellsBusy = 0; |
|
272 } |
|
273 collide = false; |
|
274 continue; // Retry with expanded table |
|
275 } |
|
276 h = advanceProbe(h); |
|
277 } |
|
278 else if (cellsBusy == 0 && cells == as && casCellsBusy()) { |
|
279 boolean init = false; |
|
280 try { // Initialize table |
|
281 if (cells == as) { |
|
282 Cell[] rs = new Cell[2]; |
|
283 rs[h & 1] = new Cell(x); |
|
284 cells = rs; |
|
285 init = true; |
|
286 } |
|
287 } finally { |
|
288 cellsBusy = 0; |
|
289 } |
|
290 if (init) |
|
291 break; |
|
292 } |
|
293 else if (casBase(v = base, ((fn == null) ? v + x : |
|
294 fn.operateAsLong(v, x)))) |
|
295 break; // Fall back on using base |
|
296 } |
|
297 } |
|
298 |
|
299 /** |
|
300 * Same as longAccumulate, but injecting long/double conversions |
|
301 * in too many places to sensibly merge with long version, given |
|
302 * the low-overhead requirements of this class. So must instead be |
|
303 * maintained by copy/paste/adapt. |
|
304 */ |
|
305 final void doubleAccumulate(double x, DoubleBinaryOperator fn, |
|
306 boolean wasUncontended) { |
|
307 int h; |
|
308 if ((h = getProbe()) == 0) { |
|
309 ThreadLocalRandom.current(); // force initialization |
|
310 h = getProbe(); |
|
311 wasUncontended = true; |
|
312 } |
|
313 boolean collide = false; // True if last slot nonempty |
|
314 for (;;) { |
|
315 Cell[] as; Cell a; int n; long v; |
|
316 if ((as = cells) != null && (n = as.length) > 0) { |
|
317 if ((a = as[(n - 1) & h]) == null) { |
|
318 if (cellsBusy == 0) { // Try to attach new Cell |
|
319 Cell r = new Cell(Double.doubleToRawLongBits(x)); |
|
320 if (cellsBusy == 0 && casCellsBusy()) { |
|
321 boolean created = false; |
|
322 try { // Recheck under lock |
|
323 Cell[] rs; int m, j; |
|
324 if ((rs = cells) != null && |
|
325 (m = rs.length) > 0 && |
|
326 rs[j = (m - 1) & h] == null) { |
|
327 rs[j] = r; |
|
328 created = true; |
|
329 } |
|
330 } finally { |
|
331 cellsBusy = 0; |
|
332 } |
|
333 if (created) |
|
334 break; |
|
335 continue; // Slot is now non-empty |
|
336 } |
|
337 } |
|
338 collide = false; |
|
339 } |
|
340 else if (!wasUncontended) // CAS already known to fail |
|
341 wasUncontended = true; // Continue after rehash |
|
342 else if (a.cas(v = a.value, |
|
343 ((fn == null) ? |
|
344 Double.doubleToRawLongBits |
|
345 (Double.longBitsToDouble(v) + x) : |
|
346 Double.doubleToRawLongBits |
|
347 (fn.operateAsDouble |
|
348 (Double.longBitsToDouble(v), x))))) |
|
349 break; |
|
350 else if (n >= NCPU || cells != as) |
|
351 collide = false; // At max size or stale |
|
352 else if (!collide) |
|
353 collide = true; |
|
354 else if (cellsBusy == 0 && casCellsBusy()) { |
|
355 try { |
|
356 if (cells == as) { // Expand table unless stale |
|
357 Cell[] rs = new Cell[n << 1]; |
|
358 for (int i = 0; i < n; ++i) |
|
359 rs[i] = as[i]; |
|
360 cells = rs; |
|
361 } |
|
362 } finally { |
|
363 cellsBusy = 0; |
|
364 } |
|
365 collide = false; |
|
366 continue; // Retry with expanded table |
|
367 } |
|
368 h = advanceProbe(h); |
|
369 } |
|
370 else if (cellsBusy == 0 && cells == as && casCellsBusy()) { |
|
371 boolean init = false; |
|
372 try { // Initialize table |
|
373 if (cells == as) { |
|
374 Cell[] rs = new Cell[2]; |
|
375 rs[h & 1] = new Cell(Double.doubleToRawLongBits(x)); |
|
376 cells = rs; |
|
377 init = true; |
|
378 } |
|
379 } finally { |
|
380 cellsBusy = 0; |
|
381 } |
|
382 if (init) |
|
383 break; |
|
384 } |
|
385 else if (casBase(v = base, |
|
386 ((fn == null) ? |
|
387 Double.doubleToRawLongBits |
|
388 (Double.longBitsToDouble(v) + x) : |
|
389 Double.doubleToRawLongBits |
|
390 (fn.operateAsDouble |
|
391 (Double.longBitsToDouble(v), x))))) |
|
392 break; // Fall back on using base |
|
393 } |
|
394 } |
|
395 |
|
396 // Unsafe mechanics |
|
397 private static final sun.misc.Unsafe UNSAFE; |
|
398 private static final long BASE; |
|
399 private static final long CELLSBUSY; |
|
400 private static final long PROBE; |
|
401 static { |
|
402 try { |
|
403 UNSAFE = sun.misc.Unsafe.getUnsafe(); |
|
404 Class<?> sk = Striped64.class; |
|
405 BASE = UNSAFE.objectFieldOffset |
|
406 (sk.getDeclaredField("base")); |
|
407 CELLSBUSY = UNSAFE.objectFieldOffset |
|
408 (sk.getDeclaredField("cellsBusy")); |
|
409 Class<?> tk = Thread.class; |
|
410 PROBE = UNSAFE.objectFieldOffset |
|
411 (tk.getDeclaredField("threadLocalRandomProbe")); |
|
412 } catch (Exception e) { |
|
413 throw new Error(e); |
|
414 } |
|
415 } |
|
416 |
|
417 } |