1 /* |
|
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. |
|
3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. |
|
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
5 * |
|
6 * This code is free software; you can redistribute it and/or modify it |
|
7 * under the terms of the GNU General Public License version 2 only, as |
|
8 * published by the Free Software Foundation. |
|
9 * |
|
10 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
13 * version 2 for more details (a copy is included in the LICENSE file that |
|
14 * accompanied this code). |
|
15 * |
|
16 * You should have received a copy of the GNU General Public License version |
|
17 * 2 along with this work; if not, write to the Free Software Foundation, |
|
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
19 * |
|
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
21 * or visit www.oracle.com if you need additional information or have any |
|
22 * questions. |
|
23 * |
|
24 */ |
|
25 |
|
26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP |
|
27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP |
|
28 |
|
29 #include "runtime/atomic.hpp" |
|
30 #include "runtime/os.hpp" |
|
31 |
|
32 #ifndef PPC64 |
|
33 #error "Atomic currently only implemented for PPC64" |
|
34 #endif |
|
35 |
|
36 // Implementation of class atomic |
|
37 |
|
38 inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } |
|
39 inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } |
|
40 inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } |
|
41 inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } |
|
42 inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } |
|
43 inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } |
|
44 |
|
45 inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } |
|
46 inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } |
|
47 inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } |
|
48 inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } |
|
49 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } |
|
50 inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; } |
|
51 |
|
52 inline jlong Atomic::load(volatile jlong* src) { return *src; } |
|
53 |
|
54 // |
|
55 // machine barrier instructions: |
|
56 // |
|
57 // - sync two-way memory barrier, aka fence |
|
58 // - lwsync orders Store|Store, |
|
59 // Load|Store, |
|
60 // Load|Load, |
|
61 // but not Store|Load |
|
62 // - eieio orders memory accesses for device memory (only) |
|
63 // - isync invalidates speculatively executed instructions |
|
64 // From the POWER ISA 2.06 documentation: |
|
65 // "[...] an isync instruction prevents the execution of |
|
66 // instructions following the isync until instructions |
|
67 // preceding the isync have completed, [...]" |
|
68 // From IBM's AIX assembler reference: |
|
69 // "The isync [...] instructions causes the processor to |
|
70 // refetch any instructions that might have been fetched |
|
71 // prior to the isync instruction. The instruction isync |
|
72 // causes the processor to wait for all previous instructions |
|
73 // to complete. Then any instructions already fetched are |
|
74 // discarded and instruction processing continues in the |
|
75 // environment established by the previous instructions." |
|
76 // |
|
77 // semantic barrier instructions: |
|
78 // (as defined in orderAccess.hpp) |
|
79 // |
|
80 // - release orders Store|Store, (maps to lwsync) |
|
81 // Load|Store |
|
82 // - acquire orders Load|Store, (maps to lwsync) |
|
83 // Load|Load |
|
84 // - fence orders Store|Store, (maps to sync) |
|
85 // Load|Store, |
|
86 // Load|Load, |
|
87 // Store|Load |
|
88 // |
|
89 |
|
90 #define strasm_sync "\n sync \n" |
|
91 #define strasm_lwsync "\n lwsync \n" |
|
92 #define strasm_isync "\n isync \n" |
|
93 #define strasm_release strasm_lwsync |
|
94 #define strasm_acquire strasm_lwsync |
|
95 #define strasm_fence strasm_sync |
|
96 #define strasm_nobarrier "" |
|
97 #define strasm_nobarrier_clobber_memory "" |
|
98 |
|
99 inline jint Atomic::add (jint add_value, volatile jint* dest) { |
|
100 |
|
101 unsigned int result; |
|
102 |
|
103 __asm__ __volatile__ ( |
|
104 strasm_lwsync |
|
105 "1: lwarx %0, 0, %2 \n" |
|
106 " add %0, %0, %1 \n" |
|
107 " stwcx. %0, 0, %2 \n" |
|
108 " bne- 1b \n" |
|
109 strasm_isync |
|
110 : /*%0*/"=&r" (result) |
|
111 : /*%1*/"r" (add_value), /*%2*/"r" (dest) |
|
112 : "cc", "memory" ); |
|
113 |
|
114 return (jint) result; |
|
115 } |
|
116 |
|
117 |
|
118 inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) { |
|
119 |
|
120 long result; |
|
121 |
|
122 __asm__ __volatile__ ( |
|
123 strasm_lwsync |
|
124 "1: ldarx %0, 0, %2 \n" |
|
125 " add %0, %0, %1 \n" |
|
126 " stdcx. %0, 0, %2 \n" |
|
127 " bne- 1b \n" |
|
128 strasm_isync |
|
129 : /*%0*/"=&r" (result) |
|
130 : /*%1*/"r" (add_value), /*%2*/"r" (dest) |
|
131 : "cc", "memory" ); |
|
132 |
|
133 return (intptr_t) result; |
|
134 } |
|
135 |
|
136 inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) { |
|
137 return (void*)add_ptr(add_value, (volatile intptr_t*)dest); |
|
138 } |
|
139 |
|
140 |
|
141 inline void Atomic::inc (volatile jint* dest) { |
|
142 |
|
143 unsigned int temp; |
|
144 |
|
145 __asm__ __volatile__ ( |
|
146 strasm_nobarrier |
|
147 "1: lwarx %0, 0, %2 \n" |
|
148 " addic %0, %0, 1 \n" |
|
149 " stwcx. %0, 0, %2 \n" |
|
150 " bne- 1b \n" |
|
151 strasm_nobarrier |
|
152 : /*%0*/"=&r" (temp), "=m" (*dest) |
|
153 : /*%2*/"r" (dest), "m" (*dest) |
|
154 : "cc" strasm_nobarrier_clobber_memory); |
|
155 |
|
156 } |
|
157 |
|
158 inline void Atomic::inc_ptr(volatile intptr_t* dest) { |
|
159 |
|
160 long temp; |
|
161 |
|
162 __asm__ __volatile__ ( |
|
163 strasm_nobarrier |
|
164 "1: ldarx %0, 0, %2 \n" |
|
165 " addic %0, %0, 1 \n" |
|
166 " stdcx. %0, 0, %2 \n" |
|
167 " bne- 1b \n" |
|
168 strasm_nobarrier |
|
169 : /*%0*/"=&r" (temp), "=m" (*dest) |
|
170 : /*%2*/"r" (dest), "m" (*dest) |
|
171 : "cc" strasm_nobarrier_clobber_memory); |
|
172 |
|
173 } |
|
174 |
|
175 inline void Atomic::inc_ptr(volatile void* dest) { |
|
176 inc_ptr((volatile intptr_t*)dest); |
|
177 } |
|
178 |
|
179 |
|
180 inline void Atomic::dec (volatile jint* dest) { |
|
181 |
|
182 unsigned int temp; |
|
183 |
|
184 __asm__ __volatile__ ( |
|
185 strasm_nobarrier |
|
186 "1: lwarx %0, 0, %2 \n" |
|
187 " addic %0, %0, -1 \n" |
|
188 " stwcx. %0, 0, %2 \n" |
|
189 " bne- 1b \n" |
|
190 strasm_nobarrier |
|
191 : /*%0*/"=&r" (temp), "=m" (*dest) |
|
192 : /*%2*/"r" (dest), "m" (*dest) |
|
193 : "cc" strasm_nobarrier_clobber_memory); |
|
194 |
|
195 } |
|
196 |
|
197 inline void Atomic::dec_ptr(volatile intptr_t* dest) { |
|
198 |
|
199 long temp; |
|
200 |
|
201 __asm__ __volatile__ ( |
|
202 strasm_nobarrier |
|
203 "1: ldarx %0, 0, %2 \n" |
|
204 " addic %0, %0, -1 \n" |
|
205 " stdcx. %0, 0, %2 \n" |
|
206 " bne- 1b \n" |
|
207 strasm_nobarrier |
|
208 : /*%0*/"=&r" (temp), "=m" (*dest) |
|
209 : /*%2*/"r" (dest), "m" (*dest) |
|
210 : "cc" strasm_nobarrier_clobber_memory); |
|
211 |
|
212 } |
|
213 |
|
214 inline void Atomic::dec_ptr(volatile void* dest) { |
|
215 dec_ptr((volatile intptr_t*)dest); |
|
216 } |
|
217 |
|
218 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) { |
|
219 |
|
220 // Note that xchg_ptr doesn't necessarily do an acquire |
|
221 // (see synchronizer.cpp). |
|
222 |
|
223 unsigned int old_value; |
|
224 const uint64_t zero = 0; |
|
225 |
|
226 __asm__ __volatile__ ( |
|
227 /* lwsync */ |
|
228 strasm_lwsync |
|
229 /* atomic loop */ |
|
230 "1: \n" |
|
231 " lwarx %[old_value], %[dest], %[zero] \n" |
|
232 " stwcx. %[exchange_value], %[dest], %[zero] \n" |
|
233 " bne- 1b \n" |
|
234 /* isync */ |
|
235 strasm_sync |
|
236 /* exit */ |
|
237 "2: \n" |
|
238 /* out */ |
|
239 : [old_value] "=&r" (old_value), |
|
240 "=m" (*dest) |
|
241 /* in */ |
|
242 : [dest] "b" (dest), |
|
243 [zero] "r" (zero), |
|
244 [exchange_value] "r" (exchange_value), |
|
245 "m" (*dest) |
|
246 /* clobber */ |
|
247 : "cc", |
|
248 "memory" |
|
249 ); |
|
250 |
|
251 return (jint) old_value; |
|
252 } |
|
253 |
|
254 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { |
|
255 |
|
256 // Note that xchg_ptr doesn't necessarily do an acquire |
|
257 // (see synchronizer.cpp). |
|
258 |
|
259 long old_value; |
|
260 const uint64_t zero = 0; |
|
261 |
|
262 __asm__ __volatile__ ( |
|
263 /* lwsync */ |
|
264 strasm_lwsync |
|
265 /* atomic loop */ |
|
266 "1: \n" |
|
267 " ldarx %[old_value], %[dest], %[zero] \n" |
|
268 " stdcx. %[exchange_value], %[dest], %[zero] \n" |
|
269 " bne- 1b \n" |
|
270 /* isync */ |
|
271 strasm_sync |
|
272 /* exit */ |
|
273 "2: \n" |
|
274 /* out */ |
|
275 : [old_value] "=&r" (old_value), |
|
276 "=m" (*dest) |
|
277 /* in */ |
|
278 : [dest] "b" (dest), |
|
279 [zero] "r" (zero), |
|
280 [exchange_value] "r" (exchange_value), |
|
281 "m" (*dest) |
|
282 /* clobber */ |
|
283 : "cc", |
|
284 "memory" |
|
285 ); |
|
286 |
|
287 return (intptr_t) old_value; |
|
288 } |
|
289 |
|
290 inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { |
|
291 return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); |
|
292 } |
|
293 |
|
294 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { |
|
295 if (order != memory_order_relaxed) { |
|
296 __asm__ __volatile__ ( |
|
297 /* fence */ |
|
298 strasm_sync |
|
299 ); |
|
300 } |
|
301 } |
|
302 |
|
303 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { |
|
304 if (order != memory_order_relaxed) { |
|
305 __asm__ __volatile__ ( |
|
306 /* fence */ |
|
307 strasm_sync |
|
308 ); |
|
309 } |
|
310 } |
|
311 |
|
312 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE |
|
313 inline jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest, jbyte compare_value, cmpxchg_memory_order order) { |
|
314 |
|
315 // Note that cmpxchg guarantees a two-way memory barrier across |
|
316 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
|
317 // specified otherwise (see atomic.hpp). |
|
318 |
|
319 // Using 32 bit internally. |
|
320 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); |
|
321 |
|
322 #ifdef VM_LITTLE_ENDIAN |
|
323 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; |
|
324 #else |
|
325 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; |
|
326 #endif |
|
327 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), |
|
328 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), |
|
329 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; |
|
330 |
|
331 unsigned int old_value, value32; |
|
332 |
|
333 cmpxchg_pre_membar(order); |
|
334 |
|
335 __asm__ __volatile__ ( |
|
336 /* simple guard */ |
|
337 " lbz %[old_value], 0(%[dest]) \n" |
|
338 " cmpw %[masked_compare_val], %[old_value] \n" |
|
339 " bne- 2f \n" |
|
340 /* atomic loop */ |
|
341 "1: \n" |
|
342 " lwarx %[value32], 0, %[dest_base] \n" |
|
343 /* extract byte and compare */ |
|
344 " srd %[old_value], %[value32], %[shift_amount] \n" |
|
345 " clrldi %[old_value], %[old_value], 56 \n" |
|
346 " cmpw %[masked_compare_val], %[old_value] \n" |
|
347 " bne- 2f \n" |
|
348 /* replace byte and try to store */ |
|
349 " xor %[value32], %[xor_value], %[value32] \n" |
|
350 " stwcx. %[value32], 0, %[dest_base] \n" |
|
351 " bne- 1b \n" |
|
352 /* exit */ |
|
353 "2: \n" |
|
354 /* out */ |
|
355 : [old_value] "=&r" (old_value), |
|
356 [value32] "=&r" (value32), |
|
357 "=m" (*dest), |
|
358 "=m" (*dest_base) |
|
359 /* in */ |
|
360 : [dest] "b" (dest), |
|
361 [dest_base] "b" (dest_base), |
|
362 [shift_amount] "r" (shift_amount), |
|
363 [masked_compare_val] "r" (masked_compare_val), |
|
364 [xor_value] "r" (xor_value), |
|
365 "m" (*dest), |
|
366 "m" (*dest_base) |
|
367 /* clobber */ |
|
368 : "cc", |
|
369 "memory" |
|
370 ); |
|
371 |
|
372 cmpxchg_post_membar(order); |
|
373 |
|
374 return (jbyte)(unsigned char)old_value; |
|
375 } |
|
376 |
|
377 inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order) { |
|
378 |
|
379 // Note that cmpxchg guarantees a two-way memory barrier across |
|
380 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
|
381 // specified otherwise (see atomic.hpp). |
|
382 |
|
383 unsigned int old_value; |
|
384 const uint64_t zero = 0; |
|
385 |
|
386 cmpxchg_pre_membar(order); |
|
387 |
|
388 __asm__ __volatile__ ( |
|
389 /* simple guard */ |
|
390 " lwz %[old_value], 0(%[dest]) \n" |
|
391 " cmpw %[compare_value], %[old_value] \n" |
|
392 " bne- 2f \n" |
|
393 /* atomic loop */ |
|
394 "1: \n" |
|
395 " lwarx %[old_value], %[dest], %[zero] \n" |
|
396 " cmpw %[compare_value], %[old_value] \n" |
|
397 " bne- 2f \n" |
|
398 " stwcx. %[exchange_value], %[dest], %[zero] \n" |
|
399 " bne- 1b \n" |
|
400 /* exit */ |
|
401 "2: \n" |
|
402 /* out */ |
|
403 : [old_value] "=&r" (old_value), |
|
404 "=m" (*dest) |
|
405 /* in */ |
|
406 : [dest] "b" (dest), |
|
407 [zero] "r" (zero), |
|
408 [compare_value] "r" (compare_value), |
|
409 [exchange_value] "r" (exchange_value), |
|
410 "m" (*dest) |
|
411 /* clobber */ |
|
412 : "cc", |
|
413 "memory" |
|
414 ); |
|
415 |
|
416 cmpxchg_post_membar(order); |
|
417 |
|
418 return (jint) old_value; |
|
419 } |
|
420 |
|
421 inline jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order) { |
|
422 |
|
423 // Note that cmpxchg guarantees a two-way memory barrier across |
|
424 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
|
425 // specified otherwise (see atomic.hpp). |
|
426 |
|
427 long old_value; |
|
428 const uint64_t zero = 0; |
|
429 |
|
430 cmpxchg_pre_membar(order); |
|
431 |
|
432 __asm__ __volatile__ ( |
|
433 /* simple guard */ |
|
434 " ld %[old_value], 0(%[dest]) \n" |
|
435 " cmpd %[compare_value], %[old_value] \n" |
|
436 " bne- 2f \n" |
|
437 /* atomic loop */ |
|
438 "1: \n" |
|
439 " ldarx %[old_value], %[dest], %[zero] \n" |
|
440 " cmpd %[compare_value], %[old_value] \n" |
|
441 " bne- 2f \n" |
|
442 " stdcx. %[exchange_value], %[dest], %[zero] \n" |
|
443 " bne- 1b \n" |
|
444 /* exit */ |
|
445 "2: \n" |
|
446 /* out */ |
|
447 : [old_value] "=&r" (old_value), |
|
448 "=m" (*dest) |
|
449 /* in */ |
|
450 : [dest] "b" (dest), |
|
451 [zero] "r" (zero), |
|
452 [compare_value] "r" (compare_value), |
|
453 [exchange_value] "r" (exchange_value), |
|
454 "m" (*dest) |
|
455 /* clobber */ |
|
456 : "cc", |
|
457 "memory" |
|
458 ); |
|
459 |
|
460 cmpxchg_post_membar(order); |
|
461 |
|
462 return (jlong) old_value; |
|
463 } |
|
464 |
|
465 inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) { |
|
466 return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order); |
|
467 } |
|
468 |
|
469 inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order) { |
|
470 return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order); |
|
471 } |
|
472 |
|
473 #undef strasm_sync |
|
474 #undef strasm_lwsync |
|
475 #undef strasm_isync |
|
476 #undef strasm_release |
|
477 #undef strasm_acquire |
|
478 #undef strasm_fence |
|
479 #undef strasm_nobarrier |
|
480 #undef strasm_nobarrier_clobber_memory |
|
481 |
|
482 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_INLINE_HPP |
|