75 #define strasm_acquire strasm_lwsync |
75 #define strasm_acquire strasm_lwsync |
76 #define strasm_fence strasm_sync |
76 #define strasm_fence strasm_sync |
77 #define strasm_nobarrier "" |
77 #define strasm_nobarrier "" |
78 #define strasm_nobarrier_clobber_memory "" |
78 #define strasm_nobarrier_clobber_memory "" |
79 |
79 |
|
80 inline void pre_membar(atomic_memory_order order) { |
|
81 switch (order) { |
|
82 case memory_order_relaxed: |
|
83 case memory_order_acquire: break; |
|
84 case memory_order_release: |
|
85 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break; |
|
86 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; |
|
87 } |
|
88 } |
|
89 |
|
90 inline void post_membar(atomic_memory_order order) { |
|
91 switch (order) { |
|
92 case memory_order_relaxed: |
|
93 case memory_order_release: break; |
|
94 case memory_order_acquire: |
|
95 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break; |
|
96 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; |
|
97 } |
|
98 } |
|
99 |
|
100 |
80 template<size_t byte_size> |
101 template<size_t byte_size> |
81 struct Atomic::PlatformAdd |
102 struct Atomic::PlatformAdd |
82 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > |
103 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > |
83 { |
104 { |
84 template<typename I, typename D> |
105 template<typename I, typename D> |
85 D add_and_fetch(I add_value, D volatile* dest) const; |
106 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; |
86 }; |
107 }; |
87 |
108 |
88 template<> |
109 template<> |
89 template<typename I, typename D> |
110 template<typename I, typename D> |
90 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const { |
111 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, |
|
112 atomic_memory_order order) const { |
91 STATIC_ASSERT(4 == sizeof(I)); |
113 STATIC_ASSERT(4 == sizeof(I)); |
92 STATIC_ASSERT(4 == sizeof(D)); |
114 STATIC_ASSERT(4 == sizeof(D)); |
93 |
115 |
94 D result; |
116 D result; |
95 |
117 |
96 __asm__ __volatile__ ( |
118 pre_membar(order); |
97 strasm_lwsync |
119 |
|
120 __asm__ __volatile__ ( |
98 "1: lwarx %0, 0, %2 \n" |
121 "1: lwarx %0, 0, %2 \n" |
99 " add %0, %0, %1 \n" |
122 " add %0, %0, %1 \n" |
100 " stwcx. %0, 0, %2 \n" |
123 " stwcx. %0, 0, %2 \n" |
101 " bne- 1b \n" |
124 " bne- 1b \n" |
102 strasm_isync |
|
103 : /*%0*/"=&r" (result) |
125 : /*%0*/"=&r" (result) |
104 : /*%1*/"r" (add_value), /*%2*/"r" (dest) |
126 : /*%1*/"r" (add_value), /*%2*/"r" (dest) |
105 : "cc", "memory" ); |
127 : "cc", "memory" ); |
106 |
128 |
|
129 post_membar(order); |
|
130 |
107 return result; |
131 return result; |
108 } |
132 } |
109 |
133 |
110 |
134 |
111 template<> |
135 template<> |
112 template<typename I, typename D> |
136 template<typename I, typename D> |
113 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const { |
137 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, |
|
138 atomic_memory_order order) const { |
114 STATIC_ASSERT(8 == sizeof(I)); |
139 STATIC_ASSERT(8 == sizeof(I)); |
115 STATIC_ASSERT(8 == sizeof(D)); |
140 STATIC_ASSERT(8 == sizeof(D)); |
116 |
141 |
117 D result; |
142 D result; |
118 |
143 |
119 __asm__ __volatile__ ( |
144 pre_membar(order); |
120 strasm_lwsync |
145 |
|
146 __asm__ __volatile__ ( |
121 "1: ldarx %0, 0, %2 \n" |
147 "1: ldarx %0, 0, %2 \n" |
122 " add %0, %0, %1 \n" |
148 " add %0, %0, %1 \n" |
123 " stdcx. %0, 0, %2 \n" |
149 " stdcx. %0, 0, %2 \n" |
124 " bne- 1b \n" |
150 " bne- 1b \n" |
125 strasm_isync |
|
126 : /*%0*/"=&r" (result) |
151 : /*%0*/"=&r" (result) |
127 : /*%1*/"r" (add_value), /*%2*/"r" (dest) |
152 : /*%1*/"r" (add_value), /*%2*/"r" (dest) |
128 : "cc", "memory" ); |
153 : "cc", "memory" ); |
129 |
154 |
|
155 post_membar(order); |
|
156 |
130 return result; |
157 return result; |
131 } |
158 } |
132 |
159 |
133 template<> |
160 template<> |
134 template<typename T> |
161 template<typename T> |
135 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, |
162 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, |
136 T volatile* dest) const { |
163 T volatile* dest, |
|
164 atomic_memory_order order) const { |
137 // Note that xchg doesn't necessarily do an acquire |
165 // Note that xchg doesn't necessarily do an acquire |
138 // (see synchronizer.cpp). |
166 // (see synchronizer.cpp). |
139 |
167 |
140 T old_value; |
168 T old_value; |
141 const uint64_t zero = 0; |
169 const uint64_t zero = 0; |
142 |
170 |
143 __asm__ __volatile__ ( |
171 pre_membar(order); |
144 /* lwsync */ |
172 |
145 strasm_lwsync |
173 __asm__ __volatile__ ( |
146 /* atomic loop */ |
174 /* atomic loop */ |
147 "1: \n" |
175 "1: \n" |
148 " lwarx %[old_value], %[dest], %[zero] \n" |
176 " lwarx %[old_value], %[dest], %[zero] \n" |
149 " stwcx. %[exchange_value], %[dest], %[zero] \n" |
177 " stwcx. %[exchange_value], %[dest], %[zero] \n" |
150 " bne- 1b \n" |
178 " bne- 1b \n" |
151 /* isync */ |
|
152 strasm_sync |
|
153 /* exit */ |
179 /* exit */ |
154 "2: \n" |
180 "2: \n" |
155 /* out */ |
181 /* out */ |
156 : [old_value] "=&r" (old_value), |
182 : [old_value] "=&r" (old_value), |
157 "=m" (*dest) |
183 "=m" (*dest) |
163 /* clobber */ |
189 /* clobber */ |
164 : "cc", |
190 : "cc", |
165 "memory" |
191 "memory" |
166 ); |
192 ); |
167 |
193 |
|
194 post_membar(order); |
|
195 |
168 return old_value; |
196 return old_value; |
169 } |
197 } |
170 |
198 |
171 template<> |
199 template<> |
172 template<typename T> |
200 template<typename T> |
173 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, |
201 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, |
174 T volatile* dest) const { |
202 T volatile* dest, |
|
203 atomic_memory_order order) const { |
175 STATIC_ASSERT(8 == sizeof(T)); |
204 STATIC_ASSERT(8 == sizeof(T)); |
176 // Note that xchg doesn't necessarily do an acquire |
205 // Note that xchg doesn't necessarily do an acquire |
177 // (see synchronizer.cpp). |
206 // (see synchronizer.cpp). |
178 |
207 |
179 T old_value; |
208 T old_value; |
180 const uint64_t zero = 0; |
209 const uint64_t zero = 0; |
181 |
210 |
182 __asm__ __volatile__ ( |
211 pre_membar(order); |
183 /* lwsync */ |
212 |
184 strasm_lwsync |
213 __asm__ __volatile__ ( |
185 /* atomic loop */ |
214 /* atomic loop */ |
186 "1: \n" |
215 "1: \n" |
187 " ldarx %[old_value], %[dest], %[zero] \n" |
216 " ldarx %[old_value], %[dest], %[zero] \n" |
188 " stdcx. %[exchange_value], %[dest], %[zero] \n" |
217 " stdcx. %[exchange_value], %[dest], %[zero] \n" |
189 " bne- 1b \n" |
218 " bne- 1b \n" |
190 /* isync */ |
|
191 strasm_sync |
|
192 /* exit */ |
219 /* exit */ |
193 "2: \n" |
220 "2: \n" |
194 /* out */ |
221 /* out */ |
195 : [old_value] "=&r" (old_value), |
222 : [old_value] "=&r" (old_value), |
196 "=m" (*dest) |
223 "=m" (*dest) |
288 /* clobber */ |
299 /* clobber */ |
289 : "cc", |
300 : "cc", |
290 "memory" |
301 "memory" |
291 ); |
302 ); |
292 |
303 |
293 cmpxchg_post_membar(order); |
304 post_membar(order); |
294 |
305 |
295 return PrimitiveConversions::cast<T>((unsigned char)old_value); |
306 return PrimitiveConversions::cast<T>((unsigned char)old_value); |
296 } |
307 } |
297 |
308 |
298 template<> |
309 template<> |
299 template<typename T> |
310 template<typename T> |
300 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, |
311 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, |
301 T volatile* dest, |
312 T volatile* dest, |
302 T compare_value, |
313 T compare_value, |
303 cmpxchg_memory_order order) const { |
314 atomic_memory_order order) const { |
304 STATIC_ASSERT(4 == sizeof(T)); |
315 STATIC_ASSERT(4 == sizeof(T)); |
305 |
316 |
306 // Note that cmpxchg guarantees a two-way memory barrier across |
317 // Note that cmpxchg guarantees a two-way memory barrier across |
307 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
318 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
308 // specified otherwise (see atomic.hpp). |
319 // specified otherwise (see atomic.hpp). |
309 |
320 |
310 T old_value; |
321 T old_value; |
311 const uint64_t zero = 0; |
322 const uint64_t zero = 0; |
312 |
323 |
313 cmpxchg_pre_membar(order); |
324 pre_membar(order); |
314 |
325 |
315 __asm__ __volatile__ ( |
326 __asm__ __volatile__ ( |
316 /* simple guard */ |
327 /* simple guard */ |
317 " lwz %[old_value], 0(%[dest]) \n" |
328 " lwz %[old_value], 0(%[dest]) \n" |
318 " cmpw %[compare_value], %[old_value] \n" |
329 " cmpw %[compare_value], %[old_value] \n" |
338 /* clobber */ |
349 /* clobber */ |
339 : "cc", |
350 : "cc", |
340 "memory" |
351 "memory" |
341 ); |
352 ); |
342 |
353 |
343 cmpxchg_post_membar(order); |
354 post_membar(order); |
344 |
355 |
345 return old_value; |
356 return old_value; |
346 } |
357 } |
347 |
358 |
348 template<> |
359 template<> |
349 template<typename T> |
360 template<typename T> |
350 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, |
361 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, |
351 T volatile* dest, |
362 T volatile* dest, |
352 T compare_value, |
363 T compare_value, |
353 cmpxchg_memory_order order) const { |
364 atomic_memory_order order) const { |
354 STATIC_ASSERT(8 == sizeof(T)); |
365 STATIC_ASSERT(8 == sizeof(T)); |
355 |
366 |
356 // Note that cmpxchg guarantees a two-way memory barrier across |
367 // Note that cmpxchg guarantees a two-way memory barrier across |
357 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
368 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not |
358 // specified otherwise (see atomic.hpp). |
369 // specified otherwise (see atomic.hpp). |
359 |
370 |
360 T old_value; |
371 T old_value; |
361 const uint64_t zero = 0; |
372 const uint64_t zero = 0; |
362 |
373 |
363 cmpxchg_pre_membar(order); |
374 pre_membar(order); |
364 |
375 |
365 __asm__ __volatile__ ( |
376 __asm__ __volatile__ ( |
366 /* simple guard */ |
377 /* simple guard */ |
367 " ld %[old_value], 0(%[dest]) \n" |
378 " ld %[old_value], 0(%[dest]) \n" |
368 " cmpd %[compare_value], %[old_value] \n" |
379 " cmpd %[compare_value], %[old_value] \n" |