]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Distributed under the Boost Software License, Version 1.0. | |
3 | * (See accompanying file LICENSE_1_0.txt or copy at | |
4 | * http://www.boost.org/LICENSE_1_0.txt) | |
5 | * | |
6 | * Copyright (c) 2009 Helge Bahmann | |
7 | * Copyright (c) 2013 Tim Blechmann | |
8 | * Copyright (c) 2014 Andrey Semashev | |
9 | */ | |
10 | /*! | |
11 | * \file atomic/detail/ops_gcc_ppc.hpp | |
12 | * | |
13 | * This header contains implementation of the \c operations template. | |
14 | */ | |
15 | ||
16 | #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_PPC_HPP_INCLUDED_ | |
17 | #define BOOST_ATOMIC_DETAIL_OPS_GCC_PPC_HPP_INCLUDED_ | |
18 | ||
19 | #include <boost/memory_order.hpp> | |
20 | #include <boost/atomic/detail/config.hpp> | |
21 | #include <boost/atomic/detail/storage_type.hpp> | |
22 | #include <boost/atomic/detail/operations_fwd.hpp> | |
23 | #include <boost/atomic/capabilities.hpp> | |
24 | ||
25 | #ifdef BOOST_HAS_PRAGMA_ONCE | |
26 | #pragma once | |
27 | #endif | |
28 | ||
29 | namespace boost { | |
30 | namespace atomics { | |
31 | namespace detail { | |
32 | ||
33 | // The implementation below uses information from this document: | |
34 | // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2010.02.19a.html | |
35 | ||
36 | /* | |
37 | Refer to: Motorola: "Programming Environments Manual for 32-Bit | |
38 | Implementations of the PowerPC Architecture", Appendix E: | |
39 | "Synchronization Programming Examples" for an explanation of what is | |
40 | going on here (can be found on the web at various places by the | |
41 | name "MPCFPE32B.pdf", Google is your friend...) | |
42 | ||
43 | Most of the atomic operations map to instructions in a relatively | |
44 | straight-forward fashion, but "load"s may at first glance appear | |
45 | a bit strange as they map to: | |
46 | ||
47 | lwz %rX, addr | |
48 | cmpw %rX, %rX | |
49 | bne- 1f | |
50 | 1: | |
51 | ||
52 | That is, the CPU is forced to perform a branch that "formally" depends | |
53 | on the value retrieved from memory. This scheme has an overhead of | |
54 | about 1-2 clock cycles per load, but it allows to map "acquire" to | |
55 | the "isync" instruction instead of "sync" uniformly and for all type | |
56 | of atomic operations. Since "isync" has a cost of about 15 clock | |
57 | cycles, while "sync" hast a cost of about 50 clock cycles, the small | |
58 | penalty to atomic loads more than compensates for this. | |
59 | ||
60 | Byte- and halfword-sized atomic values are realized by encoding the | |
61 | value to be represented into a word, performing sign/zero extension | |
62 | as appropriate. This means that after add/sub operations the value | |
63 | needs fixing up to accurately preserve the wrap-around semantic of | |
64 | the smaller type. (Nothing special needs to be done for the bit-wise | |
65 | and the "exchange type" operators as the compiler already sees to | |
66 | it that values carried in registers are extended appropriately and | |
67 | everything falls into place naturally). | |
68 | ||
69 | The register constraint "b" instructs gcc to use any register | |
70 | except r0; this is sometimes required because the encoding for | |
71 | r0 is used to signify "constant zero" in a number of instructions, | |
72 | making r0 unusable in this place. For simplicity this constraint | |
73 | is used everywhere since I am to lazy to look this up on a | |
74 | per-instruction basis, and ppc has enough registers for this not | |
75 | to pose a problem. | |
76 | */ | |
77 | ||
78 | // A note about memory_order_consume. Technically, this architecture allows to avoid | |
79 | // unnecessary memory barrier after consume load since it supports data dependency ordering. | |
80 | // However, some compiler optimizations may break a seemingly valid code relying on data | |
81 | // dependency tracking by injecting bogus branches to aid out of order execution. | |
82 | // This may happen not only in Boost.Atomic code but also in user's code, which we have no | |
83 | // control of. See this thread: http://lists.boost.org/Archives/boost/2014/06/213890.php. | |
84 | // For this reason we promote memory_order_consume to memory_order_acquire. | |
85 | ||
86 | struct gcc_ppc_operations_base | |
87 | { | |
88 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; | |
89 | ||
90 | static BOOST_FORCEINLINE void fence_before(memory_order order) BOOST_NOEXCEPT | |
91 | { | |
92 | #if defined(__powerpc64__) || defined(__PPC64__) | |
93 | if (order == memory_order_seq_cst) | |
94 | __asm__ __volatile__ ("sync" ::: "memory"); | |
95 | else if ((order & memory_order_release) != 0) | |
96 | __asm__ __volatile__ ("lwsync" ::: "memory"); | |
97 | #else | |
98 | if ((order & memory_order_release) != 0) | |
99 | __asm__ __volatile__ ("sync" ::: "memory"); | |
100 | #endif | |
101 | } | |
102 | ||
103 | static BOOST_FORCEINLINE void fence_after(memory_order order) BOOST_NOEXCEPT | |
104 | { | |
105 | if ((order & (memory_order_consume | memory_order_acquire)) != 0) | |
106 | __asm__ __volatile__ ("isync" ::: "memory"); | |
107 | } | |
108 | }; | |
109 | ||
110 | ||
111 | template< bool Signed > | |
112 | struct operations< 4u, Signed > : | |
113 | public gcc_ppc_operations_base | |
114 | { | |
115 | typedef typename make_storage_type< 4u, Signed >::type storage_type; | |
116 | typedef typename make_storage_type< 4u, Signed >::aligned aligned_storage_type; | |
117 | ||
118 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
119 | { | |
120 | fence_before(order); | |
121 | __asm__ __volatile__ | |
122 | ( | |
123 | "stw %1, %0\n\t" | |
124 | : "+m" (storage) | |
125 | : "r" (v) | |
126 | ); | |
127 | } | |
128 | ||
129 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order order) BOOST_NOEXCEPT | |
130 | { | |
131 | storage_type v; | |
132 | if (order == memory_order_seq_cst) | |
133 | __asm__ __volatile__ ("sync" ::: "memory"); | |
134 | if ((order & (memory_order_consume | memory_order_acquire)) != 0) | |
135 | { | |
136 | __asm__ __volatile__ | |
137 | ( | |
138 | "lwz %0, %1\n\t" | |
139 | "cmpw %0, %0\n\t" | |
140 | "bne- 1f\n\t" | |
141 | "1:\n\t" | |
142 | "isync\n\t" | |
143 | : "=&r" (v) | |
144 | : "m" (storage) | |
145 | : "cr0", "memory" | |
146 | ); | |
147 | } | |
148 | else | |
149 | { | |
150 | __asm__ __volatile__ | |
151 | ( | |
152 | "lwz %0, %1\n\t" | |
153 | : "=&r" (v) | |
154 | : "m" (storage) | |
155 | ); | |
156 | } | |
157 | return v; | |
158 | } | |
159 | ||
160 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
161 | { | |
162 | storage_type original; | |
163 | fence_before(order); | |
164 | __asm__ __volatile__ | |
165 | ( | |
166 | "1:\n\t" | |
167 | "lwarx %0,%y1\n\t" | |
168 | "stwcx. %2,%y1\n\t" | |
169 | "bne- 1b\n\t" | |
170 | : "=&b" (original), "+Z" (storage) | |
171 | : "b" (v) | |
172 | : "cr0" | |
173 | ); | |
174 | fence_after(order); | |
175 | return original; | |
176 | } | |
177 | ||
178 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
179 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
180 | { | |
181 | int success; | |
182 | fence_before(success_order); | |
183 | __asm__ __volatile__ | |
184 | ( | |
185 | "li %1, 0\n\t" | |
186 | "lwarx %0,%y2\n\t" | |
187 | "cmpw %0, %3\n\t" | |
188 | "bne- 1f\n\t" | |
189 | "stwcx. %4,%y2\n\t" | |
190 | "bne- 1f\n\t" | |
191 | "li %1, 1\n\t" | |
192 | "1:\n\t" | |
193 | : "=&b" (expected), "=&b" (success), "+Z" (storage) | |
194 | : "b" (expected), "b" (desired) | |
195 | : "cr0" | |
196 | ); | |
197 | if (success) | |
198 | fence_after(success_order); | |
199 | else | |
200 | fence_after(failure_order); | |
201 | return !!success; | |
202 | } | |
203 | ||
204 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
205 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
206 | { | |
207 | int success; | |
208 | fence_before(success_order); | |
209 | __asm__ __volatile__ | |
210 | ( | |
211 | "li %1, 0\n\t" | |
212 | "0: lwarx %0,%y2\n\t" | |
213 | "cmpw %0, %3\n\t" | |
214 | "bne- 1f\n\t" | |
215 | "stwcx. %4,%y2\n\t" | |
216 | "bne- 0b\n\t" | |
217 | "li %1, 1\n\t" | |
218 | "1:\n\t" | |
219 | : "=&b" (expected), "=&b" (success), "+Z" (storage) | |
220 | : "b" (expected), "b" (desired) | |
221 | : "cr0" | |
222 | ); | |
223 | if (success) | |
224 | fence_after(success_order); | |
225 | else | |
226 | fence_after(failure_order); | |
227 | return !!success; | |
228 | } | |
229 | ||
230 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
231 | { | |
232 | storage_type original, tmp; | |
233 | fence_before(order); | |
234 | __asm__ __volatile__ | |
235 | ( | |
236 | "1:\n\t" | |
237 | "lwarx %0,%y2\n\t" | |
238 | "add %1,%0,%3\n\t" | |
239 | "stwcx. %1,%y2\n\t" | |
240 | "bne- 1b\n\t" | |
241 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
242 | : "b" (v) | |
243 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
244 | ); | |
245 | fence_after(order); | |
246 | return original; | |
247 | } | |
248 | ||
249 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
250 | { | |
251 | storage_type original, tmp; | |
252 | fence_before(order); | |
253 | __asm__ __volatile__ | |
254 | ( | |
255 | "1:\n\t" | |
256 | "lwarx %0,%y2\n\t" | |
257 | "sub %1,%0,%3\n\t" | |
258 | "stwcx. %1,%y2\n\t" | |
259 | "bne- 1b\n\t" | |
260 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
261 | : "b" (v) | |
262 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
263 | ); | |
264 | fence_after(order); | |
265 | return original; | |
266 | } | |
267 | ||
268 | static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
269 | { | |
270 | storage_type original, tmp; | |
271 | fence_before(order); | |
272 | __asm__ __volatile__ | |
273 | ( | |
274 | "1:\n\t" | |
275 | "lwarx %0,%y2\n\t" | |
276 | "and %1,%0,%3\n\t" | |
277 | "stwcx. %1,%y2\n\t" | |
278 | "bne- 1b\n\t" | |
279 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
280 | : "b" (v) | |
281 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
282 | ); | |
283 | fence_after(order); | |
284 | return original; | |
285 | } | |
286 | ||
287 | static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
288 | { | |
289 | storage_type original, tmp; | |
290 | fence_before(order); | |
291 | __asm__ __volatile__ | |
292 | ( | |
293 | "1:\n\t" | |
294 | "lwarx %0,%y2\n\t" | |
295 | "or %1,%0,%3\n\t" | |
296 | "stwcx. %1,%y2\n\t" | |
297 | "bne- 1b\n\t" | |
298 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
299 | : "b" (v) | |
300 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
301 | ); | |
302 | fence_after(order); | |
303 | return original; | |
304 | } | |
305 | ||
306 | static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
307 | { | |
308 | storage_type original, tmp; | |
309 | fence_before(order); | |
310 | __asm__ __volatile__ | |
311 | ( | |
312 | "1:\n\t" | |
313 | "lwarx %0,%y2\n\t" | |
314 | "xor %1,%0,%3\n\t" | |
315 | "stwcx. %1,%y2\n\t" | |
316 | "bne- 1b\n\t" | |
317 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
318 | : "b" (v) | |
319 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
320 | ); | |
321 | fence_after(order); | |
322 | return original; | |
323 | } | |
324 | ||
325 | static BOOST_FORCEINLINE bool test_and_set(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT | |
326 | { | |
327 | return !!exchange(storage, (storage_type)1, order); | |
328 | } | |
329 | ||
330 | static BOOST_FORCEINLINE void clear(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT | |
331 | { | |
332 | store(storage, 0, order); | |
333 | } | |
334 | ||
335 | static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT | |
336 | { | |
337 | return true; | |
338 | } | |
339 | }; | |
340 | ||
341 | ||
342 | template< > | |
343 | struct operations< 1u, false > : | |
344 | public operations< 4u, false > | |
345 | { | |
346 | typedef operations< 4u, false > base_type; | |
347 | typedef base_type::storage_type storage_type; | |
348 | ||
349 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
350 | { | |
351 | storage_type original, tmp; | |
352 | fence_before(order); | |
353 | __asm__ __volatile__ | |
354 | ( | |
355 | "1:\n\t" | |
356 | "lwarx %0,%y2\n\t" | |
357 | "add %1,%0,%3\n\t" | |
358 | "rlwinm %1, %1, 0, 0xff\n\t" | |
359 | "stwcx. %1,%y2\n\t" | |
360 | "bne- 1b\n\t" | |
361 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
362 | : "b" (v) | |
363 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
364 | ); | |
365 | fence_after(order); | |
366 | return original; | |
367 | } | |
368 | ||
369 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
370 | { | |
371 | storage_type original, tmp; | |
372 | fence_before(order); | |
373 | __asm__ __volatile__ | |
374 | ( | |
375 | "1:\n\t" | |
376 | "lwarx %0,%y2\n\t" | |
377 | "sub %1,%0,%3\n\t" | |
378 | "rlwinm %1, %1, 0, 0xff\n\t" | |
379 | "stwcx. %1,%y2\n\t" | |
380 | "bne- 1b\n\t" | |
381 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
382 | : "b" (v) | |
383 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
384 | ); | |
385 | fence_after(order); | |
386 | return original; | |
387 | } | |
388 | }; | |
389 | ||
390 | template< > | |
391 | struct operations< 1u, true > : | |
392 | public operations< 4u, true > | |
393 | { | |
394 | typedef operations< 4u, true > base_type; | |
395 | typedef base_type::storage_type storage_type; | |
396 | ||
397 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
398 | { | |
399 | storage_type original, tmp; | |
400 | fence_before(order); | |
401 | __asm__ __volatile__ | |
402 | ( | |
403 | "1:\n\t" | |
404 | "lwarx %0,%y2\n\t" | |
405 | "add %1,%0,%3\n\t" | |
406 | "extsb %1, %1\n\t" | |
407 | "stwcx. %1,%y2\n\t" | |
408 | "bne- 1b\n\t" | |
409 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
410 | : "b" (v) | |
411 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
412 | ); | |
413 | fence_after(order); | |
414 | return original; | |
415 | } | |
416 | ||
417 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
418 | { | |
419 | storage_type original, tmp; | |
420 | fence_before(order); | |
421 | __asm__ __volatile__ | |
422 | ( | |
423 | "1:\n\t" | |
424 | "lwarx %0,%y2\n\t" | |
425 | "sub %1,%0,%3\n\t" | |
426 | "extsb %1, %1\n\t" | |
427 | "stwcx. %1,%y2\n\t" | |
428 | "bne- 1b\n\t" | |
429 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
430 | : "b" (v) | |
431 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
432 | ); | |
433 | fence_after(order); | |
434 | return original; | |
435 | } | |
436 | }; | |
437 | ||
438 | ||
439 | template< > | |
440 | struct operations< 2u, false > : | |
441 | public operations< 4u, false > | |
442 | { | |
443 | typedef operations< 4u, false > base_type; | |
444 | typedef base_type::storage_type storage_type; | |
445 | ||
446 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
447 | { | |
448 | storage_type original, tmp; | |
449 | fence_before(order); | |
450 | __asm__ __volatile__ | |
451 | ( | |
452 | "1:\n\t" | |
453 | "lwarx %0,%y2\n\t" | |
454 | "add %1,%0,%3\n\t" | |
455 | "rlwinm %1, %1, 0, 0xffff\n\t" | |
456 | "stwcx. %1,%y2\n\t" | |
457 | "bne- 1b\n\t" | |
458 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
459 | : "b" (v) | |
460 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
461 | ); | |
462 | fence_after(order); | |
463 | return original; | |
464 | } | |
465 | ||
466 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
467 | { | |
468 | storage_type original, tmp; | |
469 | fence_before(order); | |
470 | __asm__ __volatile__ | |
471 | ( | |
472 | "1:\n\t" | |
473 | "lwarx %0,%y2\n\t" | |
474 | "sub %1,%0,%3\n\t" | |
475 | "rlwinm %1, %1, 0, 0xffff\n\t" | |
476 | "stwcx. %1,%y2\n\t" | |
477 | "bne- 1b\n\t" | |
478 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
479 | : "b" (v) | |
480 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
481 | ); | |
482 | fence_after(order); | |
483 | return original; | |
484 | } | |
485 | }; | |
486 | ||
487 | template< > | |
488 | struct operations< 2u, true > : | |
489 | public operations< 4u, true > | |
490 | { | |
491 | typedef operations< 4u, true > base_type; | |
492 | typedef base_type::storage_type storage_type; | |
493 | ||
494 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
495 | { | |
496 | storage_type original, tmp; | |
497 | fence_before(order); | |
498 | __asm__ __volatile__ | |
499 | ( | |
500 | "1:\n\t" | |
501 | "lwarx %0,%y2\n\t" | |
502 | "add %1,%0,%3\n\t" | |
503 | "extsh %1, %1\n\t" | |
504 | "stwcx. %1,%y2\n\t" | |
505 | "bne- 1b\n\t" | |
506 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
507 | : "b" (v) | |
508 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
509 | ); | |
510 | fence_after(order); | |
511 | return original; | |
512 | } | |
513 | ||
514 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
515 | { | |
516 | storage_type original, tmp; | |
517 | fence_before(order); | |
518 | __asm__ __volatile__ | |
519 | ( | |
520 | "1:\n\t" | |
521 | "lwarx %0,%y2\n\t" | |
522 | "sub %1,%0,%3\n\t" | |
523 | "extsh %1, %1\n\t" | |
524 | "stwcx. %1,%y2\n\t" | |
525 | "bne- 1b\n\t" | |
526 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
527 | : "b" (v) | |
528 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
529 | ); | |
530 | fence_after(order); | |
531 | return original; | |
532 | } | |
533 | }; | |
534 | ||
535 | ||
536 | #if defined(__powerpc64__) || defined(__PPC64__) | |
537 | ||
538 | template< bool Signed > | |
539 | struct operations< 8u, Signed > : | |
540 | public gcc_ppc_operations_base | |
541 | { | |
542 | typedef typename make_storage_type< 8u, Signed >::type storage_type; | |
543 | typedef typename make_storage_type< 8u, Signed >::aligned aligned_storage_type; | |
544 | ||
545 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
546 | { | |
547 | fence_before(order); | |
548 | __asm__ __volatile__ | |
549 | ( | |
550 | "std %1, %0\n\t" | |
551 | : "+m" (storage) | |
552 | : "r" (v) | |
553 | ); | |
554 | } | |
555 | ||
556 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order order) BOOST_NOEXCEPT | |
557 | { | |
558 | storage_type v; | |
559 | if (order == memory_order_seq_cst) | |
560 | __asm__ __volatile__ ("sync" ::: "memory"); | |
561 | if ((order & (memory_order_consume | memory_order_acquire)) != 0) | |
562 | { | |
563 | __asm__ __volatile__ | |
564 | ( | |
565 | "ld %0, %1\n\t" | |
566 | "cmpd %0, %0\n\t" | |
567 | "bne- 1f\n\t" | |
568 | "1:\n\t" | |
569 | "isync\n\t" | |
570 | : "=&b" (v) | |
571 | : "m" (storage) | |
572 | : "cr0", "memory" | |
573 | ); | |
574 | } | |
575 | else | |
576 | { | |
577 | __asm__ __volatile__ | |
578 | ( | |
579 | "ld %0, %1\n\t" | |
580 | : "=&b" (v) | |
581 | : "m" (storage) | |
582 | ); | |
583 | } | |
584 | return v; | |
585 | } | |
586 | ||
587 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
588 | { | |
589 | storage_type original; | |
590 | fence_before(order); | |
591 | __asm__ __volatile__ | |
592 | ( | |
593 | "1:\n\t" | |
594 | "ldarx %0,%y1\n\t" | |
595 | "stdcx. %2,%y1\n\t" | |
596 | "bne- 1b\n\t" | |
597 | : "=&b" (original), "+Z" (storage) | |
598 | : "b" (v) | |
599 | : "cr0" | |
600 | ); | |
601 | fence_after(order); | |
602 | return original; | |
603 | } | |
604 | ||
605 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
606 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
607 | { | |
608 | int success; | |
609 | fence_before(success_order); | |
610 | __asm__ __volatile__ | |
611 | ( | |
612 | "li %1, 0\n\t" | |
613 | "ldarx %0,%y2\n\t" | |
614 | "cmpd %0, %3\n\t" | |
615 | "bne- 1f\n\t" | |
616 | "stdcx. %4,%y2\n\t" | |
617 | "bne- 1f\n\t" | |
618 | "li %1, 1\n\t" | |
619 | "1:" | |
620 | : "=&b" (expected), "=&b" (success), "+Z" (storage) | |
621 | : "b" (expected), "b" (desired) | |
622 | : "cr0" | |
623 | ); | |
624 | if (success) | |
625 | fence_after(success_order); | |
626 | else | |
627 | fence_after(failure_order); | |
628 | return !!success; | |
629 | } | |
630 | ||
631 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
632 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
633 | { | |
634 | int success; | |
635 | fence_before(success_order); | |
636 | __asm__ __volatile__ | |
637 | ( | |
638 | "li %1, 0\n\t" | |
639 | "0: ldarx %0,%y2\n\t" | |
640 | "cmpd %0, %3\n\t" | |
641 | "bne- 1f\n\t" | |
642 | "stdcx. %4,%y2\n\t" | |
643 | "bne- 0b\n\t" | |
644 | "li %1, 1\n\t" | |
645 | "1:\n\t" | |
646 | : "=&b" (expected), "=&b" (success), "+Z" (storage) | |
647 | : "b" (expected), "b" (desired) | |
648 | : "cr0" | |
649 | ); | |
650 | if (success) | |
651 | fence_after(success_order); | |
652 | else | |
653 | fence_after(failure_order); | |
654 | return !!success; | |
655 | } | |
656 | ||
657 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
658 | { | |
659 | storage_type original, tmp; | |
660 | fence_before(order); | |
661 | __asm__ __volatile__ | |
662 | ( | |
663 | "1:\n\t" | |
664 | "ldarx %0,%y2\n\t" | |
665 | "add %1,%0,%3\n\t" | |
666 | "stdcx. %1,%y2\n\t" | |
667 | "bne- 1b\n\t" | |
668 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
669 | : "b" (v) | |
670 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
671 | ); | |
672 | fence_after(order); | |
673 | return original; | |
674 | } | |
675 | ||
676 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
677 | { | |
678 | storage_type original, tmp; | |
679 | fence_before(order); | |
680 | __asm__ __volatile__ | |
681 | ( | |
682 | "1:\n\t" | |
683 | "ldarx %0,%y2\n\t" | |
684 | "sub %1,%0,%3\n\t" | |
685 | "stdcx. %1,%y2\n\t" | |
686 | "bne- 1b\n\t" | |
687 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
688 | : "b" (v) | |
689 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
690 | ); | |
691 | fence_after(order); | |
692 | return original; | |
693 | } | |
694 | ||
695 | static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
696 | { | |
697 | storage_type original, tmp; | |
698 | fence_before(order); | |
699 | __asm__ __volatile__ | |
700 | ( | |
701 | "1:\n\t" | |
702 | "ldarx %0,%y2\n\t" | |
703 | "and %1,%0,%3\n\t" | |
704 | "stdcx. %1,%y2\n\t" | |
705 | "bne- 1b\n\t" | |
706 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
707 | : "b" (v) | |
708 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
709 | ); | |
710 | fence_after(order); | |
711 | return original; | |
712 | } | |
713 | ||
714 | static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
715 | { | |
716 | storage_type original, tmp; | |
717 | fence_before(order); | |
718 | __asm__ __volatile__ | |
719 | ( | |
720 | "1:\n\t" | |
721 | "ldarx %0,%y2\n\t" | |
722 | "or %1,%0,%3\n\t" | |
723 | "stdcx. %1,%y2\n\t" | |
724 | "bne- 1b\n\t" | |
725 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
726 | : "b" (v) | |
727 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
728 | ); | |
729 | fence_after(order); | |
730 | return original; | |
731 | } | |
732 | ||
733 | static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
734 | { | |
735 | storage_type original, tmp; | |
736 | fence_before(order); | |
737 | __asm__ __volatile__ | |
738 | ( | |
739 | "1:\n\t" | |
740 | "ldarx %0,%y2\n\t" | |
741 | "xor %1,%0,%3\n\t" | |
742 | "stdcx. %1,%y2\n\t" | |
743 | "bne- 1b\n\t" | |
744 | : "=&b" (original), "=&b" (tmp), "+Z" (storage) | |
745 | : "b" (v) | |
746 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC | |
747 | ); | |
748 | fence_after(order); | |
749 | return original; | |
750 | } | |
751 | ||
752 | static BOOST_FORCEINLINE bool test_and_set(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT | |
753 | { | |
754 | return !!exchange(storage, (storage_type)1, order); | |
755 | } | |
756 | ||
757 | static BOOST_FORCEINLINE void clear(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT | |
758 | { | |
759 | store(storage, 0, order); | |
760 | } | |
761 | ||
762 | static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT | |
763 | { | |
764 | return true; | |
765 | } | |
766 | }; | |
767 | ||
768 | #endif // defined(__powerpc64__) || defined(__PPC64__) | |
769 | ||
770 | ||
771 | BOOST_FORCEINLINE void thread_fence(memory_order order) BOOST_NOEXCEPT | |
772 | { | |
773 | switch (order) | |
774 | { | |
775 | case memory_order_consume: | |
776 | case memory_order_acquire: | |
777 | case memory_order_release: | |
778 | case memory_order_acq_rel: | |
779 | #if defined(__powerpc64__) || defined(__PPC64__) | |
780 | __asm__ __volatile__ ("lwsync" ::: "memory"); | |
781 | break; | |
782 | #endif | |
783 | case memory_order_seq_cst: | |
784 | __asm__ __volatile__ ("sync" ::: "memory"); | |
785 | break; | |
786 | default:; | |
787 | } | |
788 | } | |
789 | ||
790 | BOOST_FORCEINLINE void signal_fence(memory_order order) BOOST_NOEXCEPT | |
791 | { | |
792 | if (order != memory_order_relaxed) | |
793 | #if defined(__ibmxl__) || defined(__IBMCPP__) | |
794 | __fence(); | |
795 | #else | |
796 | __asm__ __volatile__ ("" ::: "memory"); | |
797 | #endif | |
798 | } | |
799 | ||
800 | } // namespace detail | |
801 | } // namespace atomics | |
802 | } // namespace boost | |
803 | ||
804 | #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_PPC_HPP_INCLUDED_ |