2 * Distributed under the Boost Software License, Version 1.0.
3 * (See accompanying file LICENSE_1_0.txt or copy at
4 * http://www.boost.org/LICENSE_1_0.txt)
6 * Copyright (c) 2009 Helge Bahmann
7 * Copyright (c) 2013 Tim Blechmann
8 * Copyright (c) 2014 Andrey Semashev
11 * \file atomic/detail/ops_gcc_ppc.hpp
13 * This header contains implementation of the \c operations template.
16 #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_PPC_HPP_INCLUDED_
17 #define BOOST_ATOMIC_DETAIL_OPS_GCC_PPC_HPP_INCLUDED_
19 #include <boost/memory_order.hpp>
20 #include <boost/atomic/detail/config.hpp>
21 #include <boost/atomic/detail/storage_type.hpp>
22 #include <boost/atomic/detail/operations_fwd.hpp>
23 #include <boost/atomic/capabilities.hpp>
25 #ifdef BOOST_HAS_PRAGMA_ONCE
33 // The implementation below uses information from this document:
34 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2010.02.19a.html
37 Refer to: Motorola: "Programming Environments Manual for 32-Bit
38 Implementations of the PowerPC Architecture", Appendix E:
39 "Synchronization Programming Examples" for an explanation of what is
40 going on here (can be found on the web at various places by the
41 name "MPCFPE32B.pdf", Google is your friend...)
43 Most of the atomic operations map to instructions in a relatively
44 straight-forward fashion, but "load"s may at first glance appear
45 a bit strange as they map to:
52 That is, the CPU is forced to perform a branch that "formally" depends
53 on the value retrieved from memory. This scheme has an overhead of
54 about 1-2 clock cycles per load, but it allows to map "acquire" to
55 the "isync" instruction instead of "sync" uniformly and for all type
56 of atomic operations. Since "isync" has a cost of about 15 clock
57 cycles, while "sync" hast a cost of about 50 clock cycles, the small
58 penalty to atomic loads more than compensates for this.
60 Byte- and halfword-sized atomic values are realized by encoding the
61 value to be represented into a word, performing sign/zero extension
62 as appropriate. This means that after add/sub operations the value
63 needs fixing up to accurately preserve the wrap-around semantic of
64 the smaller type. (Nothing special needs to be done for the bit-wise
65 and the "exchange type" operators as the compiler already sees to
66 it that values carried in registers are extended appropriately and
67 everything falls into place naturally).
69 The register constraint "b" instructs gcc to use any register
70 except r0; this is sometimes required because the encoding for
71 r0 is used to signify "constant zero" in a number of instructions,
72 making r0 unusable in this place. For simplicity this constraint
73 is used everywhere since I am to lazy to look this up on a
74 per-instruction basis, and ppc has enough registers for this not
78 // A note about memory_order_consume. Technically, this architecture allows to avoid
79 // unnecessary memory barrier after consume load since it supports data dependency ordering.
80 // However, some compiler optimizations may break a seemingly valid code relying on data
81 // dependency tracking by injecting bogus branches to aid out of order execution.
82 // This may happen not only in Boost.Atomic code but also in user's code, which we have no
83 // control of. See this thread: http://lists.boost.org/Archives/boost/2014/06/213890.php.
84 // For this reason we promote memory_order_consume to memory_order_acquire.
86 struct gcc_ppc_operations_base
88 static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
90 static BOOST_FORCEINLINE void fence_before(memory_order order) BOOST_NOEXCEPT
92 #if defined(__powerpc64__) || defined(__PPC64__)
93 if (order == memory_order_seq_cst)
94 __asm__ __volatile__ ("sync" ::: "memory");
95 else if ((order & memory_order_release) != 0)
96 __asm__ __volatile__ ("lwsync" ::: "memory");
98 if ((order & memory_order_release) != 0)
99 __asm__ __volatile__ ("sync" ::: "memory");
103 static BOOST_FORCEINLINE void fence_after(memory_order order) BOOST_NOEXCEPT
105 if ((order & (memory_order_consume | memory_order_acquire)) != 0)
106 __asm__ __volatile__ ("isync" ::: "memory");
111 template< bool Signed >
112 struct operations< 4u, Signed > :
113 public gcc_ppc_operations_base
115 typedef typename make_storage_type< 4u, Signed >::type storage_type;
116 typedef typename make_storage_type< 4u, Signed >::aligned aligned_storage_type;
118 static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
129 static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order order) BOOST_NOEXCEPT
132 if (order == memory_order_seq_cst)
133 __asm__ __volatile__ ("sync" ::: "memory");
134 if ((order & (memory_order_consume | memory_order_acquire)) != 0)
160 static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
162 storage_type original;
170 : "=&b" (original), "+Z" (storage)
178 static BOOST_FORCEINLINE bool compare_exchange_weak(
179 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
182 fence_before(success_order);
193 : "=&b" (expected), "=&b" (success), "+Z" (storage)
194 : "b" (expected), "b" (desired)
198 fence_after(success_order);
200 fence_after(failure_order);
204 static BOOST_FORCEINLINE bool compare_exchange_strong(
205 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
208 fence_before(success_order);
212 "0: lwarx %0,%y2\n\t"
219 : "=&b" (expected), "=&b" (success), "+Z" (storage)
220 : "b" (expected), "b" (desired)
224 fence_after(success_order);
226 fence_after(failure_order);
230 static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
232 storage_type original, tmp;
241 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
243 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
249 static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
251 storage_type original, tmp;
260 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
262 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
268 static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
270 storage_type original, tmp;
279 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
281 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
287 static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
289 storage_type original, tmp;
298 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
300 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
306 static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
308 storage_type original, tmp;
317 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
319 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
325 static BOOST_FORCEINLINE bool test_and_set(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT
327 return !!exchange(storage, (storage_type)1, order);
330 static BOOST_FORCEINLINE void clear(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT
332 store(storage, 0, order);
335 static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT
343 struct operations< 1u, false > :
344 public operations< 4u, false >
346 typedef operations< 4u, false > base_type;
347 typedef base_type::storage_type storage_type;
349 static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
351 storage_type original, tmp;
358 "rlwinm %1, %1, 0, 0xff\n\t"
361 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
363 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
369 static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
371 storage_type original, tmp;
378 "rlwinm %1, %1, 0, 0xff\n\t"
381 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
383 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
391 struct operations< 1u, true > :
392 public operations< 4u, true >
394 typedef operations< 4u, true > base_type;
395 typedef base_type::storage_type storage_type;
397 static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
399 storage_type original, tmp;
409 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
411 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
417 static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
419 storage_type original, tmp;
429 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
431 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
440 struct operations< 2u, false > :
441 public operations< 4u, false >
443 typedef operations< 4u, false > base_type;
444 typedef base_type::storage_type storage_type;
446 static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
448 storage_type original, tmp;
455 "rlwinm %1, %1, 0, 0xffff\n\t"
458 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
460 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
466 static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
468 storage_type original, tmp;
475 "rlwinm %1, %1, 0, 0xffff\n\t"
478 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
480 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
488 struct operations< 2u, true > :
489 public operations< 4u, true >
491 typedef operations< 4u, true > base_type;
492 typedef base_type::storage_type storage_type;
494 static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
496 storage_type original, tmp;
506 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
508 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
514 static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
516 storage_type original, tmp;
526 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
528 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
536 #if defined(__powerpc64__) || defined(__PPC64__)
538 template< bool Signed >
539 struct operations< 8u, Signed > :
540 public gcc_ppc_operations_base
542 typedef typename make_storage_type< 8u, Signed >::type storage_type;
543 typedef typename make_storage_type< 8u, Signed >::aligned aligned_storage_type;
545 static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
556 static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order order) BOOST_NOEXCEPT
559 if (order == memory_order_seq_cst)
560 __asm__ __volatile__ ("sync" ::: "memory");
561 if ((order & (memory_order_consume | memory_order_acquire)) != 0)
587 static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
589 storage_type original;
597 : "=&b" (original), "+Z" (storage)
605 static BOOST_FORCEINLINE bool compare_exchange_weak(
606 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
609 fence_before(success_order);
620 : "=&b" (expected), "=&b" (success), "+Z" (storage)
621 : "b" (expected), "b" (desired)
625 fence_after(success_order);
627 fence_after(failure_order);
631 static BOOST_FORCEINLINE bool compare_exchange_strong(
632 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
635 fence_before(success_order);
639 "0: ldarx %0,%y2\n\t"
646 : "=&b" (expected), "=&b" (success), "+Z" (storage)
647 : "b" (expected), "b" (desired)
651 fence_after(success_order);
653 fence_after(failure_order);
657 static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
659 storage_type original, tmp;
668 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
670 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
676 static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
678 storage_type original, tmp;
687 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
689 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
695 static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
697 storage_type original, tmp;
706 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
708 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
714 static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
716 storage_type original, tmp;
725 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
727 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
733 static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
735 storage_type original, tmp;
744 : "=&b" (original), "=&b" (tmp), "+Z" (storage)
746 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC
752 static BOOST_FORCEINLINE bool test_and_set(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT
754 return !!exchange(storage, (storage_type)1, order);
757 static BOOST_FORCEINLINE void clear(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT
759 store(storage, 0, order);
762 static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT
768 #endif // defined(__powerpc64__) || defined(__PPC64__)
771 BOOST_FORCEINLINE void thread_fence(memory_order order) BOOST_NOEXCEPT
775 case memory_order_consume:
776 case memory_order_acquire:
777 case memory_order_release:
778 case memory_order_acq_rel:
779 #if defined(__powerpc64__) || defined(__PPC64__)
780 __asm__ __volatile__ ("lwsync" ::: "memory");
783 case memory_order_seq_cst:
784 __asm__ __volatile__ ("sync" ::: "memory");
790 BOOST_FORCEINLINE void signal_fence(memory_order order) BOOST_NOEXCEPT
792 if (order != memory_order_relaxed)
793 #if defined(__ibmxl__) || defined(__IBMCPP__)
796 __asm__ __volatile__ ("" ::: "memory");
800 } // namespace detail
801 } // namespace atomics
804 #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_PPC_HPP_INCLUDED_