]>
Commit | Line | Data |
---|---|---|
5444e768 PB |
1 | /* |
2 | * Simple interface for atomic operations. | |
3 | * | |
4 | * Copyright (C) 2013 Red Hat, Inc. | |
5 | * | |
6 | * Author: Paolo Bonzini <pbonzini@redhat.com> | |
7 | * | |
8 | * This work is licensed under the terms of the GNU GPL, version 2 or later. | |
9 | * See the COPYING file in the top-level directory. | |
10 | * | |
29f23167 | 11 | * See docs/devel/atomics.rst for discussion about the guarantees each |
a0aa44b4 | 12 | * atomic primitive is meant to provide. |
5444e768 | 13 | */ |
85199474 | 14 | |
2a6a4076 MA |
15 | #ifndef QEMU_ATOMIC_H |
16 | #define QEMU_ATOMIC_H | |
1d31fca4 | 17 | |
ef0f4bda MAL |
18 | #include "compiler.h" |
19 | ||
5444e768 PB |
20 | /* Compiler barrier */ |
21 | #define barrier() ({ asm volatile("" ::: "memory"); (void)0; }) | |
22 | ||
5927ed84 PB |
23 | /* The variable that receives the old value of an atomically-accessed |
24 | * variable must be non-qualified, because atomic builtins return values | |
25 | * through a pointer-type argument as in __atomic_load(&var, &old, MODEL). | |
26 | * | |
27 | * This macro has to handle types smaller than int manually, because of | |
28 | * implicit promotion. int and larger types, as well as pointers, can be | |
29 | * converted to a non-qualified type just by applying a binary operator. | |
30 | */ | |
31 | #define typeof_strip_qual(expr) \ | |
32 | typeof( \ | |
33 | __builtin_choose_expr( \ | |
34 | __builtin_types_compatible_p(typeof(expr), bool) || \ | |
35 | __builtin_types_compatible_p(typeof(expr), const bool) || \ | |
36 | __builtin_types_compatible_p(typeof(expr), volatile bool) || \ | |
37 | __builtin_types_compatible_p(typeof(expr), const volatile bool), \ | |
38 | (bool)1, \ | |
39 | __builtin_choose_expr( \ | |
40 | __builtin_types_compatible_p(typeof(expr), signed char) || \ | |
41 | __builtin_types_compatible_p(typeof(expr), const signed char) || \ | |
42 | __builtin_types_compatible_p(typeof(expr), volatile signed char) || \ | |
43 | __builtin_types_compatible_p(typeof(expr), const volatile signed char), \ | |
44 | (signed char)1, \ | |
45 | __builtin_choose_expr( \ | |
46 | __builtin_types_compatible_p(typeof(expr), unsigned char) || \ | |
47 | __builtin_types_compatible_p(typeof(expr), const unsigned char) || \ | |
48 | __builtin_types_compatible_p(typeof(expr), volatile unsigned char) || \ | |
49 | __builtin_types_compatible_p(typeof(expr), const volatile unsigned char), \ | |
50 | (unsigned char)1, \ | |
51 | __builtin_choose_expr( \ | |
52 | __builtin_types_compatible_p(typeof(expr), signed short) || \ | |
53 | __builtin_types_compatible_p(typeof(expr), const signed short) || \ | |
54 | __builtin_types_compatible_p(typeof(expr), volatile signed short) || \ | |
55 | __builtin_types_compatible_p(typeof(expr), const volatile signed short), \ | |
56 | (signed short)1, \ | |
57 | __builtin_choose_expr( \ | |
58 | __builtin_types_compatible_p(typeof(expr), unsigned short) || \ | |
59 | __builtin_types_compatible_p(typeof(expr), const unsigned short) || \ | |
60 | __builtin_types_compatible_p(typeof(expr), volatile unsigned short) || \ | |
61 | __builtin_types_compatible_p(typeof(expr), const volatile unsigned short), \ | |
62 | (unsigned short)1, \ | |
63 | (expr)+0)))))) | |
64 | ||
47345e71 RH |
65 | #ifndef __ATOMIC_RELAXED |
66 | #error "Expecting C11 atomic ops" | |
67 | #endif | |
a0aa44b4 AB |
68 | |
69 | /* Manual memory barriers | |
70 | * | |
71 | *__atomic_thread_fence does not include a compiler barrier; instead, | |
72 | * the barrier is part of __atomic_load/__atomic_store's "volatile-like" | |
73 | * semantics. If smp_wmb() is a no-op, absence of the barrier means that | |
74 | * the compiler is free to reorder stores on each side of the barrier. | |
75 | * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). | |
76 | */ | |
77 | ||
f1ee8696 PB |
78 | #define smp_mb() ({ barrier(); __atomic_thread_fence(__ATOMIC_SEQ_CST); }) |
79 | #define smp_mb_release() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); }) | |
80 | #define smp_mb_acquire() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); }) | |
a0aa44b4 | 81 | |
c9838952 EC |
82 | /* Most compilers currently treat consume and acquire the same, but really |
83 | * no processors except Alpha need a barrier here. Leave it in if | |
84 | * using Thread Sanitizer to avoid warnings, otherwise optimize it away. | |
85 | */ | |
ef0f4bda | 86 | #ifdef QEMU_SANITIZE_THREAD |
f1ee8696 | 87 | #define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); }) |
23ea7f57 | 88 | #elif defined(__alpha__) |
c9838952 EC |
89 | #define smp_read_barrier_depends() asm volatile("mb":::"memory") |
90 | #else | |
91 | #define smp_read_barrier_depends() barrier() | |
92 | #endif | |
93 | ||
359896df RH |
94 | /* |
95 | * A signal barrier forces all pending local memory ops to be observed before | |
96 | * a SIGSEGV is delivered to the *same* thread. In practice this is exactly | |
97 | * the same as barrier(), but since we have the correct builtin, use it. | |
98 | */ | |
99 | #define signal_barrier() __atomic_signal_fence(__ATOMIC_SEQ_CST) | |
100 | ||
374aae65 RH |
101 | /* Sanity check that the size of an atomic operation isn't "overly large". |
102 | * Despite the fact that e.g. i686 has 64-bit atomic operations, we do not | |
103 | * want to use them because we ought not need them, and this lets us do a | |
104 | * bit of sanity checking that other 32-bit hosts might build. | |
105 | * | |
106 | * That said, we have a problem on 64-bit ILP32 hosts in that in order to | |
107 | * sync with TCG_OVERSIZED_GUEST, this must match TCG_TARGET_REG_BITS. | |
108 | * We'd prefer not want to pull in everything else TCG related, so handle | |
109 | * those few cases by hand. | |
110 | * | |
119c440c | 111 | * Note that x32 is fully detected with __x86_64__ + _ILP32, and that for |
c5b00c16 PB |
112 | * Sparc we always force the use of sparcv9 in configure. MIPS n32 (ILP32) & |
113 | * n64 (LP64) ABIs are both detected using __mips64. | |
374aae65 | 114 | */ |
c5b00c16 | 115 | #if defined(__x86_64__) || defined(__sparc__) || defined(__mips64) |
374aae65 RH |
116 | # define ATOMIC_REG_SIZE 8 |
117 | #else | |
118 | # define ATOMIC_REG_SIZE sizeof(void *) | |
119 | #endif | |
a0aa44b4 AB |
120 | |
121 | /* Weak atomic operations prevent the compiler moving other | |
122 | * loads/stores past the atomic operation load/store. However there is | |
123 | * no explicit memory barrier for the processor. | |
e653bc6b AB |
124 | * |
125 | * The C11 memory model says that variables that are accessed from | |
126 | * different threads should at least be done with __ATOMIC_RELAXED | |
127 | * primitives or the result is undefined. Generally this has little to | |
128 | * no effect on the generated code but not using the atomic primitives | |
129 | * will get flagged by sanitizers as a violation. | |
a0aa44b4 | 130 | */ |
d73415a3 | 131 | #define qatomic_read__nocheck(ptr) \ |
84bca392 RH |
132 | __atomic_load_n(ptr, __ATOMIC_RELAXED) |
133 | ||
d73415a3 SH |
134 | #define qatomic_read(ptr) \ |
135 | ({ \ | |
59053636 | 136 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
d73415a3 | 137 | qatomic_read__nocheck(ptr); \ |
a0aa44b4 AB |
138 | }) |
139 | ||
d73415a3 | 140 | #define qatomic_set__nocheck(ptr, i) \ |
84bca392 RH |
141 | __atomic_store_n(ptr, i, __ATOMIC_RELAXED) |
142 | ||
d73415a3 | 143 | #define qatomic_set(ptr, i) do { \ |
59053636 | 144 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
d73415a3 | 145 | qatomic_set__nocheck(ptr, i); \ |
a0aa44b4 AB |
146 | } while(0) |
147 | ||
15487aa1 | 148 | /* See above: most compilers currently treat consume and acquire the |
d73415a3 | 149 | * same, but this slows down qatomic_rcu_read unnecessarily. |
15487aa1 | 150 | */ |
ef0f4bda | 151 | #ifdef QEMU_SANITIZE_THREAD |
d73415a3 | 152 | #define qatomic_rcu_read__nocheck(ptr, valptr) \ |
15487aa1 EC |
153 | __atomic_load(ptr, valptr, __ATOMIC_CONSUME); |
154 | #else | |
d73415a3 SH |
155 | #define qatomic_rcu_read__nocheck(ptr, valptr) \ |
156 | __atomic_load(ptr, valptr, __ATOMIC_RELAXED); \ | |
15487aa1 EC |
157 | smp_read_barrier_depends(); |
158 | #endif | |
a0aa44b4 | 159 | |
bb718463 MA |
160 | /* |
161 | * Preprocessor sorcery ahead: use a different identifier for the | |
162 | * local variable in each expansion, so we can nest macro calls | |
163 | * without shadowing variables. | |
164 | */ | |
165 | #define qatomic_rcu_read_internal(ptr, _val) \ | |
166 | ({ \ | |
59053636 | 167 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
bb718463 MA |
168 | typeof_strip_qual(*ptr) _val; \ |
169 | qatomic_rcu_read__nocheck(ptr, &_val); \ | |
170 | _val; \ | |
a0aa44b4 | 171 | }) |
bb718463 MA |
172 | #define qatomic_rcu_read(ptr) \ |
173 | qatomic_rcu_read_internal((ptr), MAKE_IDENTFIER(_val)) | |
a0aa44b4 | 174 | |
d73415a3 | 175 | #define qatomic_rcu_set(ptr, i) do { \ |
59053636 | 176 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
d73415a3 | 177 | __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ |
a0aa44b4 AB |
178 | } while(0) |
179 | ||
d73415a3 | 180 | #define qatomic_load_acquire(ptr) \ |
a0aa44b4 | 181 | ({ \ |
59053636 | 182 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
5927ed84 | 183 | typeof_strip_qual(*ptr) _val; \ |
803cf26a | 184 | __atomic_load(ptr, &_val, __ATOMIC_ACQUIRE); \ |
a0aa44b4 AB |
185 | _val; \ |
186 | }) | |
187 | ||
d73415a3 | 188 | #define qatomic_store_release(ptr, i) do { \ |
59053636 | 189 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
803cf26a | 190 | __atomic_store_n(ptr, i, __ATOMIC_RELEASE); \ |
a0aa44b4 | 191 | } while(0) |
a0aa44b4 AB |
192 | |
193 | ||
194 | /* All the remaining operations are fully sequentially consistent */ | |
195 | ||
d73415a3 | 196 | #define qatomic_xchg__nocheck(ptr, i) ({ \ |
84bca392 RH |
197 | __atomic_exchange_n(ptr, (i), __ATOMIC_SEQ_CST); \ |
198 | }) | |
199 | ||
d73415a3 | 200 | #define qatomic_xchg(ptr, i) ({ \ |
59053636 | 201 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
d73415a3 | 202 | qatomic_xchg__nocheck(ptr, i); \ |
a0aa44b4 AB |
203 | }) |
204 | ||
205 | /* Returns the eventual value, failed or not */ | |
d73415a3 | 206 | #define qatomic_cmpxchg__nocheck(ptr, old, new) ({ \ |
89943de1 | 207 | typeof_strip_qual(*ptr) _old = (old); \ |
cd95fc28 | 208 | (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \ |
a0aa44b4 AB |
209 | __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ |
210 | _old; \ | |
84bca392 RH |
211 | }) |
212 | ||
d73415a3 | 213 | #define qatomic_cmpxchg(ptr, old, new) ({ \ |
59053636 | 214 | qemu_build_assert(sizeof(*ptr) <= ATOMIC_REG_SIZE); \ |
d73415a3 | 215 | qatomic_cmpxchg__nocheck(ptr, old, new); \ |
84bca392 | 216 | }) |
a0aa44b4 AB |
217 | |
218 | /* Provide shorter names for GCC atomic builtins, return old value */ | |
d73415a3 SH |
219 | #define qatomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST) |
220 | #define qatomic_fetch_dec(ptr) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST) | |
221 | ||
222 | #define qatomic_fetch_add(ptr, n) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST) | |
223 | #define qatomic_fetch_sub(ptr, n) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST) | |
224 | #define qatomic_fetch_and(ptr, n) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST) | |
225 | #define qatomic_fetch_or(ptr, n) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST) | |
226 | #define qatomic_fetch_xor(ptr, n) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST) | |
227 | ||
228 | #define qatomic_inc_fetch(ptr) __atomic_add_fetch(ptr, 1, __ATOMIC_SEQ_CST) | |
229 | #define qatomic_dec_fetch(ptr) __atomic_sub_fetch(ptr, 1, __ATOMIC_SEQ_CST) | |
230 | #define qatomic_add_fetch(ptr, n) __atomic_add_fetch(ptr, n, __ATOMIC_SEQ_CST) | |
231 | #define qatomic_sub_fetch(ptr, n) __atomic_sub_fetch(ptr, n, __ATOMIC_SEQ_CST) | |
232 | #define qatomic_and_fetch(ptr, n) __atomic_and_fetch(ptr, n, __ATOMIC_SEQ_CST) | |
233 | #define qatomic_or_fetch(ptr, n) __atomic_or_fetch(ptr, n, __ATOMIC_SEQ_CST) | |
234 | #define qatomic_xor_fetch(ptr, n) __atomic_xor_fetch(ptr, n, __ATOMIC_SEQ_CST) | |
83d0c719 | 235 | |
a0aa44b4 | 236 | /* And even shorter names that return void. */ |
d73415a3 SH |
237 | #define qatomic_inc(ptr) \ |
238 | ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_SEQ_CST)) | |
239 | #define qatomic_dec(ptr) \ | |
240 | ((void) __atomic_fetch_sub(ptr, 1, __ATOMIC_SEQ_CST)) | |
241 | #define qatomic_add(ptr, n) \ | |
242 | ((void) __atomic_fetch_add(ptr, n, __ATOMIC_SEQ_CST)) | |
243 | #define qatomic_sub(ptr, n) \ | |
244 | ((void) __atomic_fetch_sub(ptr, n, __ATOMIC_SEQ_CST)) | |
245 | #define qatomic_and(ptr, n) \ | |
246 | ((void) __atomic_fetch_and(ptr, n, __ATOMIC_SEQ_CST)) | |
247 | #define qatomic_or(ptr, n) \ | |
248 | ((void) __atomic_fetch_or(ptr, n, __ATOMIC_SEQ_CST)) | |
249 | #define qatomic_xor(ptr, n) \ | |
250 | ((void) __atomic_fetch_xor(ptr, n, __ATOMIC_SEQ_CST)) | |
a0aa44b4 | 251 | |
f1ee8696 | 252 | #define smp_wmb() smp_mb_release() |
f1ee8696 | 253 | #define smp_rmb() smp_mb_acquire() |
803cf26a | 254 | |
ff00bed1 PB |
255 | /* |
256 | * SEQ_CST is weaker than the older __sync_* builtins and Linux | |
257 | * kernel read-modify-write atomics. Provide a macro to obtain | |
258 | * the same semantics. | |
259 | */ | |
260 | #if !defined(QEMU_SANITIZE_THREAD) && \ | |
261 | (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) | |
262 | # define smp_mb__before_rmw() signal_barrier() | |
263 | # define smp_mb__after_rmw() signal_barrier() | |
264 | #else | |
265 | # define smp_mb__before_rmw() smp_mb() | |
266 | # define smp_mb__after_rmw() smp_mb() | |
267 | #endif | |
268 | ||
06831001 PB |
269 | /* |
270 | * On some architectures, qatomic_set_mb is more efficient than a store | |
271 | * plus a fence. | |
803cf26a PB |
272 | */ |
273 | ||
ef0f4bda | 274 | #if !defined(QEMU_SANITIZE_THREAD) && \ |
47345e71 | 275 | (defined(__i386__) || defined(__x86_64__) || defined(__s390x__)) |
06831001 | 276 | # define qatomic_set_mb(ptr, i) \ |
ff00bed1 | 277 | ({ (void)qatomic_xchg(ptr, i); smp_mb__after_rmw(); }) |
47345e71 | 278 | #else |
06831001 | 279 | # define qatomic_set_mb(ptr, i) \ |
47345e71 | 280 | ({ qatomic_store_release(ptr, i); smp_mb(); }) |
803cf26a PB |
281 | #endif |
282 | ||
d73415a3 SH |
283 | #define qatomic_fetch_inc_nonzero(ptr) ({ \ |
284 | typeof_strip_qual(*ptr) _oldn = qatomic_read(ptr); \ | |
285 | while (_oldn && qatomic_cmpxchg(ptr, _oldn, _oldn + 1) != _oldn) { \ | |
286 | _oldn = qatomic_read(ptr); \ | |
447b0d0b PB |
287 | } \ |
288 | _oldn; \ | |
289 | }) | |
290 | ||
9ef0c6d6 RH |
291 | /* |
292 | * Abstractions to access atomically (i.e. "once") i64/u64 variables. | |
293 | * | |
294 | * The i386 abi is odd in that by default members are only aligned to | |
295 | * 4 bytes, which means that 8-byte types can wind up mis-aligned. | |
296 | * Clang will then warn about this, and emit a call into libatomic. | |
297 | * | |
298 | * Use of these types in structures when they will be used with atomic | |
299 | * operations can avoid this. | |
300 | */ | |
301 | typedef int64_t aligned_int64_t __attribute__((aligned(8))); | |
302 | typedef uint64_t aligned_uint64_t __attribute__((aligned(8))); | |
303 | ||
782da5b2 | 304 | #ifdef CONFIG_ATOMIC64 |
952fd671 RH |
305 | /* Use __nocheck because sizeof(void *) might be < sizeof(u64) */ |
306 | #define qatomic_read_i64(P) \ | |
307 | _Generic(*(P), int64_t: qatomic_read__nocheck(P)) | |
308 | #define qatomic_read_u64(P) \ | |
309 | _Generic(*(P), uint64_t: qatomic_read__nocheck(P)) | |
310 | #define qatomic_set_i64(P, V) \ | |
311 | _Generic(*(P), int64_t: qatomic_set__nocheck(P, V)) | |
312 | #define qatomic_set_u64(P, V) \ | |
313 | _Generic(*(P), uint64_t: qatomic_set__nocheck(P, V)) | |
782da5b2 | 314 | |
d73415a3 | 315 | static inline void qatomic64_init(void) |
782da5b2 EC |
316 | { |
317 | } | |
318 | #else /* !CONFIG_ATOMIC64 */ | |
d73415a3 SH |
319 | int64_t qatomic_read_i64(const int64_t *ptr); |
320 | uint64_t qatomic_read_u64(const uint64_t *ptr); | |
321 | void qatomic_set_i64(int64_t *ptr, int64_t val); | |
322 | void qatomic_set_u64(uint64_t *ptr, uint64_t val); | |
323 | void qatomic64_init(void); | |
782da5b2 EC |
324 | #endif /* !CONFIG_ATOMIC64 */ |
325 | ||
2a6a4076 | 326 | #endif /* QEMU_ATOMIC_H */ |