]>
Commit | Line | Data |
---|---|---|
f31841d5 JR |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | /* This header implements atomic operation primitives on x86_64 with GCC. */ | |
18 | #ifndef IN_OVS_ATOMIC_H | |
19 | #error "This header should only be included indirectly via ovs-atomic.h." | |
20 | #endif | |
21 | ||
22 | #define OVS_ATOMIC_X86_64_IMPL 1 | |
23 | ||
24 | /* | |
25 | * x86_64 Memory model (http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-3a-part-1-manual.html): | |
26 | * | |
27 | * - 1, 2, 4, and 8 byte loads and stores are atomic on aligned memory. | |
28 | * - Loads are not reordered with other loads. | |
29 | * - Stores are not reordered with OLDER loads. | |
30 | * - Loads may be reordered with OLDER stores to a different memory location, | |
31 | * but not with OLDER stores to the same memory location. | |
32 | * - Stores are not reordered with other stores, except for special | |
33 | * instructions (CLFLUSH, streaming stores, fast string operations). | |
34 | * Most of these are not emitted by compilers, and as long as the | |
35 | * atomic stores are not combined with any other stores, even the allowed | |
36 | * reordering of the stores by a single fast string operation (e.g., "stos") | |
37 | * is not a problem. | |
38 | * - Neither loads nor stores are reordered with locked instructions. | |
39 | * - Loads cannot pass earlier LFENCE or MFENCE instructions. | |
40 | * - Stores cannot pass earlier LFENCE, SFENCE, or MFENCE instructions. | |
41 | * - LFENCE instruction cannot pass earlier loads. | |
42 | * - SFENCE instruction cannot pass earlier stores. | |
43 | * - MFENCE instruction cannot pass earlier loads or stores. | |
44 | * - Stores by a single processor are observed in the same order by all | |
45 | * processors. | |
46 | * - (Unlocked) Stores from different processors are NOT ordered. | |
47 | * - Memory ordering obeys causality (memory ordering respects transitive | |
48 | * visibility). | |
49 | * - Any two stores are seen in a consistent order by processors other than | |
50 | * the those performing the stores. | |
51 | * - Locked instructions have total order. | |
52 | * | |
53 | * These rules imply that: | |
54 | * | |
55 | * - Locked instructions are not needed for aligned loads or stores to make | |
56 | * them atomic. | |
57 | * - All stores have release semantics; none of the preceding stores or loads | |
58 | * can be reordered with following stores. Following loads could still be | |
59 | * reordered to happen before the store, but that is not a violation of the | |
60 | * release semantics. | |
61 | * - All loads from a given memory location have acquire semantics with | |
62 | * respect to the stores on the same memory location; none of the following | |
63 | * loads or stores can be reordered with the load. Preceding stores to a | |
64 | * different memory location MAY be reordered with the load, but that is not | |
65 | * a violation of the acquire semantics (i.e., the loads and stores of two | |
66 | * critical sections guarded by a different memory location can overlap). | |
67 | * - Locked instructions serve as CPU memory barriers by themselves. | |
68 | * - Locked stores implement the sequential consistency memory order. Using | |
69 | * locked instructions when seq_cst memory order is requested allows normal | |
70 | * loads to observe the stores in the same (total) order without using CPU | |
71 | * memory barrier after the loads. | |
72 | * | |
73 | * NOTE: Some older AMD Opteron processors have a bug that violates the | |
74 | * acquire semantics described above. The bug manifests as an unlocked | |
75 | * read-modify-write operation following a "semaphore operation" operating | |
76 | * on data that existed before entering the critical section; i.e., the | |
77 | * preceding "semaphore operation" fails to function as an acquire barrier. | |
78 | * The affected CPUs are AMD family 15, models 32 to 63. | |
79 | * | |
80 | * Ref. http://support.amd.com/TechDocs/25759.pdf errata #147. | |
81 | */ | |
82 | ||
83 | /* Barriers. */ | |
84 | ||
85 | #define compiler_barrier() asm volatile(" " : : : "memory") | |
86 | #define cpu_barrier() asm volatile("mfence;" : : : "memory") | |
87 | ||
88 | /* | |
89 | * The 'volatile' keyword prevents the compiler from keeping the atomic | |
90 | * value in a register, and generates a new memory access for each atomic | |
91 | * operation. This allows the implementations of memory_order_relaxed and | |
92 | * memory_order_consume to avoid issuing a compiler memory barrier, allowing | |
93 | * full optimization of all surrounding non-atomic variables. | |
94 | * | |
95 | * The placement of the 'volatile' keyword after the 'TYPE' below is highly | |
96 | * significant when the TYPE is a pointer type. In that case we want the | |
97 | * pointer to be declared volatile, not the data type that is being pointed | |
98 | * at! | |
99 | */ | |
100 | #define ATOMIC(TYPE) TYPE volatile | |
101 | ||
102 | /* Memory ordering. Must be passed in as a constant. */ | |
103 | typedef enum { | |
104 | memory_order_relaxed, | |
105 | memory_order_consume, | |
106 | memory_order_acquire, | |
107 | memory_order_release, | |
108 | memory_order_acq_rel, | |
109 | memory_order_seq_cst | |
110 | } memory_order; | |
111 | \f | |
112 | #define ATOMIC_BOOL_LOCK_FREE 2 | |
113 | #define ATOMIC_CHAR_LOCK_FREE 2 | |
114 | #define ATOMIC_SHORT_LOCK_FREE 2 | |
115 | #define ATOMIC_INT_LOCK_FREE 2 | |
116 | #define ATOMIC_LONG_LOCK_FREE 2 | |
117 | #define ATOMIC_LLONG_LOCK_FREE 2 | |
118 | #define ATOMIC_POINTER_LOCK_FREE 2 | |
119 | ||
120 | #define IS_LOCKLESS_ATOMIC(OBJECT) \ | |
121 | (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT))) | |
122 | \f | |
123 | #define ATOMIC_VAR_INIT(VALUE) VALUE | |
124 | #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0) | |
125 | ||
126 | /* | |
127 | * The memory_model_relaxed does not need a compiler barrier, if the | |
128 | * atomic operation can otherwise be guaranteed to not be moved with | |
129 | * respect to other atomic operations on the same memory location. Using | |
130 | * the 'volatile' keyword in the definition of the atomic types | |
131 | * accomplishes this, as memory accesses to volatile data may not be | |
132 | * optimized away, or be reordered with other volatile accesses. | |
133 | * | |
134 | * On x86 also memory_order_consume is automatic, and data dependency on a | |
135 | * volatile atomic variable means that the compiler optimizations should not | |
136 | * cause problems. That is, the compiler should not speculate the value of | |
137 | * the atomic_read, as it is going to read it from the memory anyway. | |
138 | * This allows omiting the compiler memory barrier on atomic_reads with | |
139 | * memory_order_consume. This matches the definition of | |
140 | * smp_read_barrier_depends() in Linux kernel as a nop for x86, and its usage | |
141 | * in rcu_dereference(). | |
142 | * | |
143 | * We use this same logic below to choose inline assembly statements with or | |
144 | * without a compiler memory barrier. | |
145 | */ | |
146 | static inline void | |
147 | atomic_compiler_barrier(memory_order order) | |
148 | { | |
149 | if (order > memory_order_consume) { | |
150 | compiler_barrier(); | |
151 | } | |
152 | } | |
153 | ||
154 | static inline void | |
155 | atomic_thread_fence(memory_order order) | |
156 | { | |
157 | if (order == memory_order_seq_cst) { | |
158 | cpu_barrier(); | |
159 | } else { | |
160 | atomic_compiler_barrier(order); | |
161 | } | |
162 | } | |
163 | ||
164 | static inline void | |
165 | atomic_signal_fence(memory_order order) | |
166 | { | |
167 | atomic_compiler_barrier(order); | |
168 | } | |
169 | ||
170 | #define atomic_is_lock_free(OBJ) \ | |
171 | ((void) *(OBJ), \ | |
172 | IS_LOCKLESS_ATOMIC(*(OBJ)) ? 2 : 0) | |
173 | ||
174 | #define atomic_exchange__(DST, SRC, ORDER) \ | |
175 | ({ \ | |
176 | typeof(DST) dst___ = (DST); \ | |
0b83904f | 177 | typeof(*(DST)) src___ = (SRC); \ |
f31841d5 JR |
178 | \ |
179 | if ((ORDER) > memory_order_consume) { \ | |
180 | asm volatile("xchg %1,%0 ; " \ | |
181 | "# atomic_exchange__" \ | |
182 | : "+r" (src___), /* 0 */ \ | |
183 | "+m" (*dst___) /* 1 */ \ | |
184 | :: "memory"); \ | |
185 | } else { \ | |
186 | asm volatile("xchg %1,%0 ; " \ | |
187 | "# atomic_exchange__" \ | |
188 | : "+r" (src___), /* 0 */ \ | |
189 | "+m" (*dst___)); /* 1 */ \ | |
190 | } \ | |
191 | src___; \ | |
192 | }) | |
193 | ||
194 | /* Atomic store: Valid memory models are: | |
195 | * | |
196 | * memory_order_relaxed, memory_order_release, and | |
197 | * memory_order_seq_cst. */ | |
198 | #define atomic_store_explicit(DST, SRC, ORDER) \ | |
199 | ({ \ | |
200 | typeof(DST) dst__ = (DST); \ | |
0b83904f | 201 | typeof(*(DST)) src__ = (SRC); \ |
f31841d5 JR |
202 | \ |
203 | if ((ORDER) != memory_order_seq_cst) { \ | |
204 | atomic_compiler_barrier(ORDER); \ | |
205 | *dst__ = src__; \ | |
206 | } else { \ | |
207 | atomic_exchange__(dst__, src__, ORDER); \ | |
208 | } \ | |
209 | (void) 0; \ | |
210 | }) | |
211 | #define atomic_store(DST, SRC) \ | |
212 | atomic_store_explicit(DST, SRC, memory_order_seq_cst) | |
213 | ||
214 | /* Atomic read: Valid memory models are: | |
215 | * | |
216 | * memory_order_relaxed, memory_order_consume, memory_model_acquire, | |
217 | * and memory_order_seq_cst. */ | |
218 | #define atomic_read_explicit(SRC, DST, ORDER) \ | |
219 | ({ \ | |
220 | typeof(DST) dst__ = (DST); \ | |
221 | typeof(SRC) src__ = (SRC); \ | |
222 | \ | |
223 | *dst__ = *src__; \ | |
224 | atomic_compiler_barrier(ORDER); \ | |
225 | (void) 0; \ | |
226 | }) | |
227 | #define atomic_read(SRC, DST) \ | |
228 | atomic_read_explicit(SRC, DST, memory_order_seq_cst) | |
229 | ||
230 | #define atomic_compare_exchange__(DST, EXP, SRC, RES, CLOB) \ | |
231 | asm volatile("lock; cmpxchg %3,%1 ; " \ | |
232 | " sete %0 " \ | |
233 | "# atomic_compare_exchange__" \ | |
234 | : "=q" (RES), /* 0 */ \ | |
235 | "+m" (*DST), /* 1 */ \ | |
236 | "+a" (EXP) /* 2 */ \ | |
237 | : "r" (SRC) /* 3 */ \ | |
238 | : CLOB, "cc") | |
239 | ||
240 | /* All memory models are valid for read-modify-write operations. | |
241 | * | |
242 | * Valid memory models for the read operation of the current value in | |
243 | * the failure case are the same as for atomic read, but can not be | |
244 | * stronger than the success memory model. | |
245 | * ORD_FAIL is ignored, as atomic_compare_exchange__ already implements | |
246 | * at least as strong a barrier as allowed for ORD_FAIL in all cases. */ | |
247 | #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORDER, ORD_FAIL) \ | |
248 | ({ \ | |
249 | typeof(DST) dst__ = (DST); \ | |
250 | typeof(DST) expp__ = (EXP); \ | |
0b83904f JR |
251 | typeof(*(DST)) src__ = (SRC); \ |
252 | typeof(*(DST)) exp__ = *expp__; \ | |
f31841d5 JR |
253 | uint8_t res__; \ |
254 | (void)ORD_FAIL; \ | |
255 | \ | |
256 | if ((ORDER) > memory_order_consume) { \ | |
257 | atomic_compare_exchange__(dst__, exp__, src__, res__, \ | |
258 | "memory"); \ | |
259 | } else { \ | |
260 | atomic_compare_exchange__(dst__, exp__, src__, res__, \ | |
261 | "cc"); \ | |
262 | } \ | |
263 | if (!res__) { \ | |
264 | *expp__ = exp__; \ | |
265 | } \ | |
266 | (bool)res__; \ | |
267 | }) | |
268 | #define atomic_compare_exchange_strong(DST, EXP, SRC) \ | |
269 | atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \ | |
270 | memory_order_seq_cst, \ | |
271 | memory_order_seq_cst) | |
272 | #define atomic_compare_exchange_weak \ | |
273 | atomic_compare_exchange_strong | |
274 | #define atomic_compare_exchange_weak_explicit \ | |
275 | atomic_compare_exchange_strong_explicit | |
276 | ||
277 | #define atomic_add__(RMW, ARG, CLOB) \ | |
278 | asm volatile("lock; xadd %0,%1 ; " \ | |
279 | "# atomic_add__ " \ | |
280 | : "+r" (ARG), /* 0 */ \ | |
281 | "+m" (*RMW) /* 1 */ \ | |
282 | :: CLOB, "cc") | |
283 | ||
284 | #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \ | |
285 | ({ \ | |
286 | typeof(RMW) rmw__ = (RMW); \ | |
0b83904f | 287 | typeof(*(RMW)) arg__ = (ARG); \ |
f31841d5 JR |
288 | \ |
289 | if ((ORDER) > memory_order_consume) { \ | |
290 | atomic_add__(rmw__, arg__, "memory"); \ | |
291 | } else { \ | |
292 | atomic_add__(rmw__, arg__, "cc"); \ | |
293 | } \ | |
294 | *(ORIG) = arg__; \ | |
295 | }) | |
296 | #define atomic_add(RMW, ARG, ORIG) \ | |
297 | atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
298 | ||
299 | #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \ | |
300 | atomic_add_explicit(RMW, -(ARG), ORIG, ORDER) | |
301 | #define atomic_sub(RMW, ARG, ORIG) \ | |
302 | atomic_sub_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
303 | ||
304 | /* We could use simple locked instructions if the original value was not | |
305 | * needed. */ | |
306 | #define atomic_op__(RMW, OP, ARG, ORIG, ORDER) \ | |
307 | ({ \ | |
308 | typeof(RMW) rmw__ = (RMW); \ | |
309 | typeof(ARG) arg__ = (ARG); \ | |
310 | \ | |
0b83904f | 311 | typeof(*(RMW)) val__; \ |
f31841d5 JR |
312 | \ |
313 | atomic_read_explicit(rmw__, &val__, memory_order_relaxed); \ | |
314 | do { \ | |
315 | } while (!atomic_compare_exchange_weak_explicit(rmw__, &val__, \ | |
316 | val__ OP arg__, \ | |
317 | ORDER, \ | |
318 | memory_order_relaxed)); \ | |
319 | *(ORIG) = val__; \ | |
320 | }) | |
321 | ||
322 | #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \ | |
323 | atomic_op__(RMW, |, ARG, ORIG, ORDER) | |
0b83904f | 324 | #define atomic_or(RMW, ARG, ORIG) \ |
f31841d5 JR |
325 | atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst) |
326 | ||
327 | #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \ | |
328 | atomic_op__(RMW, ^, ARG, ORIG, ORDER) | |
329 | #define atomic_xor(RMW, ARG, ORIG) \ | |
330 | atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
331 | ||
332 | #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \ | |
333 | atomic_op__(RMW, &, ARG, ORIG, ORDER) | |
334 | #define atomic_and(RMW, ARG, ORIG) \ | |
335 | atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
336 | ||
337 | \f | |
338 | /* atomic_flag */ | |
339 | ||
340 | typedef ATOMIC(int) atomic_flag; | |
341 | #define ATOMIC_FLAG_INIT { false } | |
342 | ||
343 | #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \ | |
344 | ((bool)atomic_exchange__(FLAG, 1, ORDER)) | |
345 | #define atomic_flag_test_and_set(FLAG) \ | |
346 | atomic_flag_test_and_set_explicit(FLAG, memory_order_seq_cst) | |
347 | ||
348 | #define atomic_flag_clear_explicit(FLAG, ORDER) \ | |
349 | atomic_store_explicit(FLAG, 0, ORDER) | |
350 | #define atomic_flag_clear(FLAG) \ | |
351 | atomic_flag_clear_explicit(FLAG, memory_order_seq_cst) |