]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-atomic-x86_64.h
netdev-offload-tc: Use single 'once' variable for probing tc features
[mirror_ovs.git] / lib / ovs-atomic-x86_64.h
CommitLineData
f31841d5
JR
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This header implements atomic operation primitives on x86_64 with GCC. */
18#ifndef IN_OVS_ATOMIC_H
19#error "This header should only be included indirectly via ovs-atomic.h."
20#endif
21
22#define OVS_ATOMIC_X86_64_IMPL 1
23
24/*
25 * x86_64 Memory model (http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-vol-3a-part-1-manual.html):
26 *
27 * - 1, 2, 4, and 8 byte loads and stores are atomic on aligned memory.
28 * - Loads are not reordered with other loads.
29 * - Stores are not reordered with OLDER loads.
30 * - Loads may be reordered with OLDER stores to a different memory location,
31 * but not with OLDER stores to the same memory location.
32 * - Stores are not reordered with other stores, except for special
33 * instructions (CLFLUSH, streaming stores, fast string operations).
34 * Most of these are not emitted by compilers, and as long as the
35 * atomic stores are not combined with any other stores, even the allowed
36 * reordering of the stores by a single fast string operation (e.g., "stos")
37 * is not a problem.
38 * - Neither loads nor stores are reordered with locked instructions.
39 * - Loads cannot pass earlier LFENCE or MFENCE instructions.
40 * - Stores cannot pass earlier LFENCE, SFENCE, or MFENCE instructions.
41 * - LFENCE instruction cannot pass earlier loads.
42 * - SFENCE instruction cannot pass earlier stores.
43 * - MFENCE instruction cannot pass earlier loads or stores.
44 * - Stores by a single processor are observed in the same order by all
45 * processors.
46 * - (Unlocked) Stores from different processors are NOT ordered.
47 * - Memory ordering obeys causality (memory ordering respects transitive
48 * visibility).
49 * - Any two stores are seen in a consistent order by processors other than
50 * the those performing the stores.
51 * - Locked instructions have total order.
52 *
53 * These rules imply that:
54 *
55 * - Locked instructions are not needed for aligned loads or stores to make
56 * them atomic.
57 * - All stores have release semantics; none of the preceding stores or loads
58 * can be reordered with following stores. Following loads could still be
59 * reordered to happen before the store, but that is not a violation of the
60 * release semantics.
61 * - All loads from a given memory location have acquire semantics with
62 * respect to the stores on the same memory location; none of the following
63 * loads or stores can be reordered with the load. Preceding stores to a
64 * different memory location MAY be reordered with the load, but that is not
65 * a violation of the acquire semantics (i.e., the loads and stores of two
66 * critical sections guarded by a different memory location can overlap).
67 * - Locked instructions serve as CPU memory barriers by themselves.
68 * - Locked stores implement the sequential consistency memory order. Using
69 * locked instructions when seq_cst memory order is requested allows normal
70 * loads to observe the stores in the same (total) order without using CPU
71 * memory barrier after the loads.
72 *
73 * NOTE: Some older AMD Opteron processors have a bug that violates the
74 * acquire semantics described above. The bug manifests as an unlocked
75 * read-modify-write operation following a "semaphore operation" operating
76 * on data that existed before entering the critical section; i.e., the
77 * preceding "semaphore operation" fails to function as an acquire barrier.
78 * The affected CPUs are AMD family 15, models 32 to 63.
79 *
80 * Ref. http://support.amd.com/TechDocs/25759.pdf errata #147.
81 */
82
83/* Barriers. */
84
85#define compiler_barrier() asm volatile(" " : : : "memory")
86#define cpu_barrier() asm volatile("mfence;" : : : "memory")
87
88/*
89 * The 'volatile' keyword prevents the compiler from keeping the atomic
90 * value in a register, and generates a new memory access for each atomic
91 * operation. This allows the implementations of memory_order_relaxed and
92 * memory_order_consume to avoid issuing a compiler memory barrier, allowing
93 * full optimization of all surrounding non-atomic variables.
94 *
95 * The placement of the 'volatile' keyword after the 'TYPE' below is highly
96 * significant when the TYPE is a pointer type. In that case we want the
97 * pointer to be declared volatile, not the data type that is being pointed
98 * at!
99 */
100#define ATOMIC(TYPE) TYPE volatile
101
102/* Memory ordering. Must be passed in as a constant. */
103typedef enum {
104 memory_order_relaxed,
105 memory_order_consume,
106 memory_order_acquire,
107 memory_order_release,
108 memory_order_acq_rel,
109 memory_order_seq_cst
110} memory_order;
111\f
112#define ATOMIC_BOOL_LOCK_FREE 2
113#define ATOMIC_CHAR_LOCK_FREE 2
114#define ATOMIC_SHORT_LOCK_FREE 2
115#define ATOMIC_INT_LOCK_FREE 2
116#define ATOMIC_LONG_LOCK_FREE 2
117#define ATOMIC_LLONG_LOCK_FREE 2
118#define ATOMIC_POINTER_LOCK_FREE 2
119
120#define IS_LOCKLESS_ATOMIC(OBJECT) \
121 (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
122\f
123#define ATOMIC_VAR_INIT(VALUE) VALUE
124#define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
125
126/*
127 * The memory_model_relaxed does not need a compiler barrier, if the
128 * atomic operation can otherwise be guaranteed to not be moved with
129 * respect to other atomic operations on the same memory location. Using
130 * the 'volatile' keyword in the definition of the atomic types
131 * accomplishes this, as memory accesses to volatile data may not be
132 * optimized away, or be reordered with other volatile accesses.
133 *
134 * On x86 also memory_order_consume is automatic, and data dependency on a
135 * volatile atomic variable means that the compiler optimizations should not
136 * cause problems. That is, the compiler should not speculate the value of
137 * the atomic_read, as it is going to read it from the memory anyway.
138 * This allows omiting the compiler memory barrier on atomic_reads with
139 * memory_order_consume. This matches the definition of
140 * smp_read_barrier_depends() in Linux kernel as a nop for x86, and its usage
141 * in rcu_dereference().
142 *
143 * We use this same logic below to choose inline assembly statements with or
144 * without a compiler memory barrier.
145 */
146static inline void
147atomic_compiler_barrier(memory_order order)
148{
149 if (order > memory_order_consume) {
150 compiler_barrier();
151 }
152}
153
154static inline void
155atomic_thread_fence(memory_order order)
156{
157 if (order == memory_order_seq_cst) {
158 cpu_barrier();
159 } else {
160 atomic_compiler_barrier(order);
161 }
162}
163
164static inline void
165atomic_signal_fence(memory_order order)
166{
167 atomic_compiler_barrier(order);
168}
169
170#define atomic_is_lock_free(OBJ) \
171 ((void) *(OBJ), \
172 IS_LOCKLESS_ATOMIC(*(OBJ)) ? 2 : 0)
173
174#define atomic_exchange__(DST, SRC, ORDER) \
175 ({ \
176 typeof(DST) dst___ = (DST); \
0b83904f 177 typeof(*(DST)) src___ = (SRC); \
f31841d5
JR
178 \
179 if ((ORDER) > memory_order_consume) { \
180 asm volatile("xchg %1,%0 ; " \
181 "# atomic_exchange__" \
182 : "+r" (src___), /* 0 */ \
183 "+m" (*dst___) /* 1 */ \
184 :: "memory"); \
185 } else { \
186 asm volatile("xchg %1,%0 ; " \
187 "# atomic_exchange__" \
188 : "+r" (src___), /* 0 */ \
189 "+m" (*dst___)); /* 1 */ \
190 } \
191 src___; \
192 })
193
194/* Atomic store: Valid memory models are:
195 *
196 * memory_order_relaxed, memory_order_release, and
197 * memory_order_seq_cst. */
198#define atomic_store_explicit(DST, SRC, ORDER) \
199 ({ \
200 typeof(DST) dst__ = (DST); \
0b83904f 201 typeof(*(DST)) src__ = (SRC); \
f31841d5
JR
202 \
203 if ((ORDER) != memory_order_seq_cst) { \
204 atomic_compiler_barrier(ORDER); \
205 *dst__ = src__; \
206 } else { \
207 atomic_exchange__(dst__, src__, ORDER); \
208 } \
209 (void) 0; \
210 })
211#define atomic_store(DST, SRC) \
212 atomic_store_explicit(DST, SRC, memory_order_seq_cst)
213
214/* Atomic read: Valid memory models are:
215 *
216 * memory_order_relaxed, memory_order_consume, memory_model_acquire,
217 * and memory_order_seq_cst. */
218#define atomic_read_explicit(SRC, DST, ORDER) \
219 ({ \
220 typeof(DST) dst__ = (DST); \
221 typeof(SRC) src__ = (SRC); \
222 \
223 *dst__ = *src__; \
224 atomic_compiler_barrier(ORDER); \
225 (void) 0; \
226 })
227#define atomic_read(SRC, DST) \
228 atomic_read_explicit(SRC, DST, memory_order_seq_cst)
229
230#define atomic_compare_exchange__(DST, EXP, SRC, RES, CLOB) \
231 asm volatile("lock; cmpxchg %3,%1 ; " \
232 " sete %0 " \
233 "# atomic_compare_exchange__" \
234 : "=q" (RES), /* 0 */ \
235 "+m" (*DST), /* 1 */ \
236 "+a" (EXP) /* 2 */ \
237 : "r" (SRC) /* 3 */ \
238 : CLOB, "cc")
239
240/* All memory models are valid for read-modify-write operations.
241 *
242 * Valid memory models for the read operation of the current value in
243 * the failure case are the same as for atomic read, but can not be
244 * stronger than the success memory model.
245 * ORD_FAIL is ignored, as atomic_compare_exchange__ already implements
246 * at least as strong a barrier as allowed for ORD_FAIL in all cases. */
247#define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORDER, ORD_FAIL) \
248 ({ \
249 typeof(DST) dst__ = (DST); \
250 typeof(DST) expp__ = (EXP); \
0b83904f
JR
251 typeof(*(DST)) src__ = (SRC); \
252 typeof(*(DST)) exp__ = *expp__; \
f31841d5
JR
253 uint8_t res__; \
254 (void)ORD_FAIL; \
255 \
256 if ((ORDER) > memory_order_consume) { \
257 atomic_compare_exchange__(dst__, exp__, src__, res__, \
258 "memory"); \
259 } else { \
260 atomic_compare_exchange__(dst__, exp__, src__, res__, \
261 "cc"); \
262 } \
263 if (!res__) { \
264 *expp__ = exp__; \
265 } \
266 (bool)res__; \
267 })
268#define atomic_compare_exchange_strong(DST, EXP, SRC) \
269 atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
270 memory_order_seq_cst, \
271 memory_order_seq_cst)
272#define atomic_compare_exchange_weak \
273 atomic_compare_exchange_strong
274#define atomic_compare_exchange_weak_explicit \
275 atomic_compare_exchange_strong_explicit
276
277#define atomic_add__(RMW, ARG, CLOB) \
278 asm volatile("lock; xadd %0,%1 ; " \
279 "# atomic_add__ " \
280 : "+r" (ARG), /* 0 */ \
281 "+m" (*RMW) /* 1 */ \
282 :: CLOB, "cc")
283
284#define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \
285 ({ \
286 typeof(RMW) rmw__ = (RMW); \
0b83904f 287 typeof(*(RMW)) arg__ = (ARG); \
f31841d5
JR
288 \
289 if ((ORDER) > memory_order_consume) { \
290 atomic_add__(rmw__, arg__, "memory"); \
291 } else { \
292 atomic_add__(rmw__, arg__, "cc"); \
293 } \
294 *(ORIG) = arg__; \
295 })
296#define atomic_add(RMW, ARG, ORIG) \
297 atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
298
299#define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \
300 atomic_add_explicit(RMW, -(ARG), ORIG, ORDER)
301#define atomic_sub(RMW, ARG, ORIG) \
302 atomic_sub_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
303
304/* We could use simple locked instructions if the original value was not
305 * needed. */
306#define atomic_op__(RMW, OP, ARG, ORIG, ORDER) \
307 ({ \
308 typeof(RMW) rmw__ = (RMW); \
309 typeof(ARG) arg__ = (ARG); \
310 \
0b83904f 311 typeof(*(RMW)) val__; \
f31841d5
JR
312 \
313 atomic_read_explicit(rmw__, &val__, memory_order_relaxed); \
314 do { \
315 } while (!atomic_compare_exchange_weak_explicit(rmw__, &val__, \
316 val__ OP arg__, \
317 ORDER, \
318 memory_order_relaxed)); \
319 *(ORIG) = val__; \
320 })
321
322#define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \
323 atomic_op__(RMW, |, ARG, ORIG, ORDER)
0b83904f 324#define atomic_or(RMW, ARG, ORIG) \
f31841d5
JR
325 atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
326
327#define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \
328 atomic_op__(RMW, ^, ARG, ORIG, ORDER)
329#define atomic_xor(RMW, ARG, ORIG) \
330 atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
331
332#define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \
333 atomic_op__(RMW, &, ARG, ORIG, ORDER)
334#define atomic_and(RMW, ARG, ORIG) \
335 atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
336
337\f
338/* atomic_flag */
339
340typedef ATOMIC(int) atomic_flag;
341#define ATOMIC_FLAG_INIT { false }
342
343#define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
344 ((bool)atomic_exchange__(FLAG, 1, ORDER))
345#define atomic_flag_test_and_set(FLAG) \
346 atomic_flag_test_and_set_explicit(FLAG, memory_order_seq_cst)
347
348#define atomic_flag_clear_explicit(FLAG, ORDER) \
349 atomic_store_explicit(FLAG, 0, ORDER)
350#define atomic_flag_clear(FLAG) \
351 atomic_flag_clear_explicit(FLAG, memory_order_seq_cst)