]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-atomic-i586.h
ovsdb-idl: Fix iteration over tracked rows with no actual data.
[mirror_ovs.git] / lib / ovs-atomic-i586.h
CommitLineData
105a9298
JR
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This header implements atomic operation primitives on 32-bit 586+ with GCC.
18 */
19#ifndef IN_OVS_ATOMIC_H
20#error "This header should only be included indirectly via ovs-atomic.h."
21#endif
22
23#define OVS_ATOMIC_I586_IMPL 1
24
25/*
26 * These assumptions have been adopted from the x86_64 Memory model:
27 *
28 * - 1, 2, and 4 byte loads and stores are atomic on aligned memory.
29 * - Loads are not reordered with other loads.
30 * - Stores are not reordered with OLDER loads.
31 * - Loads may be reordered with OLDER stores to a different memory location,
32 * but not with OLDER stores to the same memory location.
33 * - Stores are not reordered with other stores, except maybe for special
34 * instructions not emitted by compilers, or by the stores performed by
35 * a single fast string operation (e.g., "stos"). As long as the atomic
36 * stores are not combined with any other stores, even the allowed reordering
37 * of the stores by a single fast string operation is not a problem.
38 * - Neither loads nor stores are reordered with locked instructions.
39 * - Stores by a single processor are observed in the same order by all
40 * processors.
41 * - (Unlocked) Stores from different processors are NOT ordered.
42 * - Memory ordering obeys causality (memory ordering respects transitive
43 * visibility).
44 * - Any two stores are seen in a consistent order by processors other than
45 * the those performing the stores.
46 * - Locked instructions have total order.
47 *
48 * These rules imply that:
49 *
50 * - Locked instructions are not needed for aligned loads or stores to make
51 * them atomic for sizes upto 4 bytes. 8 byte objects need locked
52 * instructions.
53 * - All stores have release semantics; none of the preceding stores or loads
54 * can be reordered with following stores. Following loads could still be
55 * reordered to happen before the store, but that is not a violation of the
56 * release semantics.
57 * - All loads from a given memory location have acquire semantics with
58 * respect to the stores on the same memory location; none of the following
59 * loads or stores can be reordered with the load. Preceding stores to a
60 * different memory location MAY be reordered with the load, but that is not
61 * a violation of the acquire semantics (i.e., the loads and stores of two
62 * critical sections guarded by a different memory location can overlap).
63 * - Locked instructions serve as CPU memory barriers by themselves.
64 * - Locked stores implement the sequential consistency memory order. Using
65 * locked instructions when seq_cst memory order is requested allows normal
66 * loads to observe the stores in the same (total) order without using CPU
67 * memory barrier after the loads.
68 *
69 * NOTE: Some older AMD Opteron processors have a bug that violates the
70 * acquire semantics described above. The bug manifests as an unlocked
71 * read-modify-write operation following a "semaphore operation" operating
72 * on data that existed before entering the critical section; i.e., the
73 * preceding "semaphore operation" fails to function as an acquire barrier.
74 * The affected CPUs are AMD family 15, models 32 to 63.
75 *
76 * Ref. http://support.amd.com/TechDocs/25759.pdf errata #147.
77 */
78
79/* Barriers. */
80
81#define compiler_barrier() asm volatile(" " : : : "memory")
82#define cpu_barrier() asm volatile("lock; addl $0,(%%esp)" ::: "memory", "cc")
83
84/*
85 * The 'volatile' keyword prevents the compiler from keeping the atomic
86 * value in a register, and generates a new memory access for each atomic
87 * operation. This allows the implementations of memory_order_relaxed and
88 * memory_order_consume to avoid issuing a compiler memory barrier, allowing
89 * full optimization of all surrounding non-atomic variables.
90 *
91 * The placement of the 'volatile' keyword after the 'TYPE' below is highly
92 * significant when the TYPE is a pointer type. In that case we want the
93 * pointer to be declared volatile, not the data type that is being pointed
94 * at!
55eebc01
JR
95 *
96 * Attribute aligned is used to tell the compiler to align 64-bit data
97 * on a 8-byte boundary. This allows more efficient atomic access, as the
98 * the CPU guarantees such memory accesses to be atomic. */
99#define ATOMIC(TYPE) TYPE volatile __attribute__((aligned(sizeof(TYPE))))
105a9298
JR
100
101/* Memory ordering. Must be passed in as a constant. */
102typedef enum {
103 memory_order_relaxed,
104 memory_order_consume,
105 memory_order_acquire,
106 memory_order_release,
107 memory_order_acq_rel,
108 memory_order_seq_cst
109} memory_order;
110\f
111#define ATOMIC_BOOL_LOCK_FREE 2
112#define ATOMIC_CHAR_LOCK_FREE 2
113#define ATOMIC_SHORT_LOCK_FREE 2
114#define ATOMIC_INT_LOCK_FREE 2
115#define ATOMIC_LONG_LOCK_FREE 2
116#define ATOMIC_LLONG_LOCK_FREE 2
117#define ATOMIC_POINTER_LOCK_FREE 2
118
119#define IS_LOCKLESS_ATOMIC(OBJECT) \
120 (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
121\f
122#define ATOMIC_VAR_INIT(VALUE) VALUE
123#define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
124
125/*
126 * The memory_model_relaxed does not need a compiler barrier, if the
127 * atomic operation can otherwise be guaranteed to not be moved with
128 * respect to other atomic operations on the same memory location. Using
129 * the 'volatile' keyword in the definition of the atomic types
130 * accomplishes this, as memory accesses to volatile data may not be
131 * optimized away, or be reordered with other volatile accesses.
132 *
133 * On x86 also memory_order_consume is automatic, and data dependency on a
134 * volatile atomic variable means that the compiler optimizations should not
135 * cause problems. That is, the compiler should not speculate the value of
136 * the atomic_read, as it is going to read it from the memory anyway.
137 * This allows omiting the compiler memory barrier on atomic_reads with
138 * memory_order_consume. This matches the definition of
139 * smp_read_barrier_depends() in Linux kernel as a nop for x86, and its usage
140 * in rcu_dereference().
141 *
142 * We use this same logic below to choose inline assembly statements with or
143 * without a compiler memory barrier.
144 */
145static inline void
146atomic_compiler_barrier(memory_order order)
147{
148 if (order > memory_order_consume) {
149 compiler_barrier();
150 }
151}
152
153static inline void
154atomic_thread_fence(memory_order order)
155{
156 if (order == memory_order_seq_cst) {
157 cpu_barrier();
158 } else {
159 atomic_compiler_barrier(order);
160 }
161}
162
163static inline void
164atomic_signal_fence(memory_order order)
165{
166 atomic_compiler_barrier(order);
167}
168
169#define atomic_is_lock_free(OBJ) \
170 ((void) *(OBJ), \
171 IS_LOCKLESS_ATOMIC(*(OBJ)) ? 2 : 0)
172
173/* The 8-byte atomic exchange uses cmpxchg8b with the SRC (ax:dx) as
174 * the expected value (bx:cx), which will get replaced by the current
175 * value in the likely case it did not match, after which we keep
176 * trying until the swap succeeds. */
177
178#if defined(__PIC__)
179/* ebx may not be clobbered when compiled with -fPIC, must save and
180 * restore it. Furthermore, 'DST' may be addressed via ebx, so the
181 * address must be passed via a register so that it remains valid also
182 * after changing ebx. */
183#define atomic_exchange_8__(DST, SRC, CLOB) \
184 uint32_t temp____; \
185 \
186 asm volatile(" movl %%ebx,%2 ; " \
187 " movl %%eax,%%ebx ; " \
188 " movl %%edx,%%ecx ; " \
189 "1: " \
190 "lock; cmpxchg8b (%0); " \
191 " jne 1b ; " \
192 " movl %2,%%ebx ; " \
193 " # atomic_exchange_8__ " \
194 : "+r" (DST), /* 0 */ \
195 "+A" (SRC), /* 1 */ \
196 "=mr" (temp____) /* 2 */ \
197 :: "ecx", CLOB, "cc")
198
199#else
200#define atomic_exchange_8__(DST, SRC, CLOB) \
201 asm volatile(" movl %%eax,%%ebx ; " \
202 " movl %%edx,%%ecx ; " \
203 "1: " \
204 "lock; cmpxchg8b %0 ; " \
205 " jne 1b ; " \
206 " # atomic_exchange_8__ " \
207 : "+m" (*DST), /* 0 */ \
208 "+A" (SRC) /* 1 */ \
209 :: "ebx", "ecx", CLOB, "cc")
210#endif
211
212#define atomic_exchange__(DST, SRC, ORDER) \
213 ({ \
214 typeof(DST) dst___ = (DST); \
0b83904f 215 typeof(*(DST)) src___ = (SRC); \
105a9298
JR
216 \
217 if ((ORDER) > memory_order_consume) { \
0b83904f 218 if (sizeof(*(DST)) == 8) { \
105a9298
JR
219 atomic_exchange_8__(dst___, src___, "memory"); \
220 } else { \
221 asm volatile("xchg %1,%0 ; " \
222 "# atomic_exchange__" \
223 : "+r" (src___), /* 0 */ \
224 "+m" (*dst___) /* 1 */ \
225 :: "memory"); \
226 } \
227 } else { \
0b83904f 228 if (sizeof(*(DST)) == 8) { \
105a9298
JR
229 atomic_exchange_8__(dst___, src___, "cc"); \
230 } else { \
231 asm volatile("xchg %1,%0 ; " \
232 "# atomic_exchange__" \
233 : "+r" (src___), /* 0 */ \
234 "+m" (*dst___)); /* 1 */ \
235 } \
236 } \
237 src___; \
238 })
239
55eebc01
JR
240#if defined(__SSE__)
241/* SSE registers are 128-bit wide, and moving the lowest 64-bits of an SSE
242 * register to proerly aligned memory is atomic. See ATOMIC(TYPE) above. */
243#define atomic_store_8__(DST, SRC) \
244 asm volatile("movq %1,%0 ; # atomic_store_8__" \
245 : "=m" (*DST) /* 0 */ \
246 : "x" (SRC)) /* 1, SSE */
247#else
248/* Locked 64-bit exchange is available on all i586 CPUs. */
249#define atomic_store_8__(DST, SRC) \
250 atomic_exchange_8__(DST, SRC, "cc")
251#endif
252
105a9298
JR
253#define atomic_store_explicit(DST, SRC, ORDER) \
254 ({ \
255 typeof(DST) dst__ = (DST); \
0b83904f 256 typeof(*(DST)) src__ = (SRC); \
105a9298 257 \
55eebc01 258 if ((ORDER) != memory_order_seq_cst) { \
105a9298 259 atomic_compiler_barrier(ORDER); \
55eebc01
JR
260 if (sizeof(*(DST)) == 8) { \
261 atomic_store_8__(dst__, src__); \
262 } else { \
263 *dst__ = src__; \
264 } \
105a9298
JR
265 } else { \
266 atomic_exchange__(dst__, src__, ORDER); \
267 } \
268 (void) 0; \
269 })
55eebc01 270#define atomic_store(DST, SRC) \
105a9298
JR
271 atomic_store_explicit(DST, SRC, memory_order_seq_cst)
272
55eebc01
JR
273#if defined(__SSE__)
274/* SSE registers are 128-bit wide, and moving 64-bits from properly aligned
275 * memory to an SSE register is atomic. See ATOMIC(TYPE) above. */
276#define atomic_read_8__(SRC, DST) \
277 ({ \
278 typeof(*(DST)) res__; \
279 \
280 asm ("movq %1,%0 ; # atomic_read_8__" \
281 : "=x" (res__) /* 0, SSE. */ \
282 : "m" (*SRC)); /* 1 */ \
283 *(DST) = res__; \
284 })
285#else
286/* Must use locked cmpxchg8b (available on all i586 CPUs) if compiled w/o sse
287 * support. Compare '*DST' to a random value in bx:cx and returns the actual
288 * value in ax:dx. The registers bx and cx are only read, so they are not
289 * clobbered. */
290#define atomic_read_8__(SRC, DST) \
291 ({ \
292 typeof(*(DST)) res__; \
293 \
294 asm (" movl %%ebx,%%eax ; " \
295 " movl %%ecx,%%edx ; " \
296 "lock; cmpxchg8b %1 ; " \
297 "# atomic_read_8__ " \
298 : "=&A" (res__), /* 0 */ \
299 "+m" (*SRC) /* 1 */ \
300 : : "cc"); \
301 *(DST) = res__; \
302 })
303#endif
304
305#define atomic_read_explicit(SRC, DST, ORDER) \
306 ({ \
307 typeof(DST) dst__ = (DST); \
308 typeof(SRC) src__ = (SRC); \
309 \
310 if (sizeof(*(DST)) <= 4) { \
311 *dst__ = *src__; \
312 } else { \
313 atomic_read_8__(SRC, DST); \
314 } \
315 atomic_compiler_barrier(ORDER); \
316 (void) 0; \
105a9298 317 })
55eebc01 318#define atomic_read(SRC, DST) \
105a9298
JR
319 atomic_read_explicit(SRC, DST, memory_order_seq_cst)
320
321#if defined(__PIC__)
322/* ebx may not be used as an input when compiled with -fPIC, must save
323 * and restore it. Furthermore, 'DST' may be addressed via ebx, so
324 * the address must be passed via a register so that it remains valid
325 * also after changing ebx. */
326#define atomic_compare_exchange_8__(DST, EXP, SRC, RES, CLOB) \
327 asm volatile(" xchgl %%ebx,%3 ; " \
328 "lock; cmpxchg8b (%1) ; " \
329 " xchgl %3,%%ebx ; " \
330 " sete %0 " \
331 "# atomic_compare_exchange_8__" \
332 : "=q" (RES), /* 0 */ \
333 "+r" (DST), /* 1 */ \
334 "+A" (EXP) /* 2 */ \
335 : "r" ((uint32_t)SRC), /* 3 */ \
336 "c" ((uint32_t)((uint64_t)SRC >> 32)) /* 4 */ \
337 : CLOB, "cc")
338#else
339#define atomic_compare_exchange_8__(DST, EXP, SRC, RES, CLOB) \
340 asm volatile("lock; cmpxchg8b %1 ; " \
341 " sete %0 " \
342 "# atomic_compare_exchange_8__" \
343 : "=q" (RES), /* 0 */ \
344 "+m" (*DST), /* 1 */ \
345 "+A" (EXP) /* 2 */ \
346 : "b" ((uint32_t)SRC), /* 3 */ \
347 "c" ((uint32_t)((uint64_t)SRC >> 32)) /* 4 */ \
348 : CLOB, "cc")
349#endif
350
351#define atomic_compare_exchange__(DST, EXP, SRC, RES, CLOB) \
352 asm volatile("lock; cmpxchg %3,%1 ; " \
353 " sete %0 " \
354 "# atomic_compare_exchange__" \
355 : "=q" (RES), /* 0 */ \
356 "+m" (*DST), /* 1 */ \
357 "+a" (EXP) /* 2 */ \
358 : "r" (SRC) /* 3 */ \
359 : CLOB, "cc")
360
361/* ORD_FAIL is ignored, as atomic_compare_exchange__ already implements
362 * at least as strong a barrier as allowed for ORD_FAIL in all cases. */
363#define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORDER, ORD_FAIL) \
364 ({ \
365 typeof(DST) dst__ = (DST); \
366 typeof(DST) expp__ = (EXP); \
0b83904f
JR
367 typeof(*(DST)) src__ = (SRC); \
368 typeof(*(DST)) exp__ = *expp__; \
105a9298
JR
369 uint8_t res__; \
370 (void)ORD_FAIL; \
371 \
372 if ((ORDER) > memory_order_consume) { \
0b83904f 373 if (sizeof(*(DST)) <= 4) { \
105a9298
JR
374 atomic_compare_exchange__(dst__, exp__, src__, res__, \
375 "memory"); \
376 } else { \
377 atomic_compare_exchange_8__(dst__, exp__, src__, res__, \
378 "memory"); \
379 } \
380 } else { \
0b83904f 381 if (sizeof(*(DST)) <= 4) { \
105a9298
JR
382 atomic_compare_exchange__(dst__, exp__, src__, res__, \
383 "cc"); \
384 } else { \
385 atomic_compare_exchange_8__(dst__, exp__, src__, res__, \
386 "cc"); \
387 } \
388 } \
389 if (!res__) { \
390 *expp__ = exp__; \
391 } \
392 (bool)res__; \
393 })
394#define atomic_compare_exchange_strong(DST, EXP, SRC) \
395 atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
396 memory_order_seq_cst, \
397 memory_order_seq_cst)
398#define atomic_compare_exchange_weak \
399 atomic_compare_exchange_strong
400#define atomic_compare_exchange_weak_explicit \
401 atomic_compare_exchange_strong_explicit
402
403#define atomic_add__(RMW, ARG, CLOB) \
404 asm volatile("lock; xadd %0,%1 ; " \
405 "# atomic_add__ " \
406 : "+r" (ARG), /* 0 */ \
407 "+m" (*RMW) /* 1 */ \
408 :: CLOB, "cc")
409
410#define atomic_add_32__(RMW, ARG, ORIG, ORDER) \
411 ({ \
412 typeof(RMW) rmw__ = (RMW); \
0b83904f 413 typeof(*(RMW)) arg__ = (ARG); \
105a9298
JR
414 \
415 if ((ORDER) > memory_order_consume) { \
416 atomic_add__(rmw__, arg__, "memory"); \
417 } else { \
418 atomic_add__(rmw__, arg__, "cc"); \
419 } \
420 *(ORIG) = arg__; \
421 })
422
423/* We could use simple locked instructions if the original value was not
424 * needed. */
425#define atomic_op__(RMW, OP, ARG, ORIG, ORDER) \
426 ({ \
427 typeof(RMW) rmw__ = (RMW); \
428 typeof(ARG) arg__ = (ARG); \
429 \
0b83904f 430 typeof(*(RMW)) val__; \
105a9298
JR
431 \
432 atomic_read_explicit(rmw__, &val__, memory_order_relaxed); \
433 do { \
434 } while (!atomic_compare_exchange_weak_explicit(rmw__, &val__, \
435 val__ OP arg__, \
436 ORDER, \
437 memory_order_relaxed)); \
438 *(ORIG) = val__; \
439 })
440
441#define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \
0b83904f 442 (sizeof(*(RMW)) <= 4 \
105a9298
JR
443 ? atomic_add_32__(RMW, ARG, ORIG, ORDER) \
444 : atomic_op__(RMW, +, ARG, ORIG, ORDER))
445#define atomic_add(RMW, ARG, ORIG) \
446 atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
447
448#define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \
0b83904f 449 (sizeof(*(RMW)) <= 4 \
105a9298
JR
450 ? atomic_add_32__(RMW, -(ARG), ORIG, ORDER) \
451 : atomic_op__(RMW, -, ARG, ORIG, ORDER))
452#define atomic_sub(RMW, ARG, ORIG) \
453 atomic_sub_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
454
455#define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \
456 atomic_op__(RMW, |, ARG, ORIG, ORDER)
0b83904f 457#define atomic_or(RMW, ARG, ORIG) \
105a9298
JR
458 atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
459
460#define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \
461 atomic_op__(RMW, ^, ARG, ORIG, ORDER)
462#define atomic_xor(RMW, ARG, ORIG) \
463 atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
464
465#define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \
466 atomic_op__(RMW, &, ARG, ORIG, ORDER)
467#define atomic_and(RMW, ARG, ORIG) \
468 atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
469
470\f
471/* atomic_flag */
472
473typedef ATOMIC(int) atomic_flag;
474#define ATOMIC_FLAG_INIT { false }
475
476#define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
477 ((bool)atomic_exchange__(FLAG, 1, ORDER))
478#define atomic_flag_test_and_set(FLAG) \
479 atomic_flag_test_and_set_explicit(FLAG, memory_order_seq_cst)
480
481#define atomic_flag_clear_explicit(FLAG, ORDER) \
482 atomic_store_explicit(FLAG, 0, ORDER)
483#define atomic_flag_clear(FLAG) \
484 atomic_flag_clear_explicit(FLAG, memory_order_seq_cst)