]> git.proxmox.com Git - ovs.git/blob - lib/ovs-atomic.h
lib/ovs-atomic: Add helpers for relaxed atomic access.
[ovs.git] / lib / ovs-atomic.h
1 /*
2 * Copyright (c) 2013, 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef OVS_ATOMIC_H
18 #define OVS_ATOMIC_H 1
19
20 /* Atomic operations.
21 *
22 * This library implements atomic operations with an API based on the one
23 * defined in C11. It includes multiple implementations for compilers and
24 * libraries with varying degrees of built-in support for C11, including a
25 * fallback implementation for systems that have pthreads but no other support
26 * for atomics.
27 *
28 * This comment describes the common features of all the implementations.
29 *
30 *
31 * Types
32 * =====
33 *
34 * The following atomic types are supported as typedefs for atomic versions of
35 * the listed ordinary types:
36 *
37 * ordinary type atomic version
38 * ------------------- ----------------------
39 * bool atomic_bool
40 *
41 * char atomic_char
42 * signed char atomic_schar
43 * unsigned char atomic_uchar
44 *
45 * short atomic_short
46 * unsigned short atomic_ushort
47 *
48 * int atomic_int
49 * unsigned int atomic_uint
50 *
51 * long atomic_long
52 * unsigned long atomic_ulong
53 *
54 * long long atomic_llong
55 * unsigned long long atomic_ullong
56 *
57 * size_t atomic_size_t
58 * ptrdiff_t atomic_ptrdiff_t
59 *
60 * intmax_t atomic_intmax_t
61 * uintmax_t atomic_uintmax_t
62 *
63 * intptr_t atomic_intptr_t
64 * uintptr_t atomic_uintptr_t
65 *
66 * uint8_t atomic_uint8_t (*)
67 * uint16_t atomic_uint16_t (*)
68 * uint32_t atomic_uint32_t (*)
69 * int8_t atomic_int8_t (*)
70 * int16_t atomic_int16_t (*)
71 * int32_t atomic_int32_t (*)
72 *
73 * (*) Not specified by C11.
74 *
75 * Atomic types may also be obtained via ATOMIC(TYPE), e.g. ATOMIC(void *).
76 * Only basic integer types and pointer types can be made atomic this way,
77 * e.g. atomic structs are not supported.
78 *
79 * The atomic version of a type doesn't necessarily have the same size or
80 * representation as the ordinary version; for example, atomic_int might be a
81 * typedef for a struct. The range of an atomic type does match the range of
82 * the corresponding ordinary type.
83 *
84 * C11 says that one may use the _Atomic keyword in place of the typedef name,
85 * e.g. "_Atomic int" instead of "atomic_int". This library doesn't support
86 * that.
87 *
88 *
89 * Life Cycle
90 * ==========
91 *
92 * To initialize an atomic variable at its point of definition, use
93 * ATOMIC_VAR_INIT:
94 *
95 * static atomic_int ai = ATOMIC_VAR_INIT(123);
96 *
97 * To initialize an atomic variable in code, use atomic_init():
98 *
99 * static atomic_int ai;
100 * ...
101 * atomic_init(&ai, 123);
102 *
103 *
104 * Barriers
105 * ========
106 *
107 * enum memory_order specifies the strictness of a memory barrier. It has the
108 * following values:
109 *
110 * memory_order_relaxed:
111 *
112 * Only atomicity is provided, does not imply any memory ordering with
113 * respect to any other variable (atomic or not). Relaxed accesses to
114 * the same atomic variable will be performed in the program order.
115 * The compiler and CPU are free to move memory accesses to other
116 * variables past the atomic operation.
117 *
118 * memory_order_consume:
119 *
120 * Memory accesses with data dependency on the result of the consume
121 * operation (atomic_read_explicit, or a load operation preceding a
122 * atomic_thread_fence) will not be moved prior to the consume
123 * barrier. Non-data-dependent loads and stores can be reordered to
124 * happen before the the consume barrier.
125 *
126 * RCU is the prime example of the use of the consume barrier: The
127 * consume barrier guarantees that reads from a RCU protected object
128 * are performed after the RCU protected pointer is read. A
129 * corresponding release barrier is used to store the modified RCU
130 * protected pointer after the RCU protected object has been fully
131 * constructed. The synchronization between these barriers prevents
132 * the RCU "consumer" from seeing uninitialized data.
133 *
134 * May not be used with atomic_store_explicit(), as consume semantics
135 * applies only to atomic loads.
136 *
137 * memory_order_acquire:
138 *
139 * Memory accesses after an acquire barrier cannot be moved before the
140 * barrier. Memory accesses before an acquire barrier *can* be moved
141 * after it.
142 *
143 * An atomic_thread_fence with memory_order_acquire does not have a
144 * load operation by itself; it prevents all following memory accesses
145 * from moving prior to preceding loads.
146 *
147 * May not be used with atomic_store_explicit(), as acquire semantics
148 * applies only to atomic loads.
149 *
150 * memory_order_release:
151 *
152 * Memory accesses before a release barrier cannot be moved after the
153 * barrier. Memory accesses after a release barrier *can* be moved
154 * before it.
155 *
156 * An atomic_thread_fence with memory_order_release does not have a
157 * store operation by itself; it prevents all preceding memory accesses
158 * from moving past subsequent stores.
159 *
160 * May not be used with atomic_read_explicit(), as release semantics
161 * applies only to atomic stores.
162 *
163 * memory_order_acq_rel:
164 *
165 * Memory accesses cannot be moved across an acquire-release barrier in
166 * either direction.
167 *
168 * May only be used with atomic read-modify-write operations, as both
169 * load and store operation is required for acquire-release semantics.
170 *
171 * An atomic_thread_fence with memory_order_acq_rel does not have
172 * either load or store operation by itself; it prevents all following
173 * memory accesses from moving prior to preceding loads and all
174 * preceding memory accesses from moving past subsequent stores.
175 *
176 * memory_order_seq_cst:
177 *
178 * Prevents movement of memory accesses like an acquire-release barrier,
179 * but whereas acquire-release synchronizes cooperating threads (using
180 * the same atomic variable), sequential-consistency synchronizes the
181 * whole system, providing a total order for stores on all atomic
182 * variables.
183 *
184 * OVS atomics require the memory_order to be passed as a compile-time constant
185 * value, as some compiler implementations may perform poorly if the memory
186 * order parameter is passed in as a run-time value.
187 *
188 * The following functions insert explicit barriers. Most of the other atomic
189 * functions also include barriers.
190 *
191 * void atomic_thread_fence(memory_order order);
192 *
193 * Inserts a barrier of the specified type.
194 *
195 * For memory_order_relaxed, this is a no-op.
196 *
197 * void atomic_signal_fence(memory_order order);
198 *
199 * Inserts a barrier of the specified type, but only with respect to
200 * signal handlers in the same thread as the barrier. This is
201 * basically a compiler optimization barrier, except for
202 * memory_order_relaxed, which is a no-op.
203 *
204 *
205 * Atomic Operations
206 * =================
207 *
208 * In this section, A is an atomic type and C is the corresponding non-atomic
209 * type.
210 *
211 * The "store" and "compare_exchange" primitives match C11:
212 *
213 * void atomic_store(A *object, C value);
214 * void atomic_store_explicit(A *object, C value, memory_order);
215 *
216 * Atomically stores 'value' into '*object', respecting the given
217 * memory order (or memory_order_seq_cst for atomic_store()).
218 *
219 * bool atomic_compare_exchange_strong(A *object, C *expected, C desired);
220 * bool atomic_compare_exchange_weak(A *object, C *expected, C desired);
221 * bool atomic_compare_exchange_strong_explicit(A *object, C *expected,
222 * C desired,
223 * memory_order success,
224 * memory_order failure);
225 * bool atomic_compare_exchange_weak_explicit(A *object, C *expected,
226 * C desired,
227 * memory_order success,
228 * memory_order failure);
229 *
230 * Atomically loads '*object' and compares it with '*expected' and if
231 * equal, stores 'desired' into '*object' (an atomic read-modify-write
232 * operation) and returns true, and if non-equal, stores the actual
233 * value of '*object' into '*expected' (an atomic load operation) and
234 * returns false. The memory order for the successful case (atomic
235 * read-modify-write operation) is 'success', and for the unsuccessful
236 * case (atomic load operation) 'failure'. 'failure' may not be
237 * stronger than 'success'.
238 *
239 * The weak forms may fail (returning false) also when '*object' equals
240 * '*expected'. The strong form can be implemented by the weak form in
241 * a loop. Some platforms can implement the weak form more
242 * efficiently, so it should be used if the application will need to
243 * loop anyway.
244 *
245 * The following primitives differ from the C11 ones (and have different names)
246 * because there does not appear to be a way to implement the standard
247 * primitives in standard C:
248 *
249 * void atomic_read(A *src, C *dst);
250 * void atomic_read_explicit(A *src, C *dst, memory_order);
251 *
252 * Atomically loads a value from 'src', writing the value read into
253 * '*dst', respecting the given memory order (or memory_order_seq_cst
254 * for atomic_read()).
255 *
256 * void atomic_add(A *rmw, C arg, C *orig);
257 * void atomic_sub(A *rmw, C arg, C *orig);
258 * void atomic_or(A *rmw, C arg, C *orig);
259 * void atomic_xor(A *rmw, C arg, C *orig);
260 * void atomic_and(A *rmw, C arg, C *orig);
261 * void atomic_add_explicit(A *rmw, C arg, C *orig, memory_order);
262 * void atomic_sub_explicit(A *rmw, C arg, C *orig, memory_order);
263 * void atomic_or_explicit(A *rmw, C arg, C *orig, memory_order);
264 * void atomic_xor_explicit(A *rmw, C arg, C *orig, memory_order);
265 * void atomic_and_explicit(A *rmw, C arg, C *orig, memory_order);
266 *
267 * Atomically applies the given operation, with 'arg' as the second
268 * operand, to '*rmw', and stores the original value of '*rmw' into
269 * '*orig', respecting the given memory order (or memory_order_seq_cst
270 * if none is specified).
271 *
272 * The results are similar to those that would be obtained with +=, -=,
273 * |=, ^=, or |= on non-atomic types.
274 *
275 *
276 * atomic_flag
277 * ===========
278 *
279 * atomic_flag is a typedef for a type with two states, set and clear, that
280 * provides atomic test-and-set functionality.
281 *
282 *
283 * Life Cycle
284 * ----------
285 *
286 * ATOMIC_FLAG_INIT is an initializer for atomic_flag. The initial state is
287 * "clear".
288 *
289 * An atomic_flag may also be initialized at runtime with atomic_flag_clear().
290 *
291 *
292 * Operations
293 * ----------
294 *
295 * The following functions are available.
296 *
297 * bool atomic_flag_test_and_set(atomic_flag *object)
298 * bool atomic_flag_test_and_set_explicit(atomic_flag *object,
299 * memory_order);
300 *
301 * Atomically sets '*object', respsecting the given memory order (or
302 * memory_order_seq_cst for atomic_flag_test_and_set()). Returns the
303 * previous value of the flag (false for clear, true for set).
304 *
305 * void atomic_flag_clear(atomic_flag *object);
306 * void atomic_flag_clear_explicit(atomic_flag *object, memory_order);
307 *
308 * Atomically clears '*object', respecting the given memory order (or
309 * memory_order_seq_cst for atomic_flag_clear()).
310 */
311
312 #include <limits.h>
313 #include <pthread.h>
314 #include <stdbool.h>
315 #include <stddef.h>
316 #include <stdint.h>
317 #include "compiler.h"
318 #include "util.h"
319
320 #define IN_OVS_ATOMIC_H
321 #if __CHECKER__
322 /* sparse doesn't understand some GCC extensions we use. */
323 #include "ovs-atomic-pthreads.h"
324 #elif HAVE_STDATOMIC_H
325 #include "ovs-atomic-c11.h"
326 #elif __has_extension(c_atomic)
327 #include "ovs-atomic-clang.h"
328 #elif __GNUC__ >= 4 && __GNUC_MINOR__ >= 7
329 #include "ovs-atomic-gcc4.7+.h"
330 #elif __GNUC__ && defined(__x86_64__)
331 #include "ovs-atomic-x86_64.h"
332 #elif __GNUC__ && defined(__i386__)
333 #include "ovs-atomic-i586.h"
334 #elif HAVE_GCC4_ATOMICS
335 #include "ovs-atomic-gcc4+.h"
336 #else
337 /* ovs-atomic-pthreads implementation is provided for portability.
338 * It might be too slow for real use because Open vSwitch is
339 * optimized for platforms where real atomic ops are available. */
340 #include "ovs-atomic-pthreads.h"
341 #endif
342 #undef IN_OVS_ATOMIC_H
343
344 #ifndef OMIT_STANDARD_ATOMIC_TYPES
345 typedef ATOMIC(bool) atomic_bool;
346
347 typedef ATOMIC(char) atomic_char;
348 typedef ATOMIC(signed char) atomic_schar;
349 typedef ATOMIC(unsigned char) atomic_uchar;
350
351 typedef ATOMIC(short) atomic_short;
352 typedef ATOMIC(unsigned short) atomic_ushort;
353
354 typedef ATOMIC(int) atomic_int;
355 typedef ATOMIC(unsigned int) atomic_uint;
356
357 typedef ATOMIC(long) atomic_long;
358 typedef ATOMIC(unsigned long) atomic_ulong;
359
360 typedef ATOMIC(long long) atomic_llong;
361 typedef ATOMIC(unsigned long long) atomic_ullong;
362
363 typedef ATOMIC(size_t) atomic_size_t;
364 typedef ATOMIC(ptrdiff_t) atomic_ptrdiff_t;
365
366 typedef ATOMIC(intmax_t) atomic_intmax_t;
367 typedef ATOMIC(uintmax_t) atomic_uintmax_t;
368
369 typedef ATOMIC(intptr_t) atomic_intptr_t;
370 typedef ATOMIC(uintptr_t) atomic_uintptr_t;
371 #endif /* !OMIT_STANDARD_ATOMIC_TYPES */
372
373 /* Nonstandard atomic types. */
374 typedef ATOMIC(uint8_t) atomic_uint8_t;
375 typedef ATOMIC(uint16_t) atomic_uint16_t;
376 typedef ATOMIC(uint32_t) atomic_uint32_t;
377
378 typedef ATOMIC(int8_t) atomic_int8_t;
379 typedef ATOMIC(int16_t) atomic_int16_t;
380 typedef ATOMIC(int32_t) atomic_int32_t;
381
382 /* Relaxed atomic operations.
383 *
384 * When an operation on an atomic variable is not expected to synchronize
385 * with operations on other (atomic or non-atomic) variables, no memory
386 * barriers are needed and the relaxed memory ordering can be used. These
387 * macros make such uses less daunting, but not invisible. */
388 #define atomic_store_relaxed(VAR, VALUE) \
389 atomic_store_explicit(VAR, VALUE, memory_order_relaxed)
390 #define atomic_read_relaxed(VAR, DST) \
391 atomic_read_explicit(VAR, DST, memory_order_relaxed)
392 #define atomic_compare_exchange_strong_relaxed(DST, EXP, SRC) \
393 atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
394 memory_order_relaxed, \
395 memory_order_relaxed)
396 #define atomic_compare_exchange_weak_relaxed(DST, EXP, SRC) \
397 atomic_compare_exchange_weak_explicit(DST, EXP, SRC, \
398 memory_order_relaxed, \
399 memory_order_relaxed)
400 #define atomic_add_relaxed(RMW, ARG, ORIG) \
401 atomic_add_explicit(RMW, ARG, ORIG, memory_order_relaxed)
402 #define atomic_sub_relaxed(RMW, ARG, ORIG) \
403 atomic_sub_explicit(RMW, ARG, ORIG, memory_order_relaxed)
404 #define atomic_or_relaxed(RMW, ARG, ORIG) \
405 atomic_or_explicit(RMW, ARG, ORIG, memory_order_relaxed)
406 #define atomic_xor_relaxed(RMW, ARG, ORIG) \
407 atomic_xor_explicit(RMW, ARG, ORIG, memory_order_relaxed)
408 #define atomic_and_relaxed(RMW, ARG, ORIG) \
409 atomic_and_explicit(RMW, ARG, ORIG, memory_order_relaxed)
410 #define atomic_flag_test_and_set_relaxed(FLAG) \
411 atomic_flag_test_and_set_explicit(FLAG, memory_order_relaxed)
412 #define atomic_flag_clear_relaxed(FLAG) \
413 atomic_flag_clear_explicit(FLAG, memory_order_relaxed)
414
415 /* Reference count. */
416 struct ovs_refcount {
417 atomic_uint count;
418 };
419
420 /* Initializes 'refcount'. The reference count is initially 1. */
421 static inline void
422 ovs_refcount_init(struct ovs_refcount *refcount)
423 {
424 atomic_init(&refcount->count, 1);
425 }
426
427 /* Increments 'refcount'.
428 *
429 * Does not provide a memory barrier, as the calling thread must have
430 * protected access to the object already. */
431 static inline void
432 ovs_refcount_ref(struct ovs_refcount *refcount)
433 {
434 unsigned int old_refcount;
435
436 atomic_add_explicit(&refcount->count, 1, &old_refcount,
437 memory_order_relaxed);
438 ovs_assert(old_refcount > 0);
439 }
440
441 /* Decrements 'refcount' and returns the previous reference count. Often used
442 * in this form:
443 *
444 * if (ovs_refcount_unref(&object->ref_cnt) == 1) {
445 * // ...uninitialize object...
446 * free(object);
447 * }
448 *
449 * Provides a release barrier making the preceding loads and stores to not be
450 * reordered after the unref, and in case of the last reference provides also
451 * an acquire barrier to keep all the following uninitialization from being
452 * reordered before the atomic decrement operation. Together these synchronize
453 * any concurrent unref operations between each other. */
454 static inline unsigned int
455 ovs_refcount_unref(struct ovs_refcount *refcount)
456 {
457 unsigned int old_refcount;
458
459 atomic_sub_explicit(&refcount->count, 1, &old_refcount,
460 memory_order_release);
461 ovs_assert(old_refcount > 0);
462 if (old_refcount == 1) {
463 /* 'memory_order_release' above means that there are no (reordered)
464 * accesses to the protected object from any thread at this point.
465 * An acquire barrier is needed to keep all subsequent access to the
466 * object's memory from being reordered before the atomic operation
467 * above. */
468 atomic_thread_fence(memory_order_acquire);
469 }
470 return old_refcount;
471 }
472
473 /* Reads and returns 'refcount_''s current reference count.
474 *
475 * Does not provide a memory barrier.
476 *
477 * Rarely useful. */
478 static inline unsigned int
479 ovs_refcount_read(const struct ovs_refcount *refcount_)
480 {
481 struct ovs_refcount *refcount
482 = CONST_CAST(struct ovs_refcount *, refcount_);
483 unsigned int count;
484
485 atomic_read_explicit(&refcount->count, &count, memory_order_relaxed);
486 return count;
487 }
488
489 /* Increments 'refcount', but only if it is non-zero.
490 *
491 * This may only be called for an object which is RCU protected during
492 * this call. This implies that its possible destruction is postponed
493 * until all current RCU threads quiesce.
494 *
495 * Returns false if the refcount was zero. In this case the object may
496 * be safely accessed until the current thread quiesces, but no additional
497 * references to the object may be taken.
498 *
499 * Does not provide a memory barrier, as the calling thread must have
500 * RCU protected access to the object already.
501 *
502 * It is critical that we never increment a zero refcount to a
503 * non-zero value, as whenever a refcount reaches the zero value, the
504 * protected object may be irrevocably scheduled for deletion. */
505 static inline bool
506 ovs_refcount_try_ref_rcu(struct ovs_refcount *refcount)
507 {
508 unsigned int count;
509
510 atomic_read_explicit(&refcount->count, &count, memory_order_relaxed);
511 do {
512 if (count == 0) {
513 return false;
514 }
515 } while (!atomic_compare_exchange_weak_explicit(&refcount->count, &count,
516 count + 1,
517 memory_order_relaxed,
518 memory_order_relaxed));
519 return true;
520 }
521
522 /* Decrements 'refcount' and returns the previous reference count. To
523 * be used only when a memory barrier is already provided for the
524 * protected object independently.
525 *
526 * For example:
527 *
528 * if (ovs_refcount_unref_relaxed(&object->ref_cnt) == 1) {
529 * // Schedule uninitialization and freeing of the object:
530 * ovsrcu_postpone(destructor_function, object);
531 * }
532 *
533 * Here RCU quiescing already provides a full memory barrier. No additional
534 * barriers are needed here.
535 *
536 * Or:
537 *
538 * if (stp && ovs_refcount_unref_relaxed(&stp->ref_cnt) == 1) {
539 * ovs_mutex_lock(&mutex);
540 * list_remove(&stp->node);
541 * ovs_mutex_unlock(&mutex);
542 * free(stp->name);
543 * free(stp);
544 * }
545 *
546 * Here a mutex is used to guard access to all of 'stp' apart from
547 * 'ref_cnt'. Hence all changes to 'stp' by other threads must be
548 * visible when we get the mutex, and no access after the unlock can
549 * be reordered to happen prior the lock operation. No additional
550 * barriers are needed here.
551 */
552 static inline unsigned int
553 ovs_refcount_unref_relaxed(struct ovs_refcount *refcount)
554 {
555 unsigned int old_refcount;
556
557 atomic_sub_explicit(&refcount->count, 1, &old_refcount,
558 memory_order_relaxed);
559 ovs_assert(old_refcount > 0);
560 return old_refcount;
561 }
562
563 #endif /* ovs-atomic.h */