lib/ovs-atomic.h

   1 /*
   2  * Copyright (c) 2013, 2014 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef OVS_ATOMIC_H
  18 #define OVS_ATOMIC_H 1
  19
  20 /* Atomic operations.
  21  *
  22  * This library implements atomic operations with an API based on the one
  23  * defined in C11.  It includes multiple implementations for compilers and
  24  * libraries with varying degrees of built-in support for C11, including a
  25  * fallback implementation for systems that have pthreads but no other support
  26  * for atomics.
  27  *
  28  * This comment describes the common features of all the implementations.
  29  *
  30  *
  31  * Types
  32  * =====
  33  *
  34  * The following atomic types are supported as typedefs for atomic versions of
  35  * the listed ordinary types:
  36  *
  37  *     ordinary type            atomic version
  38  *     -------------------      ----------------------
  39  *     bool                     atomic_bool
  40  *
  41  *     char                     atomic_char
  42  *     signed char              atomic_schar
  43  *     unsigned char            atomic_uchar
  44  *
  45  *     short                    atomic_short
  46  *     unsigned short           atomic_ushort
  47  *
  48  *     int                      atomic_int
  49  *     unsigned int             atomic_uint
  50  *
  51  *     long                     atomic_long
  52  *     unsigned long            atomic_ulong
  53  *
  54  *     long long                atomic_llong
  55  *     unsigned long long       atomic_ullong
  56  *
  57  *     size_t                   atomic_size_t
  58  *     ptrdiff_t                atomic_ptrdiff_t
  59  *
  60  *     intmax_t                 atomic_intmax_t
  61  *     uintmax_t                atomic_uintmax_t
  62  *
  63  *     intptr_t                 atomic_intptr_t
  64  *     uintptr_t                atomic_uintptr_t
  65  *
  66  *     uint8_t                  atomic_uint8_t     (*)
  67  *     uint16_t                 atomic_uint16_t    (*)
  68  *     uint32_t                 atomic_uint32_t    (*)
  69  *     int8_t                   atomic_int8_t      (*)
  70  *     int16_t                  atomic_int16_t     (*)
  71  *     int32_t                  atomic_int32_t     (*)
  72  *
  73  *     (*) Not specified by C11.
  74  *
  75  * Atomic types may also be obtained via ATOMIC(TYPE), e.g. ATOMIC(void *).
  76  * Only basic integer types and pointer types can be made atomic this way,
  77  * e.g. atomic structs are not supported.
  78  *
  79  * The atomic version of a type doesn't necessarily have the same size or
  80  * representation as the ordinary version; for example, atomic_int might be a
  81  * typedef for a struct.  The range of an atomic type does match the range of
  82  * the corresponding ordinary type.
  83  *
  84  * C11 says that one may use the _Atomic keyword in place of the typedef name,
  85  * e.g. "_Atomic int" instead of "atomic_int".  This library doesn't support
  86  * that.
  87  *
  88  *
  89  * Life Cycle
  90  * ==========
  91  *
  92  * To initialize an atomic variable at its point of definition, use
  93  * ATOMIC_VAR_INIT:
  94  *
  95  *     static atomic_int ai = ATOMIC_VAR_INIT(123);
  96  *
  97  * To initialize an atomic variable in code, use atomic_init():
  98  *
  99  *     static atomic_int ai;
 100  * ...
 101  *     atomic_init(&ai, 123);
 102  *
 103  *
 104  * Barriers
 105  * ========
 106  *
 107  * enum memory_order specifies the strictness of a memory barrier.  It has the
 108  * following values:
 109  *
 110  *    memory_order_relaxed:
 111  *
 112  *        Only atomicity is provided, does not imply any memory ordering with
 113  *        respect to any other variable (atomic or not).  Relaxed accesses to
 114  *        the same atomic variable will be performed in the program order.
 115  *        The compiler and CPU are free to move memory accesses to other
 116  *        variables past the atomic operation.
 117  *
 118  *    memory_order_consume:
 119  *
 120  *        Memory accesses with data dependency on the result of the consume
 121  *        operation (atomic_read_explicit, or a load operation preceding a
 122  *        atomic_thread_fence) will not be moved prior to the consume
 123  *        barrier.  Non-data-dependent loads and stores can be reordered to
 124  *        happen before the the consume barrier.
 125  *
 126  *        RCU is the prime example of the use of the consume barrier: The
 127  *        consume barrier guarantees that reads from a RCU protected object
 128  *        are performed after the RCU protected pointer is read.  A
 129  *        corresponding release barrier is used to store the modified RCU
 130  *        protected pointer after the RCU protected object has been fully
 131  *        constructed.  The synchronization between these barriers prevents
 132  *        the RCU "consumer" from seeing uninitialized data.
 133  *
 134  *        May not be used with atomic_store_explicit(), as consume semantics
 135  *        applies only to atomic loads.
 136  *
 137  *    memory_order_acquire:
 138  *
 139  *        Memory accesses after an acquire barrier cannot be moved before the
 140  *        barrier.  Memory accesses before an acquire barrier *can* be moved
 141  *        after it.
 142  *
 143  *        An atomic_thread_fence with memory_order_acquire does not have a
 144  *        load operation by itself; it prevents all following memory accesses
 145  *        from moving prior to preceding loads.
 146  *
 147  *        May not be used with atomic_store_explicit(), as acquire semantics
 148  *        applies only to atomic loads.
 149  *
 150  *    memory_order_release:
 151  *
 152  *        Memory accesses before a release barrier cannot be moved after the
 153  *        barrier.  Memory accesses after a release barrier *can* be moved
 154  *        before it.
 155  *
 156  *        An atomic_thread_fence with memory_order_release does not have a
 157  *        store operation by itself; it prevents all preceding memory accesses
 158  *        from moving past subsequent stores.
 159  *
 160  *        May not be used with atomic_read_explicit(), as release semantics
 161  *        applies only to atomic stores.
 162  *
 163  *    memory_order_acq_rel:
 164  *
 165  *        Memory accesses cannot be moved across an acquire-release barrier in
 166  *        either direction.
 167  *
 168  *        May only be used with atomic read-modify-write operations, as both
 169  *        load and store operation is required for acquire-release semantics.
 170  *
 171  *        An atomic_thread_fence with memory_order_acq_rel does not have
 172  *        either load or store operation by itself; it prevents all following
 173  *        memory accesses from moving prior to preceding loads and all
 174  *        preceding memory accesses from moving past subsequent stores.
 175  *
 176  *    memory_order_seq_cst:
 177  *
 178  *        Prevents movement of memory accesses like an acquire-release barrier,
 179  *        but whereas acquire-release synchronizes cooperating threads (using
 180  *        the same atomic variable), sequential-consistency synchronizes the
 181  *        whole system, providing a total order for stores on all atomic
 182  *        variables.
 183  *
 184  * OVS atomics require the memory_order to be passed as a compile-time constant
 185  * value, as some compiler implementations may perform poorly if the memory
 186  * order parameter is passed in as a run-time value.
 187  *
 188  * The following functions insert explicit barriers.  Most of the other atomic
 189  * functions also include barriers.
 190  *
 191  *     void atomic_thread_fence(memory_order order);
 192  *
 193  *         Inserts a barrier of the specified type.
 194  *
 195  *         For memory_order_relaxed, this is a no-op.
 196  *
 197  *     void atomic_signal_fence(memory_order order);
 198  *
 199  *         Inserts a barrier of the specified type, but only with respect to
 200  *         signal handlers in the same thread as the barrier.  This is
 201  *         basically a compiler optimization barrier, except for
 202  *         memory_order_relaxed, which is a no-op.
 203  *
 204  *
 205  * Atomic Operations
 206  * =================
 207  *
 208  * In this section, A is an atomic type and C is the corresponding non-atomic
 209  * type.
 210  *
 211  * The "store" and "compare_exchange" primitives match C11:
 212  *
 213  *     void atomic_store(A *object, C value);
 214  *     void atomic_store_explicit(A *object, C value, memory_order);
 215  *
 216  *         Atomically stores 'value' into '*object', respecting the given
 217  *         memory order (or memory_order_seq_cst for atomic_store()).
 218  *
 219  *     bool atomic_compare_exchange_strong(A *object, C *expected, C desired);
 220  *     bool atomic_compare_exchange_weak(A *object, C *expected, C desired);
 221  *     bool atomic_compare_exchange_strong_explicit(A *object, C *expected,
 222  *                                                  C desired,
 223  *                                                  memory_order success,
 224  *                                                  memory_order failure);
 225  *     bool atomic_compare_exchange_weak_explicit(A *object, C *expected,
 226  *                                                  C desired,
 227  *                                                  memory_order success,
 228  *                                                  memory_order failure);
 229  *
 230  *         Atomically loads '*object' and compares it with '*expected' and if
 231  *         equal, stores 'desired' into '*object' (an atomic read-modify-write
 232  *         operation) and returns true, and if non-equal, stores the actual
 233  *         value of '*object' into '*expected' (an atomic load operation) and
 234  *         returns false.  The memory order for the successful case (atomic
 235  *         read-modify-write operation) is 'success', and for the unsuccessful
 236  *         case (atomic load operation) 'failure'.  'failure' may not be
 237  *         stronger than 'success'.
 238  *
 239  *         The weak forms may fail (returning false) also when '*object' equals
 240  *         '*expected'.  The strong form can be implemented by the weak form in
 241  *         a loop.  Some platforms can implement the weak form more
 242  *         efficiently, so it should be used if the application will need to
 243  *         loop anyway.
 244  *
 245  * The following primitives differ from the C11 ones (and have different names)
 246  * because there does not appear to be a way to implement the standard
 247  * primitives in standard C:
 248  *
 249  *     void atomic_read(A *src, C *dst);
 250  *     void atomic_read_explicit(A *src, C *dst, memory_order);
 251  *
 252  *         Atomically loads a value from 'src', writing the value read into
 253  *         '*dst', respecting the given memory order (or memory_order_seq_cst
 254  *         for atomic_read()).
 255  *
 256  *     void atomic_add(A *rmw, C arg, C *orig);
 257  *     void atomic_sub(A *rmw, C arg, C *orig);
 258  *     void atomic_or(A *rmw, C arg, C *orig);
 259  *     void atomic_xor(A *rmw, C arg, C *orig);
 260  *     void atomic_and(A *rmw, C arg, C *orig);
 261  *     void atomic_add_explicit(A *rmw, C arg, C *orig, memory_order);
 262  *     void atomic_sub_explicit(A *rmw, C arg, C *orig, memory_order);
 263  *     void atomic_or_explicit(A *rmw, C arg, C *orig, memory_order);
 264  *     void atomic_xor_explicit(A *rmw, C arg, C *orig, memory_order);
 265  *     void atomic_and_explicit(A *rmw, C arg, C *orig, memory_order);
 266  *
 267  *         Atomically applies the given operation, with 'arg' as the second
 268  *         operand, to '*rmw', and stores the original value of '*rmw' into
 269  *         '*orig', respecting the given memory order (or memory_order_seq_cst
 270  *         if none is specified).
 271  *
 272  *         The results are similar to those that would be obtained with +=, -=,
 273  *         |=, ^=, or |= on non-atomic types.
 274  *
 275  *
 276  * atomic_flag
 277  * ===========
 278  *
 279  * atomic_flag is a typedef for a type with two states, set and clear, that
 280  * provides atomic test-and-set functionality.
 281  *
 282  *
 283  * Life Cycle
 284  * ----------
 285  *
 286  * ATOMIC_FLAG_INIT is an initializer for atomic_flag.  The initial state is
 287  * "clear".
 288  *
 289  * An atomic_flag may also be initialized at runtime with atomic_flag_clear().
 290  *
 291  *
 292  * Operations
 293  * ----------
 294  *
 295  * The following functions are available.
 296  *
 297  *     bool atomic_flag_test_and_set(atomic_flag *object)
 298  *     bool atomic_flag_test_and_set_explicit(atomic_flag *object,
 299  *                                            memory_order);
 300  *
 301  *         Atomically sets '*object', respsecting the given memory order (or
 302  *         memory_order_seq_cst for atomic_flag_test_and_set()).  Returns the
 303  *         previous value of the flag (false for clear, true for set).
 304  *
 305  *     void atomic_flag_clear(atomic_flag *object);
 306  *     void atomic_flag_clear_explicit(atomic_flag *object, memory_order);
 307  *
 308  *         Atomically clears '*object', respecting the given memory order (or
 309  *         memory_order_seq_cst for atomic_flag_clear()).
 310  */
 311
 312 #include <limits.h>
 313 #include <pthread.h>
 314 #include <stdbool.h>
 315 #include <stddef.h>
 316 #include <stdint.h>
 317 #include "compiler.h"
 318 #include "util.h"
 319
 320 #define IN_OVS_ATOMIC_H
 321     #if __CHECKER__
 322         /* sparse doesn't understand some GCC extensions we use. */
 323         #include "ovs-atomic-pthreads.h"
 324     #elif HAVE_STDATOMIC_H
 325         #include "ovs-atomic-c11.h"
 326     #elif __has_extension(c_atomic)
 327         #include "ovs-atomic-clang.h"
 328     #elif __GNUC__ >= 4 && __GNUC_MINOR__ >= 7
 329         #include "ovs-atomic-gcc4.7+.h"
 330     #elif __GNUC__ && defined(__x86_64__)
 331         #include "ovs-atomic-x86_64.h"
 332     #elif __GNUC__ && defined(__i386__)
 333         #include "ovs-atomic-i586.h"
 334     #elif HAVE_GCC4_ATOMICS
 335         #include "ovs-atomic-gcc4+.h"
 336     #else
 337         /* ovs-atomic-pthreads implementation is provided for portability.
 338          * It might be too slow for real use because Open vSwitch is
 339          * optimized for platforms where real atomic ops are available. */
 340         #include "ovs-atomic-pthreads.h"
 341     #endif
 342 #undef IN_OVS_ATOMIC_H
 343
 344 #ifndef OMIT_STANDARD_ATOMIC_TYPES
 345 typedef ATOMIC(bool)               atomic_bool;
 346
 347 typedef ATOMIC(char)               atomic_char;
 348 typedef ATOMIC(signed char)        atomic_schar;
 349 typedef ATOMIC(unsigned char)      atomic_uchar;
 350
 351 typedef ATOMIC(short)              atomic_short;
 352 typedef ATOMIC(unsigned short)     atomic_ushort;
 353
 354 typedef ATOMIC(int)                atomic_int;
 355 typedef ATOMIC(unsigned int)       atomic_uint;
 356
 357 typedef ATOMIC(long)               atomic_long;
 358 typedef ATOMIC(unsigned long)      atomic_ulong;
 359
 360 typedef ATOMIC(long long)          atomic_llong;
 361 typedef ATOMIC(unsigned long long) atomic_ullong;
 362
 363 typedef ATOMIC(size_t)             atomic_size_t;
 364 typedef ATOMIC(ptrdiff_t)          atomic_ptrdiff_t;
 365
 366 typedef ATOMIC(intmax_t)           atomic_intmax_t;
 367 typedef ATOMIC(uintmax_t)          atomic_uintmax_t;
 368
 369 typedef ATOMIC(intptr_t)           atomic_intptr_t;
 370 typedef ATOMIC(uintptr_t)          atomic_uintptr_t;
 371 #endif  /* !OMIT_STANDARD_ATOMIC_TYPES */
 372
 373 /* Nonstandard atomic types. */
 374 typedef ATOMIC(uint8_t)   atomic_uint8_t;
 375 typedef ATOMIC(uint16_t)  atomic_uint16_t;
 376 typedef ATOMIC(uint32_t)  atomic_uint32_t;
 377
 378 typedef ATOMIC(int8_t)    atomic_int8_t;
 379 typedef ATOMIC(int16_t)   atomic_int16_t;
 380 typedef ATOMIC(int32_t)   atomic_int32_t;
 381
 382 /* Reference count. */
 383 struct ovs_refcount {
 384     atomic_uint count;
 385 };
 386
 387 /* Initializes 'refcount'.  The reference count is initially 1. */
 388 static inline void
 389 ovs_refcount_init(struct ovs_refcount *refcount)
 390 {
 391     atomic_init(&refcount->count, 1);
 392 }
 393
 394 /* Increments 'refcount'.
 395  *
 396  * Does not provide a memory barrier, as the calling thread must have
 397  * protected access to the object already. */
 398 static inline void
 399 ovs_refcount_ref(struct ovs_refcount *refcount)
 400 {
 401     unsigned int old_refcount;
 402
 403     atomic_add_explicit(&refcount->count, 1, &old_refcount,
 404                         memory_order_relaxed);
 405     ovs_assert(old_refcount > 0);
 406 }
 407
 408 /* Decrements 'refcount' and returns the previous reference count.  Often used
 409  * in this form:
 410  *
 411  * if (ovs_refcount_unref(&object->ref_cnt) == 1) {
 412  *     // ...uninitialize object...
 413  *     free(object);
 414  * }
 415  *
 416  * Provides a release barrier making the preceding loads and stores to not be
 417  * reordered after the unref, and in case of the last reference provides also
 418  * an acquire barrier to keep all the following uninitialization from being
 419  * reordered before the atomic decrement operation.  Together these synchronize
 420  * any concurrent unref operations between each other. */
 421 static inline unsigned int
 422 ovs_refcount_unref(struct ovs_refcount *refcount)
 423 {
 424     unsigned int old_refcount;
 425
 426     atomic_sub_explicit(&refcount->count, 1, &old_refcount,
 427                         memory_order_release);
 428     ovs_assert(old_refcount > 0);
 429     if (old_refcount == 1) {
 430         /* 'memory_order_release' above means that there are no (reordered)
 431          * accesses to the protected object from any thread at this point.
 432          * An acquire barrier is needed to keep all subsequent access to the
 433          * object's memory from being reordered before the atomic operation
 434          * above. */
 435         atomic_thread_fence(memory_order_acquire);
 436     }
 437     return old_refcount;
 438 }
 439
 440 /* Reads and returns 'refcount_''s current reference count.
 441  *
 442  * Does not provide a memory barrier.
 443  *
 444  * Rarely useful. */
 445 static inline unsigned int
 446 ovs_refcount_read(const struct ovs_refcount *refcount_)
 447 {
 448     struct ovs_refcount *refcount
 449         = CONST_CAST(struct ovs_refcount *, refcount_);
 450     unsigned int count;
 451
 452     atomic_read_explicit(&refcount->count, &count, memory_order_relaxed);
 453     return count;
 454 }
 455
 456 /* Increments 'refcount', but only if it is non-zero.
 457  *
 458  * This may only be called for an object which is RCU protected during
 459  * this call.  This implies that its possible destruction is postponed
 460  * until all current RCU threads quiesce.
 461  *
 462  * Returns false if the refcount was zero.  In this case the object may
 463  * be safely accessed until the current thread quiesces, but no additional
 464  * references to the object may be taken.
 465  *
 466  * Does not provide a memory barrier, as the calling thread must have
 467  * RCU protected access to the object already.
 468  *
 469  * It is critical that we never increment a zero refcount to a
 470  * non-zero value, as whenever a refcount reaches the zero value, the
 471  * protected object may be irrevocably scheduled for deletion. */
 472 static inline bool
 473 ovs_refcount_try_ref_rcu(struct ovs_refcount *refcount)
 474 {
 475     unsigned int count;
 476
 477     atomic_read_explicit(&refcount->count, &count, memory_order_relaxed);
 478     do {
 479         if (count == 0) {
 480             return false;
 481         }
 482     } while (!atomic_compare_exchange_weak_explicit(&refcount->count, &count,
 483                                                     count + 1,
 484                                                     memory_order_relaxed,
 485                                                     memory_order_relaxed));
 486     return true;
 487 }
 488
 489 /* Decrements 'refcount' and returns the previous reference count.  To
 490  * be used only when a memory barrier is already provided for the
 491  * protected object independently.
 492  *
 493  * For example:
 494  *
 495  * if (ovs_refcount_unref_relaxed(&object->ref_cnt) == 1) {
 496  *     // Schedule uninitialization and freeing of the object:
 497  *     ovsrcu_postpone(destructor_function, object);
 498  * }
 499  *
 500  * Here RCU quiescing already provides a full memory barrier.  No additional
 501  * barriers are needed here.
 502  *
 503  * Or:
 504  *
 505  * if (stp && ovs_refcount_unref_relaxed(&stp->ref_cnt) == 1) {
 506  *     ovs_mutex_lock(&mutex);
 507  *     list_remove(&stp->node);
 508  *     ovs_mutex_unlock(&mutex);
 509  *     free(stp->name);
 510  *     free(stp);
 511  * }
 512  *
 513  * Here a mutex is used to guard access to all of 'stp' apart from
 514  * 'ref_cnt'.  Hence all changes to 'stp' by other threads must be
 515  * visible when we get the mutex, and no access after the unlock can
 516  * be reordered to happen prior the lock operation.  No additional
 517  * barriers are needed here.
 518  */
 519 static inline unsigned int
 520 ovs_refcount_unref_relaxed(struct ovs_refcount *refcount)
 521 {
 522     unsigned int old_refcount;
 523
 524     atomic_sub_explicit(&refcount->count, 1, &old_refcount,
 525                         memory_order_relaxed);
 526     ovs_assert(old_refcount > 0);
 527     return old_refcount;
 528 }
 529
 530 #endif /* ovs-atomic.h */