lib/ovs-atomic-msvc.h

   1 /*
   2  * Copyright (c) 2014 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /* This header implements atomic operation primitives for MSVC
  18  * on i586 or greater platforms (32 bit). */
  19 #ifndef IN_OVS_ATOMIC_H
  20 #error "This header should only be included indirectly via ovs-atomic.h."
  21 #endif
  22
  23 /* From msdn documentation: With Visual Studio 2003, volatile to volatile
  24  * references are ordered; the compiler will not re-order volatile variable
  25  * access. With Visual Studio 2005, the compiler also uses acquire semantics
  26  * for read operations on volatile variables and release semantics for write
  27  * operations on volatile variables (when supported by the CPU).
  28  *
  29  * Though there is no clear documentation that states that anything greater
  30  * than VS 2005 has the same behavior as described above, looking through MSVCs
  31  * C++ atomics library in VS2013 shows that the compiler still takes
  32  * acquire/release semantics on volatile variables. */
  33 #define ATOMIC(TYPE) TYPE volatile
  34
  35 typedef enum {
  36     memory_order_relaxed,
  37     memory_order_consume,
  38     memory_order_acquire,
  39     memory_order_release,
  40     memory_order_acq_rel,
  41     memory_order_seq_cst
  42 } memory_order;
  43
  44 #define ATOMIC_BOOL_LOCK_FREE 2
  45 #define ATOMIC_CHAR_LOCK_FREE 2
  46 #define ATOMIC_SHORT_LOCK_FREE 2
  47 #define ATOMIC_INT_LOCK_FREE 2
  48 #define ATOMIC_LONG_LOCK_FREE 2
  49 #define ATOMIC_LLONG_LOCK_FREE 2
  50 #define ATOMIC_POINTER_LOCK_FREE 2
  51
  52 #define IS_LOCKLESS_ATOMIC(OBJECT)                      \
  53     (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
  54
  55 #define ATOMIC_VAR_INIT(VALUE) (VALUE)
  56 #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
  57
  58 static inline void
  59 atomic_compiler_barrier(memory_order order)
  60 {
  61     /* In case of 'memory_order_consume', it is implicitly assumed that
  62      * the compiler will not move instructions that have data-dependency
  63      * on the variable in question before the barrier. */
  64     if (order > memory_order_consume) {
  65         _ReadWriteBarrier();
  66     }
  67 }
  68
  69 static inline void
  70 atomic_thread_fence(memory_order order)
  71 {
  72     /* x86 is strongly ordered and acquire/release semantics come
  73      * automatically. */
  74     atomic_compiler_barrier(order);
  75     if (order == memory_order_seq_cst) {
  76         MemoryBarrier();
  77         atomic_compiler_barrier(order);
  78     }
  79 }
  80
  81 static inline void
  82 atomic_signal_fence(memory_order order)
  83 {
  84     atomic_compiler_barrier(order);
  85 }
  86
  87 /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
  88  * since the compiler automatically takes acquire and release semantics on
  89  * volatile variables, for any order lesser than 'memory_order_seq_cst', we
  90  * can directly assign or read values. */
  91
  92 #define atomic_store32(DST, SRC, ORDER)                                 \
  93     if (ORDER == memory_order_seq_cst) {                                \
  94         InterlockedExchange((int32_t volatile *) (DST),                 \
  95                                (int32_t) (SRC));                        \
  96     } else {                                                            \
  97         *(DST) = (SRC);                                                 \
  98     }
  99
 100 /* MSVC converts 64 bit writes into two instructions. So there is
 101  * a possibility that an interrupt can make a 64 bit write non-atomic even
 102  * when 8 byte aligned. So use InterlockedExchange64().
 103  *
 104  * For atomic stores, 'consume' and 'acquire' semantics are not valid. But we
 105  * are using 'Exchange' to get atomic stores here and we only have
 106  * InterlockedExchange64(), InterlockedExchangeNoFence64() and
 107  * InterlockedExchange64Acquire() available. So we are forced to use
 108  * InterlockedExchange64() which uses full memory barrier for everything
 109  * greater than 'memory_order_relaxed'. */
 110 #ifdef _M_IX86
 111 #define atomic_store64(DST, SRC, ORDER)                                    \
 112     if (ORDER == memory_order_relaxed) {                                   \
 113         InterlockedExchangeNoFence64((int64_t volatile *) (DST),           \
 114                                      (int64_t) (SRC));                     \
 115     } else {                                                               \
 116         InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\
 117     }
 118 #elif _M_X64
 119 /* 64 bit writes are atomic on amd64 if 64 bit aligned. */
 120 #define atomic_store64(DST, SRC, ORDER)                                 \
 121     atomic_storeX(64, DST, SRC, ORDER)
 122 #endif
 123
 124 /* Used for 8 and 16 bit variations. */
 125 #define atomic_storeX(X, DST, SRC, ORDER)                               \
 126     if (ORDER == memory_order_seq_cst) {                                \
 127         InterlockedExchange##X((int##X##_t volatile *) (DST),           \
 128                                (int##X##_t) (SRC));                     \
 129     } else {                                                            \
 130         *(DST) = (SRC);                                                 \
 131     }
 132
 133 #define atomic_store(DST, SRC)                               \
 134         atomic_store_explicit(DST, SRC, memory_order_seq_cst)
 135
 136 #define atomic_store_explicit(DST, SRC, ORDER)                           \
 137     if (sizeof *(DST) == 1) {                                            \
 138         atomic_storeX(8, DST, SRC, ORDER)                                \
 139     } else if (sizeof *(DST) == 2) {                                     \
 140         atomic_storeX(16, DST, SRC, ORDER)                               \
 141     } else if (sizeof *(DST) == 4) {                                     \
 142         atomic_store32(DST, SRC, ORDER)                                  \
 143     } else if (sizeof *(DST) == 8) {                                     \
 144         atomic_store64(DST, SRC, ORDER)                                  \
 145     } else {                                                             \
 146         abort();                                                         \
 147     }
 148
 149 /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
 150  * reads don't need to be locked (based on the following in Intel Developers
 151  * manual:
 152  * “Locked operations are atomic with respect to all other memory operations
 153  * and all externally visible events. Only instruction fetch and page table
 154  * accesses can pass locked instructions. Locked instructions can be used to
 155  * synchronize data written by one processor and read by another processor.
 156  * For the P6 family processors, locked operations serialize all outstanding
 157  * load and store operations (that is, wait for them to complete). This rule
 158  * is also true for the Pentium 4 and Intel Xeon processors, with one
 159  * exception. Load operations that reference weakly ordered memory types
 160  * (such as the WC memory type) may not be serialized."). */
 161
 162  /* For 8, 16 and 32 bit variations. */
 163 #define atomic_readX(SRC, DST, ORDER)                                      \
 164     *(DST) = *(SRC);
 165
 166 /* MSVC converts 64 bit reads into two instructions. So there is
 167  * a possibility that an interrupt can make a 64 bit read non-atomic even
 168  * when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */
 169 #ifdef _M_IX86
 170 #define atomic_read64(SRC, DST, ORDER)                                     \
 171     __pragma (warning(push))                                               \
 172     __pragma (warning(disable:4047))                                       \
 173     *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0);               \
 174     __pragma (warning(pop))
 175 #elif _M_X64
 176 /* 64 bit reads are atomic on amd64 if 64 bit aligned. */
 177 #define atomic_read64(SRC, DST, ORDER)                                     \
 178     *(DST) = *(SRC);
 179 #endif
 180
 181 #define atomic_read(SRC, DST)                               \
 182         atomic_read_explicit(SRC, DST, memory_order_seq_cst)
 183
 184 #define atomic_read_explicit(SRC, DST, ORDER)                             \
 185     if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \
 186         atomic_readX(SRC, DST, ORDER)                                     \
 187     } else if (sizeof *(DST) == 8) {                                      \
 188         atomic_read64(SRC, DST, ORDER)                                    \
 189     } else {                                                              \
 190         abort();                                                          \
 191     }
 192
 193 /* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
 194  * functions for all the different memory orders does not exist
 195  * (though documentation exists for some of them).  The MSVC C++ library which
 196  * implements the c11 atomics simply calls the full memory barrier function
 197  * for everything in x86(see xatomic.h). So do the same here. */
 198
 199 /* For 8, 16 and 64 bit variations. */
 200 #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER)                         \
 201     atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)
 202
 203 /* Arithmetic addition calls. */
 204
 205 #define atomic_add32(RMW, ARG, ORIG, ORDER)                        \
 206     *(ORIG) = InterlockedExchangeAdd((int32_t volatile *) (RMW),   \
 207                                       (int32_t) (ARG));
 208
 209 /* For 8, 16 and 64 bit variations. */
 210 #define atomic_add_generic(X, RMW, ARG, ORIG, ORDER)                        \
 211     *(ORIG) = _InterlockedExchangeAdd##X((int##X##_t volatile *) (RMW),     \
 212                                       (int##X##_t) (ARG));
 213
 214 #define atomic_add(RMW, ARG, ORIG)                               \
 215         atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 216
 217 #define atomic_add_explicit(RMW, ARG, ORIG, ORDER)             \
 218     if (sizeof *(RMW) == 1) {                                  \
 219         atomic_op(add, 8, RMW, ARG, ORIG, ORDER)               \
 220     } else if (sizeof *(RMW) == 2) {                           \
 221         atomic_op(add, 16, RMW, ARG, ORIG, ORDER)              \
 222     } else if (sizeof *(RMW) == 4) {                           \
 223         atomic_add32(RMW, ARG, ORIG, ORDER)                    \
 224     } else if (sizeof *(RMW) == 8) {                           \
 225         atomic_op(add, 64, RMW, ARG, ORIG, ORDER)              \
 226     } else {                                                   \
 227         abort();                                               \
 228     }
 229
 230 /* Arithmetic subtraction calls. */
 231
 232 #define atomic_sub(RMW, ARG, ORIG)                             \
 233         atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)
 234
 235 #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER)           \
 236         atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)
 237
 238 /* Logical 'and' calls. */
 239
 240 #define atomic_and32(RMW, ARG, ORIG, ORDER)                        \
 241     *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG));
 242
 243 /* For 8, 16 and 64 bit variations. */
 244 #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER)                        \
 245     *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW),              \
 246                                 (int##X##_t) (ARG));
 247
 248 #define atomic_and(RMW, ARG, ORIG)                               \
 249         atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 250
 251 #define atomic_and_explicit(RMW, ARG, ORIG, ORDER)             \
 252     if (sizeof *(RMW) == 1) {                                  \
 253         atomic_op(and, 8, RMW, ARG, ORIG, ORDER)               \
 254     } else if (sizeof *(RMW) == 2) {                           \
 255         atomic_op(and, 16, RMW, ARG, ORIG, ORDER)              \
 256     } else if (sizeof *(RMW) == 4) {                           \
 257         atomic_and32(RMW, ARG, ORIG, ORDER)                    \
 258     } else if (sizeof *(RMW) == 8) {                           \
 259         atomic_op(and, 64, RMW, ARG, ORIG, ORDER)              \
 260     } else {                                                   \
 261         abort();                                               \
 262     }
 263
 264 /* Logical 'Or' calls. */
 265
 266 #define atomic_or32(RMW, ARG, ORIG, ORDER)                        \
 267     *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG));
 268
 269 /* For 8, 16 and 64 bit variations. */
 270 #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER)                        \
 271     *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW),              \
 272                                (int##X##_t) (ARG));
 273
 274 #define atomic_or(RMW, ARG, ORIG)                               \
 275         atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 276
 277 #define atomic_or_explicit(RMW, ARG, ORIG, ORDER)              \
 278     if (sizeof *(RMW) == 1) {                                  \
 279         atomic_op(or, 8, RMW, ARG, ORIG, ORDER)                \
 280     } else if (sizeof *(RMW) == 2) {                           \
 281         atomic_op(or, 16, RMW, ARG, ORIG, ORDER)               \
 282     } else if (sizeof *(RMW) == 4) {                           \
 283         atomic_or32(RMW, ARG, ORIG, ORDER)                     \
 284     } else if (sizeof *(RMW) == 8) {                           \
 285         atomic_op(or, 64, RMW, ARG, ORIG, ORDER)               \
 286     } else {                                                   \
 287         abort();                                               \
 288     }
 289
 290 /* Logical Xor calls. */
 291
 292 #define atomic_xor32(RMW, ARG, ORIG, ORDER)                        \
 293     *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG));
 294
 295 /* For 8, 16 and 64 bit variations. */
 296 #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER)                        \
 297     *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW),              \
 298                                 (int##X##_t) (ARG));
 299
 300 #define atomic_xor(RMW, ARG, ORIG)                               \
 301         atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 302
 303 #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER)             \
 304     if (sizeof *(RMW) == 1) {                                  \
 305         atomic_op(xor, 8, RMW, ARG, ORIG, ORDER)               \
 306     } else if (sizeof *(RMW) == 2) {                           \
 307         atomic_op(xor, 16, RMW, ARG, ORIG, ORDER)              \
 308     } else if (sizeof *(RMW) == 4) {                           \
 309         atomic_xor32(RMW, ARG, ORIG, ORDER);                   \
 310     } else if (sizeof *(RMW) == 8) {                           \
 311         atomic_op(xor, 64, RMW, ARG, ORIG, ORDER)              \
 312     } else {                                                   \
 313         abort();                                               \
 314     }
 315
 316 #define atomic_compare_exchange_strong(DST, EXP, SRC)   \
 317     atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
 318                                             memory_order_seq_cst, \
 319                                             memory_order_seq_cst)
 320
 321 #define atomic_compare_exchange_weak atomic_compare_exchange_strong
 322 #define atomic_compare_exchange_weak_explicit \
 323         atomic_compare_exchange_strong_explicit
 324
 325 /* MSVCs c++ compiler implements c11 atomics and looking through its
 326  * implementation (in xatomic.h), orders are ignored for x86 platform.
 327  * Do the same here. */
 328 static inline bool
 329 atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src)
 330 {
 331     int8_t previous = _InterlockedCompareExchange8(dst, src, *expected);
 332     if (previous == *expected) {
 333         return true;
 334     } else {
 335         *expected = previous;
 336         return false;
 337     }
 338 }
 339
 340 static inline bool
 341 atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected,
 342                           int16_t src)
 343 {
 344     int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
 345     if (previous == *expected) {
 346         return true;
 347     } else {
 348         *expected = previous;
 349         return false;
 350     }
 351 }
 352
 353 static inline bool
 354 atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected,
 355                           int32_t src)
 356 {
 357     int32_t previous = InterlockedCompareExchange(dst, src, *expected);
 358     if (previous == *expected) {
 359         return true;
 360     } else {
 361         *expected = previous;
 362         return false;
 363     }
 364 }
 365
 366 static inline bool
 367 atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected,
 368                           int64_t src)
 369 {
 370     int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
 371     if (previous == *expected) {
 372         return true;
 373     } else {
 374         *expected = previous;
 375         return false;
 376     }
 377 }
 378
 379 static inline bool
 380 atomic_compare_unreachable()
 381 {
 382     return true;
 383 }
 384
 385 #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2)    \
 386     (sizeof *(DST) == 1                                                       \
 387      ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP),  \
 388                                 (int8_t) (SRC))                               \
 389      : (sizeof *(DST) == 2                                                    \
 390      ? atomic_compare_exchange16((int16_t volatile *) (DST),                  \
 391                                  (int16_t *) (EXP), (int16_t) (SRC))          \
 392      : (sizeof *(DST) == 4                                                    \
 393      ? atomic_compare_exchange32((int32_t volatile *) (DST),                  \
 394                                  (int32_t *) (EXP), (int32_t) (SRC))          \
 395      : (sizeof *(DST) == 8                                                    \
 396      ? atomic_compare_exchange64((int64_t volatile *) (DST),                  \
 397                                  (int64_t *) (EXP), (int64_t) (SRC))          \
 398      : ovs_fatal(0, "atomic operation with size greater than 8 bytes"),       \
 399        atomic_compare_unreachable()))))
 400
 401 \f
 402 /* atomic_flag */
 403
 404 typedef ATOMIC(int32_t) atomic_flag;
 405 #define ATOMIC_FLAG_INIT 0
 406
 407 #define atomic_flag_test_and_set(FLAG)                 \
 408     (bool) InterlockedBitTestAndSet(FLAG, 0)
 409
 410 #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
 411         atomic_flag_test_and_set(FLAG)
 412
 413 #define atomic_flag_clear_explicit(FLAG, ORDER) \
 414         atomic_flag_clear()
 415 #define atomic_flag_clear(FLAG)                 \
 416     InterlockedBitTestAndReset(FLAG, 0)