lib/ovs-atomic-msvc.h

   1 /*
   2  * Copyright (c) 2014 Nicira, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /* This header implements atomic operation primitives for MSVC
  18  * on i586 or greater platforms (32 bit). */
  19 #ifndef IN_OVS_ATOMIC_H
  20 #error "This header should only be included indirectly via ovs-atomic.h."
  21 #endif
  22
  23 /* From msdn documentation: With Visual Studio 2003, volatile to volatile
  24  * references are ordered; the compiler will not re-order volatile variable
  25  * access. With Visual Studio 2005, the compiler also uses acquire semantics
  26  * for read operations on volatile variables and release semantics for write
  27  * operations on volatile variables (when supported by the CPU).
  28  *
  29  * Though there is no clear documentation that states that anything greater
  30  * than VS 2005 has the same behavior as described above, looking through MSVCs
  31  * C++ atomics library in VS2013 shows that the compiler still takes
  32  * acquire/release semantics on volatile variables. */
  33 #define ATOMIC(TYPE) TYPE volatile
  34
  35 typedef enum {
  36     memory_order_relaxed,
  37     memory_order_consume,
  38     memory_order_acquire,
  39     memory_order_release,
  40     memory_order_acq_rel,
  41     memory_order_seq_cst
  42 } memory_order;
  43
  44 #if _MSC_VER > 1800 && defined(_M_IX86)
  45 /* From WDK 10 _InlineInterlocked* functions are renamed to
  46  * _InlineInterlocked* although the documentation does not specify it */
  47 #define _InterlockedExchangeAdd64 _InlineInterlockedExchangeAdd64
  48 #define _InterlockedExchange64 _InlineInterlockedExchange64
  49 #endif
  50
  51 #define ATOMIC_BOOL_LOCK_FREE 2
  52 #define ATOMIC_CHAR_LOCK_FREE 2
  53 #define ATOMIC_SHORT_LOCK_FREE 2
  54 #define ATOMIC_INT_LOCK_FREE 2
  55 #define ATOMIC_LONG_LOCK_FREE 2
  56 #define ATOMIC_LLONG_LOCK_FREE 2
  57 #define ATOMIC_POINTER_LOCK_FREE 2
  58
  59 #define IS_LOCKLESS_ATOMIC(OBJECT)                      \
  60     (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
  61
  62 #define ATOMIC_VAR_INIT(VALUE) (VALUE)
  63 #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
  64
  65 static inline void
  66 atomic_compiler_barrier(memory_order order)
  67 {
  68     /* In case of 'memory_order_consume', it is implicitly assumed that
  69      * the compiler will not move instructions that have data-dependency
  70      * on the variable in question before the barrier. */
  71     if (order > memory_order_consume) {
  72         _ReadWriteBarrier();
  73     }
  74 }
  75
  76 static inline void
  77 atomic_thread_fence(memory_order order)
  78 {
  79     /* x86 is strongly ordered and acquire/release semantics come
  80      * automatically. */
  81     atomic_compiler_barrier(order);
  82     if (order == memory_order_seq_cst) {
  83         MemoryBarrier();
  84         atomic_compiler_barrier(order);
  85     }
  86 }
  87
  88 static inline void
  89 atomic_signal_fence(memory_order order)
  90 {
  91     atomic_compiler_barrier(order);
  92 }
  93
  94 /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
  95  * since the compiler automatically takes acquire and release semantics on
  96  * volatile variables, for any order lesser than 'memory_order_seq_cst', we
  97  * can directly assign or read values. */
  98
  99 #define atomic_store32(DST, SRC, ORDER)                                 \
 100     if (ORDER == memory_order_seq_cst) {                                \
 101         InterlockedExchange((long volatile *) (DST),                    \
 102                                (long) (SRC));                           \
 103     } else {                                                            \
 104         *(DST) = (SRC);                                                 \
 105     }
 106
 107 /* MSVC converts 64 bit writes into two instructions. So there is
 108  * a possibility that an interrupt can make a 64 bit write non-atomic even
 109  * when 8 byte aligned. So use InterlockedExchange64().
 110  *
 111  * For atomic stores, 'consume' and 'acquire' semantics are not valid. But we
 112  * are using 'Exchange' to get atomic stores here and we only have
 113  * InterlockedExchange64(), InterlockedExchangeNoFence64() and
 114  * InterlockedExchange64Acquire() available. So we are forced to use
 115  * InterlockedExchange64() which uses full memory barrier for everything
 116  * greater than 'memory_order_relaxed'. */
 117 #ifdef _M_IX86
 118 #define atomic_store64(DST, SRC, ORDER)                                    \
 119     if (ORDER == memory_order_relaxed) {                                   \
 120         InterlockedExchangeNoFence64((int64_t volatile *) (DST),           \
 121                                      (int64_t) (SRC));                     \
 122     } else {                                                               \
 123         InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\
 124     }
 125 #elif _M_X64
 126 /* 64 bit writes are atomic on amd64 if 64 bit aligned. */
 127 #define atomic_store64(DST, SRC, ORDER)                                    \
 128     if (ORDER == memory_order_seq_cst) {                                   \
 129         InterlockedExchange64((int64_t volatile *) (DST),                  \
 130                                (int64_t) (SRC));                           \
 131     } else {                                                               \
 132         *(DST) = (SRC);                                                    \
 133     }
 134 #endif
 135
 136 #define atomic_store8(DST, SRC, ORDER)                                     \
 137     if (ORDER == memory_order_seq_cst) {                                   \
 138         InterlockedExchange8((char volatile *) (DST), (char) (SRC));       \
 139     } else {                                                               \
 140         *(DST) = (SRC);                                                    \
 141     }
 142
 143 #define atomic_store16(DST, SRC, ORDER)                                    \
 144     if (ORDER == memory_order_seq_cst) {                                   \
 145         InterlockedExchange16((short volatile *) (DST), (short) (SRC));    \
 146     } else {                                                               \
 147         *(DST) = (SRC);                                                    \
 148     }
 149
 150 #define atomic_store(DST, SRC)                               \
 151         atomic_store_explicit(DST, SRC, memory_order_seq_cst)
 152
 153 #define atomic_store_explicit(DST, SRC, ORDER)                           \
 154     if (sizeof *(DST) == 1) {                                            \
 155         atomic_store8(DST, SRC, ORDER)                                   \
 156     } else if (sizeof *(DST) == 2) {                                     \
 157         atomic_store16( DST, SRC, ORDER)                                 \
 158     } else if (sizeof *(DST) == 4) {                                     \
 159         atomic_store32(DST, SRC, ORDER)                                  \
 160     } else if (sizeof *(DST) == 8) {                                     \
 161         atomic_store64(DST, SRC, ORDER)                                  \
 162     } else {                                                             \
 163         abort();                                                         \
 164     }
 165
 166 /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
 167  * reads don't need to be locked (based on the following in Intel Developers
 168  * manual:
 169  * “Locked operations are atomic with respect to all other memory operations
 170  * and all externally visible events. Only instruction fetch and page table
 171  * accesses can pass locked instructions. Locked instructions can be used to
 172  * synchronize data written by one processor and read by another processor.
 173  * For the P6 family processors, locked operations serialize all outstanding
 174  * load and store operations (that is, wait for them to complete). This rule
 175  * is also true for the Pentium 4 and Intel Xeon processors, with one
 176  * exception. Load operations that reference weakly ordered memory types
 177  * (such as the WC memory type) may not be serialized."). */
 178
 179  /* For 8, 16 and 32 bit variations. */
 180 #define atomic_readX(SRC, DST, ORDER)                                      \
 181     *(DST) = *(SRC);
 182
 183 /* MSVC converts 64 bit reads into two instructions. So there is
 184  * a possibility that an interrupt can make a 64 bit read non-atomic even
 185  * when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */
 186 #ifdef _M_IX86
 187 #define atomic_read64(SRC, DST, ORDER)                                     \
 188     __pragma (warning(push))                                               \
 189     __pragma (warning(disable:4047))                                       \
 190     *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0);               \
 191     __pragma (warning(pop))
 192 #elif _M_X64
 193 /* 64 bit reads are atomic on amd64 if 64 bit aligned. */
 194 #define atomic_read64(SRC, DST, ORDER)                                     \
 195     *(DST) = *(SRC);
 196 #endif
 197
 198 #define atomic_read(SRC, DST)                               \
 199         atomic_read_explicit(SRC, DST, memory_order_seq_cst)
 200
 201 #define atomic_read_explicit(SRC, DST, ORDER)                             \
 202     if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \
 203         atomic_readX(SRC, DST, ORDER)                                     \
 204     } else if (sizeof *(DST) == 8) {                                      \
 205         atomic_read64(SRC, DST, ORDER)                                    \
 206     } else {                                                              \
 207         abort();                                                          \
 208     }
 209
 210 /* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
 211  * functions for all the different memory orders does not exist
 212  * (though documentation exists for some of them).  The MSVC C++ library which
 213  * implements the c11 atomics simply calls the full memory barrier function
 214  * for everything in x86(see xatomic.h). So do the same here. */
 215
 216 /* For 8, 16 and 64 bit variations. */
 217 #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER)                         \
 218     atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)
 219
 220 /* Arithmetic addition calls. */
 221
 222 #define atomic_add8(RMW, ARG, ORIG, ORDER)                        \
 223     *(ORIG) = _InterlockedExchangeAdd8((char volatile *) (RMW),   \
 224                                       (char) (ARG));
 225
 226 #define atomic_add16(RMW, ARG, ORIG, ORDER)                        \
 227     *(ORIG) = _InterlockedExchangeAdd16((short volatile *) (RMW),   \
 228                                       (short) (ARG));
 229
 230 #define atomic_add32(RMW, ARG, ORIG, ORDER)                        \
 231     *(ORIG) = InterlockedExchangeAdd((long volatile *) (RMW),   \
 232                                       (long) (ARG));
 233 #define atomic_add64(RMW, ARG, ORIG, ORDER)                        \
 234     *(ORIG) = _InterlockedExchangeAdd64((int64_t volatile *) (RMW),   \
 235                                       (int64_t) (ARG));
 236
 237 #define atomic_add(RMW, ARG, ORIG)                               \
 238         atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 239
 240 #define atomic_add_explicit(RMW, ARG, ORIG, ORDER)             \
 241     if (sizeof *(RMW) == 1) {                                  \
 242         atomic_add8(RMW, ARG, ORIG, ORDER)               \
 243     } else if (sizeof *(RMW) == 2) {                           \
 244         atomic_add16(RMW, ARG, ORIG, ORDER)              \
 245     } else if (sizeof *(RMW) == 4) {                           \
 246         atomic_add32(RMW, ARG, ORIG, ORDER)                    \
 247     } else if (sizeof *(RMW) == 8) {                           \
 248         atomic_add64(RMW, ARG, ORIG, ORDER)              \
 249     } else {                                                   \
 250         abort();                                               \
 251     }
 252
 253 /* Arithmetic subtraction calls. */
 254
 255 #define atomic_sub(RMW, ARG, ORIG)                             \
 256         atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)
 257
 258 #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER)           \
 259         atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)
 260
 261 /* Logical 'and' calls. */
 262
 263 #define atomic_and32(RMW, ARG, ORIG, ORDER)                        \
 264     *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG));
 265
 266 /* For 8, 16 and 64 bit variations. */
 267 #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER)                        \
 268     *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW),              \
 269                                 (int##X##_t) (ARG));
 270
 271 #define atomic_and(RMW, ARG, ORIG)                               \
 272         atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 273
 274 #define atomic_and_explicit(RMW, ARG, ORIG, ORDER)             \
 275     if (sizeof *(RMW) == 1) {                                  \
 276         atomic_op(and, 8, RMW, ARG, ORIG, ORDER)               \
 277     } else if (sizeof *(RMW) == 2) {                           \
 278         atomic_op(and, 16, RMW, ARG, ORIG, ORDER)              \
 279     } else if (sizeof *(RMW) == 4) {                           \
 280         atomic_and32(RMW, ARG, ORIG, ORDER)                    \
 281     } else if (sizeof *(RMW) == 8) {                           \
 282         atomic_op(and, 64, RMW, ARG, ORIG, ORDER)              \
 283     } else {                                                   \
 284         abort();                                               \
 285     }
 286
 287 /* Logical 'Or' calls. */
 288
 289 #define atomic_or32(RMW, ARG, ORIG, ORDER)                        \
 290     *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG));
 291
 292 /* For 8, 16 and 64 bit variations. */
 293 #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER)                        \
 294     *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW),              \
 295                                (int##X##_t) (ARG));
 296
 297 #define atomic_or(RMW, ARG, ORIG)                               \
 298         atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 299
 300 #define atomic_or_explicit(RMW, ARG, ORIG, ORDER)              \
 301     if (sizeof *(RMW) == 1) {                                  \
 302         atomic_op(or, 8, RMW, ARG, ORIG, ORDER)                \
 303     } else if (sizeof *(RMW) == 2) {                           \
 304         atomic_op(or, 16, RMW, ARG, ORIG, ORDER)               \
 305     } else if (sizeof *(RMW) == 4) {                           \
 306         atomic_or32(RMW, ARG, ORIG, ORDER)                     \
 307     } else if (sizeof *(RMW) == 8) {                           \
 308         atomic_op(or, 64, RMW, ARG, ORIG, ORDER)               \
 309     } else {                                                   \
 310         abort();                                               \
 311     }
 312
 313 /* Logical Xor calls. */
 314
 315 #define atomic_xor32(RMW, ARG, ORIG, ORDER)                        \
 316     *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG));
 317
 318 /* For 8, 16 and 64 bit variations. */
 319 #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER)                        \
 320     *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW),              \
 321                                 (int##X##_t) (ARG));
 322
 323 #define atomic_xor(RMW, ARG, ORIG)                               \
 324         atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
 325
 326 #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER)             \
 327     if (sizeof *(RMW) == 1) {                                  \
 328         atomic_op(xor, 8, RMW, ARG, ORIG, ORDER)               \
 329     } else if (sizeof *(RMW) == 2) {                           \
 330         atomic_op(xor, 16, RMW, ARG, ORIG, ORDER)              \
 331     } else if (sizeof *(RMW) == 4) {                           \
 332         atomic_xor32(RMW, ARG, ORIG, ORDER);                   \
 333     } else if (sizeof *(RMW) == 8) {                           \
 334         atomic_op(xor, 64, RMW, ARG, ORIG, ORDER)              \
 335     } else {                                                   \
 336         abort();                                               \
 337     }
 338
 339 #define atomic_compare_exchange_strong(DST, EXP, SRC)   \
 340     atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
 341                                             memory_order_seq_cst, \
 342                                             memory_order_seq_cst)
 343
 344 #define atomic_compare_exchange_weak atomic_compare_exchange_strong
 345 #define atomic_compare_exchange_weak_explicit \
 346         atomic_compare_exchange_strong_explicit
 347
 348 /* MSVCs c++ compiler implements c11 atomics and looking through its
 349  * implementation (in xatomic.h), orders are ignored for x86 platform.
 350  * Do the same here. */
 351 static inline bool
 352 atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src)
 353 {
 354     int8_t previous = _InterlockedCompareExchange8((char volatile *)dst,
 355                                                    src, *expected);
 356     if (previous == *expected) {
 357         return true;
 358     } else {
 359         *expected = previous;
 360         return false;
 361     }
 362 }
 363
 364 static inline bool
 365 atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected,
 366                           int16_t src)
 367 {
 368     int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
 369     if (previous == *expected) {
 370         return true;
 371     } else {
 372         *expected = previous;
 373         return false;
 374     }
 375 }
 376
 377 static inline bool
 378 atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected,
 379                           int32_t src)
 380 {
 381     int32_t previous = InterlockedCompareExchange((long volatile *)dst,
 382                                                   src, *expected);
 383     if (previous == *expected) {
 384         return true;
 385     } else {
 386         *expected = previous;
 387         return false;
 388     }
 389 }
 390
 391 static inline bool
 392 atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected,
 393                           int64_t src)
 394 {
 395     int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
 396     if (previous == *expected) {
 397         return true;
 398     } else {
 399         *expected = previous;
 400         return false;
 401     }
 402 }
 403
 404 static inline bool
 405 atomic_compare_unreachable()
 406 {
 407     return true;
 408 }
 409
 410 #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2)    \
 411     (sizeof *(DST) == 1                                                       \
 412      ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP),  \
 413                                 (int8_t) (SRC))                               \
 414      : (sizeof *(DST) == 2                                                    \
 415      ? atomic_compare_exchange16((int16_t volatile *) (DST),                  \
 416                                  (int16_t *) (EXP), (int16_t) (SRC))          \
 417      : (sizeof *(DST) == 4                                                    \
 418      ? atomic_compare_exchange32((int32_t volatile *) (DST),                  \
 419                                  (int32_t *) (EXP), (int32_t) (SRC))          \
 420      : (sizeof *(DST) == 8                                                    \
 421      ? atomic_compare_exchange64((int64_t volatile *) (DST),                  \
 422                                  (int64_t *) (EXP), (int64_t) (SRC))          \
 423      : ovs_fatal(0, "atomic operation with size greater than 8 bytes"),       \
 424        atomic_compare_unreachable()))))
 425
 426 \f
 427 /* atomic_flag */
 428
 429 typedef ATOMIC(int32_t) atomic_flag;
 430 #define ATOMIC_FLAG_INIT 0
 431
 432 #define atomic_flag_test_and_set(FLAG)                 \
 433     (bool) InterlockedBitTestAndSet(FLAG, 0)
 434
 435 #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
 436         atomic_flag_test_and_set(FLAG)
 437
 438 #define atomic_flag_clear_explicit(FLAG, ORDER) \
 439         atomic_flag_clear()
 440 #define atomic_flag_clear(FLAG)                 \
 441     InterlockedBitTestAndReset(FLAG, 0)