[mirror_ovs.git] / lib / ovs-atomic-msvc.h

/*
 * Copyright (c) 2014 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* This header implements atomic operation primitives for MSVC
 * on i586 or greater platforms (32 bit). */
#ifndef IN_OVS_ATOMIC_H
#error "This header should only be included indirectly via ovs-atomic.h."
#endif

/* From msdn documentation: With Visual Studio 2003, volatile to volatile
 * references are ordered; the compiler will not re-order volatile variable
 * access. With Visual Studio 2005, the compiler also uses acquire semantics
 * for read operations on volatile variables and release semantics for write
 * operations on volatile variables (when supported by the CPU).
 *
 * Though there is no clear documentation that states that anything greater
 * than VS 2005 has the same behavior as described above, looking through MSVCs
 * C++ atomics library in VS2013 shows that the compiler still takes
 * acquire/release semantics on volatile variables. */
#define ATOMIC(TYPE) TYPE volatile

typedef enum {
    memory_order_relaxed,
    memory_order_consume,
    memory_order_acquire,
    memory_order_release,
    memory_order_acq_rel,
    memory_order_seq_cst
} memory_order;

#if _MSC_VER > 1800 && defined(_M_IX86)
/* From WDK 10 _InlineInterlocked* functions are renamed to
 * _InlineInterlocked* although the documentation does not specify it */
#define _InterlockedExchangeAdd64 _InlineInterlockedExchangeAdd64
#define _InterlockedExchange64 _InlineInterlockedExchange64
#endif

#define ATOMIC_BOOL_LOCK_FREE 2
#define ATOMIC_CHAR_LOCK_FREE 2
#define ATOMIC_SHORT_LOCK_FREE 2
#define ATOMIC_INT_LOCK_FREE 2
#define ATOMIC_LONG_LOCK_FREE 2
#define ATOMIC_LLONG_LOCK_FREE 2
#define ATOMIC_POINTER_LOCK_FREE 2

#define IS_LOCKLESS_ATOMIC(OBJECT)                      \
    (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))

#define ATOMIC_VAR_INIT(VALUE) (VALUE)
#define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)

static inline void
atomic_compiler_barrier(memory_order order)
{
    /* In case of 'memory_order_consume', it is implicitly assumed that
     * the compiler will not move instructions that have data-dependency
     * on the variable in question before the barrier. */
    if (order > memory_order_consume) {
        _ReadWriteBarrier();
    }
}

static inline void
atomic_thread_fence(memory_order order)
{
    /* x86 is strongly ordered and acquire/release semantics come
     * automatically. */
    atomic_compiler_barrier(order);
    if (order == memory_order_seq_cst) {
        MemoryBarrier();
        atomic_compiler_barrier(order);
    }
}

static inline void
atomic_signal_fence(memory_order order)
{
    atomic_compiler_barrier(order);
}

/* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
 * since the compiler automatically takes acquire and release semantics on
 * volatile variables, for any order lesser than 'memory_order_seq_cst', we
 * can directly assign or read values. */

#define atomic_store32(DST, SRC, ORDER)                                 \
    if (ORDER == memory_order_seq_cst) {                                \
        InterlockedExchange((long volatile *) (DST),                    \
                               (long) (SRC));                           \
    } else {                                                            \
        *(DST) = (SRC);                                                 \
    }

/* MSVC converts 64 bit writes into two instructions. So there is
 * a possibility that an interrupt can make a 64 bit write non-atomic even
 * when 8 byte aligned. So use InterlockedExchange64().
 *
 * For atomic stores, 'consume' and 'acquire' semantics are not valid. But we
 * are using 'Exchange' to get atomic stores here and we only have
 * InterlockedExchange64(), InterlockedExchangeNoFence64() and
 * InterlockedExchange64Acquire() available. So we are forced to use
 * InterlockedExchange64() which uses full memory barrier for everything
 * greater than 'memory_order_relaxed'. */
#ifdef _M_IX86
#define atomic_store64(DST, SRC, ORDER)                                    \
    if (ORDER == memory_order_relaxed) {                                   \
        InterlockedExchangeNoFence64((int64_t volatile *) (DST),           \
                                     (int64_t) (SRC));                     \
    } else {                                                               \
        InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\
    }
#elif _M_X64
/* 64 bit writes are atomic on amd64 if 64 bit aligned. */
#define atomic_store64(DST, SRC, ORDER)                                    \
    if (ORDER == memory_order_seq_cst) {                                   \
        InterlockedExchange64((int64_t volatile *) (DST),                  \
                               (int64_t) (SRC));                           \
    } else {                                                               \
        *(DST) = (SRC);                                                    \
    }
#endif

#define atomic_store8(DST, SRC, ORDER)                                     \
    if (ORDER == memory_order_seq_cst) {                                   \
        InterlockedExchange8((char volatile *) (DST), (char) (SRC));       \
    } else {                                                               \
        *(DST) = (SRC);                                                    \
    }

#define atomic_store16(DST, SRC, ORDER)                                    \
    if (ORDER == memory_order_seq_cst) {                                   \
        InterlockedExchange16((short volatile *) (DST), (short) (SRC));    \
    } else {                                                               \
        *(DST) = (SRC);                                                    \
    }

#define atomic_store(DST, SRC)                               \
        atomic_store_explicit(DST, SRC, memory_order_seq_cst)

#define atomic_store_explicit(DST, SRC, ORDER)                           \
    if (sizeof *(DST) == 1) {                                            \
        atomic_store8(DST, SRC, ORDER)                                   \
    } else if (sizeof *(DST) == 2) {                                     \
        atomic_store16( DST, SRC, ORDER)                                 \
    } else if (sizeof *(DST) == 4) {                                     \
        atomic_store32(DST, SRC, ORDER)                                  \
    } else if (sizeof *(DST) == 8) {                                     \
        atomic_store64(DST, SRC, ORDER)                                  \
    } else {                                                             \
        abort();                                                         \
    }

/* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
 * reads don't need to be locked (based on the following in Intel Developers
 * manual:
 * “Locked operations are atomic with respect to all other memory operations
 * and all externally visible events. Only instruction fetch and page table
 * accesses can pass locked instructions. Locked instructions can be used to
 * synchronize data written by one processor and read by another processor.
 * For the P6 family processors, locked operations serialize all outstanding
 * load and store operations (that is, wait for them to complete). This rule
 * is also true for the Pentium 4 and Intel Xeon processors, with one
 * exception. Load operations that reference weakly ordered memory types
 * (such as the WC memory type) may not be serialized."). */

 /* For 8, 16 and 32 bit variations. */
#define atomic_readX(SRC, DST, ORDER)                                      \
    *(DST) = *(SRC);

/* MSVC converts 64 bit reads into two instructions. So there is
 * a possibility that an interrupt can make a 64 bit read non-atomic even
 * when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */
#ifdef _M_IX86
#define atomic_read64(SRC, DST, ORDER)                                     \
    __pragma (warning(push))                                               \
    __pragma (warning(disable:4047))                                       \
    *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0);               \
    __pragma (warning(pop))
#elif _M_X64
/* 64 bit reads are atomic on amd64 if 64 bit aligned. */
#define atomic_read64(SRC, DST, ORDER)                                     \
    *(DST) = *(SRC);
#endif

#define atomic_read(SRC, DST)                               \
        atomic_read_explicit(SRC, DST, memory_order_seq_cst)

#define atomic_read_explicit(SRC, DST, ORDER)                             \
    if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \
        atomic_readX(SRC, DST, ORDER)                                     \
    } else if (sizeof *(DST) == 8) {                                      \
        atomic_read64(SRC, DST, ORDER)                                    \
    } else {                                                              \
        abort();                                                          \
    }

/* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
 * functions for all the different memory orders does not exist
 * (though documentation exists for some of them).  The MSVC C++ library which
 * implements the c11 atomics simply calls the full memory barrier function
 * for everything in x86(see xatomic.h). So do the same here. */

/* For 8, 16 and 64 bit variations. */
#define atomic_op(OP, X, RMW, ARG, ORIG, ORDER)                         \
    atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)

/* Arithmetic addition calls. */

#define atomic_add8(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = _InterlockedExchangeAdd8((char volatile *) (RMW),   \
                                      (char) (ARG));

#define atomic_add16(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = _InterlockedExchangeAdd16((short volatile *) (RMW),   \
                                      (short) (ARG));

#define atomic_add32(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedExchangeAdd((long volatile *) (RMW),   \
                                      (long) (ARG));
#define atomic_add64(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = _InterlockedExchangeAdd64((int64_t volatile *) (RMW),   \
                                      (int64_t) (ARG));

#define atomic_add(RMW, ARG, ORIG)                               \
        atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)

#define atomic_add_explicit(RMW, ARG, ORIG, ORDER)             \
    if (sizeof *(RMW) == 1) {                                  \
        atomic_add8(RMW, ARG, ORIG, ORDER)               \
    } else if (sizeof *(RMW) == 2) {                           \
        atomic_add16(RMW, ARG, ORIG, ORDER)              \
    } else if (sizeof *(RMW) == 4) {                           \
        atomic_add32(RMW, ARG, ORIG, ORDER)                    \
    } else if (sizeof *(RMW) == 8) {                           \
        atomic_add64(RMW, ARG, ORIG, ORDER)              \
    } else {                                                   \
        abort();                                               \
    }

/* Arithmetic subtraction calls. */

#define atomic_sub(RMW, ARG, ORIG)                             \
        atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)

#define atomic_sub_explicit(RMW, ARG, ORIG, ORDER)           \
        atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)

/* Logical 'and' calls. */

#define atomic_and32(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG));

/* For 8, 16 and 64 bit variations. */
#define atomic_and_generic(X, RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW),              \
                                (int##X##_t) (ARG));

#define atomic_and(RMW, ARG, ORIG)                               \
        atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)

#define atomic_and_explicit(RMW, ARG, ORIG, ORDER)             \
    if (sizeof *(RMW) == 1) {                                  \
        atomic_op(and, 8, RMW, ARG, ORIG, ORDER)               \
    } else if (sizeof *(RMW) == 2) {                           \
        atomic_op(and, 16, RMW, ARG, ORIG, ORDER)              \
    } else if (sizeof *(RMW) == 4) {                           \
        atomic_and32(RMW, ARG, ORIG, ORDER)                    \
    } else if (sizeof *(RMW) == 8) {                           \
        atomic_op(and, 64, RMW, ARG, ORIG, ORDER)              \
    } else {                                                   \
        abort();                                               \
    }

/* Logical 'Or' calls. */

#define atomic_or32(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG));

/* For 8, 16 and 64 bit variations. */
#define atomic_or_generic(X, RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW),              \
                               (int##X##_t) (ARG));

#define atomic_or(RMW, ARG, ORIG)                               \
        atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)

#define atomic_or_explicit(RMW, ARG, ORIG, ORDER)              \
    if (sizeof *(RMW) == 1) {                                  \
        atomic_op(or, 8, RMW, ARG, ORIG, ORDER)                \
    } else if (sizeof *(RMW) == 2) {                           \
        atomic_op(or, 16, RMW, ARG, ORIG, ORDER)               \
    } else if (sizeof *(RMW) == 4) {                           \
        atomic_or32(RMW, ARG, ORIG, ORDER)                     \
    } else if (sizeof *(RMW) == 8) {                           \
        atomic_op(or, 64, RMW, ARG, ORIG, ORDER)               \
    } else {                                                   \
        abort();                                               \
    }

/* Logical Xor calls. */

#define atomic_xor32(RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG));

/* For 8, 16 and 64 bit variations. */
#define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER)                        \
    *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW),              \
                                (int##X##_t) (ARG));

#define atomic_xor(RMW, ARG, ORIG)                               \
        atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)

#define atomic_xor_explicit(RMW, ARG, ORIG, ORDER)             \
    if (sizeof *(RMW) == 1) {                                  \
        atomic_op(xor, 8, RMW, ARG, ORIG, ORDER)               \
    } else if (sizeof *(RMW) == 2) {                           \
        atomic_op(xor, 16, RMW, ARG, ORIG, ORDER)              \
    } else if (sizeof *(RMW) == 4) {                           \
        atomic_xor32(RMW, ARG, ORIG, ORDER);                   \
    } else if (sizeof *(RMW) == 8) {                           \
        atomic_op(xor, 64, RMW, ARG, ORIG, ORDER)              \
    } else {                                                   \
        abort();                                               \
    }

#define atomic_compare_exchange_strong(DST, EXP, SRC)   \
    atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
                                            memory_order_seq_cst, \
                                            memory_order_seq_cst)

#define atomic_compare_exchange_weak atomic_compare_exchange_strong
#define atomic_compare_exchange_weak_explicit \
        atomic_compare_exchange_strong_explicit

/* MSVCs c++ compiler implements c11 atomics and looking through its
 * implementation (in xatomic.h), orders are ignored for x86 platform.
 * Do the same here. */
static inline bool
atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src)
{
    int8_t previous = _InterlockedCompareExchange8((char volatile *)dst,
                                                   src, *expected);
    if (previous == *expected) {
        return true;
    } else {
        *expected = previous;
        return false;
    }
}

static inline bool
atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected,
                          int16_t src)
{
    int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
    if (previous == *expected) {
        return true;
    } else {
        *expected = previous;
        return false;
    }
}

static inline bool
atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected,
                          int32_t src)
{
    int32_t previous = InterlockedCompareExchange((long volatile *)dst,
                                                  src, *expected);
    if (previous == *expected) {
        return true;
    } else {
        *expected = previous;
        return false;
    }
}

static inline bool
atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected,
                          int64_t src)
{
    int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
    if (previous == *expected) {
        return true;
    } else {
        *expected = previous;
        return false;
    }
}

static inline bool
atomic_compare_unreachable()
{
    return true;
}

#define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2)    \
    (sizeof *(DST) == 1                                                       \
     ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP),  \
                                (int8_t) (SRC))                               \
     : (sizeof *(DST) == 2                                                    \
     ? atomic_compare_exchange16((int16_t volatile *) (DST),                  \
                                 (int16_t *) (EXP), (int16_t) (SRC))          \
     : (sizeof *(DST) == 4                                                    \
     ? atomic_compare_exchange32((int32_t volatile *) (DST),                  \
                                 (int32_t *) (EXP), (int32_t) (SRC))          \
     : (sizeof *(DST) == 8                                                    \
     ? atomic_compare_exchange64((int64_t volatile *) (DST),                  \
                                 (int64_t *) (EXP), (int64_t) (SRC))          \
     : ovs_fatal(0, "atomic operation with size greater than 8 bytes"),       \
       atomic_compare_unreachable()))))

\f
/* atomic_flag */

typedef ATOMIC(int32_t) atomic_flag;
#define ATOMIC_FLAG_INIT 0

#define atomic_flag_test_and_set(FLAG)                 \
    (bool) InterlockedBitTestAndSet(FLAG, 0)

#define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
        atomic_flag_test_and_set(FLAG)

#define atomic_flag_clear_explicit(FLAG, ORDER) \
        atomic_flag_clear()
#define atomic_flag_clear(FLAG)                 \
    InterlockedBitTestAndReset(FLAG, 0)
Commit	Line	Data
ec2d2b5f GS	1	/*
	2	* Copyright (c) 2014 Nicira, Inc.
	3	*
	4	* Licensed under the Apache License, Version 2.0 (the "License");
	5	* you may not use this file except in compliance with the License.
	6	* You may obtain a copy of the License at:
	7	*
	8	* http://www.apache.org/licenses/LICENSE-2.0
	9	*
	10	* Unless required by applicable law or agreed to in writing, software
	11	* distributed under the License is distributed on an "AS IS" BASIS,
	12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	* See the License for the specific language governing permissions and
	14	* limitations under the License.
	15	*/
	16
	17	/* This header implements atomic operation primitives for MSVC
	18	* on i586 or greater platforms (32 bit). */
	19	#ifndef IN_OVS_ATOMIC_H
	20	#error "This header should only be included indirectly via ovs-atomic.h."
	21	#endif
	22
	23	/* From msdn documentation: With Visual Studio 2003, volatile to volatile
	24	* references are ordered; the compiler will not re-order volatile variable
	25	* access. With Visual Studio 2005, the compiler also uses acquire semantics
	26	* for read operations on volatile variables and release semantics for write
	27	* operations on volatile variables (when supported by the CPU).
	28	*
	29	* Though there is no clear documentation that states that anything greater
	30	* than VS 2005 has the same behavior as described above, looking through MSVCs
	31	* C++ atomics library in VS2013 shows that the compiler still takes
	32	* acquire/release semantics on volatile variables. */
	33	#define ATOMIC(TYPE) TYPE volatile
	34
	35	typedef enum {
	36	memory_order_relaxed,
	37	memory_order_consume,
	38	memory_order_acquire,
	39	memory_order_release,
	40	memory_order_acq_rel,
	41	memory_order_seq_cst
	42	} memory_order;
	43
5ec39c8a AGS	44	#if _MSC_VER > 1800 && defined(_M_IX86)
	45	/* From WDK 10 _InlineInterlocked* functions are renamed to
	46	* _InlineInterlocked* although the documentation does not specify it */
	47	#define _InterlockedExchangeAdd64 _InlineInterlockedExchangeAdd64
	48	#define _InterlockedExchange64 _InlineInterlockedExchange64
	49	#endif
	50
ec2d2b5f GS	51	#define ATOMIC_BOOL_LOCK_FREE 2
	52	#define ATOMIC_CHAR_LOCK_FREE 2
	53	#define ATOMIC_SHORT_LOCK_FREE 2
	54	#define ATOMIC_INT_LOCK_FREE 2
	55	#define ATOMIC_LONG_LOCK_FREE 2
	56	#define ATOMIC_LLONG_LOCK_FREE 2
	57	#define ATOMIC_POINTER_LOCK_FREE 2
	58
	59	#define IS_LOCKLESS_ATOMIC(OBJECT) \
	60	(sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
	61
	62	#define ATOMIC_VAR_INIT(VALUE) (VALUE)
	63	#define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
	64
	65	static inline void
	66	atomic_compiler_barrier(memory_order order)
	67	{
	68	/* In case of 'memory_order_consume', it is implicitly assumed that
	69	* the compiler will not move instructions that have data-dependency
	70	* on the variable in question before the barrier. */
	71	if (order > memory_order_consume) {
	72	_ReadWriteBarrier();
	73	}
	74	}
	75
	76	static inline void
	77	atomic_thread_fence(memory_order order)
	78	{
	79	/* x86 is strongly ordered and acquire/release semantics come
	80	* automatically. */
	81	atomic_compiler_barrier(order);
	82	if (order == memory_order_seq_cst) {
	83	MemoryBarrier();
	84	atomic_compiler_barrier(order);
	85	}
	86	}
	87
	88	static inline void
	89	atomic_signal_fence(memory_order order)
	90	{
	91	atomic_compiler_barrier(order);
	92	}
	93
	94	/* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
	95	* since the compiler automatically takes acquire and release semantics on
	96	* volatile variables, for any order lesser than 'memory_order_seq_cst', we
	97	* can directly assign or read values. */
	98
	99	#define atomic_store32(DST, SRC, ORDER) \
	100	if (ORDER == memory_order_seq_cst) { \
b35bd845 SS	101	InterlockedExchange((long volatile *) (DST), \
b35bd845 SS	102	(long) (SRC)); \
ec2d2b5f GS	103	} else { \
	104	*(DST) = (SRC); \
	105	}
	106
ea1f765f GS	107	/* MSVC converts 64 bit writes into two instructions. So there is
	108	* a possibility that an interrupt can make a 64 bit write non-atomic even
	109	* when 8 byte aligned. So use InterlockedExchange64().
	110	*
	111	* For atomic stores, 'consume' and 'acquire' semantics are not valid. But we
	112	* are using 'Exchange' to get atomic stores here and we only have
	113	* InterlockedExchange64(), InterlockedExchangeNoFence64() and
	114	* InterlockedExchange64Acquire() available. So we are forced to use
	115	* InterlockedExchange64() which uses full memory barrier for everything
	116	* greater than 'memory_order_relaxed'. */
574bd10f	117	#ifdef _M_IX86
ec2d2b5f	118	#define atomic_store64(DST, SRC, ORDER) \
ea1f765f GS	119	if (ORDER == memory_order_relaxed) { \
	120	InterlockedExchangeNoFence64((int64_t volatile *) (DST), \
	121	(int64_t) (SRC)); \
ec2d2b5f	122	} else { \
ea1f765f	123	InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\
ec2d2b5f	124	}
574bd10f AGS	125	#elif _M_X64
574bd10f AGS	126	/* 64 bit writes are atomic on amd64 if 64 bit aligned. */
b02d7d50 AGS	127	#define atomic_store64(DST, SRC, ORDER) \
	128	if (ORDER == memory_order_seq_cst) { \
	129	InterlockedExchange64((int64_t volatile *) (DST), \
	130	(int64_t) (SRC)); \
	131	} else { \
	132	*(DST) = (SRC); \
	133	}
574bd10f	134	#endif
ec2d2b5f	135
b35bd845 SS	136	#define atomic_store8(DST, SRC, ORDER) \
	137	if (ORDER == memory_order_seq_cst) { \
	138	InterlockedExchange8((char volatile *) (DST), (char) (SRC)); \
	139	} else { \
	140	*(DST) = (SRC); \
	141	}
	142
	143	#define atomic_store16(DST, SRC, ORDER) \
	144	if (ORDER == memory_order_seq_cst) { \
	145	InterlockedExchange16((short volatile *) (DST), (short) (SRC)); \
	146	} else { \
	147	*(DST) = (SRC); \
ec2d2b5f GS	148	}
	149
	150	#define atomic_store(DST, SRC) \
	151	atomic_store_explicit(DST, SRC, memory_order_seq_cst)
	152
	153	#define atomic_store_explicit(DST, SRC, ORDER) \
	154	if (sizeof *(DST) == 1) { \
b35bd845	155	atomic_store8(DST, SRC, ORDER) \
ec2d2b5f	156	} else if (sizeof *(DST) == 2) { \
b35bd845	157	atomic_store16( DST, SRC, ORDER) \
ec2d2b5f GS	158	} else if (sizeof *(DST) == 4) { \
	159	atomic_store32(DST, SRC, ORDER) \
	160	} else if (sizeof *(DST) == 8) { \
	161	atomic_store64(DST, SRC, ORDER) \
	162	} else { \
	163	abort(); \
	164	}
	165
	166	/* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
	167	* reads don't need to be locked (based on the following in Intel Developers
	168	* manual:
	169	* “Locked operations are atomic with respect to all other memory operations
	170	* and all externally visible events. Only instruction fetch and page table
	171	* accesses can pass locked instructions. Locked instructions can be used to
	172	* synchronize data written by one processor and read by another processor.
	173	* For the P6 family processors, locked operations serialize all outstanding
	174	* load and store operations (that is, wait for them to complete). This rule
	175	* is also true for the Pentium 4 and Intel Xeon processors, with one
	176	* exception. Load operations that reference weakly ordered memory types
	177	* (such as the WC memory type) may not be serialized."). */
	178
	179	/* For 8, 16 and 32 bit variations. */
	180	#define atomic_readX(SRC, DST, ORDER) \
	181	(DST) = (SRC);
	182
ea1f765f GS	183	/* MSVC converts 64 bit reads into two instructions. So there is
	184	* a possibility that an interrupt can make a 64 bit read non-atomic even
	185	* when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */
574bd10f	186	#ifdef _M_IX86
ec2d2b5f	187	#define atomic_read64(SRC, DST, ORDER) \
b816a953 GS	188	__pragma (warning(push)) \
b816a953 GS	189	__pragma (warning(disable:4047)) \
ea1f765f GS	190	(DST) = InterlockedOr64((int64_t volatile ) (SRC), 0); \
ea1f765f GS	191	__pragma (warning(pop))
574bd10f AGS	192	#elif _M_X64
	193	/* 64 bit reads are atomic on amd64 if 64 bit aligned. */
	194	#define atomic_read64(SRC, DST, ORDER) \
	195	(DST) = (SRC);
	196	#endif
ec2d2b5f GS	197
	198	#define atomic_read(SRC, DST) \
	199	atomic_read_explicit(SRC, DST, memory_order_seq_cst)
	200
	201	#define atomic_read_explicit(SRC, DST, ORDER) \
	202	if (sizeof (DST) == 1 \|\| sizeof (DST) == 2 \|\| sizeof *(DST) == 4) { \
	203	atomic_readX(SRC, DST, ORDER) \
	204	} else if (sizeof *(DST) == 8) { \
	205	atomic_read64(SRC, DST, ORDER) \
	206	} else { \
	207	abort(); \
	208	}
	209
	210	/* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
	211	* functions for all the different memory orders does not exist
	212	* (though documentation exists for some of them). The MSVC C++ library which
	213	* implements the c11 atomics simply calls the full memory barrier function
	214	* for everything in x86(see xatomic.h). So do the same here. */
	215
	216	/* For 8, 16 and 64 bit variations. */
	217	#define atomic_op(OP, X, RMW, ARG, ORIG, ORDER) \
	218	atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)
	219
	220	/* Arithmetic addition calls. */
	221
b35bd845 SS	222	#define atomic_add8(RMW, ARG, ORIG, ORDER) \
	223	(ORIG) = _InterlockedExchangeAdd8((char volatile ) (RMW), \
	224	(char) (ARG));
ec2d2b5f	225
b35bd845 SS	226	#define atomic_add16(RMW, ARG, ORIG, ORDER) \
	227	(ORIG) = _InterlockedExchangeAdd16((short volatile ) (RMW), \
	228	(short) (ARG));
	229
	230	#define atomic_add32(RMW, ARG, ORIG, ORDER) \
	231	(ORIG) = InterlockedExchangeAdd((long volatile ) (RMW), \
	232	(long) (ARG));
	233	#define atomic_add64(RMW, ARG, ORIG, ORDER) \
	234	(ORIG) = _InterlockedExchangeAdd64((int64_t volatile ) (RMW), \
	235	(int64_t) (ARG));
ec2d2b5f GS	236
	237	#define atomic_add(RMW, ARG, ORIG) \
	238	atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
	239
	240	#define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \
	241	if (sizeof *(RMW) == 1) { \
b35bd845	242	atomic_add8(RMW, ARG, ORIG, ORDER) \
ec2d2b5f	243	} else if (sizeof *(RMW) == 2) { \
b35bd845	244	atomic_add16(RMW, ARG, ORIG, ORDER) \
ec2d2b5f GS	245	} else if (sizeof *(RMW) == 4) { \
	246	atomic_add32(RMW, ARG, ORIG, ORDER) \
	247	} else if (sizeof *(RMW) == 8) { \
b35bd845	248	atomic_add64(RMW, ARG, ORIG, ORDER) \
ec2d2b5f GS	249	} else { \
	250	abort(); \
	251	}
	252
	253	/* Arithmetic subtraction calls. */
	254
	255	#define atomic_sub(RMW, ARG, ORIG) \
	256	atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)
	257
	258	#define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \
	259	atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)
	260
	261	/* Logical 'and' calls. */
	262
	263	#define atomic_and32(RMW, ARG, ORIG, ORDER) \
	264	(ORIG) = InterlockedAnd((int32_t volatile ) (RMW), (int32_t) (ARG));
	265
	266	/* For 8, 16 and 64 bit variations. */
	267	#define atomic_and_generic(X, RMW, ARG, ORIG, ORDER) \
	268	(ORIG) = InterlockedAnd##X((int##X##_t volatile ) (RMW), \
	269	(int##X##_t) (ARG));
	270
	271	#define atomic_and(RMW, ARG, ORIG) \
	272	atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
	273
	274	#define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \
	275	if (sizeof *(RMW) == 1) { \
	276	atomic_op(and, 8, RMW, ARG, ORIG, ORDER) \
	277	} else if (sizeof *(RMW) == 2) { \
	278	atomic_op(and, 16, RMW, ARG, ORIG, ORDER) \
	279	} else if (sizeof *(RMW) == 4) { \
	280	atomic_and32(RMW, ARG, ORIG, ORDER) \
	281	} else if (sizeof *(RMW) == 8) { \
	282	atomic_op(and, 64, RMW, ARG, ORIG, ORDER) \
	283	} else { \
	284	abort(); \
	285	}
	286
	287	/* Logical 'Or' calls. */
	288
	289	#define atomic_or32(RMW, ARG, ORIG, ORDER) \
	290	(ORIG) = InterlockedOr((int32_t volatile ) (RMW), (int32_t) (ARG));
	291
	292	/* For 8, 16 and 64 bit variations. */
	293	#define atomic_or_generic(X, RMW, ARG, ORIG, ORDER) \
	294	(ORIG) = InterlockedOr##X((int##X##_t volatile ) (RMW), \
	295	(int##X##_t) (ARG));
	296
	297	#define atomic_or(RMW, ARG, ORIG) \
	298	atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
	299
	300	#define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \
	301	if (sizeof *(RMW) == 1) { \
	302	atomic_op(or, 8, RMW, ARG, ORIG, ORDER) \
	303	} else if (sizeof *(RMW) == 2) { \
	304	atomic_op(or, 16, RMW, ARG, ORIG, ORDER) \
	305	} else if (sizeof *(RMW) == 4) { \
	306	atomic_or32(RMW, ARG, ORIG, ORDER) \
	307	} else if (sizeof *(RMW) == 8) { \
	308	atomic_op(or, 64, RMW, ARG, ORIG, ORDER) \
	309	} else { \
	310	abort(); \
	311	}
	312
313	/* Logical Xor calls. */
314
315	#define atomic_xor32(RMW, ARG, ORIG, ORDER) \
316	(ORIG) = InterlockedXor((int32_t volatile ) (RMW), (int32_t) (ARG));
317
318	/* For 8, 16 and 64 bit variations. */
319	#define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER) \
320	(ORIG) = InterlockedXor##X((int##X##_t volatile ) (RMW), \
321	(int##X##_t) (ARG));
322
323	#define atomic_xor(RMW, ARG, ORIG) \
324	atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
325
326	#define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \
327	if (sizeof *(RMW) == 1) { \
328	atomic_op(xor, 8, RMW, ARG, ORIG, ORDER) \
329	} else if (sizeof *(RMW) == 2) { \
330	atomic_op(xor, 16, RMW, ARG, ORIG, ORDER) \
331	} else if (sizeof *(RMW) == 4) { \
332	atomic_xor32(RMW, ARG, ORIG, ORDER); \
333	} else if (sizeof *(RMW) == 8) { \
334	atomic_op(xor, 64, RMW, ARG, ORIG, ORDER) \
335	} else { \
336	abort(); \
337	}
338
339	#define atomic_compare_exchange_strong(DST, EXP, SRC) \
340	atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
341	memory_order_seq_cst, \
342	memory_order_seq_cst)
343
344	#define atomic_compare_exchange_weak atomic_compare_exchange_strong
345	#define atomic_compare_exchange_weak_explicit \
346	atomic_compare_exchange_strong_explicit
347
348	/* MSVCs c++ compiler implements c11 atomics and looking through its
349	* implementation (in xatomic.h), orders are ignored for x86 platform.
350	* Do the same here. */
351	static inline bool
352	atomic_compare_exchange8(int8_t volatile dst, int8_t expected, int8_t src)
353	{
b35bd845 SS	354	int8_t previous = _InterlockedCompareExchange8((char volatile *)dst,
b35bd845 SS	355	src, *expected);
ec2d2b5f GS	356	if (previous == *expected) {
	357	return true;
	358	} else {
	359	*expected = previous;
	360	return false;
	361	}
	362	}
	363
	364	static inline bool
	365	atomic_compare_exchange16(int16_t volatile dst, int16_t expected,
	366	int16_t src)
	367	{
	368	int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
	369	if (previous == *expected) {
	370	return true;
	371	} else {
	372	*expected = previous;
	373	return false;
	374	}
	375	}
	376
	377	static inline bool
	378	atomic_compare_exchange32(int32_t volatile dst, int32_t expected,
	379	int32_t src)
	380	{
b35bd845 SS	381	int32_t previous = InterlockedCompareExchange((long volatile *)dst,
b35bd845 SS	382	src, *expected);
ec2d2b5f GS	383	if (previous == *expected) {
	384	return true;
	385	} else {
	386	*expected = previous;
	387	return false;
	388	}
	389	}
	390
	391	static inline bool
	392	atomic_compare_exchange64(int64_t volatile dst, int64_t expected,
	393	int64_t src)
	394	{
	395	int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
	396	if (previous == *expected) {
	397	return true;
	398	} else {
	399	*expected = previous;
	400	return false;
	401	}
	402	}
	403
	404	static inline bool
	405	atomic_compare_unreachable()
	406	{
	407	return true;
	408	}
	409
	410	#define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2) \
	411	(sizeof *(DST) == 1 \
	412	? atomic_compare_exchange8((int8_t volatile ) (DST), (int8_t ) (EXP), \
	413	(int8_t) (SRC)) \
	414	: (sizeof *(DST) == 2 \
	415	? atomic_compare_exchange16((int16_t volatile *) (DST), \
	416	(int16_t *) (EXP), (int16_t) (SRC)) \
	417	: (sizeof *(DST) == 4 \
	418	? atomic_compare_exchange32((int32_t volatile *) (DST), \
	419	(int32_t *) (EXP), (int32_t) (SRC)) \
	420	: (sizeof *(DST) == 8 \
	421	? atomic_compare_exchange64((int64_t volatile *) (DST), \
	422	(int64_t *) (EXP), (int64_t) (SRC)) \
	423	: ovs_fatal(0, "atomic operation with size greater than 8 bytes"), \
	424	atomic_compare_unreachable()))))
	425
	426	\f
	427	/* atomic_flag */
	428
	429	typedef ATOMIC(int32_t) atomic_flag;
	430	#define ATOMIC_FLAG_INIT 0
	431
	432	#define atomic_flag_test_and_set(FLAG) \
	433	(bool) InterlockedBitTestAndSet(FLAG, 0)
	434
	435	#define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
	436	atomic_flag_test_and_set(FLAG)
	437
	438	#define atomic_flag_clear_explicit(FLAG, ORDER) \
	439	atomic_flag_clear()
	440	#define atomic_flag_clear(FLAG) \
	441	InterlockedBitTestAndReset(FLAG, 0)