]> git.proxmox.com Git - mirror_ovs.git/blob - lib/ovs-atomic-msvc.h
ovs-atomic-msvc: Add atomics x64 builds
[mirror_ovs.git] / lib / ovs-atomic-msvc.h
1 /*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /* This header implements atomic operation primitives for MSVC
18 * on i586 or greater platforms (32 bit). */
19 #ifndef IN_OVS_ATOMIC_H
20 #error "This header should only be included indirectly via ovs-atomic.h."
21 #endif
22
23 /* From msdn documentation: With Visual Studio 2003, volatile to volatile
24 * references are ordered; the compiler will not re-order volatile variable
25 * access. With Visual Studio 2005, the compiler also uses acquire semantics
26 * for read operations on volatile variables and release semantics for write
27 * operations on volatile variables (when supported by the CPU).
28 *
29 * Though there is no clear documentation that states that anything greater
30 * than VS 2005 has the same behavior as described above, looking through MSVCs
31 * C++ atomics library in VS2013 shows that the compiler still takes
32 * acquire/release semantics on volatile variables. */
33 #define ATOMIC(TYPE) TYPE volatile
34
35 typedef enum {
36 memory_order_relaxed,
37 memory_order_consume,
38 memory_order_acquire,
39 memory_order_release,
40 memory_order_acq_rel,
41 memory_order_seq_cst
42 } memory_order;
43
44 #define ATOMIC_BOOL_LOCK_FREE 2
45 #define ATOMIC_CHAR_LOCK_FREE 2
46 #define ATOMIC_SHORT_LOCK_FREE 2
47 #define ATOMIC_INT_LOCK_FREE 2
48 #define ATOMIC_LONG_LOCK_FREE 2
49 #define ATOMIC_LLONG_LOCK_FREE 2
50 #define ATOMIC_POINTER_LOCK_FREE 2
51
52 #define IS_LOCKLESS_ATOMIC(OBJECT) \
53 (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
54
55 #define ATOMIC_VAR_INIT(VALUE) (VALUE)
56 #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
57
58 static inline void
59 atomic_compiler_barrier(memory_order order)
60 {
61 /* In case of 'memory_order_consume', it is implicitly assumed that
62 * the compiler will not move instructions that have data-dependency
63 * on the variable in question before the barrier. */
64 if (order > memory_order_consume) {
65 _ReadWriteBarrier();
66 }
67 }
68
69 static inline void
70 atomic_thread_fence(memory_order order)
71 {
72 /* x86 is strongly ordered and acquire/release semantics come
73 * automatically. */
74 atomic_compiler_barrier(order);
75 if (order == memory_order_seq_cst) {
76 MemoryBarrier();
77 atomic_compiler_barrier(order);
78 }
79 }
80
81 static inline void
82 atomic_signal_fence(memory_order order)
83 {
84 atomic_compiler_barrier(order);
85 }
86
87 /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition,
88 * since the compiler automatically takes acquire and release semantics on
89 * volatile variables, for any order lesser than 'memory_order_seq_cst', we
90 * can directly assign or read values. */
91
92 #define atomic_store32(DST, SRC, ORDER) \
93 if (ORDER == memory_order_seq_cst) { \
94 InterlockedExchange((int32_t volatile *) (DST), \
95 (int32_t) (SRC)); \
96 } else { \
97 *(DST) = (SRC); \
98 }
99
100 /* MSVC converts 64 bit writes into two instructions. So there is
101 * a possibility that an interrupt can make a 64 bit write non-atomic even
102 * when 8 byte aligned. So use InterlockedExchange64().
103 *
104 * For atomic stores, 'consume' and 'acquire' semantics are not valid. But we
105 * are using 'Exchange' to get atomic stores here and we only have
106 * InterlockedExchange64(), InterlockedExchangeNoFence64() and
107 * InterlockedExchange64Acquire() available. So we are forced to use
108 * InterlockedExchange64() which uses full memory barrier for everything
109 * greater than 'memory_order_relaxed'. */
110 #ifdef _M_IX86
111 #define atomic_store64(DST, SRC, ORDER) \
112 if (ORDER == memory_order_relaxed) { \
113 InterlockedExchangeNoFence64((int64_t volatile *) (DST), \
114 (int64_t) (SRC)); \
115 } else { \
116 InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\
117 }
118 #elif _M_X64
119 /* 64 bit writes are atomic on amd64 if 64 bit aligned. */
120 #define atomic_store64(DST, SRC, ORDER) \
121 atomic_storeX(64, DST, SRC, ORDER)
122 #endif
123
124 /* Used for 8 and 16 bit variations. */
125 #define atomic_storeX(X, DST, SRC, ORDER) \
126 if (ORDER == memory_order_seq_cst) { \
127 InterlockedExchange##X((int##X##_t volatile *) (DST), \
128 (int##X##_t) (SRC)); \
129 } else { \
130 *(DST) = (SRC); \
131 }
132
133 #define atomic_store(DST, SRC) \
134 atomic_store_explicit(DST, SRC, memory_order_seq_cst)
135
136 #define atomic_store_explicit(DST, SRC, ORDER) \
137 if (sizeof *(DST) == 1) { \
138 atomic_storeX(8, DST, SRC, ORDER) \
139 } else if (sizeof *(DST) == 2) { \
140 atomic_storeX(16, DST, SRC, ORDER) \
141 } else if (sizeof *(DST) == 4) { \
142 atomic_store32(DST, SRC, ORDER) \
143 } else if (sizeof *(DST) == 8) { \
144 atomic_store64(DST, SRC, ORDER) \
145 } else { \
146 abort(); \
147 }
148
149 /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding
150 * reads don't need to be locked (based on the following in Intel Developers
151 * manual:
152 * “Locked operations are atomic with respect to all other memory operations
153 * and all externally visible events. Only instruction fetch and page table
154 * accesses can pass locked instructions. Locked instructions can be used to
155 * synchronize data written by one processor and read by another processor.
156 * For the P6 family processors, locked operations serialize all outstanding
157 * load and store operations (that is, wait for them to complete). This rule
158 * is also true for the Pentium 4 and Intel Xeon processors, with one
159 * exception. Load operations that reference weakly ordered memory types
160 * (such as the WC memory type) may not be serialized."). */
161
162 /* For 8, 16 and 32 bit variations. */
163 #define atomic_readX(SRC, DST, ORDER) \
164 *(DST) = *(SRC);
165
166 /* MSVC converts 64 bit reads into two instructions. So there is
167 * a possibility that an interrupt can make a 64 bit read non-atomic even
168 * when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */
169 #ifdef _M_IX86
170 #define atomic_read64(SRC, DST, ORDER) \
171 __pragma (warning(push)) \
172 __pragma (warning(disable:4047)) \
173 *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0); \
174 __pragma (warning(pop))
175 #elif _M_X64
176 /* 64 bit reads are atomic on amd64 if 64 bit aligned. */
177 #define atomic_read64(SRC, DST, ORDER) \
178 *(DST) = *(SRC);
179 #endif
180
181 #define atomic_read(SRC, DST) \
182 atomic_read_explicit(SRC, DST, memory_order_seq_cst)
183
184 #define atomic_read_explicit(SRC, DST, ORDER) \
185 if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \
186 atomic_readX(SRC, DST, ORDER) \
187 } else if (sizeof *(DST) == 8) { \
188 atomic_read64(SRC, DST, ORDER) \
189 } else { \
190 abort(); \
191 }
192
193 /* For add, sub, and logical operations, for 8, 16 and 64 bit data types,
194 * functions for all the different memory orders does not exist
195 * (though documentation exists for some of them). The MSVC C++ library which
196 * implements the c11 atomics simply calls the full memory barrier function
197 * for everything in x86(see xatomic.h). So do the same here. */
198
199 /* For 8, 16 and 64 bit variations. */
200 #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER) \
201 atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER)
202
203 /* Arithmetic addition calls. */
204
205 #define atomic_add32(RMW, ARG, ORIG, ORDER) \
206 *(ORIG) = InterlockedExchangeAdd((int32_t volatile *) (RMW), \
207 (int32_t) (ARG));
208
209 /* For 8, 16 and 64 bit variations. */
210 #define atomic_add_generic(X, RMW, ARG, ORIG, ORDER) \
211 *(ORIG) = _InterlockedExchangeAdd##X((int##X##_t volatile *) (RMW), \
212 (int##X##_t) (ARG));
213
214 #define atomic_add(RMW, ARG, ORIG) \
215 atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
216
217 #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \
218 if (sizeof *(RMW) == 1) { \
219 atomic_op(add, 8, RMW, ARG, ORIG, ORDER) \
220 } else if (sizeof *(RMW) == 2) { \
221 atomic_op(add, 16, RMW, ARG, ORIG, ORDER) \
222 } else if (sizeof *(RMW) == 4) { \
223 atomic_add32(RMW, ARG, ORIG, ORDER) \
224 } else if (sizeof *(RMW) == 8) { \
225 atomic_op(add, 64, RMW, ARG, ORIG, ORDER) \
226 } else { \
227 abort(); \
228 }
229
230 /* Arithmetic subtraction calls. */
231
232 #define atomic_sub(RMW, ARG, ORIG) \
233 atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst)
234
235 #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \
236 atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER)
237
238 /* Logical 'and' calls. */
239
240 #define atomic_and32(RMW, ARG, ORIG, ORDER) \
241 *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG));
242
243 /* For 8, 16 and 64 bit variations. */
244 #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER) \
245 *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW), \
246 (int##X##_t) (ARG));
247
248 #define atomic_and(RMW, ARG, ORIG) \
249 atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
250
251 #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \
252 if (sizeof *(RMW) == 1) { \
253 atomic_op(and, 8, RMW, ARG, ORIG, ORDER) \
254 } else if (sizeof *(RMW) == 2) { \
255 atomic_op(and, 16, RMW, ARG, ORIG, ORDER) \
256 } else if (sizeof *(RMW) == 4) { \
257 atomic_and32(RMW, ARG, ORIG, ORDER) \
258 } else if (sizeof *(RMW) == 8) { \
259 atomic_op(and, 64, RMW, ARG, ORIG, ORDER) \
260 } else { \
261 abort(); \
262 }
263
264 /* Logical 'Or' calls. */
265
266 #define atomic_or32(RMW, ARG, ORIG, ORDER) \
267 *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG));
268
269 /* For 8, 16 and 64 bit variations. */
270 #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER) \
271 *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW), \
272 (int##X##_t) (ARG));
273
274 #define atomic_or(RMW, ARG, ORIG) \
275 atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
276
277 #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \
278 if (sizeof *(RMW) == 1) { \
279 atomic_op(or, 8, RMW, ARG, ORIG, ORDER) \
280 } else if (sizeof *(RMW) == 2) { \
281 atomic_op(or, 16, RMW, ARG, ORIG, ORDER) \
282 } else if (sizeof *(RMW) == 4) { \
283 atomic_or32(RMW, ARG, ORIG, ORDER) \
284 } else if (sizeof *(RMW) == 8) { \
285 atomic_op(or, 64, RMW, ARG, ORIG, ORDER) \
286 } else { \
287 abort(); \
288 }
289
290 /* Logical Xor calls. */
291
292 #define atomic_xor32(RMW, ARG, ORIG, ORDER) \
293 *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG));
294
295 /* For 8, 16 and 64 bit variations. */
296 #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER) \
297 *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW), \
298 (int##X##_t) (ARG));
299
300 #define atomic_xor(RMW, ARG, ORIG) \
301 atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst)
302
303 #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \
304 if (sizeof *(RMW) == 1) { \
305 atomic_op(xor, 8, RMW, ARG, ORIG, ORDER) \
306 } else if (sizeof *(RMW) == 2) { \
307 atomic_op(xor, 16, RMW, ARG, ORIG, ORDER) \
308 } else if (sizeof *(RMW) == 4) { \
309 atomic_xor32(RMW, ARG, ORIG, ORDER); \
310 } else if (sizeof *(RMW) == 8) { \
311 atomic_op(xor, 64, RMW, ARG, ORIG, ORDER) \
312 } else { \
313 abort(); \
314 }
315
316 #define atomic_compare_exchange_strong(DST, EXP, SRC) \
317 atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \
318 memory_order_seq_cst, \
319 memory_order_seq_cst)
320
321 #define atomic_compare_exchange_weak atomic_compare_exchange_strong
322 #define atomic_compare_exchange_weak_explicit \
323 atomic_compare_exchange_strong_explicit
324
325 /* MSVCs c++ compiler implements c11 atomics and looking through its
326 * implementation (in xatomic.h), orders are ignored for x86 platform.
327 * Do the same here. */
328 static inline bool
329 atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src)
330 {
331 int8_t previous = _InterlockedCompareExchange8(dst, src, *expected);
332 if (previous == *expected) {
333 return true;
334 } else {
335 *expected = previous;
336 return false;
337 }
338 }
339
340 static inline bool
341 atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected,
342 int16_t src)
343 {
344 int16_t previous = InterlockedCompareExchange16(dst, src, *expected);
345 if (previous == *expected) {
346 return true;
347 } else {
348 *expected = previous;
349 return false;
350 }
351 }
352
353 static inline bool
354 atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected,
355 int32_t src)
356 {
357 int32_t previous = InterlockedCompareExchange(dst, src, *expected);
358 if (previous == *expected) {
359 return true;
360 } else {
361 *expected = previous;
362 return false;
363 }
364 }
365
366 static inline bool
367 atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected,
368 int64_t src)
369 {
370 int64_t previous = InterlockedCompareExchange64(dst, src, *expected);
371 if (previous == *expected) {
372 return true;
373 } else {
374 *expected = previous;
375 return false;
376 }
377 }
378
379 static inline bool
380 atomic_compare_unreachable()
381 {
382 return true;
383 }
384
385 #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2) \
386 (sizeof *(DST) == 1 \
387 ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP), \
388 (int8_t) (SRC)) \
389 : (sizeof *(DST) == 2 \
390 ? atomic_compare_exchange16((int16_t volatile *) (DST), \
391 (int16_t *) (EXP), (int16_t) (SRC)) \
392 : (sizeof *(DST) == 4 \
393 ? atomic_compare_exchange32((int32_t volatile *) (DST), \
394 (int32_t *) (EXP), (int32_t) (SRC)) \
395 : (sizeof *(DST) == 8 \
396 ? atomic_compare_exchange64((int64_t volatile *) (DST), \
397 (int64_t *) (EXP), (int64_t) (SRC)) \
398 : ovs_fatal(0, "atomic operation with size greater than 8 bytes"), \
399 atomic_compare_unreachable()))))
400
401 \f
402 /* atomic_flag */
403
404 typedef ATOMIC(int32_t) atomic_flag;
405 #define ATOMIC_FLAG_INIT 0
406
407 #define atomic_flag_test_and_set(FLAG) \
408 (bool) InterlockedBitTestAndSet(FLAG, 0)
409
410 #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \
411 atomic_flag_test_and_set(FLAG)
412
413 #define atomic_flag_clear_explicit(FLAG, ORDER) \
414 atomic_flag_clear()
415 #define atomic_flag_clear(FLAG) \
416 InterlockedBitTestAndReset(FLAG, 0)