]>
Commit | Line | Data |
---|---|---|
ec2d2b5f GS |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | /* This header implements atomic operation primitives for MSVC | |
18 | * on i586 or greater platforms (32 bit). */ | |
19 | #ifndef IN_OVS_ATOMIC_H | |
20 | #error "This header should only be included indirectly via ovs-atomic.h." | |
21 | #endif | |
22 | ||
23 | /* From msdn documentation: With Visual Studio 2003, volatile to volatile | |
24 | * references are ordered; the compiler will not re-order volatile variable | |
25 | * access. With Visual Studio 2005, the compiler also uses acquire semantics | |
26 | * for read operations on volatile variables and release semantics for write | |
27 | * operations on volatile variables (when supported by the CPU). | |
28 | * | |
29 | * Though there is no clear documentation that states that anything greater | |
30 | * than VS 2005 has the same behavior as described above, looking through MSVCs | |
31 | * C++ atomics library in VS2013 shows that the compiler still takes | |
32 | * acquire/release semantics on volatile variables. */ | |
33 | #define ATOMIC(TYPE) TYPE volatile | |
34 | ||
35 | typedef enum { | |
36 | memory_order_relaxed, | |
37 | memory_order_consume, | |
38 | memory_order_acquire, | |
39 | memory_order_release, | |
40 | memory_order_acq_rel, | |
41 | memory_order_seq_cst | |
42 | } memory_order; | |
43 | ||
5ec39c8a AGS |
44 | #if _MSC_VER > 1800 && defined(_M_IX86) |
45 | /* From WDK 10 _InlineInterlocked* functions are renamed to | |
46 | * _InlineInterlocked* although the documentation does not specify it */ | |
47 | #define _InterlockedExchangeAdd64 _InlineInterlockedExchangeAdd64 | |
48 | #define _InterlockedExchange64 _InlineInterlockedExchange64 | |
49 | #endif | |
50 | ||
ec2d2b5f GS |
51 | #define ATOMIC_BOOL_LOCK_FREE 2 |
52 | #define ATOMIC_CHAR_LOCK_FREE 2 | |
53 | #define ATOMIC_SHORT_LOCK_FREE 2 | |
54 | #define ATOMIC_INT_LOCK_FREE 2 | |
55 | #define ATOMIC_LONG_LOCK_FREE 2 | |
56 | #define ATOMIC_LLONG_LOCK_FREE 2 | |
57 | #define ATOMIC_POINTER_LOCK_FREE 2 | |
58 | ||
59 | #define IS_LOCKLESS_ATOMIC(OBJECT) \ | |
60 | (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT))) | |
61 | ||
62 | #define ATOMIC_VAR_INIT(VALUE) (VALUE) | |
63 | #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0) | |
64 | ||
65 | static inline void | |
66 | atomic_compiler_barrier(memory_order order) | |
67 | { | |
68 | /* In case of 'memory_order_consume', it is implicitly assumed that | |
69 | * the compiler will not move instructions that have data-dependency | |
70 | * on the variable in question before the barrier. */ | |
71 | if (order > memory_order_consume) { | |
72 | _ReadWriteBarrier(); | |
73 | } | |
74 | } | |
75 | ||
76 | static inline void | |
77 | atomic_thread_fence(memory_order order) | |
78 | { | |
79 | /* x86 is strongly ordered and acquire/release semantics come | |
80 | * automatically. */ | |
81 | atomic_compiler_barrier(order); | |
82 | if (order == memory_order_seq_cst) { | |
83 | MemoryBarrier(); | |
84 | atomic_compiler_barrier(order); | |
85 | } | |
86 | } | |
87 | ||
88 | static inline void | |
89 | atomic_signal_fence(memory_order order) | |
90 | { | |
91 | atomic_compiler_barrier(order); | |
92 | } | |
93 | ||
94 | /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition, | |
95 | * since the compiler automatically takes acquire and release semantics on | |
96 | * volatile variables, for any order lesser than 'memory_order_seq_cst', we | |
97 | * can directly assign or read values. */ | |
98 | ||
99 | #define atomic_store32(DST, SRC, ORDER) \ | |
100 | if (ORDER == memory_order_seq_cst) { \ | |
b35bd845 SS |
101 | InterlockedExchange((long volatile *) (DST), \ |
102 | (long) (SRC)); \ | |
ec2d2b5f GS |
103 | } else { \ |
104 | *(DST) = (SRC); \ | |
105 | } | |
106 | ||
ea1f765f GS |
107 | /* MSVC converts 64 bit writes into two instructions. So there is |
108 | * a possibility that an interrupt can make a 64 bit write non-atomic even | |
109 | * when 8 byte aligned. So use InterlockedExchange64(). | |
110 | * | |
111 | * For atomic stores, 'consume' and 'acquire' semantics are not valid. But we | |
112 | * are using 'Exchange' to get atomic stores here and we only have | |
113 | * InterlockedExchange64(), InterlockedExchangeNoFence64() and | |
114 | * InterlockedExchange64Acquire() available. So we are forced to use | |
115 | * InterlockedExchange64() which uses full memory barrier for everything | |
116 | * greater than 'memory_order_relaxed'. */ | |
574bd10f | 117 | #ifdef _M_IX86 |
ec2d2b5f | 118 | #define atomic_store64(DST, SRC, ORDER) \ |
ea1f765f GS |
119 | if (ORDER == memory_order_relaxed) { \ |
120 | InterlockedExchangeNoFence64((int64_t volatile *) (DST), \ | |
121 | (int64_t) (SRC)); \ | |
ec2d2b5f | 122 | } else { \ |
ea1f765f | 123 | InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\ |
ec2d2b5f | 124 | } |
574bd10f AGS |
125 | #elif _M_X64 |
126 | /* 64 bit writes are atomic on amd64 if 64 bit aligned. */ | |
b02d7d50 AGS |
127 | #define atomic_store64(DST, SRC, ORDER) \ |
128 | if (ORDER == memory_order_seq_cst) { \ | |
129 | InterlockedExchange64((int64_t volatile *) (DST), \ | |
130 | (int64_t) (SRC)); \ | |
131 | } else { \ | |
132 | *(DST) = (SRC); \ | |
133 | } | |
574bd10f | 134 | #endif |
ec2d2b5f | 135 | |
b35bd845 SS |
136 | #define atomic_store8(DST, SRC, ORDER) \ |
137 | if (ORDER == memory_order_seq_cst) { \ | |
138 | InterlockedExchange8((char volatile *) (DST), (char) (SRC)); \ | |
139 | } else { \ | |
140 | *(DST) = (SRC); \ | |
141 | } | |
142 | ||
143 | #define atomic_store16(DST, SRC, ORDER) \ | |
144 | if (ORDER == memory_order_seq_cst) { \ | |
145 | InterlockedExchange16((short volatile *) (DST), (short) (SRC)); \ | |
146 | } else { \ | |
147 | *(DST) = (SRC); \ | |
ec2d2b5f GS |
148 | } |
149 | ||
150 | #define atomic_store(DST, SRC) \ | |
151 | atomic_store_explicit(DST, SRC, memory_order_seq_cst) | |
152 | ||
153 | #define atomic_store_explicit(DST, SRC, ORDER) \ | |
154 | if (sizeof *(DST) == 1) { \ | |
b35bd845 | 155 | atomic_store8(DST, SRC, ORDER) \ |
ec2d2b5f | 156 | } else if (sizeof *(DST) == 2) { \ |
b35bd845 | 157 | atomic_store16( DST, SRC, ORDER) \ |
ec2d2b5f GS |
158 | } else if (sizeof *(DST) == 4) { \ |
159 | atomic_store32(DST, SRC, ORDER) \ | |
160 | } else if (sizeof *(DST) == 8) { \ | |
161 | atomic_store64(DST, SRC, ORDER) \ | |
162 | } else { \ | |
163 | abort(); \ | |
164 | } | |
165 | ||
166 | /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding | |
167 | * reads don't need to be locked (based on the following in Intel Developers | |
168 | * manual: | |
169 | * “Locked operations are atomic with respect to all other memory operations | |
170 | * and all externally visible events. Only instruction fetch and page table | |
171 | * accesses can pass locked instructions. Locked instructions can be used to | |
172 | * synchronize data written by one processor and read by another processor. | |
173 | * For the P6 family processors, locked operations serialize all outstanding | |
174 | * load and store operations (that is, wait for them to complete). This rule | |
175 | * is also true for the Pentium 4 and Intel Xeon processors, with one | |
176 | * exception. Load operations that reference weakly ordered memory types | |
177 | * (such as the WC memory type) may not be serialized."). */ | |
178 | ||
179 | /* For 8, 16 and 32 bit variations. */ | |
180 | #define atomic_readX(SRC, DST, ORDER) \ | |
181 | *(DST) = *(SRC); | |
182 | ||
ea1f765f GS |
183 | /* MSVC converts 64 bit reads into two instructions. So there is |
184 | * a possibility that an interrupt can make a 64 bit read non-atomic even | |
185 | * when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */ | |
574bd10f | 186 | #ifdef _M_IX86 |
ec2d2b5f | 187 | #define atomic_read64(SRC, DST, ORDER) \ |
b816a953 GS |
188 | __pragma (warning(push)) \ |
189 | __pragma (warning(disable:4047)) \ | |
ea1f765f GS |
190 | *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0); \ |
191 | __pragma (warning(pop)) | |
574bd10f AGS |
192 | #elif _M_X64 |
193 | /* 64 bit reads are atomic on amd64 if 64 bit aligned. */ | |
194 | #define atomic_read64(SRC, DST, ORDER) \ | |
195 | *(DST) = *(SRC); | |
196 | #endif | |
ec2d2b5f GS |
197 | |
198 | #define atomic_read(SRC, DST) \ | |
199 | atomic_read_explicit(SRC, DST, memory_order_seq_cst) | |
200 | ||
201 | #define atomic_read_explicit(SRC, DST, ORDER) \ | |
202 | if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \ | |
203 | atomic_readX(SRC, DST, ORDER) \ | |
204 | } else if (sizeof *(DST) == 8) { \ | |
205 | atomic_read64(SRC, DST, ORDER) \ | |
206 | } else { \ | |
207 | abort(); \ | |
208 | } | |
209 | ||
210 | /* For add, sub, and logical operations, for 8, 16 and 64 bit data types, | |
211 | * functions for all the different memory orders does not exist | |
212 | * (though documentation exists for some of them). The MSVC C++ library which | |
213 | * implements the c11 atomics simply calls the full memory barrier function | |
214 | * for everything in x86(see xatomic.h). So do the same here. */ | |
215 | ||
216 | /* For 8, 16 and 64 bit variations. */ | |
217 | #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER) \ | |
218 | atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER) | |
219 | ||
220 | /* Arithmetic addition calls. */ | |
221 | ||
b35bd845 SS |
222 | #define atomic_add8(RMW, ARG, ORIG, ORDER) \ |
223 | *(ORIG) = _InterlockedExchangeAdd8((char volatile *) (RMW), \ | |
224 | (char) (ARG)); | |
ec2d2b5f | 225 | |
b35bd845 SS |
226 | #define atomic_add16(RMW, ARG, ORIG, ORDER) \ |
227 | *(ORIG) = _InterlockedExchangeAdd16((short volatile *) (RMW), \ | |
228 | (short) (ARG)); | |
229 | ||
230 | #define atomic_add32(RMW, ARG, ORIG, ORDER) \ | |
231 | *(ORIG) = InterlockedExchangeAdd((long volatile *) (RMW), \ | |
232 | (long) (ARG)); | |
233 | #define atomic_add64(RMW, ARG, ORIG, ORDER) \ | |
234 | *(ORIG) = _InterlockedExchangeAdd64((int64_t volatile *) (RMW), \ | |
235 | (int64_t) (ARG)); | |
ec2d2b5f GS |
236 | |
237 | #define atomic_add(RMW, ARG, ORIG) \ | |
238 | atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
239 | ||
240 | #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \ | |
241 | if (sizeof *(RMW) == 1) { \ | |
b35bd845 | 242 | atomic_add8(RMW, ARG, ORIG, ORDER) \ |
ec2d2b5f | 243 | } else if (sizeof *(RMW) == 2) { \ |
b35bd845 | 244 | atomic_add16(RMW, ARG, ORIG, ORDER) \ |
ec2d2b5f GS |
245 | } else if (sizeof *(RMW) == 4) { \ |
246 | atomic_add32(RMW, ARG, ORIG, ORDER) \ | |
247 | } else if (sizeof *(RMW) == 8) { \ | |
b35bd845 | 248 | atomic_add64(RMW, ARG, ORIG, ORDER) \ |
ec2d2b5f GS |
249 | } else { \ |
250 | abort(); \ | |
251 | } | |
252 | ||
253 | /* Arithmetic subtraction calls. */ | |
254 | ||
255 | #define atomic_sub(RMW, ARG, ORIG) \ | |
256 | atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst) | |
257 | ||
258 | #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \ | |
259 | atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER) | |
260 | ||
261 | /* Logical 'and' calls. */ | |
262 | ||
263 | #define atomic_and32(RMW, ARG, ORIG, ORDER) \ | |
264 | *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
265 | ||
266 | /* For 8, 16 and 64 bit variations. */ | |
267 | #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER) \ | |
268 | *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW), \ | |
269 | (int##X##_t) (ARG)); | |
270 | ||
271 | #define atomic_and(RMW, ARG, ORIG) \ | |
272 | atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
273 | ||
274 | #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \ | |
275 | if (sizeof *(RMW) == 1) { \ | |
276 | atomic_op(and, 8, RMW, ARG, ORIG, ORDER) \ | |
277 | } else if (sizeof *(RMW) == 2) { \ | |
278 | atomic_op(and, 16, RMW, ARG, ORIG, ORDER) \ | |
279 | } else if (sizeof *(RMW) == 4) { \ | |
280 | atomic_and32(RMW, ARG, ORIG, ORDER) \ | |
281 | } else if (sizeof *(RMW) == 8) { \ | |
282 | atomic_op(and, 64, RMW, ARG, ORIG, ORDER) \ | |
283 | } else { \ | |
284 | abort(); \ | |
285 | } | |
286 | ||
287 | /* Logical 'Or' calls. */ | |
288 | ||
289 | #define atomic_or32(RMW, ARG, ORIG, ORDER) \ | |
290 | *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
291 | ||
292 | /* For 8, 16 and 64 bit variations. */ | |
293 | #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER) \ | |
294 | *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW), \ | |
295 | (int##X##_t) (ARG)); | |
296 | ||
297 | #define atomic_or(RMW, ARG, ORIG) \ | |
298 | atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
299 | ||
300 | #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \ | |
301 | if (sizeof *(RMW) == 1) { \ | |
302 | atomic_op(or, 8, RMW, ARG, ORIG, ORDER) \ | |
303 | } else if (sizeof *(RMW) == 2) { \ | |
304 | atomic_op(or, 16, RMW, ARG, ORIG, ORDER) \ | |
305 | } else if (sizeof *(RMW) == 4) { \ | |
306 | atomic_or32(RMW, ARG, ORIG, ORDER) \ | |
307 | } else if (sizeof *(RMW) == 8) { \ | |
308 | atomic_op(or, 64, RMW, ARG, ORIG, ORDER) \ | |
309 | } else { \ | |
310 | abort(); \ | |
311 | } | |
312 | ||
313 | /* Logical Xor calls. */ | |
314 | ||
315 | #define atomic_xor32(RMW, ARG, ORIG, ORDER) \ | |
316 | *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
317 | ||
318 | /* For 8, 16 and 64 bit variations. */ | |
319 | #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER) \ | |
320 | *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW), \ | |
321 | (int##X##_t) (ARG)); | |
322 | ||
323 | #define atomic_xor(RMW, ARG, ORIG) \ | |
324 | atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
325 | ||
326 | #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \ | |
327 | if (sizeof *(RMW) == 1) { \ | |
328 | atomic_op(xor, 8, RMW, ARG, ORIG, ORDER) \ | |
329 | } else if (sizeof *(RMW) == 2) { \ | |
330 | atomic_op(xor, 16, RMW, ARG, ORIG, ORDER) \ | |
331 | } else if (sizeof *(RMW) == 4) { \ | |
332 | atomic_xor32(RMW, ARG, ORIG, ORDER); \ | |
333 | } else if (sizeof *(RMW) == 8) { \ | |
334 | atomic_op(xor, 64, RMW, ARG, ORIG, ORDER) \ | |
335 | } else { \ | |
336 | abort(); \ | |
337 | } | |
338 | ||
339 | #define atomic_compare_exchange_strong(DST, EXP, SRC) \ | |
340 | atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \ | |
341 | memory_order_seq_cst, \ | |
342 | memory_order_seq_cst) | |
343 | ||
344 | #define atomic_compare_exchange_weak atomic_compare_exchange_strong | |
345 | #define atomic_compare_exchange_weak_explicit \ | |
346 | atomic_compare_exchange_strong_explicit | |
347 | ||
348 | /* MSVCs c++ compiler implements c11 atomics and looking through its | |
349 | * implementation (in xatomic.h), orders are ignored for x86 platform. | |
350 | * Do the same here. */ | |
351 | static inline bool | |
352 | atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src) | |
353 | { | |
b35bd845 SS |
354 | int8_t previous = _InterlockedCompareExchange8((char volatile *)dst, |
355 | src, *expected); | |
ec2d2b5f GS |
356 | if (previous == *expected) { |
357 | return true; | |
358 | } else { | |
359 | *expected = previous; | |
360 | return false; | |
361 | } | |
362 | } | |
363 | ||
364 | static inline bool | |
365 | atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected, | |
366 | int16_t src) | |
367 | { | |
368 | int16_t previous = InterlockedCompareExchange16(dst, src, *expected); | |
369 | if (previous == *expected) { | |
370 | return true; | |
371 | } else { | |
372 | *expected = previous; | |
373 | return false; | |
374 | } | |
375 | } | |
376 | ||
377 | static inline bool | |
378 | atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected, | |
379 | int32_t src) | |
380 | { | |
b35bd845 SS |
381 | int32_t previous = InterlockedCompareExchange((long volatile *)dst, |
382 | src, *expected); | |
ec2d2b5f GS |
383 | if (previous == *expected) { |
384 | return true; | |
385 | } else { | |
386 | *expected = previous; | |
387 | return false; | |
388 | } | |
389 | } | |
390 | ||
391 | static inline bool | |
392 | atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected, | |
393 | int64_t src) | |
394 | { | |
395 | int64_t previous = InterlockedCompareExchange64(dst, src, *expected); | |
396 | if (previous == *expected) { | |
397 | return true; | |
398 | } else { | |
399 | *expected = previous; | |
400 | return false; | |
401 | } | |
402 | } | |
403 | ||
404 | static inline bool | |
405 | atomic_compare_unreachable() | |
406 | { | |
407 | return true; | |
408 | } | |
409 | ||
410 | #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2) \ | |
411 | (sizeof *(DST) == 1 \ | |
412 | ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP), \ | |
413 | (int8_t) (SRC)) \ | |
414 | : (sizeof *(DST) == 2 \ | |
415 | ? atomic_compare_exchange16((int16_t volatile *) (DST), \ | |
416 | (int16_t *) (EXP), (int16_t) (SRC)) \ | |
417 | : (sizeof *(DST) == 4 \ | |
418 | ? atomic_compare_exchange32((int32_t volatile *) (DST), \ | |
419 | (int32_t *) (EXP), (int32_t) (SRC)) \ | |
420 | : (sizeof *(DST) == 8 \ | |
421 | ? atomic_compare_exchange64((int64_t volatile *) (DST), \ | |
422 | (int64_t *) (EXP), (int64_t) (SRC)) \ | |
423 | : ovs_fatal(0, "atomic operation with size greater than 8 bytes"), \ | |
424 | atomic_compare_unreachable())))) | |
425 | ||
426 | \f | |
427 | /* atomic_flag */ | |
428 | ||
429 | typedef ATOMIC(int32_t) atomic_flag; | |
430 | #define ATOMIC_FLAG_INIT 0 | |
431 | ||
432 | #define atomic_flag_test_and_set(FLAG) \ | |
433 | (bool) InterlockedBitTestAndSet(FLAG, 0) | |
434 | ||
435 | #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \ | |
436 | atomic_flag_test_and_set(FLAG) | |
437 | ||
438 | #define atomic_flag_clear_explicit(FLAG, ORDER) \ | |
439 | atomic_flag_clear() | |
440 | #define atomic_flag_clear(FLAG) \ | |
441 | InterlockedBitTestAndReset(FLAG, 0) |