]>
Commit | Line | Data |
---|---|---|
ec2d2b5f GS |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | /* This header implements atomic operation primitives for MSVC | |
18 | * on i586 or greater platforms (32 bit). */ | |
19 | #ifndef IN_OVS_ATOMIC_H | |
20 | #error "This header should only be included indirectly via ovs-atomic.h." | |
21 | #endif | |
22 | ||
23 | /* From msdn documentation: With Visual Studio 2003, volatile to volatile | |
24 | * references are ordered; the compiler will not re-order volatile variable | |
25 | * access. With Visual Studio 2005, the compiler also uses acquire semantics | |
26 | * for read operations on volatile variables and release semantics for write | |
27 | * operations on volatile variables (when supported by the CPU). | |
28 | * | |
29 | * Though there is no clear documentation that states that anything greater | |
30 | * than VS 2005 has the same behavior as described above, looking through MSVCs | |
31 | * C++ atomics library in VS2013 shows that the compiler still takes | |
32 | * acquire/release semantics on volatile variables. */ | |
33 | #define ATOMIC(TYPE) TYPE volatile | |
34 | ||
35 | typedef enum { | |
36 | memory_order_relaxed, | |
37 | memory_order_consume, | |
38 | memory_order_acquire, | |
39 | memory_order_release, | |
40 | memory_order_acq_rel, | |
41 | memory_order_seq_cst | |
42 | } memory_order; | |
43 | ||
44 | #define ATOMIC_BOOL_LOCK_FREE 2 | |
45 | #define ATOMIC_CHAR_LOCK_FREE 2 | |
46 | #define ATOMIC_SHORT_LOCK_FREE 2 | |
47 | #define ATOMIC_INT_LOCK_FREE 2 | |
48 | #define ATOMIC_LONG_LOCK_FREE 2 | |
49 | #define ATOMIC_LLONG_LOCK_FREE 2 | |
50 | #define ATOMIC_POINTER_LOCK_FREE 2 | |
51 | ||
52 | #define IS_LOCKLESS_ATOMIC(OBJECT) \ | |
53 | (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT))) | |
54 | ||
55 | #define ATOMIC_VAR_INIT(VALUE) (VALUE) | |
56 | #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0) | |
57 | ||
58 | static inline void | |
59 | atomic_compiler_barrier(memory_order order) | |
60 | { | |
61 | /* In case of 'memory_order_consume', it is implicitly assumed that | |
62 | * the compiler will not move instructions that have data-dependency | |
63 | * on the variable in question before the barrier. */ | |
64 | if (order > memory_order_consume) { | |
65 | _ReadWriteBarrier(); | |
66 | } | |
67 | } | |
68 | ||
69 | static inline void | |
70 | atomic_thread_fence(memory_order order) | |
71 | { | |
72 | /* x86 is strongly ordered and acquire/release semantics come | |
73 | * automatically. */ | |
74 | atomic_compiler_barrier(order); | |
75 | if (order == memory_order_seq_cst) { | |
76 | MemoryBarrier(); | |
77 | atomic_compiler_barrier(order); | |
78 | } | |
79 | } | |
80 | ||
81 | static inline void | |
82 | atomic_signal_fence(memory_order order) | |
83 | { | |
84 | atomic_compiler_barrier(order); | |
85 | } | |
86 | ||
87 | /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition, | |
88 | * since the compiler automatically takes acquire and release semantics on | |
89 | * volatile variables, for any order lesser than 'memory_order_seq_cst', we | |
90 | * can directly assign or read values. */ | |
91 | ||
92 | #define atomic_store32(DST, SRC, ORDER) \ | |
93 | if (ORDER == memory_order_seq_cst) { \ | |
94 | InterlockedExchange((int32_t volatile *) (DST), \ | |
95 | (int32_t) (SRC)); \ | |
96 | } else { \ | |
97 | *(DST) = (SRC); \ | |
98 | } | |
99 | ||
ea1f765f GS |
100 | /* MSVC converts 64 bit writes into two instructions. So there is |
101 | * a possibility that an interrupt can make a 64 bit write non-atomic even | |
102 | * when 8 byte aligned. So use InterlockedExchange64(). | |
103 | * | |
104 | * For atomic stores, 'consume' and 'acquire' semantics are not valid. But we | |
105 | * are using 'Exchange' to get atomic stores here and we only have | |
106 | * InterlockedExchange64(), InterlockedExchangeNoFence64() and | |
107 | * InterlockedExchange64Acquire() available. So we are forced to use | |
108 | * InterlockedExchange64() which uses full memory barrier for everything | |
109 | * greater than 'memory_order_relaxed'. */ | |
ec2d2b5f | 110 | #define atomic_store64(DST, SRC, ORDER) \ |
ea1f765f GS |
111 | if (ORDER == memory_order_relaxed) { \ |
112 | InterlockedExchangeNoFence64((int64_t volatile *) (DST), \ | |
113 | (int64_t) (SRC)); \ | |
ec2d2b5f | 114 | } else { \ |
ea1f765f | 115 | InterlockedExchange64((int64_t volatile *) (DST), (int64_t) (SRC));\ |
ec2d2b5f GS |
116 | } |
117 | ||
118 | /* Used for 8 and 16 bit variations. */ | |
119 | #define atomic_storeX(X, DST, SRC, ORDER) \ | |
120 | if (ORDER == memory_order_seq_cst) { \ | |
121 | InterlockedExchange##X((int##X##_t volatile *) (DST), \ | |
122 | (int##X##_t) (SRC)); \ | |
123 | } else { \ | |
124 | *(DST) = (SRC); \ | |
125 | } | |
126 | ||
127 | #define atomic_store(DST, SRC) \ | |
128 | atomic_store_explicit(DST, SRC, memory_order_seq_cst) | |
129 | ||
130 | #define atomic_store_explicit(DST, SRC, ORDER) \ | |
131 | if (sizeof *(DST) == 1) { \ | |
132 | atomic_storeX(8, DST, SRC, ORDER) \ | |
133 | } else if (sizeof *(DST) == 2) { \ | |
134 | atomic_storeX(16, DST, SRC, ORDER) \ | |
135 | } else if (sizeof *(DST) == 4) { \ | |
136 | atomic_store32(DST, SRC, ORDER) \ | |
137 | } else if (sizeof *(DST) == 8) { \ | |
138 | atomic_store64(DST, SRC, ORDER) \ | |
139 | } else { \ | |
140 | abort(); \ | |
141 | } | |
142 | ||
143 | /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding | |
144 | * reads don't need to be locked (based on the following in Intel Developers | |
145 | * manual: | |
146 | * “Locked operations are atomic with respect to all other memory operations | |
147 | * and all externally visible events. Only instruction fetch and page table | |
148 | * accesses can pass locked instructions. Locked instructions can be used to | |
149 | * synchronize data written by one processor and read by another processor. | |
150 | * For the P6 family processors, locked operations serialize all outstanding | |
151 | * load and store operations (that is, wait for them to complete). This rule | |
152 | * is also true for the Pentium 4 and Intel Xeon processors, with one | |
153 | * exception. Load operations that reference weakly ordered memory types | |
154 | * (such as the WC memory type) may not be serialized."). */ | |
155 | ||
156 | /* For 8, 16 and 32 bit variations. */ | |
157 | #define atomic_readX(SRC, DST, ORDER) \ | |
158 | *(DST) = *(SRC); | |
159 | ||
ea1f765f GS |
160 | /* MSVC converts 64 bit reads into two instructions. So there is |
161 | * a possibility that an interrupt can make a 64 bit read non-atomic even | |
162 | * when 8 byte aligned. So use fully memory barrier InterlockedOr64(). */ | |
ec2d2b5f | 163 | #define atomic_read64(SRC, DST, ORDER) \ |
b816a953 GS |
164 | __pragma (warning(push)) \ |
165 | __pragma (warning(disable:4047)) \ | |
ea1f765f GS |
166 | *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0); \ |
167 | __pragma (warning(pop)) | |
ec2d2b5f GS |
168 | |
169 | #define atomic_read(SRC, DST) \ | |
170 | atomic_read_explicit(SRC, DST, memory_order_seq_cst) | |
171 | ||
172 | #define atomic_read_explicit(SRC, DST, ORDER) \ | |
173 | if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \ | |
174 | atomic_readX(SRC, DST, ORDER) \ | |
175 | } else if (sizeof *(DST) == 8) { \ | |
176 | atomic_read64(SRC, DST, ORDER) \ | |
177 | } else { \ | |
178 | abort(); \ | |
179 | } | |
180 | ||
181 | /* For add, sub, and logical operations, for 8, 16 and 64 bit data types, | |
182 | * functions for all the different memory orders does not exist | |
183 | * (though documentation exists for some of them). The MSVC C++ library which | |
184 | * implements the c11 atomics simply calls the full memory barrier function | |
185 | * for everything in x86(see xatomic.h). So do the same here. */ | |
186 | ||
187 | /* For 8, 16 and 64 bit variations. */ | |
188 | #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER) \ | |
189 | atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER) | |
190 | ||
191 | /* Arithmetic addition calls. */ | |
192 | ||
193 | #define atomic_add32(RMW, ARG, ORIG, ORDER) \ | |
194 | *(ORIG) = InterlockedExchangeAdd((int32_t volatile *) (RMW), \ | |
195 | (int32_t) (ARG)); | |
196 | ||
197 | /* For 8, 16 and 64 bit variations. */ | |
198 | #define atomic_add_generic(X, RMW, ARG, ORIG, ORDER) \ | |
199 | *(ORIG) = _InterlockedExchangeAdd##X((int##X##_t volatile *) (RMW), \ | |
200 | (int##X##_t) (ARG)); | |
201 | ||
202 | #define atomic_add(RMW, ARG, ORIG) \ | |
203 | atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
204 | ||
205 | #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \ | |
206 | if (sizeof *(RMW) == 1) { \ | |
207 | atomic_op(add, 8, RMW, ARG, ORIG, ORDER) \ | |
208 | } else if (sizeof *(RMW) == 2) { \ | |
209 | atomic_op(add, 16, RMW, ARG, ORIG, ORDER) \ | |
210 | } else if (sizeof *(RMW) == 4) { \ | |
211 | atomic_add32(RMW, ARG, ORIG, ORDER) \ | |
212 | } else if (sizeof *(RMW) == 8) { \ | |
213 | atomic_op(add, 64, RMW, ARG, ORIG, ORDER) \ | |
214 | } else { \ | |
215 | abort(); \ | |
216 | } | |
217 | ||
218 | /* Arithmetic subtraction calls. */ | |
219 | ||
220 | #define atomic_sub(RMW, ARG, ORIG) \ | |
221 | atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst) | |
222 | ||
223 | #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \ | |
224 | atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER) | |
225 | ||
226 | /* Logical 'and' calls. */ | |
227 | ||
228 | #define atomic_and32(RMW, ARG, ORIG, ORDER) \ | |
229 | *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
230 | ||
231 | /* For 8, 16 and 64 bit variations. */ | |
232 | #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER) \ | |
233 | *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW), \ | |
234 | (int##X##_t) (ARG)); | |
235 | ||
236 | #define atomic_and(RMW, ARG, ORIG) \ | |
237 | atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
238 | ||
239 | #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \ | |
240 | if (sizeof *(RMW) == 1) { \ | |
241 | atomic_op(and, 8, RMW, ARG, ORIG, ORDER) \ | |
242 | } else if (sizeof *(RMW) == 2) { \ | |
243 | atomic_op(and, 16, RMW, ARG, ORIG, ORDER) \ | |
244 | } else if (sizeof *(RMW) == 4) { \ | |
245 | atomic_and32(RMW, ARG, ORIG, ORDER) \ | |
246 | } else if (sizeof *(RMW) == 8) { \ | |
247 | atomic_op(and, 64, RMW, ARG, ORIG, ORDER) \ | |
248 | } else { \ | |
249 | abort(); \ | |
250 | } | |
251 | ||
252 | /* Logical 'Or' calls. */ | |
253 | ||
254 | #define atomic_or32(RMW, ARG, ORIG, ORDER) \ | |
255 | *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
256 | ||
257 | /* For 8, 16 and 64 bit variations. */ | |
258 | #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER) \ | |
259 | *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW), \ | |
260 | (int##X##_t) (ARG)); | |
261 | ||
262 | #define atomic_or(RMW, ARG, ORIG) \ | |
263 | atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
264 | ||
265 | #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \ | |
266 | if (sizeof *(RMW) == 1) { \ | |
267 | atomic_op(or, 8, RMW, ARG, ORIG, ORDER) \ | |
268 | } else if (sizeof *(RMW) == 2) { \ | |
269 | atomic_op(or, 16, RMW, ARG, ORIG, ORDER) \ | |
270 | } else if (sizeof *(RMW) == 4) { \ | |
271 | atomic_or32(RMW, ARG, ORIG, ORDER) \ | |
272 | } else if (sizeof *(RMW) == 8) { \ | |
273 | atomic_op(or, 64, RMW, ARG, ORIG, ORDER) \ | |
274 | } else { \ | |
275 | abort(); \ | |
276 | } | |
277 | ||
278 | /* Logical Xor calls. */ | |
279 | ||
280 | #define atomic_xor32(RMW, ARG, ORIG, ORDER) \ | |
281 | *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
282 | ||
283 | /* For 8, 16 and 64 bit variations. */ | |
284 | #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER) \ | |
285 | *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW), \ | |
286 | (int##X##_t) (ARG)); | |
287 | ||
288 | #define atomic_xor(RMW, ARG, ORIG) \ | |
289 | atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
290 | ||
291 | #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \ | |
292 | if (sizeof *(RMW) == 1) { \ | |
293 | atomic_op(xor, 8, RMW, ARG, ORIG, ORDER) \ | |
294 | } else if (sizeof *(RMW) == 2) { \ | |
295 | atomic_op(xor, 16, RMW, ARG, ORIG, ORDER) \ | |
296 | } else if (sizeof *(RMW) == 4) { \ | |
297 | atomic_xor32(RMW, ARG, ORIG, ORDER); \ | |
298 | } else if (sizeof *(RMW) == 8) { \ | |
299 | atomic_op(xor, 64, RMW, ARG, ORIG, ORDER) \ | |
300 | } else { \ | |
301 | abort(); \ | |
302 | } | |
303 | ||
304 | #define atomic_compare_exchange_strong(DST, EXP, SRC) \ | |
305 | atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \ | |
306 | memory_order_seq_cst, \ | |
307 | memory_order_seq_cst) | |
308 | ||
309 | #define atomic_compare_exchange_weak atomic_compare_exchange_strong | |
310 | #define atomic_compare_exchange_weak_explicit \ | |
311 | atomic_compare_exchange_strong_explicit | |
312 | ||
313 | /* MSVCs c++ compiler implements c11 atomics and looking through its | |
314 | * implementation (in xatomic.h), orders are ignored for x86 platform. | |
315 | * Do the same here. */ | |
316 | static inline bool | |
317 | atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src) | |
318 | { | |
319 | int8_t previous = _InterlockedCompareExchange8(dst, src, *expected); | |
320 | if (previous == *expected) { | |
321 | return true; | |
322 | } else { | |
323 | *expected = previous; | |
324 | return false; | |
325 | } | |
326 | } | |
327 | ||
328 | static inline bool | |
329 | atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected, | |
330 | int16_t src) | |
331 | { | |
332 | int16_t previous = InterlockedCompareExchange16(dst, src, *expected); | |
333 | if (previous == *expected) { | |
334 | return true; | |
335 | } else { | |
336 | *expected = previous; | |
337 | return false; | |
338 | } | |
339 | } | |
340 | ||
341 | static inline bool | |
342 | atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected, | |
343 | int32_t src) | |
344 | { | |
345 | int32_t previous = InterlockedCompareExchange(dst, src, *expected); | |
346 | if (previous == *expected) { | |
347 | return true; | |
348 | } else { | |
349 | *expected = previous; | |
350 | return false; | |
351 | } | |
352 | } | |
353 | ||
354 | static inline bool | |
355 | atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected, | |
356 | int64_t src) | |
357 | { | |
358 | int64_t previous = InterlockedCompareExchange64(dst, src, *expected); | |
359 | if (previous == *expected) { | |
360 | return true; | |
361 | } else { | |
362 | *expected = previous; | |
363 | return false; | |
364 | } | |
365 | } | |
366 | ||
367 | static inline bool | |
368 | atomic_compare_unreachable() | |
369 | { | |
370 | return true; | |
371 | } | |
372 | ||
373 | #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2) \ | |
374 | (sizeof *(DST) == 1 \ | |
375 | ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP), \ | |
376 | (int8_t) (SRC)) \ | |
377 | : (sizeof *(DST) == 2 \ | |
378 | ? atomic_compare_exchange16((int16_t volatile *) (DST), \ | |
379 | (int16_t *) (EXP), (int16_t) (SRC)) \ | |
380 | : (sizeof *(DST) == 4 \ | |
381 | ? atomic_compare_exchange32((int32_t volatile *) (DST), \ | |
382 | (int32_t *) (EXP), (int32_t) (SRC)) \ | |
383 | : (sizeof *(DST) == 8 \ | |
384 | ? atomic_compare_exchange64((int64_t volatile *) (DST), \ | |
385 | (int64_t *) (EXP), (int64_t) (SRC)) \ | |
386 | : ovs_fatal(0, "atomic operation with size greater than 8 bytes"), \ | |
387 | atomic_compare_unreachable())))) | |
388 | ||
389 | \f | |
390 | /* atomic_flag */ | |
391 | ||
392 | typedef ATOMIC(int32_t) atomic_flag; | |
393 | #define ATOMIC_FLAG_INIT 0 | |
394 | ||
395 | #define atomic_flag_test_and_set(FLAG) \ | |
396 | (bool) InterlockedBitTestAndSet(FLAG, 0) | |
397 | ||
398 | #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \ | |
399 | atomic_flag_test_and_set(FLAG) | |
400 | ||
401 | #define atomic_flag_clear_explicit(FLAG, ORDER) \ | |
402 | atomic_flag_clear() | |
403 | #define atomic_flag_clear(FLAG) \ | |
404 | InterlockedBitTestAndReset(FLAG, 0) |