]>
Commit | Line | Data |
---|---|---|
ec2d2b5f GS |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | /* This header implements atomic operation primitives for MSVC | |
18 | * on i586 or greater platforms (32 bit). */ | |
19 | #ifndef IN_OVS_ATOMIC_H | |
20 | #error "This header should only be included indirectly via ovs-atomic.h." | |
21 | #endif | |
22 | ||
23 | /* From msdn documentation: With Visual Studio 2003, volatile to volatile | |
24 | * references are ordered; the compiler will not re-order volatile variable | |
25 | * access. With Visual Studio 2005, the compiler also uses acquire semantics | |
26 | * for read operations on volatile variables and release semantics for write | |
27 | * operations on volatile variables (when supported by the CPU). | |
28 | * | |
29 | * Though there is no clear documentation that states that anything greater | |
30 | * than VS 2005 has the same behavior as described above, looking through MSVCs | |
31 | * C++ atomics library in VS2013 shows that the compiler still takes | |
32 | * acquire/release semantics on volatile variables. */ | |
33 | #define ATOMIC(TYPE) TYPE volatile | |
34 | ||
35 | typedef enum { | |
36 | memory_order_relaxed, | |
37 | memory_order_consume, | |
38 | memory_order_acquire, | |
39 | memory_order_release, | |
40 | memory_order_acq_rel, | |
41 | memory_order_seq_cst | |
42 | } memory_order; | |
43 | ||
44 | #define ATOMIC_BOOL_LOCK_FREE 2 | |
45 | #define ATOMIC_CHAR_LOCK_FREE 2 | |
46 | #define ATOMIC_SHORT_LOCK_FREE 2 | |
47 | #define ATOMIC_INT_LOCK_FREE 2 | |
48 | #define ATOMIC_LONG_LOCK_FREE 2 | |
49 | #define ATOMIC_LLONG_LOCK_FREE 2 | |
50 | #define ATOMIC_POINTER_LOCK_FREE 2 | |
51 | ||
52 | #define IS_LOCKLESS_ATOMIC(OBJECT) \ | |
53 | (sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT))) | |
54 | ||
55 | #define ATOMIC_VAR_INIT(VALUE) (VALUE) | |
56 | #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0) | |
57 | ||
58 | static inline void | |
59 | atomic_compiler_barrier(memory_order order) | |
60 | { | |
61 | /* In case of 'memory_order_consume', it is implicitly assumed that | |
62 | * the compiler will not move instructions that have data-dependency | |
63 | * on the variable in question before the barrier. */ | |
64 | if (order > memory_order_consume) { | |
65 | _ReadWriteBarrier(); | |
66 | } | |
67 | } | |
68 | ||
69 | static inline void | |
70 | atomic_thread_fence(memory_order order) | |
71 | { | |
72 | /* x86 is strongly ordered and acquire/release semantics come | |
73 | * automatically. */ | |
74 | atomic_compiler_barrier(order); | |
75 | if (order == memory_order_seq_cst) { | |
76 | MemoryBarrier(); | |
77 | atomic_compiler_barrier(order); | |
78 | } | |
79 | } | |
80 | ||
81 | static inline void | |
82 | atomic_signal_fence(memory_order order) | |
83 | { | |
84 | atomic_compiler_barrier(order); | |
85 | } | |
86 | ||
87 | /* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In addition, | |
88 | * since the compiler automatically takes acquire and release semantics on | |
89 | * volatile variables, for any order lesser than 'memory_order_seq_cst', we | |
90 | * can directly assign or read values. */ | |
91 | ||
92 | #define atomic_store32(DST, SRC, ORDER) \ | |
93 | if (ORDER == memory_order_seq_cst) { \ | |
94 | InterlockedExchange((int32_t volatile *) (DST), \ | |
95 | (int32_t) (SRC)); \ | |
96 | } else { \ | |
97 | *(DST) = (SRC); \ | |
98 | } | |
99 | ||
100 | /* 64 bit writes are atomic on i586 if 64 bit aligned. */ | |
101 | #define atomic_store64(DST, SRC, ORDER) \ | |
102 | if (((size_t) (DST) & (sizeof *(DST) - 1)) \ | |
103 | || ORDER == memory_order_seq_cst) { \ | |
104 | InterlockedExchange64((int64_t volatile *) (DST), \ | |
105 | (int64_t) (SRC)); \ | |
106 | } else { \ | |
107 | *(DST) = (SRC); \ | |
108 | } | |
109 | ||
110 | /* Used for 8 and 16 bit variations. */ | |
111 | #define atomic_storeX(X, DST, SRC, ORDER) \ | |
112 | if (ORDER == memory_order_seq_cst) { \ | |
113 | InterlockedExchange##X((int##X##_t volatile *) (DST), \ | |
114 | (int##X##_t) (SRC)); \ | |
115 | } else { \ | |
116 | *(DST) = (SRC); \ | |
117 | } | |
118 | ||
119 | #define atomic_store(DST, SRC) \ | |
120 | atomic_store_explicit(DST, SRC, memory_order_seq_cst) | |
121 | ||
122 | #define atomic_store_explicit(DST, SRC, ORDER) \ | |
123 | if (sizeof *(DST) == 1) { \ | |
124 | atomic_storeX(8, DST, SRC, ORDER) \ | |
125 | } else if (sizeof *(DST) == 2) { \ | |
126 | atomic_storeX(16, DST, SRC, ORDER) \ | |
127 | } else if (sizeof *(DST) == 4) { \ | |
128 | atomic_store32(DST, SRC, ORDER) \ | |
129 | } else if (sizeof *(DST) == 8) { \ | |
130 | atomic_store64(DST, SRC, ORDER) \ | |
131 | } else { \ | |
132 | abort(); \ | |
133 | } | |
134 | ||
135 | /* On x86, for 'memory_order_seq_cst', if stores are locked, the corresponding | |
136 | * reads don't need to be locked (based on the following in Intel Developers | |
137 | * manual: | |
138 | * “Locked operations are atomic with respect to all other memory operations | |
139 | * and all externally visible events. Only instruction fetch and page table | |
140 | * accesses can pass locked instructions. Locked instructions can be used to | |
141 | * synchronize data written by one processor and read by another processor. | |
142 | * For the P6 family processors, locked operations serialize all outstanding | |
143 | * load and store operations (that is, wait for them to complete). This rule | |
144 | * is also true for the Pentium 4 and Intel Xeon processors, with one | |
145 | * exception. Load operations that reference weakly ordered memory types | |
146 | * (such as the WC memory type) may not be serialized."). */ | |
147 | ||
148 | /* For 8, 16 and 32 bit variations. */ | |
149 | #define atomic_readX(SRC, DST, ORDER) \ | |
150 | *(DST) = *(SRC); | |
151 | ||
152 | /* 64 bit reads are atomic on i586 if 64 bit aligned. */ | |
153 | #define atomic_read64(SRC, DST, ORDER) \ | |
154 | if (((size_t) (SRC) & (sizeof *(SRC) - 1)) == 0) { \ | |
155 | *(DST) = *(SRC); \ | |
156 | } else { \ | |
157 | *(DST) = InterlockedOr64((int64_t volatile *) (SRC), 0); \ | |
158 | } | |
159 | ||
160 | #define atomic_read(SRC, DST) \ | |
161 | atomic_read_explicit(SRC, DST, memory_order_seq_cst) | |
162 | ||
163 | #define atomic_read_explicit(SRC, DST, ORDER) \ | |
164 | if (sizeof *(DST) == 1 || sizeof *(DST) == 2 || sizeof *(DST) == 4) { \ | |
165 | atomic_readX(SRC, DST, ORDER) \ | |
166 | } else if (sizeof *(DST) == 8) { \ | |
167 | atomic_read64(SRC, DST, ORDER) \ | |
168 | } else { \ | |
169 | abort(); \ | |
170 | } | |
171 | ||
172 | /* For add, sub, and logical operations, for 8, 16 and 64 bit data types, | |
173 | * functions for all the different memory orders does not exist | |
174 | * (though documentation exists for some of them). The MSVC C++ library which | |
175 | * implements the c11 atomics simply calls the full memory barrier function | |
176 | * for everything in x86(see xatomic.h). So do the same here. */ | |
177 | ||
178 | /* For 8, 16 and 64 bit variations. */ | |
179 | #define atomic_op(OP, X, RMW, ARG, ORIG, ORDER) \ | |
180 | atomic_##OP##_generic(X, RMW, ARG, ORIG, ORDER) | |
181 | ||
182 | /* Arithmetic addition calls. */ | |
183 | ||
184 | #define atomic_add32(RMW, ARG, ORIG, ORDER) \ | |
185 | *(ORIG) = InterlockedExchangeAdd((int32_t volatile *) (RMW), \ | |
186 | (int32_t) (ARG)); | |
187 | ||
188 | /* For 8, 16 and 64 bit variations. */ | |
189 | #define atomic_add_generic(X, RMW, ARG, ORIG, ORDER) \ | |
190 | *(ORIG) = _InterlockedExchangeAdd##X((int##X##_t volatile *) (RMW), \ | |
191 | (int##X##_t) (ARG)); | |
192 | ||
193 | #define atomic_add(RMW, ARG, ORIG) \ | |
194 | atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
195 | ||
196 | #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \ | |
197 | if (sizeof *(RMW) == 1) { \ | |
198 | atomic_op(add, 8, RMW, ARG, ORIG, ORDER) \ | |
199 | } else if (sizeof *(RMW) == 2) { \ | |
200 | atomic_op(add, 16, RMW, ARG, ORIG, ORDER) \ | |
201 | } else if (sizeof *(RMW) == 4) { \ | |
202 | atomic_add32(RMW, ARG, ORIG, ORDER) \ | |
203 | } else if (sizeof *(RMW) == 8) { \ | |
204 | atomic_op(add, 64, RMW, ARG, ORIG, ORDER) \ | |
205 | } else { \ | |
206 | abort(); \ | |
207 | } | |
208 | ||
209 | /* Arithmetic subtraction calls. */ | |
210 | ||
211 | #define atomic_sub(RMW, ARG, ORIG) \ | |
212 | atomic_add_explicit(RMW, (0 - (ARG)), ORIG, memory_order_seq_cst) | |
213 | ||
214 | #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \ | |
215 | atomic_add_explicit(RMW, (0 - (ARG)), ORIG, ORDER) | |
216 | ||
217 | /* Logical 'and' calls. */ | |
218 | ||
219 | #define atomic_and32(RMW, ARG, ORIG, ORDER) \ | |
220 | *(ORIG) = InterlockedAnd((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
221 | ||
222 | /* For 8, 16 and 64 bit variations. */ | |
223 | #define atomic_and_generic(X, RMW, ARG, ORIG, ORDER) \ | |
224 | *(ORIG) = InterlockedAnd##X((int##X##_t volatile *) (RMW), \ | |
225 | (int##X##_t) (ARG)); | |
226 | ||
227 | #define atomic_and(RMW, ARG, ORIG) \ | |
228 | atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
229 | ||
230 | #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \ | |
231 | if (sizeof *(RMW) == 1) { \ | |
232 | atomic_op(and, 8, RMW, ARG, ORIG, ORDER) \ | |
233 | } else if (sizeof *(RMW) == 2) { \ | |
234 | atomic_op(and, 16, RMW, ARG, ORIG, ORDER) \ | |
235 | } else if (sizeof *(RMW) == 4) { \ | |
236 | atomic_and32(RMW, ARG, ORIG, ORDER) \ | |
237 | } else if (sizeof *(RMW) == 8) { \ | |
238 | atomic_op(and, 64, RMW, ARG, ORIG, ORDER) \ | |
239 | } else { \ | |
240 | abort(); \ | |
241 | } | |
242 | ||
243 | /* Logical 'Or' calls. */ | |
244 | ||
245 | #define atomic_or32(RMW, ARG, ORIG, ORDER) \ | |
246 | *(ORIG) = InterlockedOr((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
247 | ||
248 | /* For 8, 16 and 64 bit variations. */ | |
249 | #define atomic_or_generic(X, RMW, ARG, ORIG, ORDER) \ | |
250 | *(ORIG) = InterlockedOr##X((int##X##_t volatile *) (RMW), \ | |
251 | (int##X##_t) (ARG)); | |
252 | ||
253 | #define atomic_or(RMW, ARG, ORIG) \ | |
254 | atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
255 | ||
256 | #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \ | |
257 | if (sizeof *(RMW) == 1) { \ | |
258 | atomic_op(or, 8, RMW, ARG, ORIG, ORDER) \ | |
259 | } else if (sizeof *(RMW) == 2) { \ | |
260 | atomic_op(or, 16, RMW, ARG, ORIG, ORDER) \ | |
261 | } else if (sizeof *(RMW) == 4) { \ | |
262 | atomic_or32(RMW, ARG, ORIG, ORDER) \ | |
263 | } else if (sizeof *(RMW) == 8) { \ | |
264 | atomic_op(or, 64, RMW, ARG, ORIG, ORDER) \ | |
265 | } else { \ | |
266 | abort(); \ | |
267 | } | |
268 | ||
269 | /* Logical Xor calls. */ | |
270 | ||
271 | #define atomic_xor32(RMW, ARG, ORIG, ORDER) \ | |
272 | *(ORIG) = InterlockedXor((int32_t volatile *) (RMW), (int32_t) (ARG)); | |
273 | ||
274 | /* For 8, 16 and 64 bit variations. */ | |
275 | #define atomic_xor_generic(X, RMW, ARG, ORIG, ORDER) \ | |
276 | *(ORIG) = InterlockedXor##X((int##X##_t volatile *) (RMW), \ | |
277 | (int##X##_t) (ARG)); | |
278 | ||
279 | #define atomic_xor(RMW, ARG, ORIG) \ | |
280 | atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst) | |
281 | ||
282 | #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \ | |
283 | if (sizeof *(RMW) == 1) { \ | |
284 | atomic_op(xor, 8, RMW, ARG, ORIG, ORDER) \ | |
285 | } else if (sizeof *(RMW) == 2) { \ | |
286 | atomic_op(xor, 16, RMW, ARG, ORIG, ORDER) \ | |
287 | } else if (sizeof *(RMW) == 4) { \ | |
288 | atomic_xor32(RMW, ARG, ORIG, ORDER); \ | |
289 | } else if (sizeof *(RMW) == 8) { \ | |
290 | atomic_op(xor, 64, RMW, ARG, ORIG, ORDER) \ | |
291 | } else { \ | |
292 | abort(); \ | |
293 | } | |
294 | ||
295 | #define atomic_compare_exchange_strong(DST, EXP, SRC) \ | |
296 | atomic_compare_exchange_strong_explicit(DST, EXP, SRC, \ | |
297 | memory_order_seq_cst, \ | |
298 | memory_order_seq_cst) | |
299 | ||
300 | #define atomic_compare_exchange_weak atomic_compare_exchange_strong | |
301 | #define atomic_compare_exchange_weak_explicit \ | |
302 | atomic_compare_exchange_strong_explicit | |
303 | ||
304 | /* MSVCs c++ compiler implements c11 atomics and looking through its | |
305 | * implementation (in xatomic.h), orders are ignored for x86 platform. | |
306 | * Do the same here. */ | |
307 | static inline bool | |
308 | atomic_compare_exchange8(int8_t volatile *dst, int8_t *expected, int8_t src) | |
309 | { | |
310 | int8_t previous = _InterlockedCompareExchange8(dst, src, *expected); | |
311 | if (previous == *expected) { | |
312 | return true; | |
313 | } else { | |
314 | *expected = previous; | |
315 | return false; | |
316 | } | |
317 | } | |
318 | ||
319 | static inline bool | |
320 | atomic_compare_exchange16(int16_t volatile *dst, int16_t *expected, | |
321 | int16_t src) | |
322 | { | |
323 | int16_t previous = InterlockedCompareExchange16(dst, src, *expected); | |
324 | if (previous == *expected) { | |
325 | return true; | |
326 | } else { | |
327 | *expected = previous; | |
328 | return false; | |
329 | } | |
330 | } | |
331 | ||
332 | static inline bool | |
333 | atomic_compare_exchange32(int32_t volatile *dst, int32_t *expected, | |
334 | int32_t src) | |
335 | { | |
336 | int32_t previous = InterlockedCompareExchange(dst, src, *expected); | |
337 | if (previous == *expected) { | |
338 | return true; | |
339 | } else { | |
340 | *expected = previous; | |
341 | return false; | |
342 | } | |
343 | } | |
344 | ||
345 | static inline bool | |
346 | atomic_compare_exchange64(int64_t volatile *dst, int64_t *expected, | |
347 | int64_t src) | |
348 | { | |
349 | int64_t previous = InterlockedCompareExchange64(dst, src, *expected); | |
350 | if (previous == *expected) { | |
351 | return true; | |
352 | } else { | |
353 | *expected = previous; | |
354 | return false; | |
355 | } | |
356 | } | |
357 | ||
358 | static inline bool | |
359 | atomic_compare_unreachable() | |
360 | { | |
361 | return true; | |
362 | } | |
363 | ||
364 | #define atomic_compare_exchange_strong_explicit(DST, EXP, SRC, ORD1, ORD2) \ | |
365 | (sizeof *(DST) == 1 \ | |
366 | ? atomic_compare_exchange8((int8_t volatile *) (DST), (int8_t *) (EXP), \ | |
367 | (int8_t) (SRC)) \ | |
368 | : (sizeof *(DST) == 2 \ | |
369 | ? atomic_compare_exchange16((int16_t volatile *) (DST), \ | |
370 | (int16_t *) (EXP), (int16_t) (SRC)) \ | |
371 | : (sizeof *(DST) == 4 \ | |
372 | ? atomic_compare_exchange32((int32_t volatile *) (DST), \ | |
373 | (int32_t *) (EXP), (int32_t) (SRC)) \ | |
374 | : (sizeof *(DST) == 8 \ | |
375 | ? atomic_compare_exchange64((int64_t volatile *) (DST), \ | |
376 | (int64_t *) (EXP), (int64_t) (SRC)) \ | |
377 | : ovs_fatal(0, "atomic operation with size greater than 8 bytes"), \ | |
378 | atomic_compare_unreachable())))) | |
379 | ||
380 | \f | |
381 | /* atomic_flag */ | |
382 | ||
383 | typedef ATOMIC(int32_t) atomic_flag; | |
384 | #define ATOMIC_FLAG_INIT 0 | |
385 | ||
386 | #define atomic_flag_test_and_set(FLAG) \ | |
387 | (bool) InterlockedBitTestAndSet(FLAG, 0) | |
388 | ||
389 | #define atomic_flag_test_and_set_explicit(FLAG, ORDER) \ | |
390 | atomic_flag_test_and_set(FLAG) | |
391 | ||
392 | #define atomic_flag_clear_explicit(FLAG, ORDER) \ | |
393 | atomic_flag_clear() | |
394 | #define atomic_flag_clear(FLAG) \ | |
395 | InterlockedBitTestAndReset(FLAG, 0) |