]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Distributed under the Boost Software License, Version 1.0. | |
3 | * (See accompanying file LICENSE_1_0.txt or copy at | |
4 | * http://www.boost.org/LICENSE_1_0.txt) | |
5 | * | |
6 | * Copyright (c) 2009 Helge Bahmann | |
7 | * Copyright (c) 2012 Tim Blechmann | |
8 | * Copyright (c) 2014 Andrey Semashev | |
9 | */ | |
10 | /*! | |
11 | * \file atomic/detail/ops_gcc_x86_dcas.hpp | |
12 | * | |
13 | * This header contains implementation of the double-width CAS primitive for x86. | |
14 | */ | |
15 | ||
16 | #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
17 | #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
18 | ||
19 | #include <boost/cstdint.hpp> | |
20 | #include <boost/memory_order.hpp> | |
21 | #include <boost/atomic/detail/config.hpp> | |
22 | #include <boost/atomic/detail/storage_type.hpp> | |
23 | #include <boost/atomic/capabilities.hpp> | |
24 | ||
25 | #ifdef BOOST_HAS_PRAGMA_ONCE | |
26 | #pragma once | |
27 | #endif | |
28 | ||
29 | namespace boost { | |
30 | namespace atomics { | |
31 | namespace detail { | |
32 | ||
33 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
34 | ||
35 | template< bool Signed > | |
36 | struct gcc_dcas_x86 | |
37 | { | |
38 | typedef typename make_storage_type< 8u, Signed >::type storage_type; | |
39 | typedef typename make_storage_type< 8u, Signed >::aligned aligned_storage_type; | |
40 | ||
41 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; | |
42 | ||
43 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
44 | { | |
45 | if ((((uint32_t)&storage) & 0x00000007) == 0) | |
46 | { | |
47 | #if defined(__SSE2__) | |
48 | __asm__ __volatile__ | |
49 | ( | |
50 | #if defined(__AVX__) | |
51 | "vmovq %1, %%xmm4\n\t" | |
52 | "vmovq %%xmm4, %0\n\t" | |
53 | #else | |
54 | "movq %1, %%xmm4\n\t" | |
55 | "movq %%xmm4, %0\n\t" | |
56 | #endif | |
57 | : "=m" (storage) | |
58 | : "m" (v) | |
59 | : "memory", "xmm4" | |
60 | ); | |
61 | #else | |
62 | __asm__ __volatile__ | |
63 | ( | |
64 | "fildll %1\n\t" | |
65 | "fistpll %0\n\t" | |
66 | : "=m" (storage) | |
67 | : "m" (v) | |
68 | : "memory" | |
69 | ); | |
70 | #endif | |
71 | } | |
72 | else | |
73 | { | |
74 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
75 | #if defined(__PIC__) | |
76 | uint32_t scratch; | |
77 | __asm__ __volatile__ | |
78 | ( | |
79 | "movl %%ebx, %[scratch]\n\t" | |
80 | "movl %[value_lo], %%ebx\n\t" | |
81 | "movl %[dest], %%eax\n\t" | |
82 | "movl 4+%[dest], %%edx\n\t" | |
83 | ".align 16\n\t" | |
84 | "1: lock; cmpxchg8b %[dest]\n\t" | |
85 | "jne 1b\n\t" | |
86 | "movl %[scratch], %%ebx\n\t" | |
87 | : [scratch] "=m" (scratch), [dest] "=o" (storage) | |
88 | : [value_lo] "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
89 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" | |
90 | ); | |
91 | #else // defined(__PIC__) | |
92 | __asm__ __volatile__ | |
93 | ( | |
94 | "movl %[dest], %%eax\n\t" | |
95 | "movl 4+%[dest], %%edx\n\t" | |
96 | ".align 16\n\t" | |
97 | "1: lock; cmpxchg8b %[dest]\n\t" | |
98 | "jne 1b\n\t" | |
99 | : [dest] "=o" (storage) | |
100 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
101 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" | |
102 | ); | |
103 | #endif // defined(__PIC__) | |
104 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
105 | #if defined(__PIC__) | |
106 | uint32_t scratch; | |
107 | __asm__ __volatile__ | |
108 | ( | |
109 | "movl %%ebx, %[scratch]\n\t" | |
110 | "movl %[value_lo], %%ebx\n\t" | |
111 | "movl 0(%[dest]), %%eax\n\t" | |
112 | "movl 4(%[dest]), %%edx\n\t" | |
113 | ".align 16\n\t" | |
114 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
115 | "jne 1b\n\t" | |
116 | "movl %[scratch], %%ebx\n\t" | |
117 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
118 | : [scratch] "=m,m" (scratch) | |
119 | : [value_lo] "a,a" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) | |
120 | #else | |
121 | : [scratch] "=m" (scratch) | |
122 | : [value_lo] "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
123 | #endif | |
124 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" | |
125 | ); | |
126 | #else // defined(__PIC__) | |
127 | __asm__ __volatile__ | |
128 | ( | |
129 | "movl 0(%[dest]), %%eax\n\t" | |
130 | "movl 4(%[dest]), %%edx\n\t" | |
131 | ".align 16\n\t" | |
132 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
133 | "jne 1b\n\t" | |
134 | : | |
135 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
136 | : [value_lo] "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) | |
137 | #else | |
138 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
139 | #endif | |
140 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" | |
141 | ); | |
142 | #endif // defined(__PIC__) | |
143 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
144 | } | |
145 | } | |
146 | ||
147 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
148 | { | |
149 | storage_type value; | |
150 | ||
151 | if ((((uint32_t)&storage) & 0x00000007) == 0) | |
152 | { | |
153 | #if defined(__SSE2__) | |
154 | __asm__ __volatile__ | |
155 | ( | |
156 | #if defined(__AVX__) | |
157 | "vmovq %1, %%xmm4\n\t" | |
158 | "vmovq %%xmm4, %0\n\t" | |
159 | #else | |
160 | "movq %1, %%xmm4\n\t" | |
161 | "movq %%xmm4, %0\n\t" | |
162 | #endif | |
163 | : "=m" (value) | |
164 | : "m" (storage) | |
165 | : "memory", "xmm4" | |
166 | ); | |
167 | #else | |
168 | __asm__ __volatile__ | |
169 | ( | |
170 | "fildll %1\n\t" | |
171 | "fistpll %0\n\t" | |
172 | : "=m" (value) | |
173 | : "m" (storage) | |
174 | : "memory" | |
175 | ); | |
176 | #endif | |
177 | } | |
178 | else | |
179 | { | |
180 | #if defined(__clang__) | |
181 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
182 | value = __sync_val_compare_and_swap(&storage, (storage_type)0, (storage_type)0); | |
183 | #else | |
184 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
185 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
186 | __asm__ __volatile__ | |
187 | ( | |
188 | "movl %%ebx, %%eax\n\t" | |
189 | "movl %%ecx, %%edx\n\t" | |
190 | "lock; cmpxchg8b %[storage]\n\t" | |
191 | : "=&A" (value) | |
192 | : [storage] "m" (storage) | |
193 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
194 | ); | |
195 | #endif | |
196 | } | |
197 | ||
198 | return value; | |
199 | } | |
200 | ||
201 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
202 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
203 | { | |
204 | #if defined(__clang__) | |
205 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
206 | storage_type old_expected = expected; | |
207 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
208 | return expected == old_expected; | |
209 | #elif defined(__PIC__) | |
210 | // Make sure ebx is saved and restored properly in case | |
211 | // of position independent code. To make this work | |
212 | // setup register constraints such that ebx can not be | |
213 | // used by accident e.g. as base address for the variable | |
214 | // to be modified. Accessing "scratch" should always be okay, | |
215 | // as it can only be placed on the stack (and therefore | |
216 | // accessed through ebp or esp only). | |
217 | // | |
218 | // In theory, could push/pop ebx onto/off the stack, but movs | |
219 | // to a prepared stack slot turn out to be faster. | |
220 | ||
221 | uint32_t scratch; | |
222 | bool success; | |
223 | __asm__ __volatile__ | |
224 | ( | |
225 | "movl %%ebx, %[scratch]\n\t" | |
226 | "movl %[desired_lo], %%ebx\n\t" | |
227 | "lock; cmpxchg8b %[dest]\n\t" | |
228 | "movl %[scratch], %%ebx\n\t" | |
229 | "sete %[success]\n\t" | |
230 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
231 | : "+A,A,A,A,A,A" (expected), [dest] "+m,m,m,m,m,m" (storage), [scratch] "=m,m,m,m,m,m" (scratch), [success] "=q,m,q,m,q,m" (success) | |
232 | : [desired_lo] "S,S,D,D,m,m" ((uint32_t)desired), "c,c,c,c,c,c" ((uint32_t)(desired >> 32)) | |
233 | #else | |
234 | : "+A" (expected), [dest] "+m" (storage), [scratch] "=m" (scratch), [success] "=q" (success) | |
235 | : [desired_lo] "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
236 | #endif | |
237 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
238 | ); | |
239 | return success; | |
240 | #else | |
241 | bool success; | |
242 | __asm__ __volatile__ | |
243 | ( | |
244 | "lock; cmpxchg8b %[dest]\n\t" | |
245 | "sete %[success]\n\t" | |
246 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
247 | : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) | |
248 | : "b,b" ((uint32_t)desired), "c,c" ((uint32_t)(desired >> 32)) | |
249 | #else | |
250 | : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) | |
251 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
252 | #endif | |
253 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
254 | ); | |
255 | return success; | |
256 | #endif | |
257 | } | |
258 | ||
259 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
260 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
261 | { | |
262 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
263 | } | |
264 | ||
265 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
266 | { | |
267 | #if defined(__clang__) | |
268 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
269 | storage_type old_val = storage; | |
270 | while (true) | |
271 | { | |
272 | storage_type val = __sync_val_compare_and_swap(&storage, old_val, v); | |
273 | if (val == old_val) | |
274 | return val; | |
275 | old_val = val; | |
276 | } | |
277 | #elif !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
278 | #if defined(__PIC__) | |
279 | uint32_t scratch; | |
280 | __asm__ __volatile__ | |
281 | ( | |
282 | "movl %%ebx, %[scratch]\n\t" | |
283 | "movl %%eax, %%ebx\n\t" | |
284 | "movl %%edx, %%ecx\n\t" | |
285 | "movl %[dest], %%eax\n\t" | |
286 | "movl 4+%[dest], %%edx\n\t" | |
287 | ".align 16\n\t" | |
288 | "1: lock; cmpxchg8b %[dest]\n\t" | |
289 | "jne 1b\n\t" | |
290 | "movl %[scratch], %%ebx\n\t" | |
291 | : "+A" (v), [scratch] "=m" (scratch), [dest] "+o" (storage) | |
292 | : | |
293 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory" | |
294 | ); | |
295 | return v; | |
296 | #else // defined(__PIC__) | |
297 | __asm__ __volatile__ | |
298 | ( | |
299 | "movl %[dest], %%eax\n\t" | |
300 | "movl 4+%[dest], %%edx\n\t" | |
301 | ".align 16\n\t" | |
302 | "1: lock; cmpxchg8b %[dest]\n\t" | |
303 | "jne 1b\n\t" | |
304 | : "=A" (v), [dest] "+o" (storage) | |
305 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
306 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
307 | ); | |
308 | return v; | |
309 | #endif // defined(__PIC__) | |
310 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
311 | #if defined(__PIC__) | |
312 | uint32_t scratch; | |
313 | __asm__ __volatile__ | |
314 | ( | |
315 | "movl %%ebx, %[scratch]\n\t" | |
316 | "movl %%eax, %%ebx\n\t" | |
317 | "movl %%edx, %%ecx\n\t" | |
318 | "movl 0(%[dest]), %%eax\n\t" | |
319 | "movl 4(%[dest]), %%edx\n\t" | |
320 | ".align 16\n\t" | |
321 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
322 | "jne 1b\n\t" | |
323 | "movl %[scratch], %%ebx\n\t" | |
324 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
325 | : "+A,A" (v), [scratch] "=m,m" (scratch) | |
326 | : [dest] "D,S" (&storage) | |
327 | #else | |
328 | : "+A" (v), [scratch] "=m" (scratch) | |
329 | : [dest] "D" (&storage) | |
330 | #endif | |
331 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory" | |
332 | ); | |
333 | return v; | |
334 | #else // defined(__PIC__) | |
335 | __asm__ __volatile__ | |
336 | ( | |
337 | "movl 0(%[dest]), %%eax\n\t" | |
338 | "movl 4(%[dest]), %%edx\n\t" | |
339 | ".align 16\n\t" | |
340 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
341 | "jne 1b\n\t" | |
342 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
343 | : "=A,A" (v) | |
344 | : "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) | |
345 | #else | |
346 | : "=A" (v) | |
347 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
348 | #endif | |
349 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
350 | ); | |
351 | return v; | |
352 | #endif // defined(__PIC__) | |
353 | #endif | |
354 | } | |
355 | ||
356 | static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT | |
357 | { | |
358 | return true; | |
359 | } | |
360 | }; | |
361 | ||
362 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
363 | ||
364 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
365 | ||
366 | template< bool Signed > | |
367 | struct gcc_dcas_x86_64 | |
368 | { | |
369 | typedef typename make_storage_type< 16u, Signed >::type storage_type; | |
370 | typedef typename make_storage_type< 16u, Signed >::aligned aligned_storage_type; | |
371 | ||
372 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; | |
373 | ||
374 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
375 | { | |
376 | uint64_t const* p_value = (uint64_t const*)&v; | |
377 | const uint64_t v_lo = p_value[0], v_hi = p_value[1]; | |
378 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
379 | __asm__ __volatile__ | |
380 | ( | |
381 | "movq %[dest], %%rax\n\t" | |
382 | "movq 8+%[dest], %%rdx\n\t" | |
383 | ".align 16\n\t" | |
384 | "1: lock; cmpxchg16b %[dest]\n\t" | |
385 | "jne 1b\n\t" | |
386 | : [dest] "=o" (storage) | |
387 | : "b" (v_lo), "c" (v_hi) | |
388 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" | |
389 | ); | |
390 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
391 | __asm__ __volatile__ | |
392 | ( | |
393 | "movq 0(%[dest]), %%rax\n\t" | |
394 | "movq 8(%[dest]), %%rdx\n\t" | |
395 | ".align 16\n\t" | |
396 | "1: lock; cmpxchg16b 0(%[dest])\n\t" | |
397 | "jne 1b\n\t" | |
398 | : | |
399 | : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage) | |
400 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" | |
401 | ); | |
402 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
403 | } | |
404 | ||
405 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
406 | { | |
407 | #if defined(__clang__) | |
408 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics | |
409 | storage_type value = storage_type(); | |
410 | return __sync_val_compare_and_swap(&storage, value, value); | |
411 | #elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
412 | // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap | |
413 | storage_type value; | |
414 | ||
415 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
416 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
417 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
418 | __asm__ __volatile__ | |
419 | ( | |
420 | "movq %%rbx, %%rax\n\t" | |
421 | "movq %%rcx, %%rdx\n\t" | |
422 | "lock; cmpxchg16b %[storage]\n\t" | |
423 | "movq %%rax, %[value]\n\t" | |
424 | "movq %%rdx, 8+%[value]\n\t" | |
425 | : [value] "=o" (value) | |
426 | : [storage] "m" (storage) | |
427 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
428 | ); | |
429 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
430 | __asm__ __volatile__ | |
431 | ( | |
432 | "movq %%rbx, %%rax\n\t" | |
433 | "movq %%rcx, %%rdx\n\t" | |
434 | "lock; cmpxchg16b %[storage]\n\t" | |
435 | "movq %%rax, 0(%[value])\n\t" | |
436 | "movq %%rdx, 8(%[value])\n\t" | |
437 | : | |
438 | : [storage] "m" (storage), [value] "r" (&value) | |
439 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
440 | ); | |
441 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
442 | ||
443 | return value; | |
444 | #else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
445 | storage_type value; | |
446 | ||
447 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
448 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
449 | __asm__ __volatile__ | |
450 | ( | |
451 | "movq %%rbx, %%rax\n\t" | |
452 | "movq %%rcx, %%rdx\n\t" | |
453 | "lock; cmpxchg16b %[storage]\n\t" | |
454 | : "=&A" (value) | |
455 | : [storage] "m" (storage) | |
456 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
457 | ); | |
458 | ||
459 | return value; | |
460 | #endif | |
461 | } | |
462 | ||
463 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
464 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
465 | { | |
466 | #if defined(__clang__) | |
467 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics | |
468 | storage_type old_expected = expected; | |
469 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
470 | return expected == old_expected; | |
471 | #elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
472 | // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap | |
473 | uint64_t const* p_desired = (uint64_t const*)&desired; | |
474 | const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1]; | |
475 | bool success; | |
476 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
477 | __asm__ __volatile__ | |
478 | ( | |
479 | "movq %[expected], %%rax\n\t" | |
480 | "movq 8+%[expected], %%rdx\n\t" | |
481 | "lock; cmpxchg16b %[dest]\n\t" | |
482 | "sete %[success]\n\t" | |
483 | "movq %%rax, %[expected]\n\t" | |
484 | "movq %%rdx, 8+%[expected]\n\t" | |
485 | : [dest] "+m" (storage), [expected] "+o" (expected), [success] "=q" (success) | |
486 | : "b" (desired_lo), "c" (desired_hi) | |
487 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
488 | ); | |
489 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
490 | __asm__ __volatile__ | |
491 | ( | |
492 | "movq 0(%[expected]), %%rax\n\t" | |
493 | "movq 8(%[expected]), %%rdx\n\t" | |
494 | "lock; cmpxchg16b %[dest]\n\t" | |
495 | "sete %[success]\n\t" | |
496 | "movq %%rax, 0(%[expected])\n\t" | |
497 | "movq %%rdx, 8(%[expected])\n\t" | |
498 | : [dest] "+m" (storage), [success] "=q" (success) | |
499 | : "b" (desired_lo), "c" (desired_hi), [expected] "r" (&expected) | |
500 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
501 | ); | |
502 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
503 | ||
504 | return success; | |
505 | #else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
506 | uint64_t const* p_desired = (uint64_t const*)&desired; | |
507 | const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1]; | |
508 | bool success; | |
509 | __asm__ __volatile__ | |
510 | ( | |
511 | "lock; cmpxchg16b %[dest]\n\t" | |
512 | "sete %[success]\n\t" | |
513 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
514 | : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) | |
515 | : "b,b" (desired_lo), "c,c" (desired_hi) | |
516 | #else | |
517 | : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) | |
518 | : "b" (desired_lo), "c" (desired_hi) | |
519 | #endif | |
520 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
521 | ); | |
522 | return success; | |
523 | #endif | |
524 | } | |
525 | ||
526 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
527 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
528 | { | |
529 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
530 | } | |
531 | ||
532 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
533 | { | |
534 | #if defined(__clang__) | |
535 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
536 | storage_type old_val = storage; | |
537 | while (true) | |
538 | { | |
539 | storage_type val = __sync_val_compare_and_swap(&storage, old_val, v); | |
540 | if (val == old_val) | |
541 | return val; | |
542 | old_val = val; | |
543 | } | |
544 | #elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
545 | // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap | |
546 | storage_type old_value; | |
547 | uint64_t const* p_value = (uint64_t const*)&v; | |
548 | const uint64_t v_lo = p_value[0], v_hi = p_value[1]; | |
549 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
550 | __asm__ __volatile__ | |
551 | ( | |
552 | "movq %[dest], %%rax\n\t" | |
553 | "movq 8+%[dest], %%rdx\n\t" | |
554 | ".align 16\n\t" | |
555 | "1: lock; cmpxchg16b %[dest]\n\t" | |
556 | "jne 1b\n\t" | |
557 | "movq %%rax, %[old_value]\n\t" | |
558 | "movq %%rdx, 8+%[old_value]\n\t" | |
559 | : [dest] "+o" (storage), [old_value] "=o" (old_value) | |
560 | : "b" (v_lo), "c" (v_hi) | |
561 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
562 | ); | |
563 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
564 | __asm__ __volatile__ | |
565 | ( | |
566 | "movq 0(%[dest]), %%rax\n\t" | |
567 | "movq 8(%[dest]), %%rdx\n\t" | |
568 | ".align 16\n\t" | |
569 | "1: lock; cmpxchg16b 0(%[dest])\n\t" | |
570 | "jne 1b\n\t" | |
571 | "movq %%rax, 0(%[old_value])\n\t" | |
572 | "movq %%rdx, 8(%[old_value])\n\t" | |
573 | : | |
574 | : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage), [old_value] "r" (&old_value) | |
575 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
576 | ); | |
577 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
578 | ||
579 | return old_value; | |
580 | #else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
581 | uint64_t const* p_value = (uint64_t const*)&v; | |
582 | const uint64_t v_lo = p_value[0], v_hi = p_value[1]; | |
583 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
584 | __asm__ __volatile__ | |
585 | ( | |
586 | "movq %[dest], %%rax\n\t" | |
587 | "movq 8+%[dest], %%rdx\n\t" | |
588 | ".align 16\n\t" | |
589 | "1: lock; cmpxchg16b %[dest]\n\t" | |
590 | "jne 1b\n\t" | |
591 | : "=&A" (v), [dest] "+o" (storage) | |
592 | : "b" (v_lo), "c" (v_hi) | |
593 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
594 | ); | |
595 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
596 | __asm__ __volatile__ | |
597 | ( | |
598 | "movq 0(%[dest]), %%rax\n\t" | |
599 | "movq 8(%[dest]), %%rdx\n\t" | |
600 | ".align 16\n\t" | |
601 | "1: lock; cmpxchg16b 0(%[dest])\n\t" | |
602 | "jne 1b\n\t" | |
603 | : "=&A" (v) | |
604 | : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage) | |
605 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
606 | ); | |
607 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
608 | ||
609 | return v; | |
610 | #endif | |
611 | } | |
612 | ||
613 | static BOOST_FORCEINLINE bool is_lock_free(storage_type const volatile&) BOOST_NOEXCEPT | |
614 | { | |
615 | return true; | |
616 | } | |
617 | }; | |
618 | ||
619 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
620 | ||
621 | } // namespace detail | |
622 | } // namespace atomics | |
623 | } // namespace boost | |
624 | ||
625 | #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ |