]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Distributed under the Boost Software License, Version 1.0. | |
3 | * (See accompanying file LICENSE_1_0.txt or copy at | |
4 | * http://www.boost.org/LICENSE_1_0.txt) | |
5 | * | |
6 | * Copyright (c) 2009 Helge Bahmann | |
7 | * Copyright (c) 2012 Tim Blechmann | |
8 | * Copyright (c) 2014 Andrey Semashev | |
9 | */ | |
10 | /*! | |
11 | * \file atomic/detail/ops_gcc_x86_dcas.hpp | |
12 | * | |
13 | * This header contains implementation of the double-width CAS primitive for x86. | |
14 | */ | |
15 | ||
16 | #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
17 | #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
18 | ||
19 | #include <boost/cstdint.hpp> | |
20 | #include <boost/memory_order.hpp> | |
21 | #include <boost/atomic/detail/config.hpp> | |
22 | #include <boost/atomic/detail/storage_type.hpp> | |
23 | #include <boost/atomic/capabilities.hpp> | |
24 | ||
25 | #ifdef BOOST_HAS_PRAGMA_ONCE | |
26 | #pragma once | |
27 | #endif | |
28 | ||
29 | namespace boost { | |
30 | namespace atomics { | |
31 | namespace detail { | |
32 | ||
33 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
34 | ||
35 | template< bool Signed > | |
36 | struct gcc_dcas_x86 | |
37 | { | |
38 | typedef typename make_storage_type< 8u, Signed >::type storage_type; | |
39 | typedef typename make_storage_type< 8u, Signed >::aligned aligned_storage_type; | |
40 | ||
41 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; | |
42 | ||
43 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
44 | { | |
45 | if ((((uint32_t)&storage) & 0x00000007) == 0) | |
46 | { | |
47 | #if defined(__SSE2__) | |
48 | __asm__ __volatile__ | |
49 | ( | |
50 | #if defined(__AVX__) | |
51 | "vmovq %1, %%xmm4\n\t" | |
52 | "vmovq %%xmm4, %0\n\t" | |
53 | #else | |
54 | "movq %1, %%xmm4\n\t" | |
55 | "movq %%xmm4, %0\n\t" | |
56 | #endif | |
57 | : "=m" (storage) | |
58 | : "m" (v) | |
59 | : "memory", "xmm4" | |
60 | ); | |
61 | #else | |
62 | __asm__ __volatile__ | |
63 | ( | |
64 | "fildll %1\n\t" | |
65 | "fistpll %0\n\t" | |
66 | : "=m" (storage) | |
67 | : "m" (v) | |
68 | : "memory" | |
69 | ); | |
70 | #endif | |
71 | } | |
72 | else | |
73 | { | |
74 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
75 | #if defined(__PIC__) | |
b32b8144 | 76 | uint32_t v_lo = (uint32_t)v; |
7c673cae FG |
77 | uint32_t scratch; |
78 | __asm__ __volatile__ | |
79 | ( | |
80 | "movl %%ebx, %[scratch]\n\t" | |
81 | "movl %[value_lo], %%ebx\n\t" | |
82 | "movl %[dest], %%eax\n\t" | |
83 | "movl 4+%[dest], %%edx\n\t" | |
84 | ".align 16\n\t" | |
85 | "1: lock; cmpxchg8b %[dest]\n\t" | |
86 | "jne 1b\n\t" | |
87 | "movl %[scratch], %%ebx\n\t" | |
b32b8144 FG |
88 | : [scratch] "=m" (scratch), [dest] "=o" (storage), [value_lo] "+a" (v_lo) |
89 | : "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
90 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" |
91 | ); | |
92 | #else // defined(__PIC__) | |
93 | __asm__ __volatile__ | |
94 | ( | |
95 | "movl %[dest], %%eax\n\t" | |
96 | "movl 4+%[dest], %%edx\n\t" | |
97 | ".align 16\n\t" | |
98 | "1: lock; cmpxchg8b %[dest]\n\t" | |
99 | "jne 1b\n\t" | |
100 | : [dest] "=o" (storage) | |
101 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
102 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" | |
103 | ); | |
104 | #endif // defined(__PIC__) | |
105 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
106 | #if defined(__PIC__) | |
b32b8144 | 107 | uint32_t v_lo = (uint32_t)v; |
7c673cae FG |
108 | uint32_t scratch; |
109 | __asm__ __volatile__ | |
110 | ( | |
111 | "movl %%ebx, %[scratch]\n\t" | |
112 | "movl %[value_lo], %%ebx\n\t" | |
113 | "movl 0(%[dest]), %%eax\n\t" | |
114 | "movl 4(%[dest]), %%edx\n\t" | |
115 | ".align 16\n\t" | |
116 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
117 | "jne 1b\n\t" | |
118 | "movl %[scratch], %%ebx\n\t" | |
119 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
b32b8144 FG |
120 | : [scratch] "=m,m" (scratch), [value_lo] "+a,a" (v_lo) |
121 | : "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) | |
7c673cae | 122 | #else |
b32b8144 FG |
123 | : [scratch] "=m" (scratch), [value_lo] "+a" (v_lo) |
124 | : "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
125 | #endif |
126 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" | |
127 | ); | |
128 | #else // defined(__PIC__) | |
129 | __asm__ __volatile__ | |
130 | ( | |
131 | "movl 0(%[dest]), %%eax\n\t" | |
132 | "movl 4(%[dest]), %%edx\n\t" | |
133 | ".align 16\n\t" | |
134 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
135 | "jne 1b\n\t" | |
136 | : | |
137 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
138 | : [value_lo] "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) | |
139 | #else | |
140 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
141 | #endif | |
142 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" | |
143 | ); | |
144 | #endif // defined(__PIC__) | |
145 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
146 | } | |
147 | } | |
148 | ||
149 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
150 | { | |
151 | storage_type value; | |
152 | ||
153 | if ((((uint32_t)&storage) & 0x00000007) == 0) | |
154 | { | |
155 | #if defined(__SSE2__) | |
156 | __asm__ __volatile__ | |
157 | ( | |
158 | #if defined(__AVX__) | |
159 | "vmovq %1, %%xmm4\n\t" | |
160 | "vmovq %%xmm4, %0\n\t" | |
161 | #else | |
162 | "movq %1, %%xmm4\n\t" | |
163 | "movq %%xmm4, %0\n\t" | |
164 | #endif | |
165 | : "=m" (value) | |
166 | : "m" (storage) | |
167 | : "memory", "xmm4" | |
168 | ); | |
169 | #else | |
170 | __asm__ __volatile__ | |
171 | ( | |
172 | "fildll %1\n\t" | |
173 | "fistpll %0\n\t" | |
174 | : "=m" (value) | |
175 | : "m" (storage) | |
176 | : "memory" | |
177 | ); | |
178 | #endif | |
179 | } | |
180 | else | |
181 | { | |
182 | #if defined(__clang__) | |
183 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
184 | value = __sync_val_compare_and_swap(&storage, (storage_type)0, (storage_type)0); | |
185 | #else | |
186 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
187 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
188 | __asm__ __volatile__ | |
189 | ( | |
190 | "movl %%ebx, %%eax\n\t" | |
191 | "movl %%ecx, %%edx\n\t" | |
192 | "lock; cmpxchg8b %[storage]\n\t" | |
193 | : "=&A" (value) | |
194 | : [storage] "m" (storage) | |
195 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
196 | ); | |
197 | #endif | |
198 | } | |
199 | ||
200 | return value; | |
201 | } | |
202 | ||
203 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
204 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
205 | { | |
206 | #if defined(__clang__) | |
b32b8144 | 207 | |
7c673cae FG |
208 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics |
209 | storage_type old_expected = expected; | |
210 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
211 | return expected == old_expected; | |
b32b8144 | 212 | |
7c673cae | 213 | #elif defined(__PIC__) |
b32b8144 | 214 | |
7c673cae FG |
215 | // Make sure ebx is saved and restored properly in case |
216 | // of position independent code. To make this work | |
217 | // setup register constraints such that ebx can not be | |
218 | // used by accident e.g. as base address for the variable | |
219 | // to be modified. Accessing "scratch" should always be okay, | |
220 | // as it can only be placed on the stack (and therefore | |
221 | // accessed through ebp or esp only). | |
222 | // | |
223 | // In theory, could push/pop ebx onto/off the stack, but movs | |
224 | // to a prepared stack slot turn out to be faster. | |
225 | ||
226 | uint32_t scratch; | |
227 | bool success; | |
b32b8144 | 228 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
7c673cae FG |
229 | __asm__ __volatile__ |
230 | ( | |
231 | "movl %%ebx, %[scratch]\n\t" | |
232 | "movl %[desired_lo], %%ebx\n\t" | |
b32b8144 FG |
233 | "lock; cmpxchg8b (%[dest])\n\t" |
234 | "movl %[scratch], %%ebx\n\t" | |
235 | : "+A" (expected), [scratch] "=m" (scratch), [success] "=@ccz" (success) | |
236 | : [desired_lo] "Sm" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
237 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
238 | ); | |
239 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
240 | __asm__ __volatile__ | |
241 | ( | |
242 | "movl %%ebx, %[scratch]\n\t" | |
243 | "movl %[desired_lo], %%ebx\n\t" | |
244 | "lock; cmpxchg8b (%[dest])\n\t" | |
7c673cae FG |
245 | "movl %[scratch], %%ebx\n\t" |
246 | "sete %[success]\n\t" | |
247 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
b32b8144 FG |
248 | : "+A,A,A,A,A,A" (expected), [scratch] "=m,m,m,m,m,m" (scratch), [success] "=q,m,q,m,q,m" (success) |
249 | : [desired_lo] "S,S,D,D,m,m" ((uint32_t)desired), "c,c,c,c,c,c" ((uint32_t)(desired >> 32)), [dest] "D,D,S,S,D,D" (&storage) | |
7c673cae | 250 | #else |
b32b8144 FG |
251 | : "+A" (expected), [scratch] "=m" (scratch), [success] "=q" (success) |
252 | : [desired_lo] "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
253 | #endif |
254 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
255 | ); | |
b32b8144 FG |
256 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
257 | ||
7c673cae | 258 | return success; |
b32b8144 FG |
259 | |
260 | #else // defined(__PIC__) | |
261 | ||
7c673cae | 262 | bool success; |
b32b8144 FG |
263 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
264 | __asm__ __volatile__ | |
265 | ( | |
266 | "lock; cmpxchg8b %[dest]\n\t" | |
267 | : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success) | |
268 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
269 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
270 | ); | |
271 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
272 | __asm__ __volatile__ |
273 | ( | |
274 | "lock; cmpxchg8b %[dest]\n\t" | |
275 | "sete %[success]\n\t" | |
276 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
277 | : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) | |
278 | : "b,b" ((uint32_t)desired), "c,c" ((uint32_t)(desired >> 32)) | |
279 | #else | |
280 | : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) | |
281 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
282 | #endif | |
283 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
284 | ); | |
b32b8144 FG |
285 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
286 | ||
7c673cae | 287 | return success; |
b32b8144 FG |
288 | |
289 | #endif // defined(__PIC__) | |
7c673cae FG |
290 | } |
291 | ||
292 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
293 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
294 | { | |
295 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
296 | } | |
297 | ||
298 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
299 | { | |
300 | #if defined(__clang__) | |
301 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
302 | storage_type old_val = storage; | |
303 | while (true) | |
304 | { | |
305 | storage_type val = __sync_val_compare_and_swap(&storage, old_val, v); | |
306 | if (val == old_val) | |
307 | return val; | |
308 | old_val = val; | |
309 | } | |
310 | #elif !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
311 | #if defined(__PIC__) | |
312 | uint32_t scratch; | |
313 | __asm__ __volatile__ | |
314 | ( | |
315 | "movl %%ebx, %[scratch]\n\t" | |
316 | "movl %%eax, %%ebx\n\t" | |
317 | "movl %%edx, %%ecx\n\t" | |
318 | "movl %[dest], %%eax\n\t" | |
319 | "movl 4+%[dest], %%edx\n\t" | |
320 | ".align 16\n\t" | |
321 | "1: lock; cmpxchg8b %[dest]\n\t" | |
322 | "jne 1b\n\t" | |
323 | "movl %[scratch], %%ebx\n\t" | |
324 | : "+A" (v), [scratch] "=m" (scratch), [dest] "+o" (storage) | |
325 | : | |
326 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory" | |
327 | ); | |
328 | return v; | |
329 | #else // defined(__PIC__) | |
330 | __asm__ __volatile__ | |
331 | ( | |
332 | "movl %[dest], %%eax\n\t" | |
333 | "movl 4+%[dest], %%edx\n\t" | |
334 | ".align 16\n\t" | |
335 | "1: lock; cmpxchg8b %[dest]\n\t" | |
336 | "jne 1b\n\t" | |
337 | : "=A" (v), [dest] "+o" (storage) | |
338 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
339 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
340 | ); | |
341 | return v; | |
342 | #endif // defined(__PIC__) | |
343 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
344 | #if defined(__PIC__) | |
345 | uint32_t scratch; | |
346 | __asm__ __volatile__ | |
347 | ( | |
348 | "movl %%ebx, %[scratch]\n\t" | |
349 | "movl %%eax, %%ebx\n\t" | |
350 | "movl %%edx, %%ecx\n\t" | |
351 | "movl 0(%[dest]), %%eax\n\t" | |
352 | "movl 4(%[dest]), %%edx\n\t" | |
353 | ".align 16\n\t" | |
354 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
355 | "jne 1b\n\t" | |
356 | "movl %[scratch], %%ebx\n\t" | |
357 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
358 | : "+A,A" (v), [scratch] "=m,m" (scratch) | |
359 | : [dest] "D,S" (&storage) | |
360 | #else | |
361 | : "+A" (v), [scratch] "=m" (scratch) | |
362 | : [dest] "D" (&storage) | |
363 | #endif | |
364 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory" | |
365 | ); | |
366 | return v; | |
367 | #else // defined(__PIC__) | |
368 | __asm__ __volatile__ | |
369 | ( | |
370 | "movl 0(%[dest]), %%eax\n\t" | |
371 | "movl 4(%[dest]), %%edx\n\t" | |
372 | ".align 16\n\t" | |
373 | "1: lock; cmpxchg8b 0(%[dest])\n\t" | |
374 | "jne 1b\n\t" | |
375 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
376 | : "=A,A" (v) | |
377 | : "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) | |
378 | #else | |
379 | : "=A" (v) | |
380 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
381 | #endif | |
382 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
383 | ); | |
384 | return v; | |
385 | #endif // defined(__PIC__) | |
386 | #endif | |
387 | } | |
7c673cae FG |
388 | }; |
389 | ||
390 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
391 | ||
392 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
393 | ||
394 | template< bool Signed > | |
395 | struct gcc_dcas_x86_64 | |
396 | { | |
397 | typedef typename make_storage_type< 16u, Signed >::type storage_type; | |
398 | typedef typename make_storage_type< 16u, Signed >::aligned aligned_storage_type; | |
399 | ||
400 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; | |
401 | ||
402 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
403 | { | |
404 | uint64_t const* p_value = (uint64_t const*)&v; | |
405 | const uint64_t v_lo = p_value[0], v_hi = p_value[1]; | |
406 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
407 | __asm__ __volatile__ | |
408 | ( | |
409 | "movq %[dest], %%rax\n\t" | |
410 | "movq 8+%[dest], %%rdx\n\t" | |
411 | ".align 16\n\t" | |
412 | "1: lock; cmpxchg16b %[dest]\n\t" | |
413 | "jne 1b\n\t" | |
414 | : [dest] "=o" (storage) | |
415 | : "b" (v_lo), "c" (v_hi) | |
416 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" | |
417 | ); | |
418 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
419 | __asm__ __volatile__ | |
420 | ( | |
421 | "movq 0(%[dest]), %%rax\n\t" | |
422 | "movq 8(%[dest]), %%rdx\n\t" | |
423 | ".align 16\n\t" | |
424 | "1: lock; cmpxchg16b 0(%[dest])\n\t" | |
425 | "jne 1b\n\t" | |
426 | : | |
427 | : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage) | |
428 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" | |
429 | ); | |
430 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
431 | } | |
432 | ||
433 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
434 | { | |
435 | #if defined(__clang__) | |
436 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics | |
437 | storage_type value = storage_type(); | |
438 | return __sync_val_compare_and_swap(&storage, value, value); | |
439 | #elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
440 | // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap | |
441 | storage_type value; | |
442 | ||
443 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
444 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
445 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
446 | __asm__ __volatile__ | |
447 | ( | |
448 | "movq %%rbx, %%rax\n\t" | |
449 | "movq %%rcx, %%rdx\n\t" | |
450 | "lock; cmpxchg16b %[storage]\n\t" | |
451 | "movq %%rax, %[value]\n\t" | |
452 | "movq %%rdx, 8+%[value]\n\t" | |
453 | : [value] "=o" (value) | |
454 | : [storage] "m" (storage) | |
455 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
456 | ); | |
457 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
458 | __asm__ __volatile__ | |
459 | ( | |
460 | "movq %%rbx, %%rax\n\t" | |
461 | "movq %%rcx, %%rdx\n\t" | |
462 | "lock; cmpxchg16b %[storage]\n\t" | |
463 | "movq %%rax, 0(%[value])\n\t" | |
464 | "movq %%rdx, 8(%[value])\n\t" | |
465 | : | |
466 | : [storage] "m" (storage), [value] "r" (&value) | |
467 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
468 | ); | |
469 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
470 | ||
471 | return value; | |
472 | #else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
473 | storage_type value; | |
474 | ||
475 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
476 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
477 | __asm__ __volatile__ | |
478 | ( | |
479 | "movq %%rbx, %%rax\n\t" | |
480 | "movq %%rcx, %%rdx\n\t" | |
481 | "lock; cmpxchg16b %[storage]\n\t" | |
482 | : "=&A" (value) | |
483 | : [storage] "m" (storage) | |
484 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
485 | ); | |
486 | ||
487 | return value; | |
488 | #endif | |
489 | } | |
490 | ||
491 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
492 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
493 | { | |
494 | #if defined(__clang__) | |
b32b8144 | 495 | |
7c673cae FG |
496 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics |
497 | storage_type old_expected = expected; | |
498 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
499 | return expected == old_expected; | |
b32b8144 | 500 | |
7c673cae | 501 | #elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) |
b32b8144 | 502 | |
7c673cae FG |
503 | // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap |
504 | uint64_t const* p_desired = (uint64_t const*)&desired; | |
505 | const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1]; | |
506 | bool success; | |
507 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
508 | __asm__ __volatile__ | |
509 | ( | |
510 | "movq %[expected], %%rax\n\t" | |
511 | "movq 8+%[expected], %%rdx\n\t" | |
512 | "lock; cmpxchg16b %[dest]\n\t" | |
513 | "sete %[success]\n\t" | |
514 | "movq %%rax, %[expected]\n\t" | |
515 | "movq %%rdx, 8+%[expected]\n\t" | |
516 | : [dest] "+m" (storage), [expected] "+o" (expected), [success] "=q" (success) | |
517 | : "b" (desired_lo), "c" (desired_hi) | |
518 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
519 | ); | |
520 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
521 | __asm__ __volatile__ | |
522 | ( | |
523 | "movq 0(%[expected]), %%rax\n\t" | |
524 | "movq 8(%[expected]), %%rdx\n\t" | |
525 | "lock; cmpxchg16b %[dest]\n\t" | |
526 | "sete %[success]\n\t" | |
527 | "movq %%rax, 0(%[expected])\n\t" | |
528 | "movq %%rdx, 8(%[expected])\n\t" | |
529 | : [dest] "+m" (storage), [success] "=q" (success) | |
530 | : "b" (desired_lo), "c" (desired_hi), [expected] "r" (&expected) | |
531 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
532 | ); | |
533 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
534 | ||
535 | return success; | |
b32b8144 | 536 | |
7c673cae | 537 | #else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) |
b32b8144 | 538 | |
7c673cae FG |
539 | uint64_t const* p_desired = (uint64_t const*)&desired; |
540 | const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1]; | |
541 | bool success; | |
b32b8144 FG |
542 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
543 | __asm__ __volatile__ | |
544 | ( | |
545 | "lock; cmpxchg16b %[dest]\n\t" | |
546 | : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success) | |
547 | : "b" (desired_lo), "c" (desired_hi) | |
548 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
549 | ); | |
550 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
551 | __asm__ __volatile__ |
552 | ( | |
553 | "lock; cmpxchg16b %[dest]\n\t" | |
554 | "sete %[success]\n\t" | |
555 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) | |
556 | : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) | |
557 | : "b,b" (desired_lo), "c,c" (desired_hi) | |
558 | #else | |
559 | : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) | |
560 | : "b" (desired_lo), "c" (desired_hi) | |
561 | #endif | |
562 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
563 | ); | |
b32b8144 FG |
564 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
565 | ||
7c673cae | 566 | return success; |
b32b8144 FG |
567 | |
568 | #endif // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
7c673cae FG |
569 | } |
570 | ||
571 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
572 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
573 | { | |
574 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
575 | } | |
576 | ||
577 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
578 | { | |
579 | #if defined(__clang__) | |
580 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics | |
581 | storage_type old_val = storage; | |
582 | while (true) | |
583 | { | |
584 | storage_type val = __sync_val_compare_and_swap(&storage, old_val, v); | |
585 | if (val == old_val) | |
586 | return val; | |
587 | old_val = val; | |
588 | } | |
589 | #elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
590 | // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap | |
591 | storage_type old_value; | |
592 | uint64_t const* p_value = (uint64_t const*)&v; | |
593 | const uint64_t v_lo = p_value[0], v_hi = p_value[1]; | |
594 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
595 | __asm__ __volatile__ | |
596 | ( | |
597 | "movq %[dest], %%rax\n\t" | |
598 | "movq 8+%[dest], %%rdx\n\t" | |
599 | ".align 16\n\t" | |
600 | "1: lock; cmpxchg16b %[dest]\n\t" | |
601 | "jne 1b\n\t" | |
602 | "movq %%rax, %[old_value]\n\t" | |
603 | "movq %%rdx, 8+%[old_value]\n\t" | |
604 | : [dest] "+o" (storage), [old_value] "=o" (old_value) | |
605 | : "b" (v_lo), "c" (v_hi) | |
606 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
607 | ); | |
608 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
609 | __asm__ __volatile__ | |
610 | ( | |
611 | "movq 0(%[dest]), %%rax\n\t" | |
612 | "movq 8(%[dest]), %%rdx\n\t" | |
613 | ".align 16\n\t" | |
614 | "1: lock; cmpxchg16b 0(%[dest])\n\t" | |
615 | "jne 1b\n\t" | |
616 | "movq %%rax, 0(%[old_value])\n\t" | |
617 | "movq %%rdx, 8(%[old_value])\n\t" | |
618 | : | |
619 | : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage), [old_value] "r" (&old_value) | |
620 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" | |
621 | ); | |
622 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
623 | ||
624 | return old_value; | |
625 | #else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) | |
626 | uint64_t const* p_value = (uint64_t const*)&v; | |
627 | const uint64_t v_lo = p_value[0], v_hi = p_value[1]; | |
628 | #if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
629 | __asm__ __volatile__ | |
630 | ( | |
631 | "movq %[dest], %%rax\n\t" | |
632 | "movq 8+%[dest], %%rdx\n\t" | |
633 | ".align 16\n\t" | |
634 | "1: lock; cmpxchg16b %[dest]\n\t" | |
635 | "jne 1b\n\t" | |
636 | : "=&A" (v), [dest] "+o" (storage) | |
637 | : "b" (v_lo), "c" (v_hi) | |
638 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
639 | ); | |
640 | #else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
641 | __asm__ __volatile__ | |
642 | ( | |
643 | "movq 0(%[dest]), %%rax\n\t" | |
644 | "movq 8(%[dest]), %%rdx\n\t" | |
645 | ".align 16\n\t" | |
646 | "1: lock; cmpxchg16b 0(%[dest])\n\t" | |
647 | "jne 1b\n\t" | |
648 | : "=&A" (v) | |
649 | : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage) | |
650 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
651 | ); | |
652 | #endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) | |
653 | ||
654 | return v; | |
655 | #endif | |
656 | } | |
7c673cae FG |
657 | }; |
658 | ||
659 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
660 | ||
661 | } // namespace detail | |
662 | } // namespace atomics | |
663 | } // namespace boost | |
664 | ||
665 | #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ |