]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/atomic/detail/ops_gcc_x86_dcas.hpp
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / boost / boost / atomic / detail / ops_gcc_x86_dcas.hpp
CommitLineData
7c673cae
FG
1/*
2 * Distributed under the Boost Software License, Version 1.0.
3 * (See accompanying file LICENSE_1_0.txt or copy at
4 * http://www.boost.org/LICENSE_1_0.txt)
5 *
6 * Copyright (c) 2009 Helge Bahmann
7 * Copyright (c) 2012 Tim Blechmann
11fdf7f2 8 * Copyright (c) 2014 - 2018 Andrey Semashev
7c673cae
FG
9 */
10/*!
11 * \file atomic/detail/ops_gcc_x86_dcas.hpp
12 *
13 * This header contains implementation of the double-width CAS primitive for x86.
14 */
15
16#ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
17#define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_
18
19#include <boost/cstdint.hpp>
20#include <boost/memory_order.hpp>
21#include <boost/atomic/detail/config.hpp>
22#include <boost/atomic/detail/storage_type.hpp>
11fdf7f2 23#include <boost/atomic/detail/string_ops.hpp>
7c673cae
FG
24#include <boost/atomic/capabilities.hpp>
25
26#ifdef BOOST_HAS_PRAGMA_ONCE
27#pragma once
28#endif
29
30namespace boost {
31namespace atomics {
32namespace detail {
33
11fdf7f2
TL
34// Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory
35// operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang
36// is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason
37// we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards.
38// Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster.
39// The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer
40// with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it.
41
7c673cae
FG
42#if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
43
44template< bool Signed >
45struct gcc_dcas_x86
46{
11fdf7f2
TL
47 typedef typename make_storage_type< 8u >::type storage_type;
48 typedef typename make_storage_type< 8u >::aligned aligned_storage_type;
49 typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t;
7c673cae 50
11fdf7f2 51 static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
7c673cae
FG
52 static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
53
54 static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
55 {
11fdf7f2 56 if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
7c673cae 57 {
11fdf7f2
TL
58#if defined(__SSE__)
59 typedef float xmm_t __attribute__((__vector_size__(16)));
60 xmm_t xmm_scratch;
7c673cae
FG
61 __asm__ __volatile__
62 (
63#if defined(__AVX__)
11fdf7f2
TL
64 "vmovq %[value], %[xmm_scratch]\n\t"
65 "vmovq %[xmm_scratch], %[storage]\n\t"
66#elif defined(__SSE2__)
67 "movq %[value], %[xmm_scratch]\n\t"
68 "movq %[xmm_scratch], %[storage]\n\t"
7c673cae 69#else
11fdf7f2
TL
70 "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
71 "movlps %[value], %[xmm_scratch]\n\t"
72 "movlps %[xmm_scratch], %[storage]\n\t"
7c673cae 73#endif
11fdf7f2
TL
74 : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch)
75 : [value] "m" (v)
76 : "memory"
7c673cae
FG
77 );
78#else
79 __asm__ __volatile__
80 (
11fdf7f2
TL
81 "fildll %[value]\n\t"
82 "fistpll %[storage]\n\t"
83 : [storage] "=m" (storage)
84 : [value] "m" (v)
7c673cae
FG
85 : "memory"
86 );
87#endif
88 }
89 else
90 {
11fdf7f2 91#if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
7c673cae
FG
92 __asm__ __volatile__
93 (
11fdf7f2
TL
94 "xchgl %%ebx, %%esi\n\t"
95 "movl %%eax, %%ebx\n\t"
96 "movl (%[dest]), %%eax\n\t"
7c673cae
FG
97 "movl 4(%[dest]), %%edx\n\t"
98 ".align 16\n\t"
11fdf7f2 99 "1: lock; cmpxchg8b (%[dest])\n\t"
7c673cae 100 "jne 1b\n\t"
11fdf7f2
TL
101 "xchgl %%ebx, %%esi\n\t"
102 :
103 : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
7c673cae
FG
104 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory"
105 );
11fdf7f2 106#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
7c673cae
FG
107 __asm__ __volatile__
108 (
11fdf7f2
TL
109 "movl %[dest_lo], %%eax\n\t"
110 "movl %[dest_hi], %%edx\n\t"
7c673cae 111 ".align 16\n\t"
11fdf7f2 112 "1: lock; cmpxchg8b %[dest_lo]\n\t"
7c673cae 113 "jne 1b\n\t"
11fdf7f2
TL
114 : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
115 : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
7c673cae
FG
116 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory"
117 );
11fdf7f2 118#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
7c673cae
FG
119 }
120 }
121
122 static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
123 {
124 storage_type value;
125
11fdf7f2 126 if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
7c673cae 127 {
11fdf7f2
TL
128#if defined(__SSE__)
129 typedef float xmm_t __attribute__((__vector_size__(16)));
130 xmm_t xmm_scratch;
7c673cae
FG
131 __asm__ __volatile__
132 (
133#if defined(__AVX__)
11fdf7f2
TL
134 "vmovq %[storage], %[xmm_scratch]\n\t"
135 "vmovq %[xmm_scratch], %[value]\n\t"
136#elif defined(__SSE2__)
137 "movq %[storage], %[xmm_scratch]\n\t"
138 "movq %[xmm_scratch], %[value]\n\t"
7c673cae 139#else
11fdf7f2
TL
140 "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
141 "movlps %[storage], %[xmm_scratch]\n\t"
142 "movlps %[xmm_scratch], %[value]\n\t"
7c673cae 143#endif
11fdf7f2
TL
144 : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch)
145 : [storage] "m" (storage)
146 : "memory"
7c673cae
FG
147 );
148#else
149 __asm__ __volatile__
150 (
11fdf7f2
TL
151 "fildll %[storage]\n\t"
152 "fistpll %[value]\n\t"
153 : [value] "=m" (value)
154 : [storage] "m" (storage)
7c673cae
FG
155 : "memory"
156 );
157#endif
158 }
159 else
160 {
92f5a8d4
TL
161 // Note that despite const qualification cmpxchg8b below may issue a store to the storage. The storage value
162 // will not change, but this prevents the storage to reside in read-only memory.
163
164#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
165
11fdf7f2 166 uint32_t value_bits[2];
92f5a8d4 167
11fdf7f2
TL
168 // We don't care for comparison result here; the previous value will be stored into value anyway.
169 // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
170 __asm__ __volatile__
171 (
172 "movl %%ebx, %%eax\n\t"
173 "movl %%ecx, %%edx\n\t"
174 "lock; cmpxchg8b %[storage]\n\t"
175 : "=&a" (value_bits[0]), "=&d" (value_bits[1])
176 : [storage] "m" (storage)
177 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
178 );
179 BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
92f5a8d4 180
11fdf7f2 181#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
92f5a8d4 182
7c673cae
FG
183 // We don't care for comparison result here; the previous value will be stored into value anyway.
184 // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
185 __asm__ __volatile__
186 (
187 "movl %%ebx, %%eax\n\t"
188 "movl %%ecx, %%edx\n\t"
189 "lock; cmpxchg8b %[storage]\n\t"
190 : "=&A" (value)
191 : [storage] "m" (storage)
192 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
193 );
92f5a8d4 194
11fdf7f2 195#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
7c673cae
FG
196 }
197
198 return value;
199 }
200
201 static BOOST_FORCEINLINE bool compare_exchange_strong(
202 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
203 {
204#if defined(__clang__)
b32b8144 205
7c673cae
FG
206 // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
207 storage_type old_expected = expected;
208 expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
209 return expected == old_expected;
b32b8144 210
11fdf7f2 211#elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
7c673cae 212
7c673cae 213 bool success;
11fdf7f2 214
b32b8144 215#if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
7c673cae
FG
216 __asm__ __volatile__
217 (
11fdf7f2 218 "xchgl %%ebx, %%esi\n\t"
b32b8144 219 "lock; cmpxchg8b (%[dest])\n\t"
11fdf7f2
TL
220 "xchgl %%ebx, %%esi\n\t"
221 : "+A" (expected), [success] "=@ccz" (success)
222 : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
b32b8144
FG
223 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
224 );
225#else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
226 __asm__ __volatile__
227 (
11fdf7f2 228 "xchgl %%ebx, %%esi\n\t"
b32b8144 229 "lock; cmpxchg8b (%[dest])\n\t"
11fdf7f2 230 "xchgl %%ebx, %%esi\n\t"
7c673cae 231 "sete %[success]\n\t"
11fdf7f2
TL
232 : "+A" (expected), [success] "=qm" (success)
233 : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
7c673cae
FG
234 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
235 );
b32b8144
FG
236#endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
237
7c673cae 238 return success;
b32b8144 239
11fdf7f2 240#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
b32b8144 241
7c673cae 242 bool success;
11fdf7f2 243
b32b8144
FG
244#if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
245 __asm__ __volatile__
246 (
247 "lock; cmpxchg8b %[dest]\n\t"
248 : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success)
249 : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
250 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
251 );
252#else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
7c673cae
FG
253 __asm__ __volatile__
254 (
255 "lock; cmpxchg8b %[dest]\n\t"
256 "sete %[success]\n\t"
11fdf7f2 257 : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
7c673cae 258 : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
7c673cae
FG
259 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
260 );
b32b8144
FG
261#endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
262
7c673cae 263 return success;
b32b8144 264
11fdf7f2 265#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
7c673cae
FG
266 }
267
268 static BOOST_FORCEINLINE bool compare_exchange_weak(
269 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
270 {
271 return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
272 }
273
92f5a8d4 274 static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
7c673cae 275 {
11fdf7f2
TL
276#if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
277#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
278
279 uint32_t old_bits[2];
7c673cae
FG
280 __asm__ __volatile__
281 (
11fdf7f2
TL
282 "xchgl %%ebx, %%esi\n\t"
283 "movl (%[dest]), %%eax\n\t"
284 "movl 4(%[dest]), %%edx\n\t"
7c673cae 285 ".align 16\n\t"
11fdf7f2 286 "1: lock; cmpxchg8b (%[dest])\n\t"
7c673cae 287 "jne 1b\n\t"
11fdf7f2
TL
288 "xchgl %%ebx, %%esi\n\t"
289 : "=a" (old_bits[0]), "=d" (old_bits[1])
290 : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
291 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
7c673cae 292 );
11fdf7f2
TL
293
294 storage_type old_value;
295 BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
296 return old_value;
297
298#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
299
300 storage_type old_value;
7c673cae
FG
301 __asm__ __volatile__
302 (
11fdf7f2
TL
303 "xchgl %%ebx, %%esi\n\t"
304 "movl (%[dest]), %%eax\n\t"
305 "movl 4(%[dest]), %%edx\n\t"
7c673cae 306 ".align 16\n\t"
11fdf7f2 307 "1: lock; cmpxchg8b (%[dest])\n\t"
7c673cae 308 "jne 1b\n\t"
11fdf7f2
TL
309 "xchgl %%ebx, %%esi\n\t"
310 : "=A" (old_value)
311 : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
7c673cae
FG
312 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
313 );
11fdf7f2
TL
314 return old_value;
315
316#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
317#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
318#if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407
319
320 // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below
321 uint32_t old_bits[2];
7c673cae
FG
322 __asm__ __volatile__
323 (
11fdf7f2 324 "movl (%[dest]), %%eax\n\t"
7c673cae
FG
325 "movl 4(%[dest]), %%edx\n\t"
326 ".align 16\n\t"
11fdf7f2 327 "1: lock; cmpxchg8b (%[dest])\n\t"
7c673cae 328 "jne 1b\n\t"
11fdf7f2
TL
329 : "=&a" (old_bits[0]), "=&d" (old_bits[1])
330 : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage)
331 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
7c673cae 332 );
11fdf7f2
TL
333
334 storage_type old_value;
335 BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
336 return old_value;
337
338#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
339
340 uint32_t old_bits[2];
7c673cae
FG
341 __asm__ __volatile__
342 (
11fdf7f2
TL
343 "movl %[dest_lo], %%eax\n\t"
344 "movl %[dest_hi], %%edx\n\t"
7c673cae 345 ".align 16\n\t"
11fdf7f2 346 "1: lock; cmpxchg8b %[dest_lo]\n\t"
7c673cae 347 "jne 1b\n\t"
11fdf7f2
TL
348 : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
349 : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
7c673cae
FG
350 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
351 );
11fdf7f2
TL
352
353 storage_type old_value;
354 BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
355 return old_value;
356
357#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
358
359 storage_type old_value;
360 __asm__ __volatile__
361 (
362 "movl %[dest_lo], %%eax\n\t"
363 "movl %[dest_hi], %%edx\n\t"
364 ".align 16\n\t"
365 "1: lock; cmpxchg8b %[dest_lo]\n\t"
366 "jne 1b\n\t"
367 : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
368 : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
369 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
370 );
371 return old_value;
372
373#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
374#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
7c673cae 375 }
7c673cae
FG
376};
377
378#endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
379
380#if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
381
382template< bool Signed >
383struct gcc_dcas_x86_64
384{
11fdf7f2
TL
385 typedef typename make_storage_type< 16u >::type storage_type;
386 typedef typename make_storage_type< 16u >::aligned aligned_storage_type;
387 typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t;
7c673cae 388
11fdf7f2 389 static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
7c673cae
FG
390 static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
391
392 static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
393 {
7c673cae
FG
394 __asm__ __volatile__
395 (
11fdf7f2
TL
396 "movq %[dest_lo], %%rax\n\t"
397 "movq %[dest_hi], %%rdx\n\t"
7c673cae 398 ".align 16\n\t"
11fdf7f2 399 "1: lock; cmpxchg16b %[dest_lo]\n\t"
7c673cae 400 "jne 1b\n\t"
11fdf7f2
TL
401 : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
402 : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
7c673cae
FG
403 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory"
404 );
7c673cae
FG
405 }
406
407 static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
408 {
92f5a8d4
TL
409 // Note that despite const qualification cmpxchg16b below may issue a store to the storage. The storage value
410 // will not change, but this prevents the storage to reside in read-only memory.
11fdf7f2 411
92f5a8d4 412#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
11fdf7f2 413
11fdf7f2 414 uint64_t value_bits[2];
7c673cae
FG
415
416 // We don't care for comparison result here; the previous value will be stored into value anyway.
417 // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
7c673cae
FG
418 __asm__ __volatile__
419 (
420 "movq %%rbx, %%rax\n\t"
421 "movq %%rcx, %%rdx\n\t"
422 "lock; cmpxchg16b %[storage]\n\t"
11fdf7f2 423 : "=&a" (value_bits[0]), "=&d" (value_bits[1])
7c673cae 424 : [storage] "m" (storage)
11fdf7f2 425 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
7c673cae 426 );
7c673cae 427
11fdf7f2
TL
428 storage_type value;
429 BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
7c673cae 430 return value;
11fdf7f2
TL
431
432#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
433
7c673cae
FG
434 storage_type value;
435
436 // We don't care for comparison result here; the previous value will be stored into value anyway.
437 // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
438 __asm__ __volatile__
439 (
440 "movq %%rbx, %%rax\n\t"
441 "movq %%rcx, %%rdx\n\t"
442 "lock; cmpxchg16b %[storage]\n\t"
443 : "=&A" (value)
444 : [storage] "m" (storage)
445 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
446 );
447
448 return value;
11fdf7f2
TL
449
450#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
7c673cae
FG
451 }
452
453 static BOOST_FORCEINLINE bool compare_exchange_strong(
454 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT
455 {
456#if defined(__clang__)
b32b8144 457
7c673cae
FG
458 // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics
459 storage_type old_expected = expected;
460 expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
461 return expected == old_expected;
b32b8144 462
11fdf7f2 463#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
b32b8144 464
11fdf7f2 465 // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap
7c673cae 466 bool success;
7c673cae
FG
467 __asm__ __volatile__
468 (
7c673cae
FG
469 "lock; cmpxchg16b %[dest]\n\t"
470 "sete %[success]\n\t"
11fdf7f2
TL
471 : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success)
472 : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
473 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
7c673cae 474 );
7c673cae
FG
475
476 return success;
b32b8144 477
11fdf7f2 478#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
b32b8144 479
7c673cae 480 bool success;
11fdf7f2 481
b32b8144
FG
482#if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
483 __asm__ __volatile__
484 (
485 "lock; cmpxchg16b %[dest]\n\t"
11fdf7f2
TL
486 : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success)
487 : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
b32b8144
FG
488 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
489 );
490#else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
7c673cae
FG
491 __asm__ __volatile__
492 (
493 "lock; cmpxchg16b %[dest]\n\t"
494 "sete %[success]\n\t"
11fdf7f2
TL
495 : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
496 : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
7c673cae
FG
497 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
498 );
b32b8144
FG
499#endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
500
7c673cae 501 return success;
b32b8144 502
11fdf7f2 503#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
7c673cae
FG
504 }
505
506 static BOOST_FORCEINLINE bool compare_exchange_weak(
507 storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT
508 {
509 return compare_exchange_strong(storage, expected, desired, success_order, failure_order);
510 }
511
512 static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
513 {
11fdf7f2
TL
514#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
515 uint64_t old_bits[2];
7c673cae
FG
516 __asm__ __volatile__
517 (
11fdf7f2
TL
518 "movq %[dest_lo], %%rax\n\t"
519 "movq %[dest_hi], %%rdx\n\t"
7c673cae 520 ".align 16\n\t"
11fdf7f2 521 "1: lock; cmpxchg16b %[dest_lo]\n\t"
7c673cae 522 "jne 1b\n\t"
11fdf7f2
TL
523 : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1])
524 : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
525 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
7c673cae 526 );
7c673cae 527
11fdf7f2
TL
528 storage_type old_value;
529 BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
7c673cae 530 return old_value;
11fdf7f2
TL
531#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
532 storage_type old_value;
7c673cae
FG
533 __asm__ __volatile__
534 (
11fdf7f2
TL
535 "movq %[dest_lo], %%rax\n\t"
536 "movq %[dest_hi], %%rdx\n\t"
7c673cae 537 ".align 16\n\t"
11fdf7f2 538 "1: lock; cmpxchg16b %[dest_lo]\n\t"
7c673cae 539 "jne 1b\n\t"
11fdf7f2
TL
540 : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
541 : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
7c673cae
FG
542 : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
543 );
7c673cae 544
11fdf7f2
TL
545 return old_value;
546#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
7c673cae 547 }
7c673cae
FG
548};
549
550#endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B)
551
552} // namespace detail
553} // namespace atomics
554} // namespace boost
555
556#endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_