]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Distributed under the Boost Software License, Version 1.0. | |
3 | * (See accompanying file LICENSE_1_0.txt or copy at | |
4 | * http://www.boost.org/LICENSE_1_0.txt) | |
5 | * | |
6 | * Copyright (c) 2009 Helge Bahmann | |
7 | * Copyright (c) 2012 Tim Blechmann | |
11fdf7f2 | 8 | * Copyright (c) 2014 - 2018 Andrey Semashev |
7c673cae FG |
9 | */ |
10 | /*! | |
11 | * \file atomic/detail/ops_gcc_x86_dcas.hpp | |
12 | * | |
13 | * This header contains implementation of the double-width CAS primitive for x86. | |
14 | */ | |
15 | ||
16 | #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
17 | #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
18 | ||
19 | #include <boost/cstdint.hpp> | |
20 | #include <boost/memory_order.hpp> | |
21 | #include <boost/atomic/detail/config.hpp> | |
f67539c2 | 22 | #include <boost/atomic/detail/storage_traits.hpp> |
11fdf7f2 | 23 | #include <boost/atomic/detail/string_ops.hpp> |
7c673cae FG |
24 | #include <boost/atomic/capabilities.hpp> |
25 | ||
26 | #ifdef BOOST_HAS_PRAGMA_ONCE | |
27 | #pragma once | |
28 | #endif | |
29 | ||
30 | namespace boost { | |
31 | namespace atomics { | |
32 | namespace detail { | |
33 | ||
11fdf7f2 TL |
34 | // Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory |
35 | // operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang | |
36 | // is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason | |
37 | // we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards. | |
38 | // Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster. | |
39 | // The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer | |
40 | // with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it. | |
41 | ||
7c673cae FG |
42 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) |
43 | ||
44 | template< bool Signed > | |
45 | struct gcc_dcas_x86 | |
46 | { | |
f67539c2 | 47 | typedef typename storage_traits< 8u >::type storage_type; |
11fdf7f2 | 48 | typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t; |
7c673cae | 49 | |
f67539c2 TL |
50 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 8u; |
51 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 8u; | |
52 | static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed; | |
11fdf7f2 | 53 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; |
7c673cae FG |
54 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; |
55 | ||
56 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
57 | { | |
11fdf7f2 | 58 | if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u)) |
7c673cae | 59 | { |
11fdf7f2 TL |
60 | #if defined(__SSE__) |
61 | typedef float xmm_t __attribute__((__vector_size__(16))); | |
62 | xmm_t xmm_scratch; | |
7c673cae FG |
63 | __asm__ __volatile__ |
64 | ( | |
65 | #if defined(__AVX__) | |
11fdf7f2 TL |
66 | "vmovq %[value], %[xmm_scratch]\n\t" |
67 | "vmovq %[xmm_scratch], %[storage]\n\t" | |
68 | #elif defined(__SSE2__) | |
69 | "movq %[value], %[xmm_scratch]\n\t" | |
70 | "movq %[xmm_scratch], %[storage]\n\t" | |
7c673cae | 71 | #else |
11fdf7f2 TL |
72 | "xorps %[xmm_scratch], %[xmm_scratch]\n\t" |
73 | "movlps %[value], %[xmm_scratch]\n\t" | |
74 | "movlps %[xmm_scratch], %[storage]\n\t" | |
7c673cae | 75 | #endif |
11fdf7f2 TL |
76 | : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch) |
77 | : [value] "m" (v) | |
78 | : "memory" | |
7c673cae FG |
79 | ); |
80 | #else | |
81 | __asm__ __volatile__ | |
82 | ( | |
11fdf7f2 TL |
83 | "fildll %[value]\n\t" |
84 | "fistpll %[storage]\n\t" | |
85 | : [storage] "=m" (storage) | |
86 | : [value] "m" (v) | |
7c673cae FG |
87 | : "memory" |
88 | ); | |
89 | #endif | |
90 | } | |
91 | else | |
92 | { | |
11fdf7f2 | 93 | #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
94 | __asm__ __volatile__ |
95 | ( | |
11fdf7f2 TL |
96 | "xchgl %%ebx, %%esi\n\t" |
97 | "movl %%eax, %%ebx\n\t" | |
98 | "movl (%[dest]), %%eax\n\t" | |
7c673cae FG |
99 | "movl 4(%[dest]), %%edx\n\t" |
100 | ".align 16\n\t" | |
11fdf7f2 | 101 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 102 | "jne 1b\n\t" |
11fdf7f2 TL |
103 | "xchgl %%ebx, %%esi\n\t" |
104 | : | |
105 | : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
106 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" |
107 | ); | |
11fdf7f2 | 108 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
109 | __asm__ __volatile__ |
110 | ( | |
11fdf7f2 TL |
111 | "movl %[dest_lo], %%eax\n\t" |
112 | "movl %[dest_hi], %%edx\n\t" | |
7c673cae | 113 | ".align 16\n\t" |
11fdf7f2 | 114 | "1: lock; cmpxchg8b %[dest_lo]\n\t" |
7c673cae | 115 | "jne 1b\n\t" |
11fdf7f2 TL |
116 | : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) |
117 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
118 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" |
119 | ); | |
11fdf7f2 | 120 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
121 | } |
122 | } | |
123 | ||
124 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
125 | { | |
126 | storage_type value; | |
127 | ||
11fdf7f2 | 128 | if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u)) |
7c673cae | 129 | { |
11fdf7f2 TL |
130 | #if defined(__SSE__) |
131 | typedef float xmm_t __attribute__((__vector_size__(16))); | |
132 | xmm_t xmm_scratch; | |
7c673cae FG |
133 | __asm__ __volatile__ |
134 | ( | |
135 | #if defined(__AVX__) | |
11fdf7f2 TL |
136 | "vmovq %[storage], %[xmm_scratch]\n\t" |
137 | "vmovq %[xmm_scratch], %[value]\n\t" | |
138 | #elif defined(__SSE2__) | |
139 | "movq %[storage], %[xmm_scratch]\n\t" | |
140 | "movq %[xmm_scratch], %[value]\n\t" | |
7c673cae | 141 | #else |
11fdf7f2 TL |
142 | "xorps %[xmm_scratch], %[xmm_scratch]\n\t" |
143 | "movlps %[storage], %[xmm_scratch]\n\t" | |
144 | "movlps %[xmm_scratch], %[value]\n\t" | |
7c673cae | 145 | #endif |
11fdf7f2 TL |
146 | : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch) |
147 | : [storage] "m" (storage) | |
148 | : "memory" | |
7c673cae FG |
149 | ); |
150 | #else | |
151 | __asm__ __volatile__ | |
152 | ( | |
11fdf7f2 TL |
153 | "fildll %[storage]\n\t" |
154 | "fistpll %[value]\n\t" | |
155 | : [value] "=m" (value) | |
156 | : [storage] "m" (storage) | |
7c673cae FG |
157 | : "memory" |
158 | ); | |
159 | #endif | |
160 | } | |
161 | else | |
162 | { | |
92f5a8d4 TL |
163 | // Note that despite const qualification cmpxchg8b below may issue a store to the storage. The storage value |
164 | // will not change, but this prevents the storage to reside in read-only memory. | |
165 | ||
166 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
167 | ||
11fdf7f2 | 168 | uint32_t value_bits[2]; |
92f5a8d4 | 169 | |
11fdf7f2 TL |
170 | // We don't care for comparison result here; the previous value will be stored into value anyway. |
171 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
172 | __asm__ __volatile__ | |
173 | ( | |
174 | "movl %%ebx, %%eax\n\t" | |
175 | "movl %%ecx, %%edx\n\t" | |
176 | "lock; cmpxchg8b %[storage]\n\t" | |
177 | : "=&a" (value_bits[0]), "=&d" (value_bits[1]) | |
178 | : [storage] "m" (storage) | |
179 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
180 | ); | |
181 | BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); | |
92f5a8d4 | 182 | |
11fdf7f2 | 183 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
92f5a8d4 | 184 | |
7c673cae FG |
185 | // We don't care for comparison result here; the previous value will be stored into value anyway. |
186 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
187 | __asm__ __volatile__ | |
188 | ( | |
189 | "movl %%ebx, %%eax\n\t" | |
190 | "movl %%ecx, %%edx\n\t" | |
191 | "lock; cmpxchg8b %[storage]\n\t" | |
192 | : "=&A" (value) | |
193 | : [storage] "m" (storage) | |
194 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
195 | ); | |
92f5a8d4 | 196 | |
11fdf7f2 | 197 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
7c673cae FG |
198 | } |
199 | ||
200 | return value; | |
201 | } | |
202 | ||
203 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
204 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
205 | { | |
206 | #if defined(__clang__) | |
b32b8144 | 207 | |
7c673cae FG |
208 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics |
209 | storage_type old_expected = expected; | |
210 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
211 | return expected == old_expected; | |
b32b8144 | 212 | |
11fdf7f2 | 213 | #elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae | 214 | |
7c673cae | 215 | bool success; |
11fdf7f2 | 216 | |
b32b8144 | 217 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
7c673cae FG |
218 | __asm__ __volatile__ |
219 | ( | |
11fdf7f2 | 220 | "xchgl %%ebx, %%esi\n\t" |
b32b8144 | 221 | "lock; cmpxchg8b (%[dest])\n\t" |
11fdf7f2 TL |
222 | "xchgl %%ebx, %%esi\n\t" |
223 | : "+A" (expected), [success] "=@ccz" (success) | |
224 | : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
b32b8144 FG |
225 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
226 | ); | |
227 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
228 | __asm__ __volatile__ | |
229 | ( | |
11fdf7f2 | 230 | "xchgl %%ebx, %%esi\n\t" |
b32b8144 | 231 | "lock; cmpxchg8b (%[dest])\n\t" |
11fdf7f2 | 232 | "xchgl %%ebx, %%esi\n\t" |
7c673cae | 233 | "sete %[success]\n\t" |
11fdf7f2 TL |
234 | : "+A" (expected), [success] "=qm" (success) |
235 | : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
236 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
237 | ); | |
b32b8144 FG |
238 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
239 | ||
7c673cae | 240 | return success; |
b32b8144 | 241 | |
11fdf7f2 | 242 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
b32b8144 | 243 | |
7c673cae | 244 | bool success; |
11fdf7f2 | 245 | |
b32b8144 FG |
246 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
247 | __asm__ __volatile__ | |
248 | ( | |
249 | "lock; cmpxchg8b %[dest]\n\t" | |
250 | : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success) | |
251 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
252 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
253 | ); | |
254 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
255 | __asm__ __volatile__ |
256 | ( | |
257 | "lock; cmpxchg8b %[dest]\n\t" | |
258 | "sete %[success]\n\t" | |
11fdf7f2 | 259 | : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) |
7c673cae | 260 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) |
7c673cae FG |
261 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
262 | ); | |
b32b8144 FG |
263 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
264 | ||
7c673cae | 265 | return success; |
b32b8144 | 266 | |
11fdf7f2 | 267 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
268 | } |
269 | ||
270 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
271 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
272 | { | |
273 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
274 | } | |
275 | ||
92f5a8d4 | 276 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT |
7c673cae | 277 | { |
11fdf7f2 TL |
278 | #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
279 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
280 | ||
281 | uint32_t old_bits[2]; | |
7c673cae FG |
282 | __asm__ __volatile__ |
283 | ( | |
11fdf7f2 TL |
284 | "xchgl %%ebx, %%esi\n\t" |
285 | "movl (%[dest]), %%eax\n\t" | |
286 | "movl 4(%[dest]), %%edx\n\t" | |
7c673cae | 287 | ".align 16\n\t" |
11fdf7f2 | 288 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 289 | "jne 1b\n\t" |
11fdf7f2 TL |
290 | "xchgl %%ebx, %%esi\n\t" |
291 | : "=a" (old_bits[0]), "=d" (old_bits[1]) | |
292 | : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
293 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 294 | ); |
11fdf7f2 TL |
295 | |
296 | storage_type old_value; | |
297 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
298 | return old_value; | |
299 | ||
300 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
301 | ||
302 | storage_type old_value; | |
7c673cae FG |
303 | __asm__ __volatile__ |
304 | ( | |
11fdf7f2 TL |
305 | "xchgl %%ebx, %%esi\n\t" |
306 | "movl (%[dest]), %%eax\n\t" | |
307 | "movl 4(%[dest]), %%edx\n\t" | |
7c673cae | 308 | ".align 16\n\t" |
11fdf7f2 | 309 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 310 | "jne 1b\n\t" |
11fdf7f2 TL |
311 | "xchgl %%ebx, %%esi\n\t" |
312 | : "=A" (old_value) | |
313 | : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
314 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
315 | ); | |
11fdf7f2 TL |
316 | return old_value; |
317 | ||
318 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
319 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) | |
320 | #if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407 | |
321 | ||
322 | // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below | |
323 | uint32_t old_bits[2]; | |
7c673cae FG |
324 | __asm__ __volatile__ |
325 | ( | |
11fdf7f2 | 326 | "movl (%[dest]), %%eax\n\t" |
7c673cae FG |
327 | "movl 4(%[dest]), %%edx\n\t" |
328 | ".align 16\n\t" | |
11fdf7f2 | 329 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 330 | "jne 1b\n\t" |
11fdf7f2 TL |
331 | : "=&a" (old_bits[0]), "=&d" (old_bits[1]) |
332 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage) | |
333 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 334 | ); |
11fdf7f2 TL |
335 | |
336 | storage_type old_value; | |
337 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
338 | return old_value; | |
339 | ||
340 | #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
341 | ||
342 | uint32_t old_bits[2]; | |
7c673cae FG |
343 | __asm__ __volatile__ |
344 | ( | |
11fdf7f2 TL |
345 | "movl %[dest_lo], %%eax\n\t" |
346 | "movl %[dest_hi], %%edx\n\t" | |
7c673cae | 347 | ".align 16\n\t" |
11fdf7f2 | 348 | "1: lock; cmpxchg8b %[dest_lo]\n\t" |
7c673cae | 349 | "jne 1b\n\t" |
11fdf7f2 TL |
350 | : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) |
351 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
352 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
353 | ); | |
11fdf7f2 TL |
354 | |
355 | storage_type old_value; | |
356 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
357 | return old_value; | |
358 | ||
359 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
360 | ||
361 | storage_type old_value; | |
362 | __asm__ __volatile__ | |
363 | ( | |
364 | "movl %[dest_lo], %%eax\n\t" | |
365 | "movl %[dest_hi], %%edx\n\t" | |
366 | ".align 16\n\t" | |
367 | "1: lock; cmpxchg8b %[dest_lo]\n\t" | |
368 | "jne 1b\n\t" | |
369 | : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) | |
370 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
371 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
372 | ); | |
373 | return old_value; | |
374 | ||
375 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
376 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) | |
7c673cae | 377 | } |
7c673cae FG |
378 | }; |
379 | ||
380 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
381 | ||
382 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
383 | ||
384 | template< bool Signed > | |
385 | struct gcc_dcas_x86_64 | |
386 | { | |
f67539c2 | 387 | typedef typename storage_traits< 16u >::type storage_type; |
11fdf7f2 | 388 | typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t; |
7c673cae | 389 | |
f67539c2 TL |
390 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 16u; |
391 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 16u; | |
392 | static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed; | |
11fdf7f2 | 393 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; |
7c673cae FG |
394 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; |
395 | ||
396 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
397 | { | |
7c673cae FG |
398 | __asm__ __volatile__ |
399 | ( | |
11fdf7f2 TL |
400 | "movq %[dest_lo], %%rax\n\t" |
401 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 402 | ".align 16\n\t" |
11fdf7f2 | 403 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 404 | "jne 1b\n\t" |
11fdf7f2 TL |
405 | : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) |
406 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
7c673cae FG |
407 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" |
408 | ); | |
7c673cae FG |
409 | } |
410 | ||
411 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
412 | { | |
92f5a8d4 TL |
413 | // Note that despite const qualification cmpxchg16b below may issue a store to the storage. The storage value |
414 | // will not change, but this prevents the storage to reside in read-only memory. | |
11fdf7f2 | 415 | |
92f5a8d4 | 416 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
11fdf7f2 | 417 | |
11fdf7f2 | 418 | uint64_t value_bits[2]; |
7c673cae FG |
419 | |
420 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
421 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
7c673cae FG |
422 | __asm__ __volatile__ |
423 | ( | |
424 | "movq %%rbx, %%rax\n\t" | |
425 | "movq %%rcx, %%rdx\n\t" | |
426 | "lock; cmpxchg16b %[storage]\n\t" | |
11fdf7f2 | 427 | : "=&a" (value_bits[0]), "=&d" (value_bits[1]) |
7c673cae | 428 | : [storage] "m" (storage) |
11fdf7f2 | 429 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
7c673cae | 430 | ); |
7c673cae | 431 | |
11fdf7f2 TL |
432 | storage_type value; |
433 | BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); | |
7c673cae | 434 | return value; |
11fdf7f2 TL |
435 | |
436 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
437 | ||
7c673cae FG |
438 | storage_type value; |
439 | ||
440 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
441 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
442 | __asm__ __volatile__ | |
443 | ( | |
444 | "movq %%rbx, %%rax\n\t" | |
445 | "movq %%rcx, %%rdx\n\t" | |
446 | "lock; cmpxchg16b %[storage]\n\t" | |
447 | : "=&A" (value) | |
448 | : [storage] "m" (storage) | |
449 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
450 | ); | |
451 | ||
452 | return value; | |
11fdf7f2 TL |
453 | |
454 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
7c673cae FG |
455 | } |
456 | ||
457 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
458 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
459 | { | |
460 | #if defined(__clang__) | |
b32b8144 | 461 | |
7c673cae FG |
462 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics |
463 | storage_type old_expected = expected; | |
464 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
465 | return expected == old_expected; | |
b32b8144 | 466 | |
11fdf7f2 | 467 | #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
b32b8144 | 468 | |
11fdf7f2 | 469 | // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap |
7c673cae | 470 | bool success; |
7c673cae FG |
471 | __asm__ __volatile__ |
472 | ( | |
7c673cae FG |
473 | "lock; cmpxchg16b %[dest]\n\t" |
474 | "sete %[success]\n\t" | |
11fdf7f2 TL |
475 | : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success) |
476 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
477 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 478 | ); |
7c673cae FG |
479 | |
480 | return success; | |
b32b8144 | 481 | |
11fdf7f2 | 482 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
b32b8144 | 483 | |
7c673cae | 484 | bool success; |
11fdf7f2 | 485 | |
b32b8144 FG |
486 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
487 | __asm__ __volatile__ | |
488 | ( | |
489 | "lock; cmpxchg16b %[dest]\n\t" | |
11fdf7f2 TL |
490 | : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success) |
491 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
b32b8144 FG |
492 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
493 | ); | |
494 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
495 | __asm__ __volatile__ |
496 | ( | |
497 | "lock; cmpxchg16b %[dest]\n\t" | |
498 | "sete %[success]\n\t" | |
11fdf7f2 TL |
499 | : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) |
500 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
7c673cae FG |
501 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
502 | ); | |
b32b8144 FG |
503 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
504 | ||
7c673cae | 505 | return success; |
b32b8144 | 506 | |
11fdf7f2 | 507 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
7c673cae FG |
508 | } |
509 | ||
510 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
511 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
512 | { | |
513 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
514 | } | |
515 | ||
516 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
517 | { | |
11fdf7f2 TL |
518 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
519 | uint64_t old_bits[2]; | |
7c673cae FG |
520 | __asm__ __volatile__ |
521 | ( | |
11fdf7f2 TL |
522 | "movq %[dest_lo], %%rax\n\t" |
523 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 524 | ".align 16\n\t" |
11fdf7f2 | 525 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 526 | "jne 1b\n\t" |
11fdf7f2 TL |
527 | : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1]) |
528 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
529 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 530 | ); |
7c673cae | 531 | |
11fdf7f2 TL |
532 | storage_type old_value; |
533 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
7c673cae | 534 | return old_value; |
11fdf7f2 TL |
535 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
536 | storage_type old_value; | |
7c673cae FG |
537 | __asm__ __volatile__ |
538 | ( | |
11fdf7f2 TL |
539 | "movq %[dest_lo], %%rax\n\t" |
540 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 541 | ".align 16\n\t" |
11fdf7f2 | 542 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 543 | "jne 1b\n\t" |
11fdf7f2 TL |
544 | : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) |
545 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
7c673cae FG |
546 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
547 | ); | |
7c673cae | 548 | |
11fdf7f2 TL |
549 | return old_value; |
550 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
7c673cae | 551 | } |
7c673cae FG |
552 | }; |
553 | ||
554 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
555 | ||
556 | } // namespace detail | |
557 | } // namespace atomics | |
558 | } // namespace boost | |
559 | ||
560 | #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ |