]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Distributed under the Boost Software License, Version 1.0. | |
3 | * (See accompanying file LICENSE_1_0.txt or copy at | |
4 | * http://www.boost.org/LICENSE_1_0.txt) | |
5 | * | |
6 | * Copyright (c) 2009 Helge Bahmann | |
7 | * Copyright (c) 2012 Tim Blechmann | |
11fdf7f2 | 8 | * Copyright (c) 2014 - 2018 Andrey Semashev |
7c673cae FG |
9 | */ |
10 | /*! | |
11 | * \file atomic/detail/ops_gcc_x86_dcas.hpp | |
12 | * | |
13 | * This header contains implementation of the double-width CAS primitive for x86. | |
14 | */ | |
15 | ||
16 | #ifndef BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
17 | #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ | |
18 | ||
19 | #include <boost/cstdint.hpp> | |
20 | #include <boost/memory_order.hpp> | |
21 | #include <boost/atomic/detail/config.hpp> | |
22 | #include <boost/atomic/detail/storage_type.hpp> | |
11fdf7f2 | 23 | #include <boost/atomic/detail/string_ops.hpp> |
7c673cae FG |
24 | #include <boost/atomic/capabilities.hpp> |
25 | ||
26 | #ifdef BOOST_HAS_PRAGMA_ONCE | |
27 | #pragma once | |
28 | #endif | |
29 | ||
30 | namespace boost { | |
31 | namespace atomics { | |
32 | namespace detail { | |
33 | ||
11fdf7f2 TL |
34 | // Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory |
35 | // operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang | |
36 | // is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason | |
37 | // we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards. | |
38 | // Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster. | |
39 | // The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer | |
40 | // with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it. | |
41 | ||
7c673cae FG |
42 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) |
43 | ||
44 | template< bool Signed > | |
45 | struct gcc_dcas_x86 | |
46 | { | |
11fdf7f2 TL |
47 | typedef typename make_storage_type< 8u >::type storage_type; |
48 | typedef typename make_storage_type< 8u >::aligned aligned_storage_type; | |
49 | typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t; | |
7c673cae | 50 | |
11fdf7f2 | 51 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; |
7c673cae FG |
52 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; |
53 | ||
54 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
55 | { | |
11fdf7f2 | 56 | if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u)) |
7c673cae | 57 | { |
11fdf7f2 TL |
58 | #if defined(__SSE__) |
59 | typedef float xmm_t __attribute__((__vector_size__(16))); | |
60 | xmm_t xmm_scratch; | |
7c673cae FG |
61 | __asm__ __volatile__ |
62 | ( | |
63 | #if defined(__AVX__) | |
11fdf7f2 TL |
64 | "vmovq %[value], %[xmm_scratch]\n\t" |
65 | "vmovq %[xmm_scratch], %[storage]\n\t" | |
66 | #elif defined(__SSE2__) | |
67 | "movq %[value], %[xmm_scratch]\n\t" | |
68 | "movq %[xmm_scratch], %[storage]\n\t" | |
7c673cae | 69 | #else |
11fdf7f2 TL |
70 | "xorps %[xmm_scratch], %[xmm_scratch]\n\t" |
71 | "movlps %[value], %[xmm_scratch]\n\t" | |
72 | "movlps %[xmm_scratch], %[storage]\n\t" | |
7c673cae | 73 | #endif |
11fdf7f2 TL |
74 | : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch) |
75 | : [value] "m" (v) | |
76 | : "memory" | |
7c673cae FG |
77 | ); |
78 | #else | |
79 | __asm__ __volatile__ | |
80 | ( | |
11fdf7f2 TL |
81 | "fildll %[value]\n\t" |
82 | "fistpll %[storage]\n\t" | |
83 | : [storage] "=m" (storage) | |
84 | : [value] "m" (v) | |
7c673cae FG |
85 | : "memory" |
86 | ); | |
87 | #endif | |
88 | } | |
89 | else | |
90 | { | |
11fdf7f2 | 91 | #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
92 | __asm__ __volatile__ |
93 | ( | |
11fdf7f2 TL |
94 | "xchgl %%ebx, %%esi\n\t" |
95 | "movl %%eax, %%ebx\n\t" | |
96 | "movl (%[dest]), %%eax\n\t" | |
7c673cae FG |
97 | "movl 4(%[dest]), %%edx\n\t" |
98 | ".align 16\n\t" | |
11fdf7f2 | 99 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 100 | "jne 1b\n\t" |
11fdf7f2 TL |
101 | "xchgl %%ebx, %%esi\n\t" |
102 | : | |
103 | : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
104 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" |
105 | ); | |
11fdf7f2 | 106 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
107 | __asm__ __volatile__ |
108 | ( | |
11fdf7f2 TL |
109 | "movl %[dest_lo], %%eax\n\t" |
110 | "movl %[dest_hi], %%edx\n\t" | |
7c673cae | 111 | ".align 16\n\t" |
11fdf7f2 | 112 | "1: lock; cmpxchg8b %[dest_lo]\n\t" |
7c673cae | 113 | "jne 1b\n\t" |
11fdf7f2 TL |
114 | : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) |
115 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
116 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" |
117 | ); | |
11fdf7f2 | 118 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
119 | } |
120 | } | |
121 | ||
122 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
123 | { | |
124 | storage_type value; | |
125 | ||
11fdf7f2 | 126 | if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u)) |
7c673cae | 127 | { |
11fdf7f2 TL |
128 | #if defined(__SSE__) |
129 | typedef float xmm_t __attribute__((__vector_size__(16))); | |
130 | xmm_t xmm_scratch; | |
7c673cae FG |
131 | __asm__ __volatile__ |
132 | ( | |
133 | #if defined(__AVX__) | |
11fdf7f2 TL |
134 | "vmovq %[storage], %[xmm_scratch]\n\t" |
135 | "vmovq %[xmm_scratch], %[value]\n\t" | |
136 | #elif defined(__SSE2__) | |
137 | "movq %[storage], %[xmm_scratch]\n\t" | |
138 | "movq %[xmm_scratch], %[value]\n\t" | |
7c673cae | 139 | #else |
11fdf7f2 TL |
140 | "xorps %[xmm_scratch], %[xmm_scratch]\n\t" |
141 | "movlps %[storage], %[xmm_scratch]\n\t" | |
142 | "movlps %[xmm_scratch], %[value]\n\t" | |
7c673cae | 143 | #endif |
11fdf7f2 TL |
144 | : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch) |
145 | : [storage] "m" (storage) | |
146 | : "memory" | |
7c673cae FG |
147 | ); |
148 | #else | |
149 | __asm__ __volatile__ | |
150 | ( | |
11fdf7f2 TL |
151 | "fildll %[storage]\n\t" |
152 | "fistpll %[value]\n\t" | |
153 | : [value] "=m" (value) | |
154 | : [storage] "m" (storage) | |
7c673cae FG |
155 | : "memory" |
156 | ); | |
157 | #endif | |
158 | } | |
159 | else | |
160 | { | |
92f5a8d4 TL |
161 | // Note that despite const qualification cmpxchg8b below may issue a store to the storage. The storage value |
162 | // will not change, but this prevents the storage to reside in read-only memory. | |
163 | ||
164 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
165 | ||
11fdf7f2 | 166 | uint32_t value_bits[2]; |
92f5a8d4 | 167 | |
11fdf7f2 TL |
168 | // We don't care for comparison result here; the previous value will be stored into value anyway. |
169 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
170 | __asm__ __volatile__ | |
171 | ( | |
172 | "movl %%ebx, %%eax\n\t" | |
173 | "movl %%ecx, %%edx\n\t" | |
174 | "lock; cmpxchg8b %[storage]\n\t" | |
175 | : "=&a" (value_bits[0]), "=&d" (value_bits[1]) | |
176 | : [storage] "m" (storage) | |
177 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
178 | ); | |
179 | BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); | |
92f5a8d4 | 180 | |
11fdf7f2 | 181 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
92f5a8d4 | 182 | |
7c673cae FG |
183 | // We don't care for comparison result here; the previous value will be stored into value anyway. |
184 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
185 | __asm__ __volatile__ | |
186 | ( | |
187 | "movl %%ebx, %%eax\n\t" | |
188 | "movl %%ecx, %%edx\n\t" | |
189 | "lock; cmpxchg8b %[storage]\n\t" | |
190 | : "=&A" (value) | |
191 | : [storage] "m" (storage) | |
192 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
193 | ); | |
92f5a8d4 | 194 | |
11fdf7f2 | 195 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
7c673cae FG |
196 | } |
197 | ||
198 | return value; | |
199 | } | |
200 | ||
201 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
202 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
203 | { | |
204 | #if defined(__clang__) | |
b32b8144 | 205 | |
7c673cae FG |
206 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics |
207 | storage_type old_expected = expected; | |
208 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
209 | return expected == old_expected; | |
b32b8144 | 210 | |
11fdf7f2 | 211 | #elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae | 212 | |
7c673cae | 213 | bool success; |
11fdf7f2 | 214 | |
b32b8144 | 215 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
7c673cae FG |
216 | __asm__ __volatile__ |
217 | ( | |
11fdf7f2 | 218 | "xchgl %%ebx, %%esi\n\t" |
b32b8144 | 219 | "lock; cmpxchg8b (%[dest])\n\t" |
11fdf7f2 TL |
220 | "xchgl %%ebx, %%esi\n\t" |
221 | : "+A" (expected), [success] "=@ccz" (success) | |
222 | : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
b32b8144 FG |
223 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
224 | ); | |
225 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
226 | __asm__ __volatile__ | |
227 | ( | |
11fdf7f2 | 228 | "xchgl %%ebx, %%esi\n\t" |
b32b8144 | 229 | "lock; cmpxchg8b (%[dest])\n\t" |
11fdf7f2 | 230 | "xchgl %%ebx, %%esi\n\t" |
7c673cae | 231 | "sete %[success]\n\t" |
11fdf7f2 TL |
232 | : "+A" (expected), [success] "=qm" (success) |
233 | : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
234 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
235 | ); | |
b32b8144 FG |
236 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
237 | ||
7c673cae | 238 | return success; |
b32b8144 | 239 | |
11fdf7f2 | 240 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
b32b8144 | 241 | |
7c673cae | 242 | bool success; |
11fdf7f2 | 243 | |
b32b8144 FG |
244 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
245 | __asm__ __volatile__ | |
246 | ( | |
247 | "lock; cmpxchg8b %[dest]\n\t" | |
248 | : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success) | |
249 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
250 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
251 | ); | |
252 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
253 | __asm__ __volatile__ |
254 | ( | |
255 | "lock; cmpxchg8b %[dest]\n\t" | |
256 | "sete %[success]\n\t" | |
11fdf7f2 | 257 | : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) |
7c673cae | 258 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) |
7c673cae FG |
259 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
260 | ); | |
b32b8144 FG |
261 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
262 | ||
7c673cae | 263 | return success; |
b32b8144 | 264 | |
11fdf7f2 | 265 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
266 | } |
267 | ||
268 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
269 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
270 | { | |
271 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
272 | } | |
273 | ||
92f5a8d4 | 274 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT |
7c673cae | 275 | { |
11fdf7f2 TL |
276 | #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
277 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
278 | ||
279 | uint32_t old_bits[2]; | |
7c673cae FG |
280 | __asm__ __volatile__ |
281 | ( | |
11fdf7f2 TL |
282 | "xchgl %%ebx, %%esi\n\t" |
283 | "movl (%[dest]), %%eax\n\t" | |
284 | "movl 4(%[dest]), %%edx\n\t" | |
7c673cae | 285 | ".align 16\n\t" |
11fdf7f2 | 286 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 287 | "jne 1b\n\t" |
11fdf7f2 TL |
288 | "xchgl %%ebx, %%esi\n\t" |
289 | : "=a" (old_bits[0]), "=d" (old_bits[1]) | |
290 | : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
291 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 292 | ); |
11fdf7f2 TL |
293 | |
294 | storage_type old_value; | |
295 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
296 | return old_value; | |
297 | ||
298 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
299 | ||
300 | storage_type old_value; | |
7c673cae FG |
301 | __asm__ __volatile__ |
302 | ( | |
11fdf7f2 TL |
303 | "xchgl %%ebx, %%esi\n\t" |
304 | "movl (%[dest]), %%eax\n\t" | |
305 | "movl 4(%[dest]), %%edx\n\t" | |
7c673cae | 306 | ".align 16\n\t" |
11fdf7f2 | 307 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 308 | "jne 1b\n\t" |
11fdf7f2 TL |
309 | "xchgl %%ebx, %%esi\n\t" |
310 | : "=A" (old_value) | |
311 | : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
312 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
313 | ); | |
11fdf7f2 TL |
314 | return old_value; |
315 | ||
316 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
317 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) | |
318 | #if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407 | |
319 | ||
320 | // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below | |
321 | uint32_t old_bits[2]; | |
7c673cae FG |
322 | __asm__ __volatile__ |
323 | ( | |
11fdf7f2 | 324 | "movl (%[dest]), %%eax\n\t" |
7c673cae FG |
325 | "movl 4(%[dest]), %%edx\n\t" |
326 | ".align 16\n\t" | |
11fdf7f2 | 327 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 328 | "jne 1b\n\t" |
11fdf7f2 TL |
329 | : "=&a" (old_bits[0]), "=&d" (old_bits[1]) |
330 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage) | |
331 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 332 | ); |
11fdf7f2 TL |
333 | |
334 | storage_type old_value; | |
335 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
336 | return old_value; | |
337 | ||
338 | #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
339 | ||
340 | uint32_t old_bits[2]; | |
7c673cae FG |
341 | __asm__ __volatile__ |
342 | ( | |
11fdf7f2 TL |
343 | "movl %[dest_lo], %%eax\n\t" |
344 | "movl %[dest_hi], %%edx\n\t" | |
7c673cae | 345 | ".align 16\n\t" |
11fdf7f2 | 346 | "1: lock; cmpxchg8b %[dest_lo]\n\t" |
7c673cae | 347 | "jne 1b\n\t" |
11fdf7f2 TL |
348 | : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) |
349 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
350 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
351 | ); | |
11fdf7f2 TL |
352 | |
353 | storage_type old_value; | |
354 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
355 | return old_value; | |
356 | ||
357 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
358 | ||
359 | storage_type old_value; | |
360 | __asm__ __volatile__ | |
361 | ( | |
362 | "movl %[dest_lo], %%eax\n\t" | |
363 | "movl %[dest_hi], %%edx\n\t" | |
364 | ".align 16\n\t" | |
365 | "1: lock; cmpxchg8b %[dest_lo]\n\t" | |
366 | "jne 1b\n\t" | |
367 | : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) | |
368 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
369 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
370 | ); | |
371 | return old_value; | |
372 | ||
373 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
374 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) | |
7c673cae | 375 | } |
7c673cae FG |
376 | }; |
377 | ||
378 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
379 | ||
380 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
381 | ||
382 | template< bool Signed > | |
383 | struct gcc_dcas_x86_64 | |
384 | { | |
11fdf7f2 TL |
385 | typedef typename make_storage_type< 16u >::type storage_type; |
386 | typedef typename make_storage_type< 16u >::aligned aligned_storage_type; | |
387 | typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t; | |
7c673cae | 388 | |
11fdf7f2 | 389 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; |
7c673cae FG |
390 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; |
391 | ||
392 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
393 | { | |
7c673cae FG |
394 | __asm__ __volatile__ |
395 | ( | |
11fdf7f2 TL |
396 | "movq %[dest_lo], %%rax\n\t" |
397 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 398 | ".align 16\n\t" |
11fdf7f2 | 399 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 400 | "jne 1b\n\t" |
11fdf7f2 TL |
401 | : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) |
402 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
7c673cae FG |
403 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" |
404 | ); | |
7c673cae FG |
405 | } |
406 | ||
407 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
408 | { | |
92f5a8d4 TL |
409 | // Note that despite const qualification cmpxchg16b below may issue a store to the storage. The storage value |
410 | // will not change, but this prevents the storage to reside in read-only memory. | |
11fdf7f2 | 411 | |
92f5a8d4 | 412 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
11fdf7f2 | 413 | |
11fdf7f2 | 414 | uint64_t value_bits[2]; |
7c673cae FG |
415 | |
416 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
417 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
7c673cae FG |
418 | __asm__ __volatile__ |
419 | ( | |
420 | "movq %%rbx, %%rax\n\t" | |
421 | "movq %%rcx, %%rdx\n\t" | |
422 | "lock; cmpxchg16b %[storage]\n\t" | |
11fdf7f2 | 423 | : "=&a" (value_bits[0]), "=&d" (value_bits[1]) |
7c673cae | 424 | : [storage] "m" (storage) |
11fdf7f2 | 425 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
7c673cae | 426 | ); |
7c673cae | 427 | |
11fdf7f2 TL |
428 | storage_type value; |
429 | BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); | |
7c673cae | 430 | return value; |
11fdf7f2 TL |
431 | |
432 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
433 | ||
7c673cae FG |
434 | storage_type value; |
435 | ||
436 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
437 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
438 | __asm__ __volatile__ | |
439 | ( | |
440 | "movq %%rbx, %%rax\n\t" | |
441 | "movq %%rcx, %%rdx\n\t" | |
442 | "lock; cmpxchg16b %[storage]\n\t" | |
443 | : "=&A" (value) | |
444 | : [storage] "m" (storage) | |
445 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
446 | ); | |
447 | ||
448 | return value; | |
11fdf7f2 TL |
449 | |
450 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
7c673cae FG |
451 | } |
452 | ||
453 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
454 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
455 | { | |
456 | #if defined(__clang__) | |
b32b8144 | 457 | |
7c673cae FG |
458 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics |
459 | storage_type old_expected = expected; | |
460 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
461 | return expected == old_expected; | |
b32b8144 | 462 | |
11fdf7f2 | 463 | #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
b32b8144 | 464 | |
11fdf7f2 | 465 | // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap |
7c673cae | 466 | bool success; |
7c673cae FG |
467 | __asm__ __volatile__ |
468 | ( | |
7c673cae FG |
469 | "lock; cmpxchg16b %[dest]\n\t" |
470 | "sete %[success]\n\t" | |
11fdf7f2 TL |
471 | : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success) |
472 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
473 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 474 | ); |
7c673cae FG |
475 | |
476 | return success; | |
b32b8144 | 477 | |
11fdf7f2 | 478 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
b32b8144 | 479 | |
7c673cae | 480 | bool success; |
11fdf7f2 | 481 | |
b32b8144 FG |
482 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
483 | __asm__ __volatile__ | |
484 | ( | |
485 | "lock; cmpxchg16b %[dest]\n\t" | |
11fdf7f2 TL |
486 | : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success) |
487 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
b32b8144 FG |
488 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
489 | ); | |
490 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
491 | __asm__ __volatile__ |
492 | ( | |
493 | "lock; cmpxchg16b %[dest]\n\t" | |
494 | "sete %[success]\n\t" | |
11fdf7f2 TL |
495 | : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) |
496 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
7c673cae FG |
497 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
498 | ); | |
b32b8144 FG |
499 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
500 | ||
7c673cae | 501 | return success; |
b32b8144 | 502 | |
11fdf7f2 | 503 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
7c673cae FG |
504 | } |
505 | ||
506 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
507 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
508 | { | |
509 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
510 | } | |
511 | ||
512 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
513 | { | |
11fdf7f2 TL |
514 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
515 | uint64_t old_bits[2]; | |
7c673cae FG |
516 | __asm__ __volatile__ |
517 | ( | |
11fdf7f2 TL |
518 | "movq %[dest_lo], %%rax\n\t" |
519 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 520 | ".align 16\n\t" |
11fdf7f2 | 521 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 522 | "jne 1b\n\t" |
11fdf7f2 TL |
523 | : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1]) |
524 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
525 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 526 | ); |
7c673cae | 527 | |
11fdf7f2 TL |
528 | storage_type old_value; |
529 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
7c673cae | 530 | return old_value; |
11fdf7f2 TL |
531 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
532 | storage_type old_value; | |
7c673cae FG |
533 | __asm__ __volatile__ |
534 | ( | |
11fdf7f2 TL |
535 | "movq %[dest_lo], %%rax\n\t" |
536 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 537 | ".align 16\n\t" |
11fdf7f2 | 538 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 539 | "jne 1b\n\t" |
11fdf7f2 TL |
540 | : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) |
541 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
7c673cae FG |
542 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
543 | ); | |
7c673cae | 544 | |
11fdf7f2 TL |
545 | return old_value; |
546 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
7c673cae | 547 | } |
7c673cae FG |
548 | }; |
549 | ||
550 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
551 | ||
552 | } // namespace detail | |
553 | } // namespace atomics | |
554 | } // namespace boost | |
555 | ||
556 | #endif // BOOST_ATOMIC_DETAIL_OPS_GCC_X86_DCAS_HPP_INCLUDED_ |