]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * Distributed under the Boost Software License, Version 1.0. | |
3 | * (See accompanying file LICENSE_1_0.txt or copy at | |
4 | * http://www.boost.org/LICENSE_1_0.txt) | |
5 | * | |
6 | * Copyright (c) 2009 Helge Bahmann | |
7 | * Copyright (c) 2012 Tim Blechmann | |
20effc67 | 8 | * Copyright (c) 2014 Andrey Semashev |
7c673cae FG |
9 | */ |
10 | /*! | |
20effc67 | 11 | * \file atomic/detail/core_arch_ops_gcc_x86.hpp |
7c673cae | 12 | * |
20effc67 | 13 | * This header contains implementation of the \c core_arch_operations template. |
7c673cae FG |
14 | */ |
15 | ||
20effc67 TL |
16 | #ifndef BOOST_ATOMIC_DETAIL_CORE_ARCH_OPS_GCC_X86_HPP_INCLUDED_ |
17 | #define BOOST_ATOMIC_DETAIL_CORE_ARCH_OPS_GCC_X86_HPP_INCLUDED_ | |
7c673cae | 18 | |
20effc67 | 19 | #include <cstddef> |
7c673cae FG |
20 | #include <boost/memory_order.hpp> |
21 | #include <boost/atomic/detail/config.hpp> | |
f67539c2 | 22 | #include <boost/atomic/detail/storage_traits.hpp> |
20effc67 TL |
23 | #include <boost/atomic/detail/core_arch_operations_fwd.hpp> |
24 | #include <boost/atomic/detail/capabilities.hpp> | |
25 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) || defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
26 | #include <boost/cstdint.hpp> | |
27 | #include <boost/atomic/detail/intptr.hpp> | |
11fdf7f2 | 28 | #include <boost/atomic/detail/string_ops.hpp> |
20effc67 TL |
29 | #include <boost/atomic/detail/core_ops_cas_based.hpp> |
30 | #endif | |
31 | #include <boost/atomic/detail/header.hpp> | |
7c673cae FG |
32 | |
33 | #ifdef BOOST_HAS_PRAGMA_ONCE | |
34 | #pragma once | |
35 | #endif | |
36 | ||
37 | namespace boost { | |
38 | namespace atomics { | |
39 | namespace detail { | |
40 | ||
20effc67 TL |
41 | struct core_arch_operations_gcc_x86_base |
42 | { | |
43 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = false; | |
44 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; | |
45 | ||
46 | static BOOST_FORCEINLINE void fence_before(memory_order order) BOOST_NOEXCEPT | |
47 | { | |
48 | if ((static_cast< unsigned int >(order) & static_cast< unsigned int >(memory_order_release)) != 0u) | |
49 | __asm__ __volatile__ ("" ::: "memory"); | |
50 | } | |
51 | ||
52 | static BOOST_FORCEINLINE void fence_after(memory_order order) BOOST_NOEXCEPT | |
53 | { | |
54 | if ((static_cast< unsigned int >(order) & (static_cast< unsigned int >(memory_order_consume) | static_cast< unsigned int >(memory_order_acquire))) != 0u) | |
55 | __asm__ __volatile__ ("" ::: "memory"); | |
56 | } | |
57 | }; | |
58 | ||
59 | template< std::size_t Size, bool Signed, bool Interprocess, typename Derived > | |
60 | struct core_arch_operations_gcc_x86 : | |
61 | public core_arch_operations_gcc_x86_base | |
62 | { | |
63 | typedef typename storage_traits< Size >::type storage_type; | |
64 | ||
65 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = Size; | |
66 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = Size; | |
67 | static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed; | |
68 | static BOOST_CONSTEXPR_OR_CONST bool is_interprocess = Interprocess; | |
69 | ||
70 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
71 | { | |
72 | if (order != memory_order_seq_cst) | |
73 | { | |
74 | fence_before(order); | |
75 | storage = v; | |
76 | fence_after(order); | |
77 | } | |
78 | else | |
79 | { | |
80 | Derived::exchange(storage, v, order); | |
81 | } | |
82 | } | |
83 | ||
84 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order order) BOOST_NOEXCEPT | |
85 | { | |
86 | storage_type v = storage; | |
87 | fence_after(order); | |
88 | return v; | |
89 | } | |
90 | ||
91 | static BOOST_FORCEINLINE storage_type fetch_sub(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT | |
92 | { | |
93 | return Derived::fetch_add(storage, -v, order); | |
94 | } | |
95 | ||
96 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
97 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
98 | { | |
99 | return Derived::compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
100 | } | |
101 | ||
102 | static BOOST_FORCEINLINE bool test_and_set(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT | |
103 | { | |
104 | return !!Derived::exchange(storage, (storage_type)1, order); | |
105 | } | |
106 | ||
107 | static BOOST_FORCEINLINE void clear(storage_type volatile& storage, memory_order order) BOOST_NOEXCEPT | |
108 | { | |
109 | store(storage, (storage_type)0, order); | |
110 | } | |
111 | }; | |
112 | ||
113 | template< bool Signed, bool Interprocess > | |
114 | struct core_arch_operations< 1u, Signed, Interprocess > : | |
115 | public core_arch_operations_gcc_x86< 1u, Signed, Interprocess, core_arch_operations< 1u, Signed, Interprocess > > | |
116 | { | |
117 | typedef core_arch_operations_gcc_x86< 1u, Signed, Interprocess, core_arch_operations< 1u, Signed, Interprocess > > base_type; | |
118 | typedef typename base_type::storage_type storage_type; | |
119 | typedef typename storage_traits< 4u >::type temp_storage_type; | |
120 | ||
121 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
122 | { | |
123 | __asm__ __volatile__ | |
124 | ( | |
125 | "lock; xaddb %0, %1" | |
126 | : "+q" (v), "+m" (storage) | |
127 | : | |
128 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
129 | ); | |
130 | return v; | |
131 | } | |
132 | ||
133 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
134 | { | |
135 | __asm__ __volatile__ | |
136 | ( | |
137 | "xchgb %0, %1" | |
138 | : "+q" (v), "+m" (storage) | |
139 | : | |
140 | : "memory" | |
141 | ); | |
142 | return v; | |
143 | } | |
144 | ||
145 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
146 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
147 | { | |
148 | storage_type previous = expected; | |
149 | bool success; | |
150 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
151 | __asm__ __volatile__ | |
152 | ( | |
153 | "lock; cmpxchgb %3, %1" | |
154 | : "+a" (previous), "+m" (storage), "=@ccz" (success) | |
155 | : "q" (desired) | |
156 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
157 | ); | |
158 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
159 | __asm__ __volatile__ | |
160 | ( | |
161 | "lock; cmpxchgb %3, %1\n\t" | |
162 | "sete %2" | |
163 | : "+a" (previous), "+m" (storage), "=q" (success) | |
164 | : "q" (desired) | |
165 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
166 | ); | |
167 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
168 | expected = previous; | |
169 | return success; | |
170 | } | |
171 | ||
172 | #define BOOST_ATOMIC_DETAIL_CAS_LOOP(op, argument, result)\ | |
173 | temp_storage_type new_val;\ | |
174 | __asm__ __volatile__\ | |
175 | (\ | |
176 | ".align 16\n\t"\ | |
177 | "1: mov %[arg], %2\n\t"\ | |
178 | op " %%al, %b2\n\t"\ | |
179 | "lock; cmpxchgb %b2, %[storage]\n\t"\ | |
180 | "jne 1b"\ | |
181 | : [res] "+a" (result), [storage] "+m" (storage), "=&q" (new_val)\ | |
182 | : [arg] "ir" ((temp_storage_type)argument)\ | |
183 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"\ | |
184 | ) | |
185 | ||
186 | static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
187 | { | |
188 | storage_type res = storage; | |
189 | BOOST_ATOMIC_DETAIL_CAS_LOOP("andb", v, res); | |
190 | return res; | |
191 | } | |
192 | ||
193 | static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
194 | { | |
195 | storage_type res = storage; | |
196 | BOOST_ATOMIC_DETAIL_CAS_LOOP("orb", v, res); | |
197 | return res; | |
198 | } | |
199 | ||
200 | static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
201 | { | |
202 | storage_type res = storage; | |
203 | BOOST_ATOMIC_DETAIL_CAS_LOOP("xorb", v, res); | |
204 | return res; | |
205 | } | |
206 | ||
207 | #undef BOOST_ATOMIC_DETAIL_CAS_LOOP | |
208 | }; | |
209 | ||
210 | template< bool Signed, bool Interprocess > | |
211 | struct core_arch_operations< 2u, Signed, Interprocess > : | |
212 | public core_arch_operations_gcc_x86< 2u, Signed, Interprocess, core_arch_operations< 2u, Signed, Interprocess > > | |
213 | { | |
214 | typedef core_arch_operations_gcc_x86< 2u, Signed, Interprocess, core_arch_operations< 2u, Signed, Interprocess > > base_type; | |
215 | typedef typename base_type::storage_type storage_type; | |
216 | typedef typename storage_traits< 4u >::type temp_storage_type; | |
217 | ||
218 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
219 | { | |
220 | __asm__ __volatile__ | |
221 | ( | |
222 | "lock; xaddw %0, %1" | |
223 | : "+q" (v), "+m" (storage) | |
224 | : | |
225 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
226 | ); | |
227 | return v; | |
228 | } | |
229 | ||
230 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
231 | { | |
232 | __asm__ __volatile__ | |
233 | ( | |
234 | "xchgw %0, %1" | |
235 | : "+q" (v), "+m" (storage) | |
236 | : | |
237 | : "memory" | |
238 | ); | |
239 | return v; | |
240 | } | |
241 | ||
242 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
243 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
244 | { | |
245 | storage_type previous = expected; | |
246 | bool success; | |
247 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
248 | __asm__ __volatile__ | |
249 | ( | |
250 | "lock; cmpxchgw %3, %1" | |
251 | : "+a" (previous), "+m" (storage), "=@ccz" (success) | |
252 | : "q" (desired) | |
253 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
254 | ); | |
255 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
256 | __asm__ __volatile__ | |
257 | ( | |
258 | "lock; cmpxchgw %3, %1\n\t" | |
259 | "sete %2" | |
260 | : "+a" (previous), "+m" (storage), "=q" (success) | |
261 | : "q" (desired) | |
262 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
263 | ); | |
264 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
265 | expected = previous; | |
266 | return success; | |
267 | } | |
268 | ||
269 | #define BOOST_ATOMIC_DETAIL_CAS_LOOP(op, argument, result)\ | |
270 | temp_storage_type new_val;\ | |
271 | __asm__ __volatile__\ | |
272 | (\ | |
273 | ".align 16\n\t"\ | |
274 | "1: mov %[arg], %2\n\t"\ | |
275 | op " %%ax, %w2\n\t"\ | |
276 | "lock; cmpxchgw %w2, %[storage]\n\t"\ | |
277 | "jne 1b"\ | |
278 | : [res] "+a" (result), [storage] "+m" (storage), "=&q" (new_val)\ | |
279 | : [arg] "ir" ((temp_storage_type)argument)\ | |
280 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"\ | |
281 | ) | |
282 | ||
283 | static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
284 | { | |
285 | storage_type res = storage; | |
286 | BOOST_ATOMIC_DETAIL_CAS_LOOP("andw", v, res); | |
287 | return res; | |
288 | } | |
289 | ||
290 | static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
291 | { | |
292 | storage_type res = storage; | |
293 | BOOST_ATOMIC_DETAIL_CAS_LOOP("orw", v, res); | |
294 | return res; | |
295 | } | |
296 | ||
297 | static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
298 | { | |
299 | storage_type res = storage; | |
300 | BOOST_ATOMIC_DETAIL_CAS_LOOP("xorw", v, res); | |
301 | return res; | |
302 | } | |
303 | ||
304 | #undef BOOST_ATOMIC_DETAIL_CAS_LOOP | |
305 | }; | |
306 | ||
307 | template< bool Signed, bool Interprocess > | |
308 | struct core_arch_operations< 4u, Signed, Interprocess > : | |
309 | public core_arch_operations_gcc_x86< 4u, Signed, Interprocess, core_arch_operations< 4u, Signed, Interprocess > > | |
310 | { | |
311 | typedef core_arch_operations_gcc_x86< 4u, Signed, Interprocess, core_arch_operations< 4u, Signed, Interprocess > > base_type; | |
312 | typedef typename base_type::storage_type storage_type; | |
313 | ||
314 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
315 | { | |
316 | __asm__ __volatile__ | |
317 | ( | |
318 | "lock; xaddl %0, %1" | |
319 | : "+r" (v), "+m" (storage) | |
320 | : | |
321 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
322 | ); | |
323 | return v; | |
324 | } | |
325 | ||
326 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
327 | { | |
328 | __asm__ __volatile__ | |
329 | ( | |
330 | "xchgl %0, %1" | |
331 | : "+r" (v), "+m" (storage) | |
332 | : | |
333 | : "memory" | |
334 | ); | |
335 | return v; | |
336 | } | |
337 | ||
338 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
339 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
340 | { | |
341 | storage_type previous = expected; | |
342 | bool success; | |
343 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
344 | __asm__ __volatile__ | |
345 | ( | |
346 | "lock; cmpxchgl %3, %1" | |
347 | : "+a" (previous), "+m" (storage), "=@ccz" (success) | |
348 | : "r" (desired) | |
349 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
350 | ); | |
351 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
352 | __asm__ __volatile__ | |
353 | ( | |
354 | "lock; cmpxchgl %3, %1\n\t" | |
355 | "sete %2" | |
356 | : "+a" (previous), "+m" (storage), "=q" (success) | |
357 | : "r" (desired) | |
358 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
359 | ); | |
360 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
361 | expected = previous; | |
362 | return success; | |
363 | } | |
364 | ||
365 | #define BOOST_ATOMIC_DETAIL_CAS_LOOP(op, argument, result)\ | |
366 | storage_type new_val;\ | |
367 | __asm__ __volatile__\ | |
368 | (\ | |
369 | ".align 16\n\t"\ | |
370 | "1: mov %[arg], %[new_val]\n\t"\ | |
371 | op " %%eax, %[new_val]\n\t"\ | |
372 | "lock; cmpxchgl %[new_val], %[storage]\n\t"\ | |
373 | "jne 1b"\ | |
374 | : [res] "+a" (result), [storage] "+m" (storage), [new_val] "=&r" (new_val)\ | |
375 | : [arg] "ir" (argument)\ | |
376 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"\ | |
377 | ) | |
378 | ||
379 | static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
380 | { | |
381 | storage_type res = storage; | |
382 | BOOST_ATOMIC_DETAIL_CAS_LOOP("andl", v, res); | |
383 | return res; | |
384 | } | |
385 | ||
386 | static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
387 | { | |
388 | storage_type res = storage; | |
389 | BOOST_ATOMIC_DETAIL_CAS_LOOP("orl", v, res); | |
390 | return res; | |
391 | } | |
392 | ||
393 | static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
394 | { | |
395 | storage_type res = storage; | |
396 | BOOST_ATOMIC_DETAIL_CAS_LOOP("xorl", v, res); | |
397 | return res; | |
398 | } | |
399 | ||
400 | #undef BOOST_ATOMIC_DETAIL_CAS_LOOP | |
401 | }; | |
402 | ||
403 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) | |
404 | ||
11fdf7f2 TL |
405 | // Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory |
406 | // operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang | |
407 | // is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason | |
408 | // we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards. | |
409 | // Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster. | |
410 | // The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer | |
411 | // with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it. | |
412 | ||
20effc67 | 413 | template< bool Signed, bool Interprocess > |
7c673cae FG |
414 | struct gcc_dcas_x86 |
415 | { | |
f67539c2 | 416 | typedef typename storage_traits< 8u >::type storage_type; |
11fdf7f2 | 417 | typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t; |
7c673cae | 418 | |
f67539c2 TL |
419 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 8u; |
420 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 8u; | |
421 | static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed; | |
20effc67 | 422 | static BOOST_CONSTEXPR_OR_CONST bool is_interprocess = Interprocess; |
11fdf7f2 | 423 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; |
7c673cae FG |
424 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; |
425 | ||
426 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
427 | { | |
20effc67 | 428 | if (BOOST_LIKELY((((uintptr_t)&storage) & 0x00000007) == 0u)) |
7c673cae | 429 | { |
11fdf7f2 TL |
430 | #if defined(__SSE__) |
431 | typedef float xmm_t __attribute__((__vector_size__(16))); | |
432 | xmm_t xmm_scratch; | |
7c673cae FG |
433 | __asm__ __volatile__ |
434 | ( | |
435 | #if defined(__AVX__) | |
11fdf7f2 TL |
436 | "vmovq %[value], %[xmm_scratch]\n\t" |
437 | "vmovq %[xmm_scratch], %[storage]\n\t" | |
438 | #elif defined(__SSE2__) | |
439 | "movq %[value], %[xmm_scratch]\n\t" | |
440 | "movq %[xmm_scratch], %[storage]\n\t" | |
7c673cae | 441 | #else |
11fdf7f2 TL |
442 | "xorps %[xmm_scratch], %[xmm_scratch]\n\t" |
443 | "movlps %[value], %[xmm_scratch]\n\t" | |
444 | "movlps %[xmm_scratch], %[storage]\n\t" | |
7c673cae | 445 | #endif |
11fdf7f2 TL |
446 | : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch) |
447 | : [value] "m" (v) | |
448 | : "memory" | |
7c673cae FG |
449 | ); |
450 | #else | |
451 | __asm__ __volatile__ | |
452 | ( | |
11fdf7f2 TL |
453 | "fildll %[value]\n\t" |
454 | "fistpll %[storage]\n\t" | |
455 | : [storage] "=m" (storage) | |
456 | : [value] "m" (v) | |
7c673cae FG |
457 | : "memory" |
458 | ); | |
459 | #endif | |
460 | } | |
461 | else | |
462 | { | |
11fdf7f2 | 463 | #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
464 | __asm__ __volatile__ |
465 | ( | |
11fdf7f2 TL |
466 | "xchgl %%ebx, %%esi\n\t" |
467 | "movl %%eax, %%ebx\n\t" | |
468 | "movl (%[dest]), %%eax\n\t" | |
7c673cae FG |
469 | "movl 4(%[dest]), %%edx\n\t" |
470 | ".align 16\n\t" | |
11fdf7f2 | 471 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 472 | "jne 1b\n\t" |
11fdf7f2 TL |
473 | "xchgl %%ebx, %%esi\n\t" |
474 | : | |
475 | : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
476 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" |
477 | ); | |
11fdf7f2 | 478 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
479 | __asm__ __volatile__ |
480 | ( | |
11fdf7f2 TL |
481 | "movl %[dest_lo], %%eax\n\t" |
482 | "movl %[dest_hi], %%edx\n\t" | |
7c673cae | 483 | ".align 16\n\t" |
11fdf7f2 | 484 | "1: lock; cmpxchg8b %[dest_lo]\n\t" |
7c673cae | 485 | "jne 1b\n\t" |
11fdf7f2 TL |
486 | : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) |
487 | : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
488 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" |
489 | ); | |
11fdf7f2 | 490 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
491 | } |
492 | } | |
493 | ||
494 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
495 | { | |
496 | storage_type value; | |
497 | ||
20effc67 | 498 | if (BOOST_LIKELY((((uintptr_t)&storage) & 0x00000007) == 0u)) |
7c673cae | 499 | { |
11fdf7f2 TL |
500 | #if defined(__SSE__) |
501 | typedef float xmm_t __attribute__((__vector_size__(16))); | |
502 | xmm_t xmm_scratch; | |
7c673cae FG |
503 | __asm__ __volatile__ |
504 | ( | |
505 | #if defined(__AVX__) | |
11fdf7f2 TL |
506 | "vmovq %[storage], %[xmm_scratch]\n\t" |
507 | "vmovq %[xmm_scratch], %[value]\n\t" | |
508 | #elif defined(__SSE2__) | |
509 | "movq %[storage], %[xmm_scratch]\n\t" | |
510 | "movq %[xmm_scratch], %[value]\n\t" | |
7c673cae | 511 | #else |
11fdf7f2 TL |
512 | "xorps %[xmm_scratch], %[xmm_scratch]\n\t" |
513 | "movlps %[storage], %[xmm_scratch]\n\t" | |
514 | "movlps %[xmm_scratch], %[value]\n\t" | |
7c673cae | 515 | #endif |
11fdf7f2 TL |
516 | : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch) |
517 | : [storage] "m" (storage) | |
518 | : "memory" | |
7c673cae FG |
519 | ); |
520 | #else | |
521 | __asm__ __volatile__ | |
522 | ( | |
11fdf7f2 TL |
523 | "fildll %[storage]\n\t" |
524 | "fistpll %[value]\n\t" | |
525 | : [value] "=m" (value) | |
526 | : [storage] "m" (storage) | |
7c673cae FG |
527 | : "memory" |
528 | ); | |
529 | #endif | |
530 | } | |
531 | else | |
532 | { | |
92f5a8d4 TL |
533 | // Note that despite const qualification cmpxchg8b below may issue a store to the storage. The storage value |
534 | // will not change, but this prevents the storage to reside in read-only memory. | |
535 | ||
536 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
537 | ||
11fdf7f2 | 538 | uint32_t value_bits[2]; |
92f5a8d4 | 539 | |
11fdf7f2 TL |
540 | // We don't care for comparison result here; the previous value will be stored into value anyway. |
541 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
542 | __asm__ __volatile__ | |
543 | ( | |
544 | "movl %%ebx, %%eax\n\t" | |
545 | "movl %%ecx, %%edx\n\t" | |
546 | "lock; cmpxchg8b %[storage]\n\t" | |
547 | : "=&a" (value_bits[0]), "=&d" (value_bits[1]) | |
548 | : [storage] "m" (storage) | |
549 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
550 | ); | |
551 | BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); | |
92f5a8d4 | 552 | |
11fdf7f2 | 553 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
92f5a8d4 | 554 | |
7c673cae FG |
555 | // We don't care for comparison result here; the previous value will be stored into value anyway. |
556 | // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. | |
557 | __asm__ __volatile__ | |
558 | ( | |
559 | "movl %%ebx, %%eax\n\t" | |
560 | "movl %%ecx, %%edx\n\t" | |
561 | "lock; cmpxchg8b %[storage]\n\t" | |
562 | : "=&A" (value) | |
563 | : [storage] "m" (storage) | |
564 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
565 | ); | |
92f5a8d4 | 566 | |
11fdf7f2 | 567 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
7c673cae FG |
568 | } |
569 | ||
570 | return value; | |
571 | } | |
572 | ||
573 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
574 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
575 | { | |
576 | #if defined(__clang__) | |
b32b8144 | 577 | |
7c673cae FG |
578 | // Clang cannot allocate eax:edx register pairs but it has sync intrinsics |
579 | storage_type old_expected = expected; | |
580 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
581 | return expected == old_expected; | |
b32b8144 | 582 | |
11fdf7f2 | 583 | #elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae | 584 | |
7c673cae | 585 | bool success; |
11fdf7f2 | 586 | |
b32b8144 | 587 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
7c673cae FG |
588 | __asm__ __volatile__ |
589 | ( | |
11fdf7f2 | 590 | "xchgl %%ebx, %%esi\n\t" |
b32b8144 | 591 | "lock; cmpxchg8b (%[dest])\n\t" |
11fdf7f2 TL |
592 | "xchgl %%ebx, %%esi\n\t" |
593 | : "+A" (expected), [success] "=@ccz" (success) | |
594 | : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
b32b8144 FG |
595 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
596 | ); | |
597 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
598 | __asm__ __volatile__ | |
599 | ( | |
11fdf7f2 | 600 | "xchgl %%ebx, %%esi\n\t" |
b32b8144 | 601 | "lock; cmpxchg8b (%[dest])\n\t" |
11fdf7f2 | 602 | "xchgl %%ebx, %%esi\n\t" |
7c673cae | 603 | "sete %[success]\n\t" |
11fdf7f2 TL |
604 | : "+A" (expected), [success] "=qm" (success) |
605 | : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
606 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
607 | ); | |
b32b8144 FG |
608 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
609 | ||
7c673cae | 610 | return success; |
b32b8144 | 611 | |
11fdf7f2 | 612 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
b32b8144 | 613 | |
7c673cae | 614 | bool success; |
11fdf7f2 | 615 | |
b32b8144 FG |
616 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
617 | __asm__ __volatile__ | |
618 | ( | |
619 | "lock; cmpxchg8b %[dest]\n\t" | |
620 | : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success) | |
621 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) | |
622 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
623 | ); | |
624 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
625 | __asm__ __volatile__ |
626 | ( | |
627 | "lock; cmpxchg8b %[dest]\n\t" | |
628 | "sete %[success]\n\t" | |
11fdf7f2 | 629 | : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) |
7c673cae | 630 | : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) |
7c673cae FG |
631 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
632 | ); | |
b32b8144 FG |
633 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
634 | ||
7c673cae | 635 | return success; |
b32b8144 | 636 | |
11fdf7f2 | 637 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
7c673cae FG |
638 | } |
639 | ||
640 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
641 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
642 | { | |
643 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
644 | } | |
645 | ||
92f5a8d4 | 646 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT |
7c673cae | 647 | { |
11fdf7f2 TL |
648 | #if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) |
649 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
650 | ||
651 | uint32_t old_bits[2]; | |
7c673cae FG |
652 | __asm__ __volatile__ |
653 | ( | |
11fdf7f2 TL |
654 | "xchgl %%ebx, %%esi\n\t" |
655 | "movl (%[dest]), %%eax\n\t" | |
656 | "movl 4(%[dest]), %%edx\n\t" | |
7c673cae | 657 | ".align 16\n\t" |
11fdf7f2 | 658 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 659 | "jne 1b\n\t" |
11fdf7f2 TL |
660 | "xchgl %%ebx, %%esi\n\t" |
661 | : "=a" (old_bits[0]), "=d" (old_bits[1]) | |
662 | : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
663 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 664 | ); |
11fdf7f2 TL |
665 | |
666 | storage_type old_value; | |
667 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
668 | return old_value; | |
669 | ||
670 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
671 | ||
672 | storage_type old_value; | |
7c673cae FG |
673 | __asm__ __volatile__ |
674 | ( | |
11fdf7f2 TL |
675 | "xchgl %%ebx, %%esi\n\t" |
676 | "movl (%[dest]), %%eax\n\t" | |
677 | "movl 4(%[dest]), %%edx\n\t" | |
7c673cae | 678 | ".align 16\n\t" |
11fdf7f2 | 679 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 680 | "jne 1b\n\t" |
11fdf7f2 TL |
681 | "xchgl %%ebx, %%esi\n\t" |
682 | : "=A" (old_value) | |
683 | : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) | |
7c673cae FG |
684 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
685 | ); | |
11fdf7f2 TL |
686 | return old_value; |
687 | ||
688 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
689 | #else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) | |
690 | #if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407 | |
691 | ||
692 | // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below | |
693 | uint32_t old_bits[2]; | |
7c673cae FG |
694 | __asm__ __volatile__ |
695 | ( | |
11fdf7f2 | 696 | "movl (%[dest]), %%eax\n\t" |
7c673cae FG |
697 | "movl 4(%[dest]), %%edx\n\t" |
698 | ".align 16\n\t" | |
11fdf7f2 | 699 | "1: lock; cmpxchg8b (%[dest])\n\t" |
7c673cae | 700 | "jne 1b\n\t" |
11fdf7f2 TL |
701 | : "=&a" (old_bits[0]), "=&d" (old_bits[1]) |
702 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage) | |
703 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 704 | ); |
11fdf7f2 TL |
705 | |
706 | storage_type old_value; | |
707 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
708 | return old_value; | |
709 | ||
710 | #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
711 | ||
712 | uint32_t old_bits[2]; | |
7c673cae FG |
713 | __asm__ __volatile__ |
714 | ( | |
11fdf7f2 TL |
715 | "movl %[dest_lo], %%eax\n\t" |
716 | "movl %[dest_hi], %%edx\n\t" | |
7c673cae | 717 | ".align 16\n\t" |
11fdf7f2 | 718 | "1: lock; cmpxchg8b %[dest_lo]\n\t" |
7c673cae | 719 | "jne 1b\n\t" |
11fdf7f2 TL |
720 | : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) |
721 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
7c673cae FG |
722 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
723 | ); | |
11fdf7f2 TL |
724 | |
725 | storage_type old_value; | |
726 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
727 | return old_value; | |
728 | ||
729 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
730 | ||
731 | storage_type old_value; | |
732 | __asm__ __volatile__ | |
733 | ( | |
734 | "movl %[dest_lo], %%eax\n\t" | |
735 | "movl %[dest_hi], %%edx\n\t" | |
736 | ".align 16\n\t" | |
737 | "1: lock; cmpxchg8b %[dest_lo]\n\t" | |
738 | "jne 1b\n\t" | |
739 | : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) | |
740 | : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) | |
741 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
742 | ); | |
743 | return old_value; | |
744 | ||
745 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
746 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) | |
7c673cae | 747 | } |
7c673cae FG |
748 | }; |
749 | ||
20effc67 TL |
750 | template< bool Signed, bool Interprocess > |
751 | struct core_arch_operations< 8u, Signed, Interprocess > : | |
752 | public core_operations_cas_based< gcc_dcas_x86< Signed, Interprocess > > | |
753 | { | |
754 | }; | |
755 | ||
756 | #elif defined(__x86_64__) | |
757 | ||
758 | template< bool Signed, bool Interprocess > | |
759 | struct core_arch_operations< 8u, Signed, Interprocess > : | |
760 | public core_arch_operations_gcc_x86< 8u, Signed, Interprocess, core_arch_operations< 8u, Signed, Interprocess > > | |
761 | { | |
762 | typedef core_arch_operations_gcc_x86< 8u, Signed, Interprocess, core_arch_operations< 8u, Signed, Interprocess > > base_type; | |
763 | typedef typename base_type::storage_type storage_type; | |
764 | ||
765 | static BOOST_FORCEINLINE storage_type fetch_add(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
766 | { | |
767 | __asm__ __volatile__ | |
768 | ( | |
769 | "lock; xaddq %0, %1" | |
770 | : "+r" (v), "+m" (storage) | |
771 | : | |
772 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
773 | ); | |
774 | return v; | |
775 | } | |
776 | ||
777 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
778 | { | |
779 | __asm__ __volatile__ | |
780 | ( | |
781 | "xchgq %0, %1" | |
782 | : "+r" (v), "+m" (storage) | |
783 | : | |
784 | : "memory" | |
785 | ); | |
786 | return v; | |
787 | } | |
788 | ||
789 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
790 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
791 | { | |
792 | storage_type previous = expected; | |
793 | bool success; | |
794 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
795 | __asm__ __volatile__ | |
796 | ( | |
797 | "lock; cmpxchgq %3, %1" | |
798 | : "+a" (previous), "+m" (storage), "=@ccz" (success) | |
799 | : "r" (desired) | |
800 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
801 | ); | |
802 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
803 | __asm__ __volatile__ | |
804 | ( | |
805 | "lock; cmpxchgq %3, %1\n\t" | |
806 | "sete %2" | |
807 | : "+a" (previous), "+m" (storage), "=q" (success) | |
808 | : "r" (desired) | |
809 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
810 | ); | |
811 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
812 | expected = previous; | |
813 | return success; | |
814 | } | |
815 | ||
816 | #define BOOST_ATOMIC_DETAIL_CAS_LOOP(op, argument, result)\ | |
817 | storage_type new_val;\ | |
818 | __asm__ __volatile__\ | |
819 | (\ | |
820 | ".align 16\n\t"\ | |
821 | "1: movq %[arg], %[new_val]\n\t"\ | |
822 | op " %%rax, %[new_val]\n\t"\ | |
823 | "lock; cmpxchgq %[new_val], %[storage]\n\t"\ | |
824 | "jne 1b"\ | |
825 | : [res] "+a" (result), [storage] "+m" (storage), [new_val] "=&r" (new_val)\ | |
826 | : [arg] "r" (argument)\ | |
827 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"\ | |
828 | ) | |
829 | ||
830 | static BOOST_FORCEINLINE storage_type fetch_and(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
831 | { | |
832 | storage_type res = storage; | |
833 | BOOST_ATOMIC_DETAIL_CAS_LOOP("andq", v, res); | |
834 | return res; | |
835 | } | |
836 | ||
837 | static BOOST_FORCEINLINE storage_type fetch_or(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
838 | { | |
839 | storage_type res = storage; | |
840 | BOOST_ATOMIC_DETAIL_CAS_LOOP("orq", v, res); | |
841 | return res; | |
842 | } | |
843 | ||
844 | static BOOST_FORCEINLINE storage_type fetch_xor(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
845 | { | |
846 | storage_type res = storage; | |
847 | BOOST_ATOMIC_DETAIL_CAS_LOOP("xorq", v, res); | |
848 | return res; | |
849 | } | |
850 | ||
851 | #undef BOOST_ATOMIC_DETAIL_CAS_LOOP | |
852 | }; | |
853 | ||
854 | #endif | |
7c673cae FG |
855 | |
856 | #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) | |
857 | ||
20effc67 | 858 | template< bool Signed, bool Interprocess > |
7c673cae FG |
859 | struct gcc_dcas_x86_64 |
860 | { | |
f67539c2 | 861 | typedef typename storage_traits< 16u >::type storage_type; |
11fdf7f2 | 862 | typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t; |
7c673cae | 863 | |
f67539c2 TL |
864 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_size = 16u; |
865 | static BOOST_CONSTEXPR_OR_CONST std::size_t storage_alignment = 16u; | |
866 | static BOOST_CONSTEXPR_OR_CONST bool is_signed = Signed; | |
20effc67 | 867 | static BOOST_CONSTEXPR_OR_CONST bool is_interprocess = Interprocess; |
11fdf7f2 | 868 | static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; |
7c673cae FG |
869 | static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; |
870 | ||
871 | static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
872 | { | |
7c673cae FG |
873 | __asm__ __volatile__ |
874 | ( | |
11fdf7f2 TL |
875 | "movq %[dest_lo], %%rax\n\t" |
876 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 877 | ".align 16\n\t" |
11fdf7f2 | 878 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 879 | "jne 1b\n\t" |
11fdf7f2 TL |
880 | : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) |
881 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
7c673cae FG |
882 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" |
883 | ); | |
7c673cae FG |
884 | } |
885 | ||
886 | static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT | |
887 | { | |
92f5a8d4 TL |
888 | // Note that despite const qualification cmpxchg16b below may issue a store to the storage. The storage value |
889 | // will not change, but this prevents the storage to reside in read-only memory. | |
11fdf7f2 | 890 | |
92f5a8d4 | 891 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
11fdf7f2 | 892 | |
11fdf7f2 | 893 | uint64_t value_bits[2]; |
7c673cae FG |
894 | |
895 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
896 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
7c673cae FG |
897 | __asm__ __volatile__ |
898 | ( | |
899 | "movq %%rbx, %%rax\n\t" | |
900 | "movq %%rcx, %%rdx\n\t" | |
901 | "lock; cmpxchg16b %[storage]\n\t" | |
11fdf7f2 | 902 | : "=&a" (value_bits[0]), "=&d" (value_bits[1]) |
7c673cae | 903 | : [storage] "m" (storage) |
11fdf7f2 | 904 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
7c673cae | 905 | ); |
7c673cae | 906 | |
11fdf7f2 TL |
907 | storage_type value; |
908 | BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); | |
7c673cae | 909 | return value; |
11fdf7f2 TL |
910 | |
911 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
912 | ||
7c673cae FG |
913 | storage_type value; |
914 | ||
915 | // We don't care for comparison result here; the previous value will be stored into value anyway. | |
916 | // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. | |
917 | __asm__ __volatile__ | |
918 | ( | |
919 | "movq %%rbx, %%rax\n\t" | |
920 | "movq %%rcx, %%rdx\n\t" | |
921 | "lock; cmpxchg16b %[storage]\n\t" | |
922 | : "=&A" (value) | |
923 | : [storage] "m" (storage) | |
924 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
925 | ); | |
926 | ||
927 | return value; | |
11fdf7f2 TL |
928 | |
929 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
7c673cae FG |
930 | } |
931 | ||
932 | static BOOST_FORCEINLINE bool compare_exchange_strong( | |
933 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order, memory_order) BOOST_NOEXCEPT | |
934 | { | |
935 | #if defined(__clang__) | |
b32b8144 | 936 | |
7c673cae FG |
937 | // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics |
938 | storage_type old_expected = expected; | |
939 | expected = __sync_val_compare_and_swap(&storage, old_expected, desired); | |
940 | return expected == old_expected; | |
b32b8144 | 941 | |
11fdf7f2 | 942 | #elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
b32b8144 | 943 | |
11fdf7f2 | 944 | // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap |
7c673cae | 945 | bool success; |
7c673cae FG |
946 | __asm__ __volatile__ |
947 | ( | |
7c673cae FG |
948 | "lock; cmpxchg16b %[dest]\n\t" |
949 | "sete %[success]\n\t" | |
11fdf7f2 TL |
950 | : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success) |
951 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
952 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 953 | ); |
7c673cae FG |
954 | |
955 | return success; | |
b32b8144 | 956 | |
11fdf7f2 | 957 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
b32b8144 | 958 | |
7c673cae | 959 | bool success; |
11fdf7f2 | 960 | |
b32b8144 FG |
961 | #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
962 | __asm__ __volatile__ | |
963 | ( | |
964 | "lock; cmpxchg16b %[dest]\n\t" | |
11fdf7f2 TL |
965 | : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success) |
966 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
b32b8144 FG |
967 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
968 | ); | |
969 | #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) | |
7c673cae FG |
970 | __asm__ __volatile__ |
971 | ( | |
972 | "lock; cmpxchg16b %[dest]\n\t" | |
973 | "sete %[success]\n\t" | |
11fdf7f2 TL |
974 | : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) |
975 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) | |
7c673cae FG |
976 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
977 | ); | |
b32b8144 FG |
978 | #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) |
979 | ||
7c673cae | 980 | return success; |
b32b8144 | 981 | |
11fdf7f2 | 982 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
7c673cae FG |
983 | } |
984 | ||
985 | static BOOST_FORCEINLINE bool compare_exchange_weak( | |
986 | storage_type volatile& storage, storage_type& expected, storage_type desired, memory_order success_order, memory_order failure_order) BOOST_NOEXCEPT | |
987 | { | |
988 | return compare_exchange_strong(storage, expected, desired, success_order, failure_order); | |
989 | } | |
990 | ||
991 | static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT | |
992 | { | |
11fdf7f2 TL |
993 | #if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
994 | uint64_t old_bits[2]; | |
7c673cae FG |
995 | __asm__ __volatile__ |
996 | ( | |
11fdf7f2 TL |
997 | "movq %[dest_lo], %%rax\n\t" |
998 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 999 | ".align 16\n\t" |
11fdf7f2 | 1000 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 1001 | "jne 1b\n\t" |
11fdf7f2 TL |
1002 | : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1]) |
1003 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
1004 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" | |
7c673cae | 1005 | ); |
7c673cae | 1006 | |
11fdf7f2 TL |
1007 | storage_type old_value; |
1008 | BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); | |
7c673cae | 1009 | return old_value; |
11fdf7f2 TL |
1010 | #else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) |
1011 | storage_type old_value; | |
7c673cae FG |
1012 | __asm__ __volatile__ |
1013 | ( | |
11fdf7f2 TL |
1014 | "movq %[dest_lo], %%rax\n\t" |
1015 | "movq %[dest_hi], %%rdx\n\t" | |
7c673cae | 1016 | ".align 16\n\t" |
11fdf7f2 | 1017 | "1: lock; cmpxchg16b %[dest_lo]\n\t" |
7c673cae | 1018 | "jne 1b\n\t" |
11fdf7f2 TL |
1019 | : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) |
1020 | : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) | |
7c673cae FG |
1021 | : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" |
1022 | ); | |
7c673cae | 1023 | |
11fdf7f2 TL |
1024 | return old_value; |
1025 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) | |
7c673cae | 1026 | } |
7c673cae FG |
1027 | }; |
1028 | ||
20effc67 TL |
1029 | template< bool Signed, bool Interprocess > |
1030 | struct core_arch_operations< 16u, Signed, Interprocess > : | |
1031 | public core_operations_cas_based< gcc_dcas_x86_64< Signed, Interprocess > > | |
1032 | { | |
1033 | }; | |
1034 | ||
7c673cae FG |
1035 | #endif // defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG16B) |
1036 | ||
1037 | } // namespace detail | |
1038 | } // namespace atomics | |
1039 | } // namespace boost | |
1040 | ||
20effc67 TL |
1041 | #include <boost/atomic/detail/footer.hpp> |
1042 | ||
1043 | #endif // BOOST_ATOMIC_DETAIL_CORE_ARCH_OPS_GCC_X86_HPP_INCLUDED_ |