]> git.proxmox.com Git - rustc.git/blame - vendor/portable-atomic/src/imp/atomic128/aarch64.rs
New upstream version 1.75.0+dfsg1
[rustc.git] / vendor / portable-atomic / src / imp / atomic128 / aarch64.rs
CommitLineData
ed00b5ec
FG
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
781aab86
FG
3// Atomic{I,U}128 implementation on AArch64.
4//
5// There are a few ways to implement 128-bit atomic operations in AArch64.
6//
7// - LDXP/STXP loop (DW LL/SC)
8// - CASP (DWCAS) added as FEAT_LSE (mandatory from armv8.1-a)
9// - LDP/STP (DW load/store) if FEAT_LSE2 (optional from armv8.2-a, mandatory from armv8.4-a) is available
ed00b5ec
FG
10// - LDIAPP/STILP (DW acquire-load/release-store) added as FEAT_LRCPC3 (optional from armv8.9-a/armv9.4-a) (if FEAT_LSE2 is also available)
11// - LDCLRP/LDSETP/SWPP (DW RMW) added as FEAT_LSE128 (optional from armv9.4-a)
781aab86
FG
12//
13// If outline-atomics is not enabled and FEAT_LSE is not available at
14// compile-time, we use LDXP/STXP loop.
15// If outline-atomics is enabled and FEAT_LSE is not available at
16// compile-time, we use CASP for CAS if FEAT_LSE is available
17// at run-time, otherwise, use LDXP/STXP loop.
18// If FEAT_LSE is available at compile-time, we use CASP for load/store/CAS/RMW.
19// However, when portable_atomic_ll_sc_rmw cfg is set, use LDXP/STXP loop instead of CASP
20// loop for RMW (by default, it is set on Apple hardware; see build script for details).
21// If FEAT_LSE2 is available at compile-time, we use LDP/STP for load/store.
ed00b5ec
FG
22// If FEAT_LSE128 is available at compile-time, we use LDCLRP/LDSETP/SWPP for fetch_and/fetch_or/swap/{release,seqcst}-store.
23// If FEAT_LSE2 and FEAT_LRCPC3 are available at compile-time, we use LDIAPP/STILP for acquire-load/release-store.
781aab86 24//
ed00b5ec 25// Note: FEAT_LSE2 doesn't imply FEAT_LSE. FEAT_LSE128 implies FEAT_LSE but not FEAT_LSE2.
781aab86
FG
26//
27// Note that we do not separate LL and SC into separate functions, but handle
28// them within a single asm block. This is because it is theoretically possible
29// for the compiler to insert operations that might clear the reservation between
30// LL and SC. Considering the type of operations we are providing and the fact
31// that [progress64](https://github.com/ARM-software/progress64) uses such code,
32// this is probably not a problem for aarch64, but it seems that aarch64 doesn't
33// guarantee it and hexagon is the only architecture with hardware guarantees
34// that such code works. See also:
35//
36// - https://yarchive.net/comp/linux/cmpxchg_ll_sc_portability.html
37// - https://lists.llvm.org/pipermail/llvm-dev/2016-May/099490.html
38// - https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html
39//
40// Also, even when using a CAS loop to implement atomic RMW, include the loop itself
41// in the asm block because it is more efficient for some codegen backends.
42// https://github.com/rust-lang/compiler-builtins/issues/339#issuecomment-1191260474
43//
44// Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use
45// this module and use intrinsics.rs instead.
46//
47// Refs:
48// - ARM Compiler armasm User Guide
49// https://developer.arm.com/documentation/dui0801/latest
50// - Arm A-profile A64 Instruction Set Architecture
51// https://developer.arm.com/documentation/ddi0602/latest
52// - Arm Architecture Reference Manual for A-profile architecture
53// https://developer.arm.com/documentation/ddi0487/latest
54// - atomic-maybe-uninit https://github.com/taiki-e/atomic-maybe-uninit
55//
56// Generated asm:
ed00b5ec
FG
57// - aarch64 https://godbolt.org/z/5Mz1E33vz
58// - aarch64 msvc https://godbolt.org/z/P53d1MsGY
59// - aarch64 (+lse) https://godbolt.org/z/qvaE8n79K
60// - aarch64 msvc (+lse) https://godbolt.org/z/dj4aYerfr
61// - aarch64 (+lse,+lse2) https://godbolt.org/z/1E15jjxah
62// - aarch64 (+lse,+lse2,+rcpc3) https://godbolt.org/z/YreM4n84o
63// - aarch64 (+lse2,+lse128) https://godbolt.org/z/Kfeqs54ox
64// - aarch64 (+lse2,+lse128,+rcpc3) https://godbolt.org/z/n6zhjE77s
781aab86
FG
65
66include!("macros.rs");
67
68// On musl with static linking, it seems that getauxval is not always available.
69// See detect/auxv.rs for more.
70#[cfg(not(portable_atomic_no_outline_atomics))]
ed00b5ec
FG
71#[cfg(any(
72 test,
73 not(all(
74 any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
75 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
76 )),
77))]
781aab86
FG
78#[cfg(any(
79 all(
80 target_os = "linux",
81 any(
82 target_env = "gnu",
83 all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")),
84 portable_atomic_outline_atomics,
85 ),
86 ),
87 target_os = "android",
88 target_os = "freebsd",
89))]
90#[path = "detect/auxv.rs"]
91mod detect;
92#[cfg(not(portable_atomic_no_outline_atomics))]
ed00b5ec
FG
93#[cfg_attr(
94 target_os = "netbsd",
95 cfg(any(
96 test,
97 not(all(
98 any(target_feature = "lse2", portable_atomic_target_feature = "lse2"),
99 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
100 )),
101 ))
102)]
103#[cfg_attr(
104 target_os = "openbsd",
105 cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))
106)]
107#[cfg(any(target_os = "netbsd", target_os = "openbsd"))]
781aab86
FG
108#[path = "detect/aarch64_aa64reg.rs"]
109mod detect;
110#[cfg(not(portable_atomic_no_outline_atomics))]
111#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
112#[cfg(target_os = "fuchsia")]
113#[path = "detect/aarch64_fuchsia.rs"]
114mod detect;
115#[cfg(not(portable_atomic_no_outline_atomics))]
116#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
117#[cfg(target_os = "windows")]
118#[path = "detect/aarch64_windows.rs"]
119mod detect;
120
121// test only
122#[cfg(test)]
123#[cfg(not(qemu))]
124#[cfg(not(valgrind))]
125#[cfg(not(portable_atomic_no_outline_atomics))]
126#[cfg(any(target_os = "linux", target_os = "android", target_os = "freebsd"))]
127#[path = "detect/aarch64_aa64reg.rs"]
128mod detect_aa64reg;
129#[cfg(test)]
130#[cfg(not(portable_atomic_no_outline_atomics))]
131#[cfg(target_os = "macos")]
132#[path = "detect/aarch64_macos.rs"]
133mod detect_macos;
134
135#[cfg(not(portable_atomic_no_asm))]
136use core::arch::asm;
137use core::sync::atomic::Ordering;
138
ed00b5ec
FG
139use crate::utils::{Pair, U128};
140
781aab86
FG
141#[cfg(any(
142 target_feature = "lse",
143 portable_atomic_target_feature = "lse",
144 not(portable_atomic_no_outline_atomics),
145))]
146macro_rules! debug_assert_lse {
147 () => {
148 #[cfg(all(
149 not(portable_atomic_no_outline_atomics),
150 any(
151 all(
152 target_os = "linux",
153 any(
154 target_env = "gnu",
155 all(
156 any(target_env = "musl", target_env = "ohos"),
157 not(target_feature = "crt-static"),
158 ),
159 portable_atomic_outline_atomics,
160 ),
161 ),
162 target_os = "android",
163 target_os = "freebsd",
ed00b5ec 164 target_os = "netbsd",
781aab86
FG
165 target_os = "openbsd",
166 target_os = "fuchsia",
167 target_os = "windows",
168 ),
169 ))]
170 #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
171 {
172 debug_assert!(detect::detect().has_lse());
173 }
174 };
175}
ed00b5ec
FG
176#[rustfmt::skip]
177#[cfg(any(
178 target_feature = "lse2",
179 portable_atomic_target_feature = "lse2",
180 not(portable_atomic_no_outline_atomics),
181))]
182macro_rules! debug_assert_lse2 {
183 () => {
184 #[cfg(all(
185 not(portable_atomic_no_outline_atomics),
186 any(
187 all(
188 target_os = "linux",
189 any(
190 target_env = "gnu",
191 all(
192 any(target_env = "musl", target_env = "ohos"),
193 not(target_feature = "crt-static"),
194 ),
195 portable_atomic_outline_atomics,
196 ),
197 ),
198 target_os = "android",
199 target_os = "freebsd",
200 target_os = "netbsd",
201 // These don't support detection of FEAT_LSE2.
202 // target_os = "openbsd",
203 // target_os = "fuchsia",
204 // target_os = "windows",
205 ),
206 ))]
207 #[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
208 {
209 debug_assert!(detect::detect().has_lse2());
210 }
211 };
212}
213
214// Refs: https://developer.arm.com/documentation/100067/0612/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
215//
216// This is similar to #[target_feature(enable = "lse")], except that there are
217// no compiler guarantees regarding (un)inlining, and the scope is within an asm
218// block rather than a function. We use this directive to support outline-atomics
219// on pre-1.61 rustc (aarch64_target_feature stabilized in Rust 1.61).
220//
221// The .arch_extension directive is effective until the end of the assembly block and
222// is not propagated to subsequent code, so the end_lse macro is unneeded.
223// https://godbolt.org/z/4oMEW8vWc
224// https://github.com/torvalds/linux/commit/e0d5896bd356cd577f9710a02d7a474cdf58426b
225// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
226// (It seems GCC effectively ignores this directive and always allow FEAT_LSE instructions: https://godbolt.org/z/W9W6rensG)
227//
228// The .arch directive has a similar effect, but we don't use it due to the following issue:
229// https://github.com/torvalds/linux/commit/dd1f6308b28edf0452dd5dc7877992903ec61e69
230//
231// This is also needed for compatibility with rustc_codegen_cranelift:
232// https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400#issuecomment-1774599775
233//
234// Note: If FEAT_LSE is not available at compile-time, we must guarantee that
235// the function that uses it is not inlined into a function where it is not
236// clear whether FEAT_LSE is available. Otherwise, (even if we checked whether
237// FEAT_LSE is available at run-time) optimizations that reorder its
238// instructions across the if condition might introduce undefined behavior.
239// (see also https://rust-lang.github.io/rfcs/2045-target-feature.html#safely-inlining-target_feature-functions-on-more-contexts)
240// However, our code uses the ifunc helper macro that works with function pointers,
241// so we don't have to worry about this unless calling without helper macro.
242#[cfg(any(
243 target_feature = "lse",
244 portable_atomic_target_feature = "lse",
245 not(portable_atomic_no_outline_atomics),
246))]
247macro_rules! start_lse {
248 () => {
249 ".arch_extension lse"
250 };
251}
781aab86
FG
252
253#[cfg(target_endian = "little")]
254macro_rules! select_le_or_be {
255 ($le:expr, $be:expr) => {
256 $le
257 };
258}
259#[cfg(target_endian = "big")]
260macro_rules! select_le_or_be {
261 ($le:expr, $be:expr) => {
262 $be
263 };
264}
265
781aab86
FG
266macro_rules! atomic_rmw {
267 ($op:ident, $order:ident) => {
268 atomic_rmw!($op, $order, write = $order)
269 };
270 ($op:ident, $order:ident, write = $write:ident) => {
271 match $order {
272 Ordering::Relaxed => $op!("", "", ""),
273 Ordering::Acquire => $op!("a", "", ""),
274 Ordering::Release => $op!("", "l", ""),
275 Ordering::AcqRel => $op!("a", "l", ""),
276 // In MSVC environments, SeqCst stores/writes needs fences after writes.
277 // https://reviews.llvm.org/D141748
278 #[cfg(target_env = "msvc")]
279 Ordering::SeqCst if $write == Ordering::SeqCst => $op!("a", "l", "dmb ish"),
280 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
281 Ordering::SeqCst => $op!("a", "l", ""),
282 _ => unreachable!("{:?}", $order),
283 }
284 };
285}
286
ed00b5ec
FG
287// cfg guarantee that the CPU supports FEAT_LSE2.
288#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
289use _atomic_load_ldp as atomic_load;
290#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
781aab86
FG
291#[inline]
292unsafe fn atomic_load(src: *mut u128, order: Ordering) -> u128 {
ed00b5ec
FG
293 #[inline]
294 unsafe fn atomic_load_no_lse2(src: *mut u128, order: Ordering) -> u128 {
781aab86
FG
295 #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
296 // SAFETY: the caller must uphold the safety contract.
297 // cfg guarantee that the CPU supports FEAT_LSE.
298 unsafe {
299 _atomic_load_casp(src, order)
300 }
301 #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
302 // SAFETY: the caller must uphold the safety contract.
303 unsafe {
304 _atomic_load_ldxp_stxp(src, order)
305 }
306 }
ed00b5ec
FG
307 #[cfg(not(all(
308 not(portable_atomic_no_outline_atomics),
309 any(
310 all(
311 target_os = "linux",
312 any(
313 target_env = "gnu",
314 all(
315 any(target_env = "musl", target_env = "ohos"),
316 not(target_feature = "crt-static"),
317 ),
318 portable_atomic_outline_atomics,
319 ),
320 ),
321 target_os = "android",
322 target_os = "freebsd",
323 target_os = "netbsd",
324 // These don't support detection of FEAT_LSE2.
325 // target_os = "openbsd",
326 // target_os = "fuchsia",
327 // target_os = "windows",
328 ),
329 )))]
330 // SAFETY: the caller must uphold the safety contract.
331 unsafe {
332 atomic_load_no_lse2(src, order)
333 }
334 #[cfg(all(
335 not(portable_atomic_no_outline_atomics),
336 any(
337 all(
338 target_os = "linux",
339 any(
340 target_env = "gnu",
341 all(
342 any(target_env = "musl", target_env = "ohos"),
343 not(target_feature = "crt-static"),
344 ),
345 portable_atomic_outline_atomics,
346 ),
347 ),
348 target_os = "android",
349 target_os = "freebsd",
350 target_os = "netbsd",
351 // These don't support detection of FEAT_LSE2.
352 // target_os = "openbsd",
353 // target_os = "fuchsia",
354 // target_os = "windows",
355 ),
356 ))]
357 {
358 fn_alias! {
359 // inline(never) is just a hint and also not strictly necessary
360 // because we use ifunc helper macro, but used for clarity.
361 #[inline(never)]
362 unsafe fn(src: *mut u128) -> u128;
363 atomic_load_lse2_relaxed = _atomic_load_ldp(Ordering::Relaxed);
364 atomic_load_lse2_acquire = _atomic_load_ldp(Ordering::Acquire);
365 atomic_load_lse2_seqcst = _atomic_load_ldp(Ordering::SeqCst);
366 }
367 fn_alias! {
368 unsafe fn(src: *mut u128) -> u128;
369 atomic_load_no_lse2_relaxed = atomic_load_no_lse2(Ordering::Relaxed);
370 atomic_load_no_lse2_acquire = atomic_load_no_lse2(Ordering::Acquire);
371 atomic_load_no_lse2_seqcst = atomic_load_no_lse2(Ordering::SeqCst);
372 }
373 // SAFETY: the caller must uphold the safety contract.
374 // and we've checked if FEAT_LSE2 is available.
375 unsafe {
376 match order {
377 Ordering::Relaxed => {
378 ifunc!(unsafe fn(src: *mut u128) -> u128 {
379 let cpuinfo = detect::detect();
380 if cpuinfo.has_lse2() {
381 atomic_load_lse2_relaxed
382 } else {
383 atomic_load_no_lse2_relaxed
384 }
385 })
386 }
387 Ordering::Acquire => {
388 ifunc!(unsafe fn(src: *mut u128) -> u128 {
389 let cpuinfo = detect::detect();
390 if cpuinfo.has_lse2() {
391 atomic_load_lse2_acquire
392 } else {
393 atomic_load_no_lse2_acquire
394 }
395 })
396 }
397 Ordering::SeqCst => {
398 ifunc!(unsafe fn(src: *mut u128) -> u128 {
399 let cpuinfo = detect::detect();
400 if cpuinfo.has_lse2() {
401 atomic_load_lse2_seqcst
402 } else {
403 atomic_load_no_lse2_seqcst
404 }
405 })
406 }
407 _ => unreachable!("{:?}", order),
408 }
409 }
410 }
781aab86 411}
ed00b5ec 412// If CPU supports FEAT_LSE2, LDP/LDIAPP is single-copy atomic reads,
781aab86
FG
413// otherwise it is two single-copy atomic reads.
414// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
ed00b5ec
FG
415#[cfg(any(
416 target_feature = "lse2",
417 portable_atomic_target_feature = "lse2",
418 not(portable_atomic_no_outline_atomics),
419))]
781aab86 420#[inline]
ed00b5ec 421unsafe fn _atomic_load_ldp(src: *mut u128, order: Ordering) -> u128 {
781aab86 422 debug_assert!(src as usize % 16 == 0);
ed00b5ec 423 debug_assert_lse2!();
781aab86
FG
424
425 // SAFETY: the caller must guarantee that `dst` is valid for reads,
426 // 16-byte aligned, that there are no concurrent non-atomic operations.
427 //
428 // Refs:
429 // - LDP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDP--A64-
430 unsafe {
ed00b5ec 431 let (out_lo, out_hi);
781aab86
FG
432 macro_rules! atomic_load_relaxed {
433 ($acquire:tt $(, $readonly:tt)?) => {
434 asm!(
ed00b5ec 435 "ldp {out_lo}, {out_hi}, [{src}]",
781aab86
FG
436 $acquire,
437 src = in(reg) ptr_reg!(src),
ed00b5ec
FG
438 out_hi = lateout(reg) out_hi,
439 out_lo = lateout(reg) out_lo,
781aab86
FG
440 options(nostack, preserves_flags $(, $readonly)?),
441 )
442 };
443 }
444 match order {
445 Ordering::Relaxed => atomic_load_relaxed!("", readonly),
ed00b5ec
FG
446 #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
447 Ordering::Acquire => {
448 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
449 // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDIAPP--Load-Acquire-RCpc-ordered-Pair-of-registers-
450 asm!(
451 "ldiapp {out_lo}, {out_hi}, [{src}]",
452 src = in(reg) ptr_reg!(src),
453 out_hi = lateout(reg) out_hi,
454 out_lo = lateout(reg) out_lo,
455 options(nostack, preserves_flags),
456 );
457 }
458 #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
781aab86
FG
459 Ordering::Acquire => atomic_load_relaxed!("dmb ishld"),
460 Ordering::SeqCst => {
461 asm!(
462 // ldar (or dmb ishld) is required to prevent reordering with preceding stlxp.
463 // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108891 for details.
464 "ldar {tmp}, [{src}]",
ed00b5ec 465 "ldp {out_lo}, {out_hi}, [{src}]",
781aab86
FG
466 "dmb ishld",
467 src = in(reg) ptr_reg!(src),
ed00b5ec
FG
468 out_hi = lateout(reg) out_hi,
469 out_lo = lateout(reg) out_lo,
781aab86
FG
470 tmp = out(reg) _,
471 options(nostack, preserves_flags),
472 );
473 }
474 _ => unreachable!("{:?}", order),
475 }
ed00b5ec 476 U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
781aab86
FG
477 }
478}
479// Do not use _atomic_compare_exchange_casp because it needs extra MOV to implement load.
480#[cfg(any(test, not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))))]
481#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
482#[inline]
483unsafe fn _atomic_load_casp(src: *mut u128, order: Ordering) -> u128 {
484 debug_assert!(src as usize % 16 == 0);
485 debug_assert_lse!();
486
487 // SAFETY: the caller must uphold the safety contract.
488 // cfg guarantee that the CPU supports FEAT_LSE.
489 unsafe {
ed00b5ec 490 let (out_lo, out_hi);
781aab86
FG
491 macro_rules! atomic_load {
492 ($acquire:tt, $release:tt) => {
493 asm!(
ed00b5ec 494 start_lse!(),
781aab86
FG
495 concat!("casp", $acquire, $release, " x2, x3, x2, x3, [{src}]"),
496 src = in(reg) ptr_reg!(src),
497 // must be allocated to even/odd register pair
ed00b5ec
FG
498 inout("x2") 0_u64 => out_lo,
499 inout("x3") 0_u64 => out_hi,
781aab86
FG
500 options(nostack, preserves_flags),
501 )
502 };
503 }
504 match order {
505 Ordering::Relaxed => atomic_load!("", ""),
506 Ordering::Acquire => atomic_load!("a", ""),
507 Ordering::SeqCst => atomic_load!("a", "l"),
508 _ => unreachable!("{:?}", order),
509 }
ed00b5ec 510 U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
781aab86
FG
511 }
512}
513#[cfg(any(
514 test,
515 all(
516 not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")),
517 not(any(target_feature = "lse", portable_atomic_target_feature = "lse")),
518 ),
519))]
520#[inline]
521unsafe fn _atomic_load_ldxp_stxp(src: *mut u128, order: Ordering) -> u128 {
522 debug_assert!(src as usize % 16 == 0);
523
524 // SAFETY: the caller must uphold the safety contract.
525 unsafe {
ed00b5ec 526 let (mut out_lo, mut out_hi);
781aab86
FG
527 macro_rules! atomic_load {
528 ($acquire:tt, $release:tt) => {
529 asm!(
530 "2:",
ed00b5ec
FG
531 concat!("ld", $acquire, "xp {out_lo}, {out_hi}, [{src}]"),
532 concat!("st", $release, "xp {r:w}, {out_lo}, {out_hi}, [{src}]"),
781aab86
FG
533 // 0 if the store was successful, 1 if no store was performed
534 "cbnz {r:w}, 2b",
535 src = in(reg) ptr_reg!(src),
ed00b5ec
FG
536 out_lo = out(reg) out_lo,
537 out_hi = out(reg) out_hi,
781aab86
FG
538 r = out(reg) _,
539 options(nostack, preserves_flags),
540 )
541 };
542 }
543 match order {
544 Ordering::Relaxed => atomic_load!("", ""),
545 Ordering::Acquire => atomic_load!("a", ""),
546 Ordering::SeqCst => atomic_load!("a", "l"),
547 _ => unreachable!("{:?}", order),
548 }
ed00b5ec 549 U128 { pair: Pair { lo: out_lo, hi: out_hi } }.whole
781aab86
FG
550 }
551}
552
ed00b5ec
FG
553// cfg guarantee that the CPU supports FEAT_LSE2.
554#[cfg(any(target_feature = "lse2", portable_atomic_target_feature = "lse2"))]
555use _atomic_store_stp as atomic_store;
556#[cfg(not(any(target_feature = "lse2", portable_atomic_target_feature = "lse2")))]
781aab86
FG
557#[inline]
558unsafe fn atomic_store(dst: *mut u128, val: u128, order: Ordering) {
ed00b5ec
FG
559 #[inline]
560 unsafe fn atomic_store_no_lse2(dst: *mut u128, val: u128, order: Ordering) {
561 // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
562 // we use CAS-based atomic RMW.
563 #[cfg(all(
564 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
565 not(portable_atomic_ll_sc_rmw),
566 ))]
567 // SAFETY: the caller must uphold the safety contract.
568 // cfg guarantee that the CPU supports FEAT_LSE.
569 unsafe {
570 _atomic_swap_casp(dst, val, order);
571 }
572 #[cfg(not(all(
573 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
574 not(portable_atomic_ll_sc_rmw),
575 )))]
576 // SAFETY: the caller must uphold the safety contract.
577 unsafe {
578 _atomic_store_ldxp_stxp(dst, val, order);
579 }
781aab86 580 }
ed00b5ec
FG
581 #[cfg(not(all(
582 not(portable_atomic_no_outline_atomics),
583 any(
584 all(
585 target_os = "linux",
586 any(
587 target_env = "gnu",
588 all(
589 any(target_env = "musl", target_env = "ohos"),
590 not(target_feature = "crt-static"),
591 ),
592 portable_atomic_outline_atomics,
593 ),
594 ),
595 target_os = "android",
596 target_os = "freebsd",
597 target_os = "netbsd",
598 // These don't support detection of FEAT_LSE2.
599 // target_os = "openbsd",
600 // target_os = "fuchsia",
601 // target_os = "windows",
602 ),
603 )))]
781aab86
FG
604 // SAFETY: the caller must uphold the safety contract.
605 unsafe {
ed00b5ec
FG
606 atomic_store_no_lse2(dst, val, order);
607 }
608 #[cfg(all(
609 not(portable_atomic_no_outline_atomics),
610 any(
611 all(
612 target_os = "linux",
613 any(
614 target_env = "gnu",
615 all(
616 any(target_env = "musl", target_env = "ohos"),
617 not(target_feature = "crt-static"),
618 ),
619 portable_atomic_outline_atomics,
620 ),
621 ),
622 target_os = "android",
623 target_os = "freebsd",
624 target_os = "netbsd",
625 // These don't support detection of FEAT_LSE2.
626 // target_os = "openbsd",
627 // target_os = "fuchsia",
628 // target_os = "windows",
629 ),
630 ))]
631 {
632 fn_alias! {
633 // inline(never) is just a hint and also not strictly necessary
634 // because we use ifunc helper macro, but used for clarity.
635 #[inline(never)]
636 unsafe fn(dst: *mut u128, val: u128);
637 atomic_store_lse2_relaxed = _atomic_store_stp(Ordering::Relaxed);
638 atomic_store_lse2_release = _atomic_store_stp(Ordering::Release);
639 atomic_store_lse2_seqcst = _atomic_store_stp(Ordering::SeqCst);
640 }
641 fn_alias! {
642 unsafe fn(dst: *mut u128, val: u128);
643 atomic_store_no_lse2_relaxed = atomic_store_no_lse2(Ordering::Relaxed);
644 atomic_store_no_lse2_release = atomic_store_no_lse2(Ordering::Release);
645 atomic_store_no_lse2_seqcst = atomic_store_no_lse2(Ordering::SeqCst);
646 }
647 // SAFETY: the caller must uphold the safety contract.
648 // and we've checked if FEAT_LSE2 is available.
649 unsafe {
650 match order {
651 Ordering::Relaxed => {
652 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
653 let cpuinfo = detect::detect();
654 if cpuinfo.has_lse2() {
655 atomic_store_lse2_relaxed
656 } else {
657 atomic_store_no_lse2_relaxed
658 }
659 });
660 }
661 Ordering::Release => {
662 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
663 let cpuinfo = detect::detect();
664 if cpuinfo.has_lse2() {
665 atomic_store_lse2_release
666 } else {
667 atomic_store_no_lse2_release
668 }
669 });
670 }
671 Ordering::SeqCst => {
672 ifunc!(unsafe fn(dst: *mut u128, val: u128) {
673 let cpuinfo = detect::detect();
674 if cpuinfo.has_lse2() {
675 atomic_store_lse2_seqcst
676 } else {
677 atomic_store_no_lse2_seqcst
678 }
679 });
680 }
681 _ => unreachable!("{:?}", order),
682 }
683 }
781aab86
FG
684 }
685}
ed00b5ec 686// If CPU supports FEAT_LSE2, STP/STILP is single-copy atomic writes,
781aab86
FG
687// otherwise it is two single-copy atomic writes.
688// Refs: B2.2.1 of the Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile
ed00b5ec
FG
689#[cfg(any(
690 target_feature = "lse2",
691 portable_atomic_target_feature = "lse2",
692 not(portable_atomic_no_outline_atomics),
693))]
781aab86 694#[inline]
ed00b5ec 695unsafe fn _atomic_store_stp(dst: *mut u128, val: u128, order: Ordering) {
781aab86 696 debug_assert!(dst as usize % 16 == 0);
ed00b5ec 697 debug_assert_lse2!();
781aab86
FG
698
699 // SAFETY: the caller must guarantee that `dst` is valid for writes,
700 // 16-byte aligned, that there are no concurrent non-atomic operations.
701 //
702 // Refs:
703 // - STP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STP--A64-
704 unsafe {
ed00b5ec 705 #[rustfmt::skip]
781aab86 706 macro_rules! atomic_store {
ed00b5ec
FG
707 ($acquire:tt, $release:tt) => {{
708 let val = U128 { whole: val };
781aab86
FG
709 asm!(
710 $release,
711 "stp {val_lo}, {val_hi}, [{dst}]",
712 $acquire,
713 dst = in(reg) ptr_reg!(dst),
714 val_lo = in(reg) val.pair.lo,
715 val_hi = in(reg) val.pair.hi,
716 options(nostack, preserves_flags),
ed00b5ec
FG
717 );
718 }};
781aab86
FG
719 }
720 match order {
721 Ordering::Relaxed => atomic_store!("", ""),
ed00b5ec
FG
722 #[cfg(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3"))]
723 Ordering::Release => {
724 let val = U128 { whole: val };
725 // SAFETY: cfg guarantee that the CPU supports FEAT_LRCPC3.
726 // Refs: https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/STILP--Store-Release-ordered-Pair-of-registers-
727 asm!(
728 "stilp {val_lo}, {val_hi}, [{dst}]",
729 dst = in(reg) ptr_reg!(dst),
730 val_lo = in(reg) val.pair.lo,
731 val_hi = in(reg) val.pair.hi,
732 options(nostack, preserves_flags),
733 );
734 }
735 #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
736 #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
737 Ordering::Release => {
738 // Use swpp if stp requires fences.
739 // https://reviews.llvm.org/D143506
740 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
741 _atomic_swap_swpp(dst, val, order);
742 }
743 #[cfg(not(any(target_feature = "rcpc3", portable_atomic_target_feature = "rcpc3")))]
744 #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86 745 Ordering::Release => atomic_store!("", "dmb ish"),
ed00b5ec
FG
746 #[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
747 Ordering::SeqCst => {
748 // Use swpp if stp requires fences.
749 // https://reviews.llvm.org/D143506
750 // SAFETY: cfg guarantee that the CPU supports FEAT_LSE128.
751 _atomic_swap_swpp(dst, val, order);
752 }
753 #[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
754 Ordering::SeqCst => atomic_store!("dmb ish", "dmb ish"),
755 _ => unreachable!("{:?}", order),
756 }
757 }
758}
ed00b5ec
FG
759// Do not use _atomic_swap_ldxp_stxp because it needs extra registers to implement store.
760#[cfg(any(
761 test,
762 not(all(
763 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
764 not(portable_atomic_ll_sc_rmw),
765 ))
766))]
767#[inline]
768unsafe fn _atomic_store_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) {
769 debug_assert!(dst as usize % 16 == 0);
770
771 // SAFETY: the caller must uphold the safety contract.
772 unsafe {
773 let val = U128 { whole: val };
774 macro_rules! store {
775 ($acquire:tt, $release:tt, $fence:tt) => {
776 asm!(
777 "2:",
778 concat!("ld", $acquire, "xp xzr, {tmp}, [{dst}]"),
779 concat!("st", $release, "xp {tmp:w}, {val_lo}, {val_hi}, [{dst}]"),
780 // 0 if the store was successful, 1 if no store was performed
781 "cbnz {tmp:w}, 2b",
782 $fence,
783 dst = in(reg) ptr_reg!(dst),
784 val_lo = in(reg) val.pair.lo,
785 val_hi = in(reg) val.pair.hi,
786 tmp = out(reg) _,
787 options(nostack, preserves_flags),
788 )
789 };
790 }
791 atomic_rmw!(store, order);
792 }
793}
781aab86
FG
794
795#[inline]
796unsafe fn atomic_compare_exchange(
797 dst: *mut u128,
798 old: u128,
799 new: u128,
800 success: Ordering,
801 failure: Ordering,
802) -> Result<u128, u128> {
803 #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
804 // SAFETY: the caller must uphold the safety contract.
805 // cfg guarantee that the CPU supports FEAT_LSE.
ed00b5ec 806 let prev = unsafe { _atomic_compare_exchange_casp(dst, old, new, success, failure) };
781aab86
FG
807 #[cfg(not(all(
808 not(portable_atomic_no_outline_atomics),
809 any(
810 all(
811 target_os = "linux",
812 any(
813 target_env = "gnu",
814 all(
815 any(target_env = "musl", target_env = "ohos"),
816 not(target_feature = "crt-static"),
817 ),
818 portable_atomic_outline_atomics,
819 ),
820 ),
821 target_os = "android",
822 target_os = "freebsd",
ed00b5ec 823 target_os = "netbsd",
781aab86
FG
824 target_os = "openbsd",
825 target_os = "fuchsia",
826 target_os = "windows",
827 ),
828 )))]
829 #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
830 // SAFETY: the caller must uphold the safety contract.
ed00b5ec 831 let prev = unsafe { _atomic_compare_exchange_ldxp_stxp(dst, old, new, success, failure) };
781aab86
FG
832 #[cfg(all(
833 not(portable_atomic_no_outline_atomics),
834 any(
835 all(
836 target_os = "linux",
837 any(
838 target_env = "gnu",
839 all(
840 any(target_env = "musl", target_env = "ohos"),
841 not(target_feature = "crt-static"),
842 ),
843 portable_atomic_outline_atomics,
844 ),
845 ),
846 target_os = "android",
847 target_os = "freebsd",
ed00b5ec 848 target_os = "netbsd",
781aab86
FG
849 target_os = "openbsd",
850 target_os = "fuchsia",
851 target_os = "windows",
852 ),
853 ))]
854 #[cfg(not(any(target_feature = "lse", portable_atomic_target_feature = "lse")))]
ed00b5ec 855 let prev = {
781aab86 856 fn_alias! {
ed00b5ec
FG
857 // inline(never) is just a hint and also not strictly necessary
858 // because we use ifunc helper macro, but used for clarity.
859 #[inline(never)]
781aab86
FG
860 unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
861 atomic_compare_exchange_casp_relaxed
862 = _atomic_compare_exchange_casp(Ordering::Relaxed, Ordering::Relaxed);
863 atomic_compare_exchange_casp_acquire
864 = _atomic_compare_exchange_casp(Ordering::Acquire, Ordering::Acquire);
865 atomic_compare_exchange_casp_release
866 = _atomic_compare_exchange_casp(Ordering::Release, Ordering::Relaxed);
867 atomic_compare_exchange_casp_acqrel
868 = _atomic_compare_exchange_casp(Ordering::AcqRel, Ordering::Acquire);
869 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
870 #[cfg(target_env = "msvc")]
871 atomic_compare_exchange_casp_seqcst
872 = _atomic_compare_exchange_casp(Ordering::SeqCst, Ordering::SeqCst);
873 }
874 fn_alias! {
875 unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128;
876 atomic_compare_exchange_ldxp_stxp_relaxed
877 = _atomic_compare_exchange_ldxp_stxp(Ordering::Relaxed, Ordering::Relaxed);
878 atomic_compare_exchange_ldxp_stxp_acquire
879 = _atomic_compare_exchange_ldxp_stxp(Ordering::Acquire, Ordering::Acquire);
880 atomic_compare_exchange_ldxp_stxp_release
881 = _atomic_compare_exchange_ldxp_stxp(Ordering::Release, Ordering::Relaxed);
882 atomic_compare_exchange_ldxp_stxp_acqrel
883 = _atomic_compare_exchange_ldxp_stxp(Ordering::AcqRel, Ordering::Acquire);
884 // AcqRel and SeqCst RMWs are equivalent in non-MSVC environments.
885 #[cfg(target_env = "msvc")]
886 atomic_compare_exchange_ldxp_stxp_seqcst
887 = _atomic_compare_exchange_ldxp_stxp(Ordering::SeqCst, Ordering::SeqCst);
888 }
889 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
890 // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
891 // and we've checked if FEAT_LSE is available.
892 unsafe {
893 let success = crate::utils::upgrade_success_ordering(success, failure);
894 match success {
895 Ordering::Relaxed => {
896 ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
897 if detect::detect().has_lse() {
898 atomic_compare_exchange_casp_relaxed
899 } else {
900 atomic_compare_exchange_ldxp_stxp_relaxed
901 }
902 })
903 }
904 Ordering::Acquire => {
905 ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
906 if detect::detect().has_lse() {
907 atomic_compare_exchange_casp_acquire
908 } else {
909 atomic_compare_exchange_ldxp_stxp_acquire
910 }
911 })
912 }
913 Ordering::Release => {
914 ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
915 if detect::detect().has_lse() {
916 atomic_compare_exchange_casp_release
917 } else {
918 atomic_compare_exchange_ldxp_stxp_release
919 }
920 })
921 }
922 // AcqRel and SeqCst RMWs are equivalent in both implementations in non-MSVC environments.
923 #[cfg(not(target_env = "msvc"))]
924 Ordering::AcqRel | Ordering::SeqCst => {
925 ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
926 if detect::detect().has_lse() {
927 atomic_compare_exchange_casp_acqrel
928 } else {
929 atomic_compare_exchange_ldxp_stxp_acqrel
930 }
931 })
932 }
933 #[cfg(target_env = "msvc")]
934 Ordering::AcqRel => {
935 ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
936 if detect::detect().has_lse() {
937 atomic_compare_exchange_casp_acqrel
938 } else {
939 atomic_compare_exchange_ldxp_stxp_acqrel
940 }
941 })
942 }
943 #[cfg(target_env = "msvc")]
944 Ordering::SeqCst => {
945 ifunc!(unsafe fn(dst: *mut u128, old: u128, new: u128) -> u128 {
946 if detect::detect().has_lse() {
947 atomic_compare_exchange_casp_seqcst
948 } else {
949 atomic_compare_exchange_ldxp_stxp_seqcst
950 }
951 })
952 }
953 _ => unreachable!("{:?}", success),
954 }
955 }
956 };
ed00b5ec
FG
957 if prev == old {
958 Ok(prev)
781aab86 959 } else {
ed00b5ec 960 Err(prev)
781aab86
FG
961 }
962}
963#[cfg(any(
964 target_feature = "lse",
965 portable_atomic_target_feature = "lse",
966 not(portable_atomic_no_outline_atomics),
967))]
781aab86
FG
968#[inline]
969unsafe fn _atomic_compare_exchange_casp(
970 dst: *mut u128,
971 old: u128,
972 new: u128,
973 success: Ordering,
974 failure: Ordering,
975) -> u128 {
976 debug_assert!(dst as usize % 16 == 0);
977 debug_assert_lse!();
978 let order = crate::utils::upgrade_success_ordering(success, failure);
979
980 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
981 // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
982 // and the CPU supports FEAT_LSE.
983 //
984 // Refs:
985 // - https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/CASPA--CASPAL--CASP--CASPL--CASPAL--CASP--CASPL--A64-
986 // - https://developer.arm.com/documentation/ddi0602/2023-06/Base-Instructions/CASP--CASPA--CASPAL--CASPL--Compare-and-Swap-Pair-of-words-or-doublewords-in-memory-
987 unsafe {
988 let old = U128 { whole: old };
989 let new = U128 { whole: new };
990 let (prev_lo, prev_hi);
991 macro_rules! cmpxchg {
992 ($acquire:tt, $release:tt, $fence:tt) => {
993 asm!(
ed00b5ec 994 start_lse!(),
781aab86
FG
995 concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
996 $fence,
997 dst = in(reg) ptr_reg!(dst),
998 // must be allocated to even/odd register pair
999 inout("x6") old.pair.lo => prev_lo,
1000 inout("x7") old.pair.hi => prev_hi,
1001 // must be allocated to even/odd register pair
1002 in("x4") new.pair.lo,
1003 in("x5") new.pair.hi,
1004 options(nostack, preserves_flags),
1005 )
1006 };
1007 }
1008 atomic_rmw!(cmpxchg, order, write = success);
1009 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1010 }
1011}
1012#[cfg(any(test, not(any(target_feature = "lse", portable_atomic_target_feature = "lse"))))]
1013#[inline]
1014unsafe fn _atomic_compare_exchange_ldxp_stxp(
1015 dst: *mut u128,
1016 old: u128,
1017 new: u128,
1018 success: Ordering,
1019 failure: Ordering,
1020) -> u128 {
1021 debug_assert!(dst as usize % 16 == 0);
1022 let order = crate::utils::upgrade_success_ordering(success, failure);
1023
1024 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
1025 // reads, 16-byte aligned, and that there are no concurrent non-atomic operations.
1026 //
1027 // Refs:
1028 // - LDXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDXP--A64-
1029 // - LDAXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/LDAXP--A64-
1030 // - STXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STXP--A64-
1031 // - STLXP: https://developer.arm.com/documentation/dui0801/l/A64-Data-Transfer-Instructions/STLXP--A64-
1032 //
1033 // Note: Load-Exclusive pair (by itself) does not guarantee atomicity; to complete an atomic
1034 // operation (even load/store), a corresponding Store-Exclusive pair must succeed.
1035 // See Arm Architecture Reference Manual for A-profile architecture
1036 // Section B2.2.1 "Requirements for single-copy atomicity", and
1037 // Section B2.9 "Synchronization and semaphores" for more.
1038 unsafe {
1039 let old = U128 { whole: old };
1040 let new = U128 { whole: new };
1041 let (mut prev_lo, mut prev_hi);
1042 macro_rules! cmpxchg {
1043 ($acquire:tt, $release:tt, $fence:tt) => {
1044 asm!(
1045 "2:",
ed00b5ec
FG
1046 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
1047 "cmp {prev_lo}, {old_lo}",
781aab86 1048 "cset {r:w}, ne",
ed00b5ec 1049 "cmp {prev_hi}, {old_hi}",
781aab86
FG
1050 "cinc {r:w}, {r:w}, ne",
1051 "cbz {r:w}, 3f",
ed00b5ec 1052 concat!("st", $release, "xp {r:w}, {prev_lo}, {prev_hi}, [{dst}]"),
781aab86
FG
1053 // 0 if the store was successful, 1 if no store was performed
1054 "cbnz {r:w}, 2b",
1055 "b 4f",
1056 "3:",
1057 concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
1058 // 0 if the store was successful, 1 if no store was performed
1059 "cbnz {r:w}, 2b",
1060 "4:",
1061 $fence,
1062 dst = in(reg) ptr_reg!(dst),
1063 old_lo = in(reg) old.pair.lo,
1064 old_hi = in(reg) old.pair.hi,
1065 new_lo = in(reg) new.pair.lo,
1066 new_hi = in(reg) new.pair.hi,
ed00b5ec
FG
1067 prev_lo = out(reg) prev_lo,
1068 prev_hi = out(reg) prev_hi,
781aab86
FG
1069 r = out(reg) _,
1070 // Do not use `preserves_flags` because CMP modifies the condition flags.
1071 options(nostack),
1072 )
1073 };
1074 }
1075 atomic_rmw!(cmpxchg, order, write = success);
1076 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1077 }
1078}
1079
1080// casp is always strong, and ldxp requires a corresponding (succeed) stxp for
1081// its atomicity (see code comment in _atomic_compare_exchange_ldxp_stxp).
1082// (i.e., aarch64 doesn't have 128-bit weak CAS)
1083use self::atomic_compare_exchange as atomic_compare_exchange_weak;
1084
1085// If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
1086// we use CAS-based atomic RMW.
ed00b5ec 1087#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
1088#[cfg(all(
1089 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1090 not(portable_atomic_ll_sc_rmw),
1091))]
1092use _atomic_swap_casp as atomic_swap;
ed00b5ec 1093#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
1094#[cfg(not(all(
1095 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1096 not(portable_atomic_ll_sc_rmw),
1097)))]
1098use _atomic_swap_ldxp_stxp as atomic_swap;
ed00b5ec
FG
1099#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
1100use _atomic_swap_swpp as atomic_swap;
1101#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
1102#[inline]
1103unsafe fn _atomic_swap_swpp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1104 debug_assert!(dst as usize % 16 == 0);
1105
1106 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
1107 // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
1108 // and the CPU supports FEAT_LSE128.
1109 //
1110 // Refs:
1111 // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/SWPP--SWPPA--SWPPAL--SWPPL--Swap-quadword-in-memory-?lang=en
1112 unsafe {
1113 let val = U128 { whole: val };
1114 let (prev_lo, prev_hi);
1115 macro_rules! swap {
1116 ($acquire:tt, $release:tt, $fence:tt) => {
1117 asm!(
1118 concat!("swpp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
1119 $fence,
1120 dst = in(reg) ptr_reg!(dst),
1121 val_lo = inout(reg) val.pair.lo => prev_lo,
1122 val_hi = inout(reg) val.pair.hi => prev_hi,
1123 options(nostack, preserves_flags),
1124 )
1125 };
1126 }
1127 atomic_rmw!(swap, order);
1128 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1129 }
1130}
781aab86 1131// Do not use atomic_rmw_cas_3 because it needs extra MOV to implement swap.
ed00b5ec 1132#[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
781aab86
FG
1133#[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
1134#[inline]
1135unsafe fn _atomic_swap_casp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1136 debug_assert!(dst as usize % 16 == 0);
1137 debug_assert_lse!();
1138
1139 // SAFETY: the caller must uphold the safety contract.
1140 // cfg guarantee that the CPU supports FEAT_LSE.
1141 unsafe {
1142 let val = U128 { whole: val };
1143 let (mut prev_lo, mut prev_hi);
1144 macro_rules! swap {
1145 ($acquire:tt, $release:tt, $fence:tt) => {
1146 asm!(
ed00b5ec 1147 start_lse!(),
781aab86
FG
1148 // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
1149 // This is not single-copy atomic reads, but this is ok because subsequent
1150 // CAS will check for consistency.
1151 "ldp x4, x5, [{dst}]",
1152 "2:",
1153 // casp writes the current value to the first register pair,
1154 // so copy the `out`'s value for later comparison.
1155 "mov {tmp_lo}, x4",
1156 "mov {tmp_hi}, x5",
1157 concat!("casp", $acquire, $release, " x4, x5, x2, x3, [{dst}]"),
1158 "cmp {tmp_hi}, x5",
1159 "ccmp {tmp_lo}, x4, #0, eq",
1160 "b.ne 2b",
1161 $fence,
1162 dst = in(reg) ptr_reg!(dst),
1163 tmp_lo = out(reg) _,
1164 tmp_hi = out(reg) _,
1165 // must be allocated to even/odd register pair
1166 out("x4") prev_lo,
1167 out("x5") prev_hi,
1168 // must be allocated to even/odd register pair
1169 in("x2") val.pair.lo,
1170 in("x3") val.pair.hi,
1171 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
1172 options(nostack),
1173 )
1174 };
1175 }
1176 atomic_rmw!(swap, order);
1177 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1178 }
1179}
1180// Do not use atomic_rmw_ll_sc_3 because it needs extra MOV to implement swap.
1181#[cfg(any(
1182 test,
1183 not(all(
1184 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1185 not(portable_atomic_ll_sc_rmw),
1186 ))
1187))]
1188#[inline]
1189unsafe fn _atomic_swap_ldxp_stxp(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1190 debug_assert!(dst as usize % 16 == 0);
1191
1192 // SAFETY: the caller must uphold the safety contract.
1193 unsafe {
1194 let val = U128 { whole: val };
1195 let (mut prev_lo, mut prev_hi);
1196 macro_rules! swap {
1197 ($acquire:tt, $release:tt, $fence:tt) => {
1198 asm!(
1199 "2:",
1200 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
1201 concat!("st", $release, "xp {r:w}, {val_lo}, {val_hi}, [{dst}]"),
1202 // 0 if the store was successful, 1 if no store was performed
1203 "cbnz {r:w}, 2b",
1204 $fence,
1205 dst = in(reg) ptr_reg!(dst),
1206 val_lo = in(reg) val.pair.lo,
1207 val_hi = in(reg) val.pair.hi,
1208 prev_lo = out(reg) prev_lo,
1209 prev_hi = out(reg) prev_hi,
1210 r = out(reg) _,
1211 options(nostack, preserves_flags),
1212 )
1213 };
1214 }
1215 atomic_rmw!(swap, order);
1216 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1217 }
1218}
1219
1220/// Atomic RMW by LL/SC loop (3 arguments)
1221/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
1222///
1223/// `$op` can use the following registers:
1224/// - val_lo/val_hi pair: val argument (read-only for `$op`)
1225/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
ed00b5ec 1226/// - new_lo/new_hi pair: new value that will be stored by sc
781aab86
FG
1227macro_rules! atomic_rmw_ll_sc_3 {
1228 ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
1229 // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
1230 // we use CAS-based atomic RMW generated by atomic_rmw_cas_3! macro instead.
1231 #[cfg(not(all(
1232 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1233 not(portable_atomic_ll_sc_rmw),
1234 )))]
1235 use $name as $reexport_name;
1236 #[cfg(any(
1237 test,
1238 not(all(
1239 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1240 not(portable_atomic_ll_sc_rmw),
1241 ))
1242 ))]
1243 #[inline]
1244 unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1245 debug_assert!(dst as usize % 16 == 0);
1246 // SAFETY: the caller must uphold the safety contract.
1247 unsafe {
1248 let val = U128 { whole: val };
1249 let (mut prev_lo, mut prev_hi);
1250 macro_rules! op {
1251 ($acquire:tt, $release:tt, $fence:tt) => {
1252 asm!(
1253 "2:",
1254 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
1255 $($op)*
1256 concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
1257 // 0 if the store was successful, 1 if no store was performed
1258 "cbnz {r:w}, 2b",
1259 $fence,
1260 dst = in(reg) ptr_reg!(dst),
1261 val_lo = in(reg) val.pair.lo,
1262 val_hi = in(reg) val.pair.hi,
1263 prev_lo = out(reg) prev_lo,
1264 prev_hi = out(reg) prev_hi,
1265 new_lo = out(reg) _,
1266 new_hi = out(reg) _,
1267 r = out(reg) _,
1268 options(nostack $(, $preserves_flags)?),
1269 )
1270 };
1271 }
1272 atomic_rmw!(op, order);
1273 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1274 }
1275 }
1276 };
1277}
1278/// Atomic RMW by CAS loop (3 arguments)
1279/// `unsafe fn(dst: *mut u128, val: u128, order: Ordering) -> u128;`
1280///
1281/// `$op` can use the following registers:
1282/// - val_lo/val_hi pair: val argument (read-only for `$op`)
1283/// - x6/x7 pair: previous value loaded (read-only for `$op`)
ed00b5ec 1284/// - x4/x5 pair: new value that will be stored
781aab86
FG
1285macro_rules! atomic_rmw_cas_3 {
1286 ($name:ident as $reexport_name:ident, $($op:tt)*) => {
1287 // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
1288 // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
1289 #[cfg(all(
1290 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1291 not(portable_atomic_ll_sc_rmw),
1292 ))]
1293 use $name as $reexport_name;
ed00b5ec 1294 #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
781aab86
FG
1295 #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
1296 #[inline]
1297 unsafe fn $name(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1298 debug_assert!(dst as usize % 16 == 0);
1299 debug_assert_lse!();
1300 // SAFETY: the caller must uphold the safety contract.
1301 // cfg guarantee that the CPU supports FEAT_LSE.
1302 unsafe {
1303 let val = U128 { whole: val };
1304 let (mut prev_lo, mut prev_hi);
1305 macro_rules! op {
1306 ($acquire:tt, $release:tt, $fence:tt) => {
1307 asm!(
ed00b5ec 1308 start_lse!(),
781aab86
FG
1309 // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
1310 // This is not single-copy atomic reads, but this is ok because subsequent
1311 // CAS will check for consistency.
1312 "ldp x6, x7, [{dst}]",
1313 "2:",
1314 // casp writes the current value to the first register pair,
1315 // so copy the `out`'s value for later comparison.
1316 "mov {tmp_lo}, x6",
1317 "mov {tmp_hi}, x7",
1318 $($op)*
1319 concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
1320 "cmp {tmp_hi}, x7",
1321 "ccmp {tmp_lo}, x6, #0, eq",
1322 "b.ne 2b",
1323 $fence,
1324 dst = in(reg) ptr_reg!(dst),
1325 val_lo = in(reg) val.pair.lo,
1326 val_hi = in(reg) val.pair.hi,
1327 tmp_lo = out(reg) _,
1328 tmp_hi = out(reg) _,
1329 // must be allocated to even/odd register pair
1330 out("x6") prev_lo,
1331 out("x7") prev_hi,
1332 // must be allocated to even/odd register pair
1333 out("x4") _,
1334 out("x5") _,
1335 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
1336 options(nostack),
1337 )
1338 };
1339 }
1340 atomic_rmw!(op, order);
1341 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1342 }
1343 }
1344 };
1345}
1346
1347/// Atomic RMW by LL/SC loop (2 arguments)
1348/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
1349///
1350/// `$op` can use the following registers:
1351/// - prev_lo/prev_hi pair: previous value loaded by ll (read-only for `$op`)
ed00b5ec 1352/// - new_lo/new_hi pair: new value that will be stored by sc
781aab86
FG
1353macro_rules! atomic_rmw_ll_sc_2 {
1354 ($name:ident as $reexport_name:ident $(($preserves_flags:tt))?, $($op:tt)*) => {
1355 // If FEAT_LSE is available at compile-time and portable_atomic_ll_sc_rmw cfg is not set,
1356 // we use CAS-based atomic RMW generated by atomic_rmw_cas_2! macro instead.
1357 #[cfg(not(all(
1358 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1359 not(portable_atomic_ll_sc_rmw),
1360 )))]
1361 use $name as $reexport_name;
1362 #[cfg(any(
1363 test,
1364 not(all(
1365 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1366 not(portable_atomic_ll_sc_rmw),
1367 ))
1368 ))]
1369 #[inline]
1370 unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
1371 debug_assert!(dst as usize % 16 == 0);
1372 // SAFETY: the caller must uphold the safety contract.
1373 unsafe {
1374 let (mut prev_lo, mut prev_hi);
1375 macro_rules! op {
1376 ($acquire:tt, $release:tt, $fence:tt) => {
1377 asm!(
1378 "2:",
1379 concat!("ld", $acquire, "xp {prev_lo}, {prev_hi}, [{dst}]"),
1380 $($op)*
1381 concat!("st", $release, "xp {r:w}, {new_lo}, {new_hi}, [{dst}]"),
1382 // 0 if the store was successful, 1 if no store was performed
1383 "cbnz {r:w}, 2b",
1384 $fence,
1385 dst = in(reg) ptr_reg!(dst),
1386 prev_lo = out(reg) prev_lo,
1387 prev_hi = out(reg) prev_hi,
1388 new_lo = out(reg) _,
1389 new_hi = out(reg) _,
1390 r = out(reg) _,
1391 options(nostack $(, $preserves_flags)?),
1392 )
1393 };
1394 }
1395 atomic_rmw!(op, order);
1396 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1397 }
1398 }
1399 };
1400}
1401/// Atomic RMW by CAS loop (2 arguments)
1402/// `unsafe fn(dst: *mut u128, order: Ordering) -> u128;`
1403///
1404/// `$op` can use the following registers:
1405/// - x6/x7 pair: previous value loaded (read-only for `$op`)
ed00b5ec 1406/// - x4/x5 pair: new value that will be stored
781aab86
FG
1407macro_rules! atomic_rmw_cas_2 {
1408 ($name:ident as $reexport_name:ident, $($op:tt)*) => {
1409 // If FEAT_LSE is not available at compile-time or portable_atomic_ll_sc_rmw cfg is set,
1410 // we use LL/SC-based atomic RMW generated by atomic_rmw_ll_sc_3! macro instead.
1411 #[cfg(all(
1412 any(target_feature = "lse", portable_atomic_target_feature = "lse"),
1413 not(portable_atomic_ll_sc_rmw),
1414 ))]
1415 use $name as $reexport_name;
ed00b5ec 1416 #[cfg(any(test, not(portable_atomic_ll_sc_rmw)))]
781aab86
FG
1417 #[cfg(any(target_feature = "lse", portable_atomic_target_feature = "lse"))]
1418 #[inline]
1419 unsafe fn $name(dst: *mut u128, order: Ordering) -> u128 {
1420 debug_assert!(dst as usize % 16 == 0);
1421 debug_assert_lse!();
1422 // SAFETY: the caller must uphold the safety contract.
1423 // cfg guarantee that the CPU supports FEAT_LSE.
1424 unsafe {
1425 let (mut prev_lo, mut prev_hi);
1426 macro_rules! op {
1427 ($acquire:tt, $release:tt, $fence:tt) => {
1428 asm!(
ed00b5ec 1429 start_lse!(),
781aab86
FG
1430 // If FEAT_LSE2 is not supported, this works like byte-wise atomic.
1431 // This is not single-copy atomic reads, but this is ok because subsequent
1432 // CAS will check for consistency.
1433 "ldp x6, x7, [{dst}]",
1434 "2:",
1435 // casp writes the current value to the first register pair,
1436 // so copy the `out`'s value for later comparison.
1437 "mov {tmp_lo}, x6",
1438 "mov {tmp_hi}, x7",
1439 $($op)*
1440 concat!("casp", $acquire, $release, " x6, x7, x4, x5, [{dst}]"),
1441 "cmp {tmp_hi}, x7",
1442 "ccmp {tmp_lo}, x6, #0, eq",
1443 "b.ne 2b",
1444 $fence,
1445 dst = in(reg) ptr_reg!(dst),
1446 tmp_lo = out(reg) _,
1447 tmp_hi = out(reg) _,
1448 // must be allocated to even/odd register pair
1449 out("x6") prev_lo,
1450 out("x7") prev_hi,
1451 // must be allocated to even/odd register pair
1452 out("x4") _,
1453 out("x5") _,
1454 // Do not use `preserves_flags` because CMP and CCMP modify the condition flags.
1455 options(nostack),
1456 )
1457 };
1458 }
1459 atomic_rmw!(op, order);
1460 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1461 }
1462 }
1463 };
1464}
1465
1466// Do not use `preserves_flags` because ADDS modifies the condition flags.
1467atomic_rmw_ll_sc_3! {
1468 _atomic_add_ldxp_stxp as atomic_add,
1469 select_le_or_be!("adds {new_lo}, {prev_lo}, {val_lo}", "adds {new_hi}, {prev_hi}, {val_hi}"),
1470 select_le_or_be!("adc {new_hi}, {prev_hi}, {val_hi}", "adc {new_lo}, {prev_lo}, {val_lo}"),
1471}
1472atomic_rmw_cas_3! {
1473 _atomic_add_casp as atomic_add,
1474 select_le_or_be!("adds x4, x6, {val_lo}", "adds x5, x7, {val_hi}"),
1475 select_le_or_be!("adc x5, x7, {val_hi}", "adc x4, x6, {val_lo}"),
1476}
1477
1478// Do not use `preserves_flags` because SUBS modifies the condition flags.
1479atomic_rmw_ll_sc_3! {
1480 _atomic_sub_ldxp_stxp as atomic_sub,
1481 select_le_or_be!("subs {new_lo}, {prev_lo}, {val_lo}", "subs {new_hi}, {prev_hi}, {val_hi}"),
1482 select_le_or_be!("sbc {new_hi}, {prev_hi}, {val_hi}", "sbc {new_lo}, {prev_lo}, {val_lo}"),
1483}
1484atomic_rmw_cas_3! {
1485 _atomic_sub_casp as atomic_sub,
1486 select_le_or_be!("subs x4, x6, {val_lo}", "subs x5, x7, {val_hi}"),
1487 select_le_or_be!("sbc x5, x7, {val_hi}", "sbc x4, x6, {val_lo}"),
1488}
1489
ed00b5ec 1490#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
1491atomic_rmw_ll_sc_3! {
1492 _atomic_and_ldxp_stxp as atomic_and (preserves_flags),
1493 "and {new_lo}, {prev_lo}, {val_lo}",
1494 "and {new_hi}, {prev_hi}, {val_hi}",
1495}
ed00b5ec 1496#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
1497atomic_rmw_cas_3! {
1498 _atomic_and_casp as atomic_and,
1499 "and x4, x6, {val_lo}",
1500 "and x5, x7, {val_hi}",
1501}
ed00b5ec
FG
1502#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
1503#[inline]
1504unsafe fn atomic_and(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1505 debug_assert!(dst as usize % 16 == 0);
1506
1507 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
1508 // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
1509 // and the CPU supports FEAT_LSE128.
1510 //
1511 // Refs:
1512 // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDCLRP--LDCLRPA--LDCLRPAL--LDCLRPL--Atomic-bit-clear-on-quadword-in-memory-?lang=en
1513 unsafe {
1514 let val = U128 { whole: !val };
1515 let (prev_lo, prev_hi);
1516 macro_rules! and {
1517 ($acquire:tt, $release:tt, $fence:tt) => {
1518 asm!(
1519 concat!("ldclrp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
1520 $fence,
1521 dst = in(reg) ptr_reg!(dst),
1522 val_lo = inout(reg) val.pair.lo => prev_lo,
1523 val_hi = inout(reg) val.pair.hi => prev_hi,
1524 options(nostack, preserves_flags),
1525 )
1526 };
1527 }
1528 atomic_rmw!(and, order);
1529 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1530 }
1531}
781aab86
FG
1532
1533atomic_rmw_ll_sc_3! {
1534 _atomic_nand_ldxp_stxp as atomic_nand (preserves_flags),
1535 "and {new_lo}, {prev_lo}, {val_lo}",
1536 "mvn {new_lo}, {new_lo}",
1537 "and {new_hi}, {prev_hi}, {val_hi}",
1538 "mvn {new_hi}, {new_hi}",
1539}
1540atomic_rmw_cas_3! {
1541 _atomic_nand_casp as atomic_nand,
1542 "and x4, x6, {val_lo}",
1543 "mvn x4, x4",
1544 "and x5, x7, {val_hi}",
1545 "mvn x5, x5",
1546}
1547
ed00b5ec 1548#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
1549atomic_rmw_ll_sc_3! {
1550 _atomic_or_ldxp_stxp as atomic_or (preserves_flags),
1551 "orr {new_lo}, {prev_lo}, {val_lo}",
1552 "orr {new_hi}, {prev_hi}, {val_hi}",
1553}
ed00b5ec 1554#[cfg(not(any(target_feature = "lse128", portable_atomic_target_feature = "lse128")))]
781aab86
FG
1555atomic_rmw_cas_3! {
1556 _atomic_or_casp as atomic_or,
1557 "orr x4, x6, {val_lo}",
1558 "orr x5, x7, {val_hi}",
1559}
ed00b5ec
FG
1560#[cfg(any(target_feature = "lse128", portable_atomic_target_feature = "lse128"))]
1561#[inline]
1562unsafe fn atomic_or(dst: *mut u128, val: u128, order: Ordering) -> u128 {
1563 debug_assert!(dst as usize % 16 == 0);
1564
1565 // SAFETY: the caller must guarantee that `dst` is valid for both writes and
1566 // reads, 16-byte aligned, that there are no concurrent non-atomic operations,
1567 // and the CPU supports FEAT_LSE128.
1568 //
1569 // Refs:
1570 // - https://developer.arm.com/documentation/ddi0602/2023-03/Base-Instructions/LDSETP--LDSETPA--LDSETPAL--LDSETPL--Atomic-bit-set-on-quadword-in-memory-?lang=en
1571 unsafe {
1572 let val = U128 { whole: val };
1573 let (prev_lo, prev_hi);
1574 macro_rules! or {
1575 ($acquire:tt, $release:tt, $fence:tt) => {
1576 asm!(
1577 concat!("ldsetp", $acquire, $release, " {val_lo}, {val_hi}, [{dst}]"),
1578 $fence,
1579 dst = in(reg) ptr_reg!(dst),
1580 val_lo = inout(reg) val.pair.lo => prev_lo,
1581 val_hi = inout(reg) val.pair.hi => prev_hi,
1582 options(nostack, preserves_flags),
1583 )
1584 };
1585 }
1586 atomic_rmw!(or, order);
1587 U128 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole
1588 }
1589}
781aab86
FG
1590
1591atomic_rmw_ll_sc_3! {
1592 _atomic_xor_ldxp_stxp as atomic_xor (preserves_flags),
1593 "eor {new_lo}, {prev_lo}, {val_lo}",
1594 "eor {new_hi}, {prev_hi}, {val_hi}",
1595}
1596atomic_rmw_cas_3! {
1597 _atomic_xor_casp as atomic_xor,
1598 "eor x4, x6, {val_lo}",
1599 "eor x5, x7, {val_hi}",
1600}
1601
1602atomic_rmw_ll_sc_2! {
1603 _atomic_not_ldxp_stxp as atomic_not (preserves_flags),
1604 "mvn {new_lo}, {prev_lo}",
1605 "mvn {new_hi}, {prev_hi}",
1606}
1607atomic_rmw_cas_2! {
1608 _atomic_not_casp as atomic_not,
1609 "mvn x4, x6",
1610 "mvn x5, x7",
1611}
1612
1613// Do not use `preserves_flags` because NEGS modifies the condition flags.
1614atomic_rmw_ll_sc_2! {
1615 _atomic_neg_ldxp_stxp as atomic_neg,
1616 select_le_or_be!("negs {new_lo}, {prev_lo}", "negs {new_hi}, {prev_hi}"),
1617 select_le_or_be!("ngc {new_hi}, {prev_hi}", "ngc {new_lo}, {prev_lo}"),
1618}
1619atomic_rmw_cas_2! {
1620 _atomic_neg_casp as atomic_neg,
1621 select_le_or_be!("negs x4, x6", "negs x5, x7"),
1622 select_le_or_be!("ngc x5, x7", "ngc x4, x6"),
1623}
1624
1625// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
1626atomic_rmw_ll_sc_3! {
1627 _atomic_max_ldxp_stxp as atomic_max,
1628 select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
1629 select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
1630 "csel {new_hi}, {prev_hi}, {val_hi}, lt", // select hi 64-bit
1631 "csel {new_lo}, {prev_lo}, {val_lo}, lt", // select lo 64-bit
1632}
1633atomic_rmw_cas_3! {
1634 _atomic_max_casp as atomic_max,
1635 select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
1636 select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
1637 "csel x5, x7, {val_hi}, lt", // select hi 64-bit
1638 "csel x4, x6, {val_lo}, lt", // select lo 64-bit
1639}
1640
1641// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
1642atomic_rmw_ll_sc_3! {
1643 _atomic_umax_ldxp_stxp as atomic_umax,
1644 select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
1645 select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
1646 "csel {new_hi}, {prev_hi}, {val_hi}, lo", // select hi 64-bit
1647 "csel {new_lo}, {prev_lo}, {val_lo}, lo", // select lo 64-bit
1648}
1649atomic_rmw_cas_3! {
1650 _atomic_umax_casp as atomic_umax,
1651 select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
1652 select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
1653 "csel x5, x7, {val_hi}, lo", // select hi 64-bit
1654 "csel x4, x6, {val_lo}, lo", // select lo 64-bit
1655}
1656
1657// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
1658atomic_rmw_ll_sc_3! {
1659 _atomic_min_ldxp_stxp as atomic_min,
1660 select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
1661 select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
1662 "csel {new_hi}, {prev_hi}, {val_hi}, ge", // select hi 64-bit
1663 "csel {new_lo}, {prev_lo}, {val_lo}, ge", // select lo 64-bit
1664}
1665atomic_rmw_cas_3! {
1666 _atomic_min_casp as atomic_min,
1667 select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
1668 select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
1669 "csel x5, x7, {val_hi}, ge", // select hi 64-bit
1670 "csel x4, x6, {val_lo}, ge", // select lo 64-bit
1671}
1672
1673// Do not use `preserves_flags` because CMP and SBCS modify the condition flags.
1674atomic_rmw_ll_sc_3! {
1675 _atomic_umin_ldxp_stxp as atomic_umin,
1676 select_le_or_be!("cmp {val_lo}, {prev_lo}", "cmp {val_hi}, {prev_hi}"),
1677 select_le_or_be!("sbcs xzr, {val_hi}, {prev_hi}", "sbcs xzr, {val_lo}, {prev_lo}"),
1678 "csel {new_hi}, {prev_hi}, {val_hi}, hs", // select hi 64-bit
1679 "csel {new_lo}, {prev_lo}, {val_lo}, hs", // select lo 64-bit
1680}
1681atomic_rmw_cas_3! {
1682 _atomic_umin_casp as atomic_umin,
1683 select_le_or_be!("cmp {val_lo}, x6", "cmp {val_hi}, x7"),
1684 select_le_or_be!("sbcs xzr, {val_hi}, x7", "sbcs xzr, {val_lo}, x6"),
1685 "csel x5, x7, {val_hi}, hs", // select hi 64-bit
1686 "csel x4, x6, {val_lo}, hs", // select lo 64-bit
1687}
1688
1689#[inline]
1690const fn is_lock_free() -> bool {
1691 IS_ALWAYS_LOCK_FREE
1692}
1693const IS_ALWAYS_LOCK_FREE: bool = true;
1694
1695atomic128!(AtomicI128, i128, atomic_max, atomic_min);
1696atomic128!(AtomicU128, u128, atomic_umax, atomic_umin);
1697
1698#[cfg(test)]
1699mod tests {
1700 use super::*;
1701
1702 test_atomic_int!(i128);
1703 test_atomic_int!(u128);
1704
1705 // load/store/swap implementation is not affected by signedness, so it is
1706 // enough to test only unsigned types.
1707 stress_test!(u128);
1708}