]> git.proxmox.com Git - rustc.git/blame - src/test/codegen/swap-large-types.rs
New upstream version 1.63.0+dfsg1
[rustc.git] / src / test / codegen / swap-large-types.rs
CommitLineData
5e7ed085
FG
1// compile-flags: -O
2// only-x86_64
3// ignore-debug: the debug assertions get in the way
4
5#![crate_type = "lib"]
6
7use std::mem::swap;
8use std::ptr::{read, copy_nonoverlapping, write};
9
10type KeccakBuffer = [[u64; 5]; 5];
11
12// A basic read+copy+write swap implementation ends up copying one of the values
13// to stack for large types, which is completely unnecessary as the lack of
14// overlap means we can just do whatever fits in registers at a time.
15
16// CHECK-LABEL: @swap_basic
17#[no_mangle]
18pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
19// CHECK: alloca [5 x [5 x i64]]
20
21 // SAFETY: exclusive references are always valid to read/write,
22 // are non-overlapping, and nothing here panics so it's drop-safe.
23 unsafe {
24 let z = read(x);
25 copy_nonoverlapping(y, x, 1);
26 write(y, z);
27 }
28}
29
30// This test verifies that the library does something smarter, and thus
31// doesn't need any scratch space on the stack.
32
33// CHECK-LABEL: @swap_std
34#[no_mangle]
35pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
36// CHECK-NOT: alloca
37// CHECK: load <{{[0-9]+}} x i64>
38// CHECK: store <{{[0-9]+}} x i64>
39 swap(x, y)
40}
41
42// Verify that types with usize alignment are swapped via vectored usizes,
43// not falling back to byte-level code.
44
45// CHECK-LABEL: @swap_slice
46#[no_mangle]
47pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
48// CHECK-NOT: alloca
49// CHECK: load <{{[0-9]+}} x i64>
50// CHECK: store <{{[0-9]+}} x i64>
51 if x.len() == y.len() {
52 x.swap_with_slice(y);
53 }
54}
55
56// But for a large align-1 type, vectorized byte copying is what we want.
57
58type OneKilobyteBuffer = [u8; 1024];
59
60// CHECK-LABEL: @swap_1kb_slices
61#[no_mangle]
62pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
63// CHECK-NOT: alloca
64// CHECK: load <{{[0-9]+}} x i8>
65// CHECK: store <{{[0-9]+}} x i8>
66 if x.len() == y.len() {
67 x.swap_with_slice(y);
68 }
69}
70
71// This verifies that the 2×read + 2×write optimizes to just 3 memcpys
72// for an unusual type like this. It's not clear whether we should do anything
73// smarter in Rust for these, so for now it's fine to leave these up to the backend.
74// That's not as bad as it might seem, as for example, LLVM will lower the
75// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature.
76// Eventually we'll be able to pass `align_of::<T>` to a const generic and
77// thus pick a smarter chunk size ourselves without huge code duplication.
78
79#[repr(align(64))]
80pub struct BigButHighlyAligned([u8; 64 * 3]);
81
82// CHECK-LABEL: @swap_big_aligned
83#[no_mangle]
84pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
85// CHECK-NOT: call void @llvm.memcpy
923072b8
FG
86// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
87// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
88// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
5e7ed085
FG
89// CHECK-NOT: call void @llvm.memcpy
90 swap(x, y)
91}