[rustc.git] / src / test / codegen / swap-large-types.rs

// compile-flags: -O
// only-x86_64
// ignore-debug: the debug assertions get in the way

#![crate_type = "lib"]

use std::mem::swap;
use std::ptr::{read, copy_nonoverlapping, write};

type KeccakBuffer = [[u64; 5]; 5];

// A basic read+copy+write swap implementation ends up copying one of the values
// to stack for large types, which is completely unnecessary as the lack of
// overlap means we can just do whatever fits in registers at a time.

// CHECK-LABEL: @swap_basic
#[no_mangle]
pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
// CHECK: alloca [5 x [5 x i64]]

    // SAFETY: exclusive references are always valid to read/write,
    // are non-overlapping, and nothing here panics so it's drop-safe.
    unsafe {
        let z = read(x);
        copy_nonoverlapping(y, x, 1);
        write(y, z);
    }
}

// This test verifies that the library does something smarter, and thus
// doesn't need any scratch space on the stack.

// CHECK-LABEL: @swap_std
#[no_mangle]
pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
    swap(x, y)
}

// Verify that types with usize alignment are swapped via vectored usizes,
// not falling back to byte-level code.

// CHECK-LABEL: @swap_slice
#[no_mangle]
pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
    if x.len() == y.len() {
        x.swap_with_slice(y);
    }
}

// But for a large align-1 type, vectorized byte copying is what we want.

type OneKilobyteBuffer = [u8; 1024];

// CHECK-LABEL: @swap_1kb_slices
#[no_mangle]
pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
    if x.len() == y.len() {
        x.swap_with_slice(y);
    }
}

// This verifies that the 2×read + 2×write optimizes to just 3 memcpys
// for an unusual type like this.  It's not clear whether we should do anything
// smarter in Rust for these, so for now it's fine to leave these up to the backend.
// That's not as bad as it might seem, as for example, LLVM will lower the
// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature.
// Eventually we'll be able to pass `align_of::<T>` to a const generic and
// thus pick a smarter chunk size ourselves without huge code duplication.

#[repr(align(64))]
pub struct BigButHighlyAligned([u8; 64 * 3]);

// CHECK-LABEL: @swap_big_aligned
#[no_mangle]
pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
// CHECK-NOT: call void @llvm.memcpy
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*|ptr}} noundef nonnull align 64 dereferenceable(192)
// CHECK-NOT: call void @llvm.memcpy
    swap(x, y)
}
Commit	Line	Data
5e7ed085 FG	1	// compile-flags: -O
	2	// only-x86_64
	3	// ignore-debug: the debug assertions get in the way
	4
	5	#![crate_type = "lib"]
	6
	7	use std::mem::swap;
	8	use std::ptr::{read, copy_nonoverlapping, write};
	9
	10	type KeccakBuffer = [[u64; 5]; 5];
	11
	12	// A basic read+copy+write swap implementation ends up copying one of the values
	13	// to stack for large types, which is completely unnecessary as the lack of
	14	// overlap means we can just do whatever fits in registers at a time.
	15
	16	// CHECK-LABEL: @swap_basic
	17	#[no_mangle]
	18	pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
	19	// CHECK: alloca [5 x [5 x i64]]
	20
	21	// SAFETY: exclusive references are always valid to read/write,
	22	// are non-overlapping, and nothing here panics so it's drop-safe.
	23	unsafe {
	24	let z = read(x);
	25	copy_nonoverlapping(y, x, 1);
	26	write(y, z);
	27	}
	28	}
	29
	30	// This test verifies that the library does something smarter, and thus
	31	// doesn't need any scratch space on the stack.
	32
	33	// CHECK-LABEL: @swap_std
	34	#[no_mangle]
	35	pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
	36	// CHECK-NOT: alloca
	37	// CHECK: load <{{[0-9]+}} x i64>
	38	// CHECK: store <{{[0-9]+}} x i64>
	39	swap(x, y)
	40	}
	41
	42	// Verify that types with usize alignment are swapped via vectored usizes,
	43	// not falling back to byte-level code.
	44
	45	// CHECK-LABEL: @swap_slice
	46	#[no_mangle]
	47	pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
	48	// CHECK-NOT: alloca
	49	// CHECK: load <{{[0-9]+}} x i64>
	50	// CHECK: store <{{[0-9]+}} x i64>
	51	if x.len() == y.len() {
	52	x.swap_with_slice(y);
	53	}
	54	}
	55
	56	// But for a large align-1 type, vectorized byte copying is what we want.
	57
	58	type OneKilobyteBuffer = [u8; 1024];
	59
	60	// CHECK-LABEL: @swap_1kb_slices
	61	#[no_mangle]
	62	pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
	63	// CHECK-NOT: alloca
	64	// CHECK: load <{{[0-9]+}} x i8>
65	// CHECK: store <{{[0-9]+}} x i8>
66	if x.len() == y.len() {
67	x.swap_with_slice(y);
68	}
69	}
70
71	// This verifies that the 2×read + 2×write optimizes to just 3 memcpys
72	// for an unusual type like this. It's not clear whether we should do anything
73	// smarter in Rust for these, so for now it's fine to leave these up to the backend.
74	// That's not as bad as it might seem, as for example, LLVM will lower the
75	// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature.
76	// Eventually we'll be able to pass `align_of::<T>` to a const generic and
77	// thus pick a smarter chunk size ourselves without huge code duplication.
78
79	#[repr(align(64))]
80	pub struct BigButHighlyAligned([u8; 64 * 3]);
81
82	// CHECK-LABEL: @swap_big_aligned
83	#[no_mangle]
84	pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
85	// CHECK-NOT: call void @llvm.memcpy
923072b8 FG	86	// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*\|ptr}} noundef nonnull align 64 dereferenceable(192)
	87	// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*\|ptr}} noundef nonnull align 64 dereferenceable(192)
	88	// CHECK: call void @llvm.memcpy.{{.+}}({{i8\*\|ptr}} noundef nonnull align 64 dereferenceable(192)
5e7ed085 FG	89	// CHECK-NOT: call void @llvm.memcpy
	90	swap(x, y)
	91	}