[rustc.git] / library / portable-simd / crates / core_simd / src / ops.rs

use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::ops::{Add, Mul};
use core::ops::{BitAnd, BitOr, BitXor};
use core::ops::{Div, Rem, Sub};
use core::ops::{Shl, Shr};

mod assign;
mod deref;
mod shift_scalar;
mod unary;

impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
where
    T: SimdElement,
    LaneCount<N>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
{
    type Output = I::Output;
    #[inline]
    fn index(&self, index: I) -> &Self::Output {
        &self.as_array()[index]
    }
}

impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
where
    T: SimdElement,
    LaneCount<N>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
{
    #[inline]
    fn index_mut(&mut self, index: I) -> &mut Self::Output {
        &mut self.as_mut_array()[index]
    }
}

macro_rules! unsafe_base {
    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
        // Safety: $lhs and $rhs are vectors
        unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
    };
}

/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
/// It handles performing a bitand in addition to calling the shift operator, so that the result
/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
/// At worst, this will maybe add another instruction and cycle,
/// at best, it may open up more optimization opportunities,
/// or simply be elided entirely, especially for SIMD ISAs which default to this.
///
// FIXME: Consider implementing this in cg_llvm instead?
// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
macro_rules! wrap_bitshift {
    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
        #[allow(clippy::suspicious_arithmetic_impl)]
        // Safety: $lhs and the bitand result are vectors
        unsafe {
            core::intrinsics::simd::$simd_call(
                $lhs,
                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
            )
        }
    };
}

/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
/// thus guaranteeing a Rust value returns instead.
///
/// |                  | LLVM | Rust
/// | :--------------: | :--- | :----------
/// | N {/,%} 0        | UB   | panic!()
/// | <$int>::MIN / -1 | UB   | <$int>::MIN
/// | <$int>::MIN % -1 | UB   | 0
///
macro_rules! int_divrem_guard {
    (   $lhs:ident,
        $rhs:ident,
        {   const PANIC_ZERO: &'static str = $zero:literal;
            $simd_call:ident
        },
        $int:ident ) => {
        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
            panic!($zero);
        } else {
            // Prevent otherwise-UB overflow on the MIN / -1 case.
            let rhs = if <$int>::MIN != 0 {
                // This should, at worst, optimize to a few branchless logical ops
                // Ideally, this entire conditional should evaporate
                // Fire LLVM and implement those manually if it doesn't get the hint
                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
                // type inference can break here, so cut an SInt to size
                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
                .select(Simd::splat(1 as _), $rhs)
            } else {
                // Nice base case to make it easy to const-fold away the other branch.
                $rhs
            };
            // Safety: $lhs and rhs are vectors
            unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
        }
    };
}

macro_rules! for_base_types {
    (   T = ($($scalar:ident),*);
        type Lhs = Simd<T, N>;
        type Rhs = Simd<T, N>;
        type Output = $out:ty;

        impl $op:ident::$call:ident {
            $macro_impl:ident $inner:tt
        }) => {
            $(
                impl<const N: usize> $op<Self> for Simd<$scalar, N>
                where
                    $scalar: SimdElement,
                    LaneCount<N>: SupportedLaneCount,
                {
                    type Output = $out;

                    #[inline]
                    #[must_use = "operator returns a new vector without mutating the inputs"]
                    // TODO: only useful for int Div::div, but we hope that this
                    // will essentially always always get inlined anyway.
                    #[track_caller]
                    fn $call(self, rhs: Self) -> Self::Output {
                        $macro_impl!(self, rhs, $inner, $scalar)
                    }
                }
            )*
    }
}

// A "TokenTree muncher": takes a set of scalar types `T = {};`
// type parameters for the ops it implements, `Op::fn` names,
// and a macro that expands into an expr, substituting in an intrinsic.
// It passes that to for_base_types, which expands an impl for the types,
// using the expanded expr in the function, and recurses with itself.
//
// tl;dr impls a set of ops::{Traits} for a set of types
macro_rules! for_base_ops {
    (
        T = $types:tt;
        type Lhs = Simd<T, N>;
        type Rhs = Simd<T, N>;
        type Output = $out:ident;
        impl $op:ident::$call:ident
            $inner:tt
        $($rest:tt)*
    ) => {
        for_base_types! {
            T = $types;
            type Lhs = Simd<T, N>;
            type Rhs = Simd<T, N>;
            type Output = $out;
            impl $op::$call
                $inner
        }
        for_base_ops! {
            T = $types;
            type Lhs = Simd<T, N>;
            type Rhs = Simd<T, N>;
            type Output = $out;
            $($rest)*
        }
    };
    ($($done:tt)*) => {
        // Done.
    }
}

// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
// For all of these operations, simd_* intrinsics apply wrapping logic.
for_base_ops! {
    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
    type Lhs = Simd<T, N>;
    type Rhs = Simd<T, N>;
    type Output = Self;

    impl Add::add {
        unsafe_base { simd_add }
    }

    impl Mul::mul {
        unsafe_base { simd_mul }
    }

    impl Sub::sub {
        unsafe_base { simd_sub }
    }

    impl BitAnd::bitand {
        unsafe_base { simd_and }
    }

    impl BitOr::bitor {
        unsafe_base { simd_or }
    }

    impl BitXor::bitxor {
        unsafe_base { simd_xor }
    }

    impl Div::div {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to divide by zero";
            simd_div
        }
    }

    impl Rem::rem {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
            simd_rem
        }
    }

    // The only question is how to handle shifts >= <Int>::BITS?
    // Our current solution uses wrapping logic.
    impl Shl::shl {
        wrap_bitshift { simd_shl }
    }

    impl Shr::shr {
        wrap_bitshift {
            // This automatically monomorphizes to lshr or ashr, depending,
            // so it's fine to use it for both UInts and SInts.
            simd_shr
        }
    }
}

// We don't need any special precautions here:
// Floats always accept arithmetic ops, but may become NaN.
for_base_ops! {
    T = (f32, f64);
    type Lhs = Simd<T, N>;
    type Rhs = Simd<T, N>;
    type Output = Self;

    impl Add::add {
        unsafe_base { simd_add }
    }

    impl Mul::mul {
        unsafe_base { simd_mul }
    }

    impl Sub::sub {
        unsafe_base { simd_sub }
    }

    impl Div::div {
        unsafe_base { simd_div }
    }

    impl Rem::rem {
        unsafe_base { simd_rem }
    }
}
Commit	Line	Data
4b012472	1	use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
a2a8927a XL	2	use core::ops::{Add, Mul};
	3	use core::ops::{BitAnd, BitOr, BitXor};
	4	use core::ops::{Div, Rem, Sub};
	5	use core::ops::{Shl, Shr};
	6
	7	mod assign;
	8	mod deref;
4b012472	9	mod shift_scalar;
a2a8927a	10	mod unary;
3c0e092e	11
4b012472	12	impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
3c0e092e XL	13	where
3c0e092e XL	14	T: SimdElement,
4b012472	15	LaneCount<N>: SupportedLaneCount,
3c0e092e XL	16	I: core::slice::SliceIndex<[T]>,
	17	{
	18	type Output = I::Output;
fe692bf9	19	#[inline]
3c0e092e XL	20	fn index(&self, index: I) -> &Self::Output {
	21	&self.as_array()[index]
	22	}
	23	}
	24
4b012472	25	impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
3c0e092e XL	26	where
3c0e092e XL	27	T: SimdElement,
4b012472	28	LaneCount<N>: SupportedLaneCount,
3c0e092e XL	29	I: core::slice::SliceIndex<[T]>,
3c0e092e XL	30	{
fe692bf9	31	#[inline]
3c0e092e XL	32	fn index_mut(&mut self, index: I) -> &mut Self::Output {
	33	&mut self.as_mut_array()[index]
	34	}
	35	}
	36
5099ac24 FG	37	macro_rules! unsafe_base {
5099ac24 FG	38	($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
064997fb	39	// Safety: $lhs and $rhs are vectors
c620b35d	40	unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
5099ac24	41	};
3c0e092e XL	42	}
3c0e092e XL	43
5099ac24 FG	44	/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
5099ac24 FG	45	/// It handles performing a bitand in addition to calling the shift operator, so that the result
487cf647	46	/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
5099ac24 FG	47	/// At worst, this will maybe add another instruction and cycle,
	48	/// at best, it may open up more optimization opportunities,
	49	/// or simply be elided entirely, especially for SIMD ISAs which default to this.
	50	///
	51	// FIXME: Consider implementing this in cg_llvm instead?
	52	// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
	53	macro_rules! wrap_bitshift {
	54	($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
064997fb FG	55	#[allow(clippy::suspicious_arithmetic_impl)]
064997fb FG	56	// Safety: $lhs and the bitand result are vectors
5099ac24	57	unsafe {
c620b35d	58	core::intrinsics::simd::$simd_call(
5099ac24 FG	59	$lhs,
	60	$rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
	61	)
3c0e092e	62	}
3c0e092e	63	};
3c0e092e XL	64	}
3c0e092e XL	65
5e7ed085 FG	66	/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
	67	/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
	68	/// thus guaranteeing a Rust value returns instead.
	69	///
	70	/// \| \| LLVM \| Rust
	71	/// \| :--------------: \| :--- \| :----------
	72	/// \| N {/,%} 0 \| UB \| panic!()
	73	/// \| <$int>::MIN / -1 \| UB \| <$int>::MIN
	74	/// \| <$int>::MIN % -1 \| UB \| 0
	75	///
5099ac24 FG	76	macro_rules! int_divrem_guard {
	77	( $lhs:ident,
	78	$rhs:ident,
	79	{ const PANIC_ZERO: &'static str = $zero:literal;
5099ac24 FG	80	$simd_call:ident
	81	},
	82	$int:ident ) => {
064997fb	83	if $rhs.simd_eq(Simd::splat(0 as _)).any() {
5099ac24	84	panic!($zero);
5099ac24	85	} else {
5e7ed085 FG	86	// Prevent otherwise-UB overflow on the MIN / -1 case.
	87	let rhs = if <$int>::MIN != 0 {
	88	// This should, at worst, optimize to a few branchless logical ops
	89	// Ideally, this entire conditional should evaporate
	90	// Fire LLVM and implement those manually if it doesn't get the hint
064997fb	91	($lhs.simd_eq(Simd::splat(<$int>::MIN))
5e7ed085	92	// type inference can break here, so cut an SInt to size
064997fb FG	93	& $rhs.simd_eq(Simd::splat(-1i64 as _)))
064997fb FG	94	.select(Simd::splat(1 as _), $rhs)
5e7ed085 FG	95	} else {
	96	// Nice base case to make it easy to const-fold away the other branch.
	97	$rhs
	98	};
064997fb	99	// Safety: $lhs and rhs are vectors
c620b35d	100	unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
5099ac24	101	}
3c0e092e	102	};
5099ac24	103	}
3c0e092e	104
5099ac24 FG	105	macro_rules! for_base_types {
	106	( T = ($($scalar:ident),*);
	107	type Lhs = Simd<T, N>;
	108	type Rhs = Simd<T, N>;
	109	type Output = $out:ty;
	110
	111	impl $op:ident::$call:ident {
	112	$macro_impl:ident $inner:tt
	113	}) => {
	114	$(
	115	impl<const N: usize> $op<Self> for Simd<$scalar, N>
	116	where
	117	$scalar: SimdElement,
	118	LaneCount<N>: SupportedLaneCount,
	119	{
	120	type Output = $out;
3c0e092e	121
5099ac24 FG	122	#[inline]
5099ac24 FG	123	#[must_use = "operator returns a new vector without mutating the inputs"]
fe692bf9 FG	124	// TODO: only useful for int Div::div, but we hope that this
	125	// will essentially always always get inlined anyway.
	126	#[track_caller]
5099ac24 FG	127	fn $call(self, rhs: Self) -> Self::Output {
5099ac24 FG	128	$macro_impl!(self, rhs, $inner, $scalar)
3c0e092e	129	}
fe692bf9 FG	130	}
fe692bf9 FG	131	)*
5099ac24	132	}
3c0e092e XL	133	}
3c0e092e XL	134
5099ac24 FG	135	// A "TokenTree muncher": takes a set of scalar types `T = {};`
	136	// type parameters for the ops it implements, `Op::fn` names,
	137	// and a macro that expands into an expr, substituting in an intrinsic.
	138	// It passes that to for_base_types, which expands an impl for the types,
	139	// using the expanded expr in the function, and recurses with itself.
	140	//
	141	// tl;dr impls a set of ops::{Traits} for a set of types
	142	macro_rules! for_base_ops {
	143	(
	144	T = $types:tt;
	145	type Lhs = Simd<T, N>;
	146	type Rhs = Simd<T, N>;
	147	type Output = $out:ident;
	148	impl $op:ident::$call:ident
	149	$inner:tt
	150	$($rest:tt)*
	151	) => {
	152	for_base_types! {
	153	T = $types;
	154	type Lhs = Simd<T, N>;
	155	type Rhs = Simd<T, N>;
	156	type Output = $out;
	157	impl $op::$call
	158	$inner
	159	}
	160	for_base_ops! {
	161	T = $types;
	162	type Lhs = Simd<T, N>;
	163	type Rhs = Simd<T, N>;
	164	type Output = $out;
	165	$($rest)*
	166	}
3c0e092e	167	};
5099ac24 FG	168	($($done:tt)*) => {
	169	// Done.
	170	}
3c0e092e XL	171	}
3c0e092e XL	172
5099ac24 FG	173	// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
	174	// For all of these operations, simd_* intrinsics apply wrapping logic.
	175	for_base_ops! {
	176	T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
	177	type Lhs = Simd<T, N>;
	178	type Rhs = Simd<T, N>;
	179	type Output = Self;
3c0e092e	180
5099ac24 FG	181	impl Add::add {
	182	unsafe_base { simd_add }
	183	}
3c0e092e	184
5099ac24 FG	185	impl Mul::mul {
	186	unsafe_base { simd_mul }
	187	}
3c0e092e	188
5099ac24 FG	189	impl Sub::sub {
	190	unsafe_base { simd_sub }
	191	}
3c0e092e	192
5099ac24 FG	193	impl BitAnd::bitand {
	194	unsafe_base { simd_and }
	195	}
3c0e092e	196
5099ac24 FG	197	impl BitOr::bitor {
	198	unsafe_base { simd_or }
	199	}
3c0e092e	200
5099ac24 FG	201	impl BitXor::bitxor {
	202	unsafe_base { simd_xor }
	203	}
3c0e092e	204
5099ac24 FG	205	impl Div::div {
	206	int_divrem_guard {
	207	const PANIC_ZERO: &'static str = "attempt to divide by zero";
5099ac24 FG	208	simd_div
	209	}
	210	}
3c0e092e	211
5099ac24 FG	212	impl Rem::rem {
	213	int_divrem_guard {
	214	const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
5099ac24 FG	215	simd_rem
	216	}
	217	}
3c0e092e	218
5099ac24 FG	219	// The only question is how to handle shifts >= <Int>::BITS?
	220	// Our current solution uses wrapping logic.
	221	impl Shl::shl {
	222	wrap_bitshift { simd_shl }
	223	}
3c0e092e	224
5099ac24 FG	225	impl Shr::shr {
	226	wrap_bitshift {
	227	// This automatically monomorphizes to lshr or ashr, depending,
	228	// so it's fine to use it for both UInts and SInts.
	229	simd_shr
	230	}
	231	}
3c0e092e XL	232	}
3c0e092e XL	233
5099ac24 FG	234	// We don't need any special precautions here:
	235	// Floats always accept arithmetic ops, but may become NaN.
	236	for_base_ops! {
	237	T = (f32, f64);
	238	type Lhs = Simd<T, N>;
	239	type Rhs = Simd<T, N>;
	240	type Output = Self;
	241
	242	impl Add::add {
	243	unsafe_base { simd_add }
	244	}
3c0e092e	245
5099ac24 FG	246	impl Mul::mul {
	247	unsafe_base { simd_mul }
	248	}
	249
	250	impl Sub::sub {
	251	unsafe_base { simd_sub }
	252	}
	253
	254	impl Div::div {
	255	unsafe_base { simd_div }
	256	}
	257
	258	impl Rem::rem {
	259	unsafe_base { simd_rem }
	260	}
	261	}