[rustc.git] / library / portable-simd / crates / core_simd / src / ops.rs

use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
use core::ops::{Add, Mul};
use core::ops::{BitAnd, BitOr, BitXor};
use core::ops::{Div, Rem, Sub};
use core::ops::{Shl, Shr};

mod assign;
mod deref;
mod unary;

impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
where
    T: SimdElement,
    LaneCount<LANES>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
{
    type Output = I::Output;
    #[inline]
    fn index(&self, index: I) -> &Self::Output {
        &self.as_array()[index]
    }
}

impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
where
    T: SimdElement,
    LaneCount<LANES>: SupportedLaneCount,
    I: core::slice::SliceIndex<[T]>,
{
    #[inline]
    fn index_mut(&mut self, index: I) -> &mut Self::Output {
        &mut self.as_mut_array()[index]
    }
}

macro_rules! unsafe_base {
    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
        // Safety: $lhs and $rhs are vectors
        unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
    };
}

/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
/// It handles performing a bitand in addition to calling the shift operator, so that the result
/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
/// At worst, this will maybe add another instruction and cycle,
/// at best, it may open up more optimization opportunities,
/// or simply be elided entirely, especially for SIMD ISAs which default to this.
///
// FIXME: Consider implementing this in cg_llvm instead?
// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
macro_rules! wrap_bitshift {
    ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
        #[allow(clippy::suspicious_arithmetic_impl)]
        // Safety: $lhs and the bitand result are vectors
        unsafe {
            $crate::simd::intrinsics::$simd_call(
                $lhs,
                $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
            )
        }
    };
}

/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
/// thus guaranteeing a Rust value returns instead.
///
/// |                  | LLVM | Rust
/// | :--------------: | :--- | :----------
/// | N {/,%} 0        | UB   | panic!()
/// | <$int>::MIN / -1 | UB   | <$int>::MIN
/// | <$int>::MIN % -1 | UB   | 0
///
macro_rules! int_divrem_guard {
    (   $lhs:ident,
        $rhs:ident,
        {   const PANIC_ZERO: &'static str = $zero:literal;
            $simd_call:ident
        },
        $int:ident ) => {
        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
            panic!($zero);
        } else {
            // Prevent otherwise-UB overflow on the MIN / -1 case.
            let rhs = if <$int>::MIN != 0 {
                // This should, at worst, optimize to a few branchless logical ops
                // Ideally, this entire conditional should evaporate
                // Fire LLVM and implement those manually if it doesn't get the hint
                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
                // type inference can break here, so cut an SInt to size
                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
                .select(Simd::splat(1 as _), $rhs)
            } else {
                // Nice base case to make it easy to const-fold away the other branch.
                $rhs
            };
            // Safety: $lhs and rhs are vectors
            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
        }
    };
}

macro_rules! for_base_types {
    (   T = ($($scalar:ident),*);
        type Lhs = Simd<T, N>;
        type Rhs = Simd<T, N>;
        type Output = $out:ty;

        impl $op:ident::$call:ident {
            $macro_impl:ident $inner:tt
        }) => {
            $(
                impl<const N: usize> $op<Self> for Simd<$scalar, N>
                where
                    $scalar: SimdElement,
                    LaneCount<N>: SupportedLaneCount,
                {
                    type Output = $out;

                    #[inline]
                    #[must_use = "operator returns a new vector without mutating the inputs"]
                    // TODO: only useful for int Div::div, but we hope that this
                    // will essentially always always get inlined anyway.
                    #[track_caller]
                    fn $call(self, rhs: Self) -> Self::Output {
                        $macro_impl!(self, rhs, $inner, $scalar)
                    }
                }
            )*
    }
}

// A "TokenTree muncher": takes a set of scalar types `T = {};`
// type parameters for the ops it implements, `Op::fn` names,
// and a macro that expands into an expr, substituting in an intrinsic.
// It passes that to for_base_types, which expands an impl for the types,
// using the expanded expr in the function, and recurses with itself.
//
// tl;dr impls a set of ops::{Traits} for a set of types
macro_rules! for_base_ops {
    (
        T = $types:tt;
        type Lhs = Simd<T, N>;
        type Rhs = Simd<T, N>;
        type Output = $out:ident;
        impl $op:ident::$call:ident
            $inner:tt
        $($rest:tt)*
    ) => {
        for_base_types! {
            T = $types;
            type Lhs = Simd<T, N>;
            type Rhs = Simd<T, N>;
            type Output = $out;
            impl $op::$call
                $inner
        }
        for_base_ops! {
            T = $types;
            type Lhs = Simd<T, N>;
            type Rhs = Simd<T, N>;
            type Output = $out;
            $($rest)*
        }
    };
    ($($done:tt)*) => {
        // Done.
    }
}

// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
// For all of these operations, simd_* intrinsics apply wrapping logic.
for_base_ops! {
    T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
    type Lhs = Simd<T, N>;
    type Rhs = Simd<T, N>;
    type Output = Self;

    impl Add::add {
        unsafe_base { simd_add }
    }

    impl Mul::mul {
        unsafe_base { simd_mul }
    }

    impl Sub::sub {
        unsafe_base { simd_sub }
    }

    impl BitAnd::bitand {
        unsafe_base { simd_and }
    }

    impl BitOr::bitor {
        unsafe_base { simd_or }
    }

    impl BitXor::bitxor {
        unsafe_base { simd_xor }
    }

    impl Div::div {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to divide by zero";
            simd_div
        }
    }

    impl Rem::rem {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
            simd_rem
        }
    }

    // The only question is how to handle shifts >= <Int>::BITS?
    // Our current solution uses wrapping logic.
    impl Shl::shl {
        wrap_bitshift { simd_shl }
    }

    impl Shr::shr {
        wrap_bitshift {
            // This automatically monomorphizes to lshr or ashr, depending,
            // so it's fine to use it for both UInts and SInts.
            simd_shr
        }
    }
}

// We don't need any special precautions here:
// Floats always accept arithmetic ops, but may become NaN.
for_base_ops! {
    T = (f32, f64);
    type Lhs = Simd<T, N>;
    type Rhs = Simd<T, N>;
    type Output = Self;

    impl Add::add {
        unsafe_base { simd_add }
    }

    impl Mul::mul {
        unsafe_base { simd_mul }
    }

    impl Sub::sub {
        unsafe_base { simd_sub }
    }

    impl Div::div {
        unsafe_base { simd_div }
    }

    impl Rem::rem {
        unsafe_base { simd_rem }
    }
}
Commit	Line	Data
064997fb	1	use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
a2a8927a XL	2	use core::ops::{Add, Mul};
	3	use core::ops::{BitAnd, BitOr, BitXor};
	4	use core::ops::{Div, Rem, Sub};
	5	use core::ops::{Shl, Shr};
	6
	7	mod assign;
	8	mod deref;
	9	mod unary;
3c0e092e XL	10
	11	impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
	12	where
	13	T: SimdElement,
	14	LaneCount<LANES>: SupportedLaneCount,
	15	I: core::slice::SliceIndex<[T]>,
	16	{
	17	type Output = I::Output;
fe692bf9	18	#[inline]
3c0e092e XL	19	fn index(&self, index: I) -> &Self::Output {
	20	&self.as_array()[index]
	21	}
	22	}
	23
	24	impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
	25	where
	26	T: SimdElement,
	27	LaneCount<LANES>: SupportedLaneCount,
	28	I: core::slice::SliceIndex<[T]>,
	29	{
fe692bf9	30	#[inline]
3c0e092e XL	31	fn index_mut(&mut self, index: I) -> &mut Self::Output {
	32	&mut self.as_mut_array()[index]
	33	}
	34	}
	35
5099ac24 FG	36	macro_rules! unsafe_base {
5099ac24 FG	37	($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
064997fb	38	// Safety: $lhs and $rhs are vectors
5099ac24 FG	39	unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
5099ac24 FG	40	};
3c0e092e XL	41	}
3c0e092e XL	42
5099ac24 FG	43	/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
5099ac24 FG	44	/// It handles performing a bitand in addition to calling the shift operator, so that the result
487cf647	45	/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
5099ac24 FG	46	/// At worst, this will maybe add another instruction and cycle,
	47	/// at best, it may open up more optimization opportunities,
	48	/// or simply be elided entirely, especially for SIMD ISAs which default to this.
	49	///
	50	// FIXME: Consider implementing this in cg_llvm instead?
	51	// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
	52	macro_rules! wrap_bitshift {
	53	($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
064997fb FG	54	#[allow(clippy::suspicious_arithmetic_impl)]
064997fb FG	55	// Safety: $lhs and the bitand result are vectors
5099ac24 FG	56	unsafe {
	57	$crate::simd::intrinsics::$simd_call(
	58	$lhs,
	59	$rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
	60	)
3c0e092e	61	}
3c0e092e	62	};
3c0e092e XL	63	}
3c0e092e XL	64
5e7ed085 FG	65	/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
	66	/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
	67	/// thus guaranteeing a Rust value returns instead.
	68	///
	69	/// \| \| LLVM \| Rust
	70	/// \| :--------------: \| :--- \| :----------
	71	/// \| N {/,%} 0 \| UB \| panic!()
	72	/// \| <$int>::MIN / -1 \| UB \| <$int>::MIN
	73	/// \| <$int>::MIN % -1 \| UB \| 0
	74	///
5099ac24 FG	75	macro_rules! int_divrem_guard {
	76	( $lhs:ident,
	77	$rhs:ident,
	78	{ const PANIC_ZERO: &'static str = $zero:literal;
5099ac24 FG	79	$simd_call:ident
	80	},
	81	$int:ident ) => {
064997fb	82	if $rhs.simd_eq(Simd::splat(0 as _)).any() {
5099ac24	83	panic!($zero);
5099ac24	84	} else {
5e7ed085 FG	85	// Prevent otherwise-UB overflow on the MIN / -1 case.
	86	let rhs = if <$int>::MIN != 0 {
	87	// This should, at worst, optimize to a few branchless logical ops
	88	// Ideally, this entire conditional should evaporate
	89	// Fire LLVM and implement those manually if it doesn't get the hint
064997fb	90	($lhs.simd_eq(Simd::splat(<$int>::MIN))
5e7ed085	91	// type inference can break here, so cut an SInt to size
064997fb FG	92	& $rhs.simd_eq(Simd::splat(-1i64 as _)))
064997fb FG	93	.select(Simd::splat(1 as _), $rhs)
5e7ed085 FG	94	} else {
	95	// Nice base case to make it easy to const-fold away the other branch.
	96	$rhs
	97	};
064997fb	98	// Safety: $lhs and rhs are vectors
5e7ed085	99	unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
5099ac24	100	}
3c0e092e	101	};
5099ac24	102	}
3c0e092e	103
5099ac24 FG	104	macro_rules! for_base_types {
	105	( T = ($($scalar:ident),*);
	106	type Lhs = Simd<T, N>;
	107	type Rhs = Simd<T, N>;
	108	type Output = $out:ty;
	109
	110	impl $op:ident::$call:ident {
	111	$macro_impl:ident $inner:tt
	112	}) => {
	113	$(
	114	impl<const N: usize> $op<Self> for Simd<$scalar, N>
	115	where
	116	$scalar: SimdElement,
	117	LaneCount<N>: SupportedLaneCount,
	118	{
	119	type Output = $out;
3c0e092e	120
5099ac24 FG	121	#[inline]
5099ac24 FG	122	#[must_use = "operator returns a new vector without mutating the inputs"]
fe692bf9 FG	123	// TODO: only useful for int Div::div, but we hope that this
	124	// will essentially always always get inlined anyway.
	125	#[track_caller]
5099ac24 FG	126	fn $call(self, rhs: Self) -> Self::Output {
5099ac24 FG	127	$macro_impl!(self, rhs, $inner, $scalar)
3c0e092e	128	}
fe692bf9 FG	129	}
fe692bf9 FG	130	)*
5099ac24	131	}
3c0e092e XL	132	}
3c0e092e XL	133
5099ac24 FG	134	// A "TokenTree muncher": takes a set of scalar types `T = {};`
	135	// type parameters for the ops it implements, `Op::fn` names,
	136	// and a macro that expands into an expr, substituting in an intrinsic.
	137	// It passes that to for_base_types, which expands an impl for the types,
	138	// using the expanded expr in the function, and recurses with itself.
	139	//
	140	// tl;dr impls a set of ops::{Traits} for a set of types
	141	macro_rules! for_base_ops {
	142	(
	143	T = $types:tt;
	144	type Lhs = Simd<T, N>;
	145	type Rhs = Simd<T, N>;
	146	type Output = $out:ident;
	147	impl $op:ident::$call:ident
	148	$inner:tt
	149	$($rest:tt)*
	150	) => {
	151	for_base_types! {
	152	T = $types;
	153	type Lhs = Simd<T, N>;
	154	type Rhs = Simd<T, N>;
	155	type Output = $out;
	156	impl $op::$call
	157	$inner
	158	}
	159	for_base_ops! {
	160	T = $types;
	161	type Lhs = Simd<T, N>;
	162	type Rhs = Simd<T, N>;
	163	type Output = $out;
	164	$($rest)*
	165	}
3c0e092e	166	};
5099ac24 FG	167	($($done:tt)*) => {
	168	// Done.
	169	}
3c0e092e XL	170	}
3c0e092e XL	171
5099ac24 FG	172	// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
	173	// For all of these operations, simd_* intrinsics apply wrapping logic.
	174	for_base_ops! {
	175	T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
	176	type Lhs = Simd<T, N>;
	177	type Rhs = Simd<T, N>;
	178	type Output = Self;
3c0e092e	179
5099ac24 FG	180	impl Add::add {
	181	unsafe_base { simd_add }
	182	}
3c0e092e	183
5099ac24 FG	184	impl Mul::mul {
	185	unsafe_base { simd_mul }
	186	}
3c0e092e	187
5099ac24 FG	188	impl Sub::sub {
	189	unsafe_base { simd_sub }
	190	}
3c0e092e	191
5099ac24 FG	192	impl BitAnd::bitand {
	193	unsafe_base { simd_and }
	194	}
3c0e092e	195
5099ac24 FG	196	impl BitOr::bitor {
	197	unsafe_base { simd_or }
	198	}
3c0e092e	199
5099ac24 FG	200	impl BitXor::bitxor {
	201	unsafe_base { simd_xor }
	202	}
3c0e092e	203
5099ac24 FG	204	impl Div::div {
	205	int_divrem_guard {
	206	const PANIC_ZERO: &'static str = "attempt to divide by zero";
5099ac24 FG	207	simd_div
	208	}
	209	}
3c0e092e	210
5099ac24 FG	211	impl Rem::rem {
	212	int_divrem_guard {
	213	const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
5099ac24 FG	214	simd_rem
	215	}
	216	}
3c0e092e	217
5099ac24 FG	218	// The only question is how to handle shifts >= <Int>::BITS?
	219	// Our current solution uses wrapping logic.
	220	impl Shl::shl {
	221	wrap_bitshift { simd_shl }
	222	}
3c0e092e	223
5099ac24 FG	224	impl Shr::shr {
	225	wrap_bitshift {
	226	// This automatically monomorphizes to lshr or ashr, depending,
	227	// so it's fine to use it for both UInts and SInts.
	228	simd_shr
	229	}
	230	}
3c0e092e XL	231	}
3c0e092e XL	232
5099ac24 FG	233	// We don't need any special precautions here:
	234	// Floats always accept arithmetic ops, but may become NaN.
	235	for_base_ops! {
	236	T = (f32, f64);
	237	type Lhs = Simd<T, N>;
	238	type Rhs = Simd<T, N>;
	239	type Output = Self;
	240
	241	impl Add::add {
	242	unsafe_base { simd_add }
	243	}
3c0e092e	244
5099ac24 FG	245	impl Mul::mul {
	246	unsafe_base { simd_mul }
	247	}
	248
	249	impl Sub::sub {
	250	unsafe_base { simd_sub }
	251	}
	252
	253	impl Div::div {
	254	unsafe_base { simd_div }
	255	}
	256
	257	impl Rem::rem {
	258	unsafe_base { simd_rem }
	259	}
	260	}