]> git.proxmox.com Git - rustc.git/blame - library/portable-simd/crates/core_simd/src/ops.rs
bump version to 1.79.0+dfsg1-1~bpo12+pve2
[rustc.git] / library / portable-simd / crates / core_simd / src / ops.rs
CommitLineData
4b012472 1use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
a2a8927a
XL
2use core::ops::{Add, Mul};
3use core::ops::{BitAnd, BitOr, BitXor};
4use core::ops::{Div, Rem, Sub};
5use core::ops::{Shl, Shr};
6
7mod assign;
8mod deref;
4b012472 9mod shift_scalar;
a2a8927a 10mod unary;
3c0e092e 11
4b012472 12impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
3c0e092e
XL
13where
14 T: SimdElement,
4b012472 15 LaneCount<N>: SupportedLaneCount,
3c0e092e
XL
16 I: core::slice::SliceIndex<[T]>,
17{
18 type Output = I::Output;
fe692bf9 19 #[inline]
3c0e092e
XL
20 fn index(&self, index: I) -> &Self::Output {
21 &self.as_array()[index]
22 }
23}
24
4b012472 25impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
3c0e092e
XL
26where
27 T: SimdElement,
4b012472 28 LaneCount<N>: SupportedLaneCount,
3c0e092e
XL
29 I: core::slice::SliceIndex<[T]>,
30{
fe692bf9 31 #[inline]
3c0e092e
XL
32 fn index_mut(&mut self, index: I) -> &mut Self::Output {
33 &mut self.as_mut_array()[index]
34 }
35}
36
5099ac24
FG
37macro_rules! unsafe_base {
38 ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
064997fb 39 // Safety: $lhs and $rhs are vectors
c620b35d 40 unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) }
5099ac24 41 };
3c0e092e
XL
42}
43
5099ac24
FG
44/// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic.
45/// It handles performing a bitand in addition to calling the shift operator, so that the result
487cf647 46/// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS`
5099ac24
FG
47/// At worst, this will maybe add another instruction and cycle,
48/// at best, it may open up more optimization opportunities,
49/// or simply be elided entirely, especially for SIMD ISAs which default to this.
50///
51// FIXME: Consider implementing this in cg_llvm instead?
52// cg_clif defaults to this, and scalar MIR shifts also default to wrapping
53macro_rules! wrap_bitshift {
54 ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
064997fb
FG
55 #[allow(clippy::suspicious_arithmetic_impl)]
56 // Safety: $lhs and the bitand result are vectors
5099ac24 57 unsafe {
c620b35d 58 core::intrinsics::simd::$simd_call(
5099ac24
FG
59 $lhs,
60 $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)),
61 )
3c0e092e 62 }
3c0e092e 63 };
3c0e092e
XL
64}
65
5e7ed085
FG
66/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
67/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
68/// thus guaranteeing a Rust value returns instead.
69///
70/// | | LLVM | Rust
71/// | :--------------: | :--- | :----------
72/// | N {/,%} 0 | UB | panic!()
73/// | <$int>::MIN / -1 | UB | <$int>::MIN
74/// | <$int>::MIN % -1 | UB | 0
75///
5099ac24
FG
76macro_rules! int_divrem_guard {
77 ( $lhs:ident,
78 $rhs:ident,
79 { const PANIC_ZERO: &'static str = $zero:literal;
5099ac24
FG
80 $simd_call:ident
81 },
82 $int:ident ) => {
064997fb 83 if $rhs.simd_eq(Simd::splat(0 as _)).any() {
5099ac24 84 panic!($zero);
5099ac24 85 } else {
5e7ed085
FG
86 // Prevent otherwise-UB overflow on the MIN / -1 case.
87 let rhs = if <$int>::MIN != 0 {
88 // This should, at worst, optimize to a few branchless logical ops
89 // Ideally, this entire conditional should evaporate
90 // Fire LLVM and implement those manually if it doesn't get the hint
064997fb 91 ($lhs.simd_eq(Simd::splat(<$int>::MIN))
5e7ed085 92 // type inference can break here, so cut an SInt to size
064997fb
FG
93 & $rhs.simd_eq(Simd::splat(-1i64 as _)))
94 .select(Simd::splat(1 as _), $rhs)
5e7ed085
FG
95 } else {
96 // Nice base case to make it easy to const-fold away the other branch.
97 $rhs
98 };
064997fb 99 // Safety: $lhs and rhs are vectors
c620b35d 100 unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
5099ac24 101 }
3c0e092e 102 };
5099ac24 103}
3c0e092e 104
5099ac24
FG
105macro_rules! for_base_types {
106 ( T = ($($scalar:ident),*);
107 type Lhs = Simd<T, N>;
108 type Rhs = Simd<T, N>;
109 type Output = $out:ty;
110
111 impl $op:ident::$call:ident {
112 $macro_impl:ident $inner:tt
113 }) => {
114 $(
115 impl<const N: usize> $op<Self> for Simd<$scalar, N>
116 where
117 $scalar: SimdElement,
118 LaneCount<N>: SupportedLaneCount,
119 {
120 type Output = $out;
3c0e092e 121
5099ac24
FG
122 #[inline]
123 #[must_use = "operator returns a new vector without mutating the inputs"]
fe692bf9
FG
124 // TODO: only useful for int Div::div, but we hope that this
125 // will essentially always always get inlined anyway.
126 #[track_caller]
5099ac24
FG
127 fn $call(self, rhs: Self) -> Self::Output {
128 $macro_impl!(self, rhs, $inner, $scalar)
3c0e092e 129 }
fe692bf9
FG
130 }
131 )*
5099ac24 132 }
3c0e092e
XL
133}
134
5099ac24
FG
135// A "TokenTree muncher": takes a set of scalar types `T = {};`
136// type parameters for the ops it implements, `Op::fn` names,
137// and a macro that expands into an expr, substituting in an intrinsic.
138// It passes that to for_base_types, which expands an impl for the types,
139// using the expanded expr in the function, and recurses with itself.
140//
141// tl;dr impls a set of ops::{Traits} for a set of types
142macro_rules! for_base_ops {
143 (
144 T = $types:tt;
145 type Lhs = Simd<T, N>;
146 type Rhs = Simd<T, N>;
147 type Output = $out:ident;
148 impl $op:ident::$call:ident
149 $inner:tt
150 $($rest:tt)*
151 ) => {
152 for_base_types! {
153 T = $types;
154 type Lhs = Simd<T, N>;
155 type Rhs = Simd<T, N>;
156 type Output = $out;
157 impl $op::$call
158 $inner
159 }
160 for_base_ops! {
161 T = $types;
162 type Lhs = Simd<T, N>;
163 type Rhs = Simd<T, N>;
164 type Output = $out;
165 $($rest)*
166 }
3c0e092e 167 };
5099ac24
FG
168 ($($done:tt)*) => {
169 // Done.
170 }
3c0e092e
XL
171}
172
5099ac24
FG
173// Integers can always accept add, mul, sub, bitand, bitor, and bitxor.
174// For all of these operations, simd_* intrinsics apply wrapping logic.
175for_base_ops! {
176 T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize);
177 type Lhs = Simd<T, N>;
178 type Rhs = Simd<T, N>;
179 type Output = Self;
3c0e092e 180
5099ac24
FG
181 impl Add::add {
182 unsafe_base { simd_add }
183 }
3c0e092e 184
5099ac24
FG
185 impl Mul::mul {
186 unsafe_base { simd_mul }
187 }
3c0e092e 188
5099ac24
FG
189 impl Sub::sub {
190 unsafe_base { simd_sub }
191 }
3c0e092e 192
5099ac24
FG
193 impl BitAnd::bitand {
194 unsafe_base { simd_and }
195 }
3c0e092e 196
5099ac24
FG
197 impl BitOr::bitor {
198 unsafe_base { simd_or }
199 }
3c0e092e 200
5099ac24
FG
201 impl BitXor::bitxor {
202 unsafe_base { simd_xor }
203 }
3c0e092e 204
5099ac24
FG
205 impl Div::div {
206 int_divrem_guard {
207 const PANIC_ZERO: &'static str = "attempt to divide by zero";
5099ac24
FG
208 simd_div
209 }
210 }
3c0e092e 211
5099ac24
FG
212 impl Rem::rem {
213 int_divrem_guard {
214 const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
5099ac24
FG
215 simd_rem
216 }
217 }
3c0e092e 218
5099ac24
FG
219 // The only question is how to handle shifts >= <Int>::BITS?
220 // Our current solution uses wrapping logic.
221 impl Shl::shl {
222 wrap_bitshift { simd_shl }
223 }
3c0e092e 224
5099ac24
FG
225 impl Shr::shr {
226 wrap_bitshift {
227 // This automatically monomorphizes to lshr or ashr, depending,
228 // so it's fine to use it for both UInts and SInts.
229 simd_shr
230 }
231 }
3c0e092e
XL
232}
233
5099ac24
FG
234// We don't need any special precautions here:
235// Floats always accept arithmetic ops, but may become NaN.
236for_base_ops! {
237 T = (f32, f64);
238 type Lhs = Simd<T, N>;
239 type Rhs = Simd<T, N>;
240 type Output = Self;
241
242 impl Add::add {
243 unsafe_base { simd_add }
244 }
3c0e092e 245
5099ac24
FG
246 impl Mul::mul {
247 unsafe_base { simd_mul }
248 }
249
250 impl Sub::sub {
251 unsafe_base { simd_sub }
252 }
253
254 impl Div::div {
255 unsafe_base { simd_div }
256 }
257
258 impl Rem::rem {
259 unsafe_base { simd_rem }
260 }
261}