]>
Commit | Line | Data |
---|---|---|
4b012472 | 1 | use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount}; |
a2a8927a XL |
2 | use core::ops::{Add, Mul}; |
3 | use core::ops::{BitAnd, BitOr, BitXor}; | |
4 | use core::ops::{Div, Rem, Sub}; | |
5 | use core::ops::{Shl, Shr}; | |
6 | ||
7 | mod assign; | |
8 | mod deref; | |
4b012472 | 9 | mod shift_scalar; |
a2a8927a | 10 | mod unary; |
3c0e092e | 11 | |
4b012472 | 12 | impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N> |
3c0e092e XL |
13 | where |
14 | T: SimdElement, | |
4b012472 | 15 | LaneCount<N>: SupportedLaneCount, |
3c0e092e XL |
16 | I: core::slice::SliceIndex<[T]>, |
17 | { | |
18 | type Output = I::Output; | |
fe692bf9 | 19 | #[inline] |
3c0e092e XL |
20 | fn index(&self, index: I) -> &Self::Output { |
21 | &self.as_array()[index] | |
22 | } | |
23 | } | |
24 | ||
4b012472 | 25 | impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N> |
3c0e092e XL |
26 | where |
27 | T: SimdElement, | |
4b012472 | 28 | LaneCount<N>: SupportedLaneCount, |
3c0e092e XL |
29 | I: core::slice::SliceIndex<[T]>, |
30 | { | |
fe692bf9 | 31 | #[inline] |
3c0e092e XL |
32 | fn index_mut(&mut self, index: I) -> &mut Self::Output { |
33 | &mut self.as_mut_array()[index] | |
34 | } | |
35 | } | |
36 | ||
5099ac24 FG |
37 | macro_rules! unsafe_base { |
38 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { | |
064997fb | 39 | // Safety: $lhs and $rhs are vectors |
c620b35d | 40 | unsafe { core::intrinsics::simd::$simd_call($lhs, $rhs) } |
5099ac24 | 41 | }; |
3c0e092e XL |
42 | } |
43 | ||
5099ac24 FG |
44 | /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. |
45 | /// It handles performing a bitand in addition to calling the shift operator, so that the result | |
487cf647 | 46 | /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS` |
5099ac24 FG |
47 | /// At worst, this will maybe add another instruction and cycle, |
48 | /// at best, it may open up more optimization opportunities, | |
49 | /// or simply be elided entirely, especially for SIMD ISAs which default to this. | |
50 | /// | |
51 | // FIXME: Consider implementing this in cg_llvm instead? | |
52 | // cg_clif defaults to this, and scalar MIR shifts also default to wrapping | |
53 | macro_rules! wrap_bitshift { | |
54 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { | |
064997fb FG |
55 | #[allow(clippy::suspicious_arithmetic_impl)] |
56 | // Safety: $lhs and the bitand result are vectors | |
5099ac24 | 57 | unsafe { |
c620b35d | 58 | core::intrinsics::simd::$simd_call( |
5099ac24 FG |
59 | $lhs, |
60 | $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), | |
61 | ) | |
3c0e092e | 62 | } |
3c0e092e | 63 | }; |
3c0e092e XL |
64 | } |
65 | ||
5e7ed085 FG |
66 | /// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. |
67 | /// It guards against LLVM's UB conditions for integer div or rem using masks and selects, | |
68 | /// thus guaranteeing a Rust value returns instead. | |
69 | /// | |
70 | /// | | LLVM | Rust | |
71 | /// | :--------------: | :--- | :---------- | |
72 | /// | N {/,%} 0 | UB | panic!() | |
73 | /// | <$int>::MIN / -1 | UB | <$int>::MIN | |
74 | /// | <$int>::MIN % -1 | UB | 0 | |
75 | /// | |
5099ac24 FG |
76 | macro_rules! int_divrem_guard { |
77 | ( $lhs:ident, | |
78 | $rhs:ident, | |
79 | { const PANIC_ZERO: &'static str = $zero:literal; | |
5099ac24 FG |
80 | $simd_call:ident |
81 | }, | |
82 | $int:ident ) => { | |
064997fb | 83 | if $rhs.simd_eq(Simd::splat(0 as _)).any() { |
5099ac24 | 84 | panic!($zero); |
5099ac24 | 85 | } else { |
5e7ed085 FG |
86 | // Prevent otherwise-UB overflow on the MIN / -1 case. |
87 | let rhs = if <$int>::MIN != 0 { | |
88 | // This should, at worst, optimize to a few branchless logical ops | |
89 | // Ideally, this entire conditional should evaporate | |
90 | // Fire LLVM and implement those manually if it doesn't get the hint | |
064997fb | 91 | ($lhs.simd_eq(Simd::splat(<$int>::MIN)) |
5e7ed085 | 92 | // type inference can break here, so cut an SInt to size |
064997fb FG |
93 | & $rhs.simd_eq(Simd::splat(-1i64 as _))) |
94 | .select(Simd::splat(1 as _), $rhs) | |
5e7ed085 FG |
95 | } else { |
96 | // Nice base case to make it easy to const-fold away the other branch. | |
97 | $rhs | |
98 | }; | |
064997fb | 99 | // Safety: $lhs and rhs are vectors |
c620b35d | 100 | unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) } |
5099ac24 | 101 | } |
3c0e092e | 102 | }; |
5099ac24 | 103 | } |
3c0e092e | 104 | |
5099ac24 FG |
105 | macro_rules! for_base_types { |
106 | ( T = ($($scalar:ident),*); | |
107 | type Lhs = Simd<T, N>; | |
108 | type Rhs = Simd<T, N>; | |
109 | type Output = $out:ty; | |
110 | ||
111 | impl $op:ident::$call:ident { | |
112 | $macro_impl:ident $inner:tt | |
113 | }) => { | |
114 | $( | |
115 | impl<const N: usize> $op<Self> for Simd<$scalar, N> | |
116 | where | |
117 | $scalar: SimdElement, | |
118 | LaneCount<N>: SupportedLaneCount, | |
119 | { | |
120 | type Output = $out; | |
3c0e092e | 121 | |
5099ac24 FG |
122 | #[inline] |
123 | #[must_use = "operator returns a new vector without mutating the inputs"] | |
fe692bf9 FG |
124 | // TODO: only useful for int Div::div, but we hope that this |
125 | // will essentially always always get inlined anyway. | |
126 | #[track_caller] | |
5099ac24 FG |
127 | fn $call(self, rhs: Self) -> Self::Output { |
128 | $macro_impl!(self, rhs, $inner, $scalar) | |
3c0e092e | 129 | } |
fe692bf9 FG |
130 | } |
131 | )* | |
5099ac24 | 132 | } |
3c0e092e XL |
133 | } |
134 | ||
5099ac24 FG |
135 | // A "TokenTree muncher": takes a set of scalar types `T = {};` |
136 | // type parameters for the ops it implements, `Op::fn` names, | |
137 | // and a macro that expands into an expr, substituting in an intrinsic. | |
138 | // It passes that to for_base_types, which expands an impl for the types, | |
139 | // using the expanded expr in the function, and recurses with itself. | |
140 | // | |
141 | // tl;dr impls a set of ops::{Traits} for a set of types | |
142 | macro_rules! for_base_ops { | |
143 | ( | |
144 | T = $types:tt; | |
145 | type Lhs = Simd<T, N>; | |
146 | type Rhs = Simd<T, N>; | |
147 | type Output = $out:ident; | |
148 | impl $op:ident::$call:ident | |
149 | $inner:tt | |
150 | $($rest:tt)* | |
151 | ) => { | |
152 | for_base_types! { | |
153 | T = $types; | |
154 | type Lhs = Simd<T, N>; | |
155 | type Rhs = Simd<T, N>; | |
156 | type Output = $out; | |
157 | impl $op::$call | |
158 | $inner | |
159 | } | |
160 | for_base_ops! { | |
161 | T = $types; | |
162 | type Lhs = Simd<T, N>; | |
163 | type Rhs = Simd<T, N>; | |
164 | type Output = $out; | |
165 | $($rest)* | |
166 | } | |
3c0e092e | 167 | }; |
5099ac24 FG |
168 | ($($done:tt)*) => { |
169 | // Done. | |
170 | } | |
3c0e092e XL |
171 | } |
172 | ||
5099ac24 FG |
173 | // Integers can always accept add, mul, sub, bitand, bitor, and bitxor. |
174 | // For all of these operations, simd_* intrinsics apply wrapping logic. | |
175 | for_base_ops! { | |
176 | T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); | |
177 | type Lhs = Simd<T, N>; | |
178 | type Rhs = Simd<T, N>; | |
179 | type Output = Self; | |
3c0e092e | 180 | |
5099ac24 FG |
181 | impl Add::add { |
182 | unsafe_base { simd_add } | |
183 | } | |
3c0e092e | 184 | |
5099ac24 FG |
185 | impl Mul::mul { |
186 | unsafe_base { simd_mul } | |
187 | } | |
3c0e092e | 188 | |
5099ac24 FG |
189 | impl Sub::sub { |
190 | unsafe_base { simd_sub } | |
191 | } | |
3c0e092e | 192 | |
5099ac24 FG |
193 | impl BitAnd::bitand { |
194 | unsafe_base { simd_and } | |
195 | } | |
3c0e092e | 196 | |
5099ac24 FG |
197 | impl BitOr::bitor { |
198 | unsafe_base { simd_or } | |
199 | } | |
3c0e092e | 200 | |
5099ac24 FG |
201 | impl BitXor::bitxor { |
202 | unsafe_base { simd_xor } | |
203 | } | |
3c0e092e | 204 | |
5099ac24 FG |
205 | impl Div::div { |
206 | int_divrem_guard { | |
207 | const PANIC_ZERO: &'static str = "attempt to divide by zero"; | |
5099ac24 FG |
208 | simd_div |
209 | } | |
210 | } | |
3c0e092e | 211 | |
5099ac24 FG |
212 | impl Rem::rem { |
213 | int_divrem_guard { | |
214 | const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; | |
5099ac24 FG |
215 | simd_rem |
216 | } | |
217 | } | |
3c0e092e | 218 | |
5099ac24 FG |
219 | // The only question is how to handle shifts >= <Int>::BITS? |
220 | // Our current solution uses wrapping logic. | |
221 | impl Shl::shl { | |
222 | wrap_bitshift { simd_shl } | |
223 | } | |
3c0e092e | 224 | |
5099ac24 FG |
225 | impl Shr::shr { |
226 | wrap_bitshift { | |
227 | // This automatically monomorphizes to lshr or ashr, depending, | |
228 | // so it's fine to use it for both UInts and SInts. | |
229 | simd_shr | |
230 | } | |
231 | } | |
3c0e092e XL |
232 | } |
233 | ||
5099ac24 FG |
234 | // We don't need any special precautions here: |
235 | // Floats always accept arithmetic ops, but may become NaN. | |
236 | for_base_ops! { | |
237 | T = (f32, f64); | |
238 | type Lhs = Simd<T, N>; | |
239 | type Rhs = Simd<T, N>; | |
240 | type Output = Self; | |
241 | ||
242 | impl Add::add { | |
243 | unsafe_base { simd_add } | |
244 | } | |
3c0e092e | 245 | |
5099ac24 FG |
246 | impl Mul::mul { |
247 | unsafe_base { simd_mul } | |
248 | } | |
249 | ||
250 | impl Sub::sub { | |
251 | unsafe_base { simd_sub } | |
252 | } | |
253 | ||
254 | impl Div::div { | |
255 | unsafe_base { simd_div } | |
256 | } | |
257 | ||
258 | impl Rem::rem { | |
259 | unsafe_base { simd_rem } | |
260 | } | |
261 | } |