]>
Commit | Line | Data |
---|---|---|
064997fb | 1 | use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount}; |
a2a8927a XL |
2 | use core::ops::{Add, Mul}; |
3 | use core::ops::{BitAnd, BitOr, BitXor}; | |
4 | use core::ops::{Div, Rem, Sub}; | |
5 | use core::ops::{Shl, Shr}; | |
6 | ||
7 | mod assign; | |
8 | mod deref; | |
9 | mod unary; | |
3c0e092e XL |
10 | |
11 | impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES> | |
12 | where | |
13 | T: SimdElement, | |
14 | LaneCount<LANES>: SupportedLaneCount, | |
15 | I: core::slice::SliceIndex<[T]>, | |
16 | { | |
17 | type Output = I::Output; | |
fe692bf9 | 18 | #[inline] |
3c0e092e XL |
19 | fn index(&self, index: I) -> &Self::Output { |
20 | &self.as_array()[index] | |
21 | } | |
22 | } | |
23 | ||
24 | impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES> | |
25 | where | |
26 | T: SimdElement, | |
27 | LaneCount<LANES>: SupportedLaneCount, | |
28 | I: core::slice::SliceIndex<[T]>, | |
29 | { | |
fe692bf9 | 30 | #[inline] |
3c0e092e XL |
31 | fn index_mut(&mut self, index: I) -> &mut Self::Output { |
32 | &mut self.as_mut_array()[index] | |
33 | } | |
34 | } | |
35 | ||
5099ac24 FG |
36 | macro_rules! unsafe_base { |
37 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => { | |
064997fb | 38 | // Safety: $lhs and $rhs are vectors |
5099ac24 FG |
39 | unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) } |
40 | }; | |
3c0e092e XL |
41 | } |
42 | ||
5099ac24 FG |
43 | /// SAFETY: This macro should not be used for anything except Shl or Shr, and passed the appropriate shift intrinsic. |
44 | /// It handles performing a bitand in addition to calling the shift operator, so that the result | |
487cf647 | 45 | /// is well-defined: LLVM can return a poison value if you shl, lshr, or ashr if `rhs >= <Int>::BITS` |
5099ac24 FG |
46 | /// At worst, this will maybe add another instruction and cycle, |
47 | /// at best, it may open up more optimization opportunities, | |
48 | /// or simply be elided entirely, especially for SIMD ISAs which default to this. | |
49 | /// | |
50 | // FIXME: Consider implementing this in cg_llvm instead? | |
51 | // cg_clif defaults to this, and scalar MIR shifts also default to wrapping | |
52 | macro_rules! wrap_bitshift { | |
53 | ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => { | |
064997fb FG |
54 | #[allow(clippy::suspicious_arithmetic_impl)] |
55 | // Safety: $lhs and the bitand result are vectors | |
5099ac24 FG |
56 | unsafe { |
57 | $crate::simd::intrinsics::$simd_call( | |
58 | $lhs, | |
59 | $rhs.bitand(Simd::splat(<$int>::BITS as $int - 1)), | |
60 | ) | |
3c0e092e | 61 | } |
3c0e092e | 62 | }; |
3c0e092e XL |
63 | } |
64 | ||
5e7ed085 FG |
65 | /// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic. |
66 | /// It guards against LLVM's UB conditions for integer div or rem using masks and selects, | |
67 | /// thus guaranteeing a Rust value returns instead. | |
68 | /// | |
69 | /// | | LLVM | Rust | |
70 | /// | :--------------: | :--- | :---------- | |
71 | /// | N {/,%} 0 | UB | panic!() | |
72 | /// | <$int>::MIN / -1 | UB | <$int>::MIN | |
73 | /// | <$int>::MIN % -1 | UB | 0 | |
74 | /// | |
5099ac24 FG |
75 | macro_rules! int_divrem_guard { |
76 | ( $lhs:ident, | |
77 | $rhs:ident, | |
78 | { const PANIC_ZERO: &'static str = $zero:literal; | |
5099ac24 FG |
79 | $simd_call:ident |
80 | }, | |
81 | $int:ident ) => { | |
064997fb | 82 | if $rhs.simd_eq(Simd::splat(0 as _)).any() { |
5099ac24 | 83 | panic!($zero); |
5099ac24 | 84 | } else { |
5e7ed085 FG |
85 | // Prevent otherwise-UB overflow on the MIN / -1 case. |
86 | let rhs = if <$int>::MIN != 0 { | |
87 | // This should, at worst, optimize to a few branchless logical ops | |
88 | // Ideally, this entire conditional should evaporate | |
89 | // Fire LLVM and implement those manually if it doesn't get the hint | |
064997fb | 90 | ($lhs.simd_eq(Simd::splat(<$int>::MIN)) |
5e7ed085 | 91 | // type inference can break here, so cut an SInt to size |
064997fb FG |
92 | & $rhs.simd_eq(Simd::splat(-1i64 as _))) |
93 | .select(Simd::splat(1 as _), $rhs) | |
5e7ed085 FG |
94 | } else { |
95 | // Nice base case to make it easy to const-fold away the other branch. | |
96 | $rhs | |
97 | }; | |
064997fb | 98 | // Safety: $lhs and rhs are vectors |
5e7ed085 | 99 | unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) } |
5099ac24 | 100 | } |
3c0e092e | 101 | }; |
5099ac24 | 102 | } |
3c0e092e | 103 | |
5099ac24 FG |
104 | macro_rules! for_base_types { |
105 | ( T = ($($scalar:ident),*); | |
106 | type Lhs = Simd<T, N>; | |
107 | type Rhs = Simd<T, N>; | |
108 | type Output = $out:ty; | |
109 | ||
110 | impl $op:ident::$call:ident { | |
111 | $macro_impl:ident $inner:tt | |
112 | }) => { | |
113 | $( | |
114 | impl<const N: usize> $op<Self> for Simd<$scalar, N> | |
115 | where | |
116 | $scalar: SimdElement, | |
117 | LaneCount<N>: SupportedLaneCount, | |
118 | { | |
119 | type Output = $out; | |
3c0e092e | 120 | |
5099ac24 FG |
121 | #[inline] |
122 | #[must_use = "operator returns a new vector without mutating the inputs"] | |
fe692bf9 FG |
123 | // TODO: only useful for int Div::div, but we hope that this |
124 | // will essentially always always get inlined anyway. | |
125 | #[track_caller] | |
5099ac24 FG |
126 | fn $call(self, rhs: Self) -> Self::Output { |
127 | $macro_impl!(self, rhs, $inner, $scalar) | |
3c0e092e | 128 | } |
fe692bf9 FG |
129 | } |
130 | )* | |
5099ac24 | 131 | } |
3c0e092e XL |
132 | } |
133 | ||
5099ac24 FG |
134 | // A "TokenTree muncher": takes a set of scalar types `T = {};` |
135 | // type parameters for the ops it implements, `Op::fn` names, | |
136 | // and a macro that expands into an expr, substituting in an intrinsic. | |
137 | // It passes that to for_base_types, which expands an impl for the types, | |
138 | // using the expanded expr in the function, and recurses with itself. | |
139 | // | |
140 | // tl;dr impls a set of ops::{Traits} for a set of types | |
141 | macro_rules! for_base_ops { | |
142 | ( | |
143 | T = $types:tt; | |
144 | type Lhs = Simd<T, N>; | |
145 | type Rhs = Simd<T, N>; | |
146 | type Output = $out:ident; | |
147 | impl $op:ident::$call:ident | |
148 | $inner:tt | |
149 | $($rest:tt)* | |
150 | ) => { | |
151 | for_base_types! { | |
152 | T = $types; | |
153 | type Lhs = Simd<T, N>; | |
154 | type Rhs = Simd<T, N>; | |
155 | type Output = $out; | |
156 | impl $op::$call | |
157 | $inner | |
158 | } | |
159 | for_base_ops! { | |
160 | T = $types; | |
161 | type Lhs = Simd<T, N>; | |
162 | type Rhs = Simd<T, N>; | |
163 | type Output = $out; | |
164 | $($rest)* | |
165 | } | |
3c0e092e | 166 | }; |
5099ac24 FG |
167 | ($($done:tt)*) => { |
168 | // Done. | |
169 | } | |
3c0e092e XL |
170 | } |
171 | ||
5099ac24 FG |
172 | // Integers can always accept add, mul, sub, bitand, bitor, and bitxor. |
173 | // For all of these operations, simd_* intrinsics apply wrapping logic. | |
174 | for_base_ops! { | |
175 | T = (i8, i16, i32, i64, isize, u8, u16, u32, u64, usize); | |
176 | type Lhs = Simd<T, N>; | |
177 | type Rhs = Simd<T, N>; | |
178 | type Output = Self; | |
3c0e092e | 179 | |
5099ac24 FG |
180 | impl Add::add { |
181 | unsafe_base { simd_add } | |
182 | } | |
3c0e092e | 183 | |
5099ac24 FG |
184 | impl Mul::mul { |
185 | unsafe_base { simd_mul } | |
186 | } | |
3c0e092e | 187 | |
5099ac24 FG |
188 | impl Sub::sub { |
189 | unsafe_base { simd_sub } | |
190 | } | |
3c0e092e | 191 | |
5099ac24 FG |
192 | impl BitAnd::bitand { |
193 | unsafe_base { simd_and } | |
194 | } | |
3c0e092e | 195 | |
5099ac24 FG |
196 | impl BitOr::bitor { |
197 | unsafe_base { simd_or } | |
198 | } | |
3c0e092e | 199 | |
5099ac24 FG |
200 | impl BitXor::bitxor { |
201 | unsafe_base { simd_xor } | |
202 | } | |
3c0e092e | 203 | |
5099ac24 FG |
204 | impl Div::div { |
205 | int_divrem_guard { | |
206 | const PANIC_ZERO: &'static str = "attempt to divide by zero"; | |
5099ac24 FG |
207 | simd_div |
208 | } | |
209 | } | |
3c0e092e | 210 | |
5099ac24 FG |
211 | impl Rem::rem { |
212 | int_divrem_guard { | |
213 | const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero"; | |
5099ac24 FG |
214 | simd_rem |
215 | } | |
216 | } | |
3c0e092e | 217 | |
5099ac24 FG |
218 | // The only question is how to handle shifts >= <Int>::BITS? |
219 | // Our current solution uses wrapping logic. | |
220 | impl Shl::shl { | |
221 | wrap_bitshift { simd_shl } | |
222 | } | |
3c0e092e | 223 | |
5099ac24 FG |
224 | impl Shr::shr { |
225 | wrap_bitshift { | |
226 | // This automatically monomorphizes to lshr or ashr, depending, | |
227 | // so it's fine to use it for both UInts and SInts. | |
228 | simd_shr | |
229 | } | |
230 | } | |
3c0e092e XL |
231 | } |
232 | ||
5099ac24 FG |
233 | // We don't need any special precautions here: |
234 | // Floats always accept arithmetic ops, but may become NaN. | |
235 | for_base_ops! { | |
236 | T = (f32, f64); | |
237 | type Lhs = Simd<T, N>; | |
238 | type Rhs = Simd<T, N>; | |
239 | type Output = Self; | |
240 | ||
241 | impl Add::add { | |
242 | unsafe_base { simd_add } | |
243 | } | |
3c0e092e | 244 | |
5099ac24 FG |
245 | impl Mul::mul { |
246 | unsafe_base { simd_mul } | |
247 | } | |
248 | ||
249 | impl Sub::sub { | |
250 | unsafe_base { simd_sub } | |
251 | } | |
252 | ||
253 | impl Div::div { | |
254 | unsafe_base { simd_div } | |
255 | } | |
256 | ||
257 | impl Rem::rem { | |
258 | unsafe_base { simd_rem } | |
259 | } | |
260 | } |