]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | //! Implements portable horizontal float vector arithmetic reductions. |
2 | ||
3 | macro_rules! impl_reduction_float_arithmetic { | |
4 | ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { | |
5 | impl $id { | |
6 | /// Horizontal sum of the vector elements. | |
7 | /// | |
8 | /// The intrinsic performs a tree-reduction of the vector elements. | |
9 | /// That is, for an 8 element vector: | |
10 | /// | |
11 | /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) | |
12 | /// | |
13 | /// If one of the vector element is `NaN` the reduction returns | |
14 | /// `NaN`. The resulting `NaN` is not required to be equal to any | |
15 | /// of the `NaN`s in the vector. | |
16 | #[inline] | |
17 | pub fn sum(self) -> $elem_ty { | |
18 | #[cfg(not(target_arch = "aarch64"))] | |
19 | { | |
20 | use crate::llvm::simd_reduce_add_ordered; | |
21 | unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } | |
22 | } | |
23 | #[cfg(target_arch = "aarch64")] | |
24 | { | |
25 | // FIXME: broken on AArch64 | |
26 | // https://github.com/rust-lang-nursery/packed_simd/issues/15 | |
27 | let mut x = self.extract(0) as $elem_ty; | |
28 | for i in 1..$id::lanes() { | |
29 | x += self.extract(i) as $elem_ty; | |
30 | } | |
31 | x | |
32 | } | |
33 | } | |
34 | ||
35 | /// Horizontal product of the vector elements. | |
36 | /// | |
37 | /// The intrinsic performs a tree-reduction of the vector elements. | |
38 | /// That is, for an 8 element vector: | |
39 | /// | |
40 | /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) | |
41 | /// | |
42 | /// If one of the vector element is `NaN` the reduction returns | |
43 | /// `NaN`. The resulting `NaN` is not required to be equal to any | |
44 | /// of the `NaN`s in the vector. | |
45 | #[inline] | |
46 | pub fn product(self) -> $elem_ty { | |
47 | #[cfg(not(target_arch = "aarch64"))] | |
48 | { | |
49 | use crate::llvm::simd_reduce_mul_ordered; | |
50 | unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } | |
51 | } | |
52 | #[cfg(target_arch = "aarch64")] | |
53 | { | |
54 | // FIXME: broken on AArch64 | |
55 | // https://github.com/rust-lang-nursery/packed_simd/issues/15 | |
56 | let mut x = self.extract(0) as $elem_ty; | |
57 | for i in 1..$id::lanes() { | |
58 | x *= self.extract(i) as $elem_ty; | |
59 | } | |
60 | x | |
61 | } | |
62 | } | |
63 | } | |
64 | ||
65 | impl crate::iter::Sum for $id { | |
66 | #[inline] | |
67 | fn sum<I: Iterator<Item = $id>>(iter: I) -> $id { | |
68 | iter.fold($id::splat(0.), crate::ops::Add::add) | |
69 | } | |
70 | } | |
71 | ||
72 | impl crate::iter::Product for $id { | |
73 | #[inline] | |
74 | fn product<I: Iterator<Item = $id>>(iter: I) -> $id { | |
75 | iter.fold($id::splat(1.), crate::ops::Mul::mul) | |
76 | } | |
77 | } | |
78 | ||
79 | impl<'a> crate::iter::Sum<&'a $id> for $id { | |
80 | #[inline] | |
81 | fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id { | |
82 | iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) | |
83 | } | |
84 | } | |
85 | ||
86 | impl<'a> crate::iter::Product<&'a $id> for $id { | |
87 | #[inline] | |
88 | fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id { | |
89 | iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) | |
90 | } | |
91 | } | |
92 | ||
93 | test_if! { | |
94 | $test_tt: | |
95 | paste::item! { | |
96 | pub mod [<$id _reduction_float_arith>] { | |
97 | use super::*; | |
98 | fn alternating(x: usize) -> $id { | |
99 | let mut v = $id::splat(1 as $elem_ty); | |
100 | for i in 0..$id::lanes() { | |
101 | if i % x == 0 { | |
102 | v = v.replace(i, 2 as $elem_ty); | |
103 | } | |
104 | } | |
105 | v | |
106 | } | |
107 | ||
108 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
109 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
110 | fn sum() { | |
111 | let v = $id::splat(0 as $elem_ty); | |
112 | assert_eq!(v.sum(), 0 as $elem_ty); | |
113 | let v = $id::splat(1 as $elem_ty); | |
114 | assert_eq!(v.sum(), $id::lanes() as $elem_ty); | |
115 | let v = alternating(2); | |
116 | assert_eq!( | |
117 | v.sum(), | |
118 | ($id::lanes() / 2 + $id::lanes()) as $elem_ty | |
119 | ); | |
120 | } | |
121 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
122 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
123 | fn product() { | |
124 | let v = $id::splat(0 as $elem_ty); | |
125 | assert_eq!(v.product(), 0 as $elem_ty); | |
126 | let v = $id::splat(1 as $elem_ty); | |
127 | assert_eq!(v.product(), 1 as $elem_ty); | |
128 | let f = match $id::lanes() { | |
129 | 64 => 16, | |
130 | 32 => 8, | |
131 | 16 => 4, | |
132 | _ => 2, | |
133 | }; | |
134 | let v = alternating(f); | |
135 | assert_eq!( | |
136 | v.product(), | |
137 | (2_usize.pow(($id::lanes() / f) as u32) | |
138 | as $elem_ty) | |
139 | ); | |
140 | } | |
141 | ||
142 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
143 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
144 | #[allow(unreachable_code)] | |
145 | #[allow(unused_mut)] | |
146 | // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 | |
147 | fn sum_nan() { | |
148 | // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 | |
149 | // https://github.com/rust-lang-nursery/packed_simd/issues/6 | |
150 | return; | |
151 | ||
152 | let n0 = crate::$elem_ty::NAN; | |
153 | let v0 = $id::splat(-3.0); | |
154 | for i in 0..$id::lanes() { | |
155 | let mut v = v0.replace(i, n0); | |
156 | // If the vector contains a NaN the result is NaN: | |
157 | assert!( | |
158 | v.sum().is_nan(), | |
159 | "nan at {} => {} | {:?}", | |
160 | i, | |
161 | v.sum(), | |
162 | v | |
163 | ); | |
164 | for j in 0..i { | |
165 | v = v.replace(j, n0); | |
166 | assert!(v.sum().is_nan()); | |
167 | } | |
168 | } | |
169 | let v = $id::splat(n0); | |
170 | assert!(v.sum().is_nan(), "all nans | {:?}", v); | |
171 | } | |
172 | ||
173 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
174 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
175 | #[allow(unreachable_code)] | |
176 | #[allow(unused_mut)] | |
177 | // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 | |
178 | fn product_nan() { | |
179 | // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 | |
180 | // https://github.com/rust-lang-nursery/packed_simd/issues/6 | |
181 | return; | |
182 | ||
183 | let n0 = crate::$elem_ty::NAN; | |
184 | let v0 = $id::splat(-3.0); | |
185 | for i in 0..$id::lanes() { | |
186 | let mut v = v0.replace(i, n0); | |
187 | // If the vector contains a NaN the result is NaN: | |
188 | assert!( | |
189 | v.product().is_nan(), | |
190 | "nan at {} => {} | {:?}", | |
191 | i, | |
192 | v.product(), | |
193 | v | |
194 | ); | |
195 | for j in 0..i { | |
196 | v = v.replace(j, n0); | |
197 | assert!(v.product().is_nan()); | |
198 | } | |
199 | } | |
200 | let v = $id::splat(n0); | |
201 | assert!(v.product().is_nan(), "all nans | {:?}", v); | |
202 | } | |
203 | ||
204 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
205 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
206 | #[allow(unused, dead_code)] | |
207 | fn sum_roundoff() { | |
208 | // Performs a tree-reduction | |
209 | fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { | |
210 | assert!(!a.is_empty()); | |
211 | if a.len() == 1 { | |
212 | a[0] | |
213 | } else if a.len() == 2 { | |
214 | a[0] + a[1] | |
215 | } else { | |
216 | let mid = a.len() / 2; | |
217 | let (left, right) = a.split_at(mid); | |
218 | tree_reduce_sum(left) + tree_reduce_sum(right) | |
219 | } | |
220 | } | |
221 | ||
222 | let mut start = crate::$elem_ty::EPSILON; | |
223 | let mut scalar_reduction = 0. as $elem_ty; | |
224 | ||
225 | let mut v = $id::splat(0. as $elem_ty); | |
226 | for i in 0..$id::lanes() { | |
227 | let c = if i % 2 == 0 { 1e3 } else { -1. }; | |
228 | start *= 3.14 * c; | |
229 | scalar_reduction += start; | |
230 | v = v.replace(i, start); | |
231 | } | |
232 | let simd_reduction = v.sum(); | |
233 | ||
234 | let mut a = [0. as $elem_ty; $id::lanes()]; | |
235 | v.write_to_slice_unaligned(&mut a); | |
236 | let tree_reduction = tree_reduce_sum(&a); | |
237 | ||
238 | // tolerate 1 ULP difference: | |
239 | let red_bits = simd_reduction.to_bits(); | |
240 | let tree_bits = tree_reduction.to_bits(); | |
241 | assert!( | |
242 | if red_bits > tree_bits { | |
243 | red_bits - tree_bits | |
244 | } else { | |
245 | tree_bits - red_bits | |
246 | } < 2, | |
247 | "vector: {:?} | simd_reduction: {:?} | \ | |
248 | tree_reduction: {} | scalar_reduction: {}", | |
249 | v, | |
250 | simd_reduction, | |
251 | tree_reduction, | |
252 | scalar_reduction | |
253 | ); | |
254 | } | |
255 | ||
256 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
257 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
258 | #[allow(unused, dead_code)] | |
259 | fn product_roundoff() { | |
260 | // Performs a tree-reduction | |
261 | fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { | |
262 | assert!(!a.is_empty()); | |
263 | if a.len() == 1 { | |
264 | a[0] | |
265 | } else if a.len() == 2 { | |
266 | a[0] * a[1] | |
267 | } else { | |
268 | let mid = a.len() / 2; | |
269 | let (left, right) = a.split_at(mid); | |
270 | tree_reduce_product(left) | |
271 | * tree_reduce_product(right) | |
272 | } | |
273 | } | |
274 | ||
275 | let mut start = crate::$elem_ty::EPSILON; | |
276 | let mut scalar_reduction = 1. as $elem_ty; | |
277 | ||
278 | let mut v = $id::splat(0. as $elem_ty); | |
279 | for i in 0..$id::lanes() { | |
280 | let c = if i % 2 == 0 { 1e3 } else { -1. }; | |
281 | start *= 3.14 * c; | |
282 | scalar_reduction *= start; | |
283 | v = v.replace(i, start); | |
284 | } | |
285 | let simd_reduction = v.product(); | |
286 | ||
287 | let mut a = [0. as $elem_ty; $id::lanes()]; | |
288 | v.write_to_slice_unaligned(&mut a); | |
289 | let tree_reduction = tree_reduce_product(&a); | |
290 | ||
291 | // tolerate 1 ULP difference: | |
292 | let red_bits = simd_reduction.to_bits(); | |
293 | let tree_bits = tree_reduction.to_bits(); | |
294 | assert!( | |
295 | if red_bits > tree_bits { | |
296 | red_bits - tree_bits | |
297 | } else { | |
298 | tree_bits - red_bits | |
299 | } < 2, | |
300 | "vector: {:?} | simd_reduction: {:?} | \ | |
301 | tree_reduction: {} | scalar_reduction: {}", | |
302 | v, | |
303 | simd_reduction, | |
304 | tree_reduction, | |
305 | scalar_reduction | |
306 | ); | |
307 | } | |
308 | } | |
309 | } | |
310 | } | |
311 | }; | |
312 | } |