]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | //! Implements portable horizontal integer vector arithmetic reductions. |
2 | ||
3 | macro_rules! impl_reduction_integer_arithmetic { | |
4 | ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | |
5 | | $test_tt:tt) => { | |
6 | impl $id { | |
7 | /// Horizontal wrapping sum of the vector elements. | |
8 | /// | |
9 | /// The intrinsic performs a tree-reduction of the vector elements. | |
10 | /// That is, for an 8 element vector: | |
11 | /// | |
12 | /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) | |
13 | /// | |
14 | /// If an operation overflows it returns the mathematical result | |
15 | /// modulo `2^n` where `n` is the number of times it overflows. | |
16 | #[inline] | |
17 | pub fn wrapping_sum(self) -> $elem_ty { | |
18 | #[cfg(not(target_arch = "aarch64"))] | |
19 | { | |
20 | use crate::llvm::simd_reduce_add_ordered; | |
f25598a0 | 21 | let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) }; |
f20569fa XL |
22 | v as $elem_ty |
23 | } | |
24 | #[cfg(target_arch = "aarch64")] | |
25 | { | |
26 | // FIXME: broken on AArch64 | |
27 | // https://github.com/rust-lang-nursery/packed_simd/issues/15 | |
28 | let mut x = self.extract(0) as $elem_ty; | |
29 | for i in 1..$id::lanes() { | |
30 | x = x.wrapping_add(self.extract(i) as $elem_ty); | |
31 | } | |
32 | x | |
33 | } | |
34 | } | |
35 | ||
36 | /// Horizontal wrapping product of the vector elements. | |
37 | /// | |
38 | /// The intrinsic performs a tree-reduction of the vector elements. | |
39 | /// That is, for an 8 element vector: | |
40 | /// | |
41 | /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) | |
42 | /// | |
43 | /// If an operation overflows it returns the mathematical result | |
44 | /// modulo `2^n` where `n` is the number of times it overflows. | |
45 | #[inline] | |
46 | pub fn wrapping_product(self) -> $elem_ty { | |
47 | #[cfg(not(target_arch = "aarch64"))] | |
48 | { | |
49 | use crate::llvm::simd_reduce_mul_ordered; | |
f25598a0 | 50 | let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) }; |
f20569fa XL |
51 | v as $elem_ty |
52 | } | |
53 | #[cfg(target_arch = "aarch64")] | |
54 | { | |
55 | // FIXME: broken on AArch64 | |
56 | // https://github.com/rust-lang-nursery/packed_simd/issues/15 | |
57 | let mut x = self.extract(0) as $elem_ty; | |
58 | for i in 1..$id::lanes() { | |
59 | x = x.wrapping_mul(self.extract(i) as $elem_ty); | |
60 | } | |
61 | x | |
62 | } | |
63 | } | |
64 | } | |
65 | ||
66 | impl crate::iter::Sum for $id { | |
67 | #[inline] | |
68 | fn sum<I: Iterator<Item = $id>>(iter: I) -> $id { | |
69 | iter.fold($id::splat(0), crate::ops::Add::add) | |
70 | } | |
71 | } | |
72 | ||
73 | impl crate::iter::Product for $id { | |
74 | #[inline] | |
75 | fn product<I: Iterator<Item = $id>>(iter: I) -> $id { | |
76 | iter.fold($id::splat(1), crate::ops::Mul::mul) | |
77 | } | |
78 | } | |
79 | ||
80 | impl<'a> crate::iter::Sum<&'a $id> for $id { | |
81 | #[inline] | |
82 | fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id { | |
83 | iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) | |
84 | } | |
85 | } | |
86 | ||
87 | impl<'a> crate::iter::Product<&'a $id> for $id { | |
88 | #[inline] | |
89 | fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id { | |
90 | iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) | |
91 | } | |
92 | } | |
93 | ||
94 | test_if! { | |
95 | $test_tt: | |
96 | paste::item! { | |
97 | pub mod [<$id _reduction_int_arith>] { | |
98 | use super::*; | |
99 | ||
100 | fn alternating(x: usize) -> $id { | |
101 | let mut v = $id::splat(1 as $elem_ty); | |
102 | for i in 0..$id::lanes() { | |
103 | if i % x == 0 { | |
104 | v = v.replace(i, 2 as $elem_ty); | |
105 | } | |
106 | } | |
107 | v | |
108 | } | |
109 | ||
110 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
111 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
112 | fn wrapping_sum() { | |
113 | let v = $id::splat(0 as $elem_ty); | |
114 | assert_eq!(v.wrapping_sum(), 0 as $elem_ty); | |
115 | let v = $id::splat(1 as $elem_ty); | |
116 | assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); | |
117 | let v = alternating(2); | |
118 | if $id::lanes() > 1 { | |
119 | assert_eq!( | |
120 | v.wrapping_sum(), | |
121 | ($id::lanes() / 2 + $id::lanes()) as $elem_ty | |
122 | ); | |
123 | } else { | |
124 | assert_eq!( | |
125 | v.wrapping_sum(), | |
126 | 2 as $elem_ty | |
127 | ); | |
128 | } | |
129 | } | |
130 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
131 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
132 | fn wrapping_sum_overflow() { | |
133 | let start = $elem_ty::max_value() | |
134 | - ($id::lanes() as $elem_ty / 2); | |
135 | ||
136 | let v = $id::splat(start as $elem_ty); | |
137 | let vwrapping_sum = v.wrapping_sum(); | |
138 | ||
139 | let mut wrapping_sum = start; | |
140 | for _ in 1..$id::lanes() { | |
141 | wrapping_sum = wrapping_sum.wrapping_add(start); | |
142 | } | |
143 | assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); | |
144 | } | |
145 | ||
146 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
147 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
148 | fn wrapping_product() { | |
149 | let v = $id::splat(0 as $elem_ty); | |
150 | assert_eq!(v.wrapping_product(), 0 as $elem_ty); | |
151 | let v = $id::splat(1 as $elem_ty); | |
152 | assert_eq!(v.wrapping_product(), 1 as $elem_ty); | |
153 | let f = match $id::lanes() { | |
154 | 64 => 16, | |
155 | 32 => 8, | |
156 | 16 => 4, | |
157 | _ => 2, | |
158 | }; | |
159 | let v = alternating(f); | |
160 | if $id::lanes() > 1 { | |
161 | assert_eq!( | |
162 | v.wrapping_product(), | |
163 | (2_usize.pow(($id::lanes() / f) as u32) | |
164 | as $elem_ty) | |
165 | ); | |
166 | } else { | |
167 | assert_eq!( | |
168 | v.wrapping_product(), | |
169 | 2 as $elem_ty | |
170 | ); | |
171 | } | |
172 | } | |
173 | ||
174 | #[cfg_attr(not(target_arch = "wasm32"), test)] | |
175 | #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] | |
176 | fn wrapping_product_overflow() { | |
177 | let start = $elem_ty::max_value() | |
178 | - ($id::lanes() as $elem_ty / 2); | |
179 | ||
180 | let v = $id::splat(start as $elem_ty); | |
181 | let vmul = v.wrapping_product(); | |
182 | ||
183 | let mut mul = start; | |
184 | for _ in 1..$id::lanes() { | |
185 | mul = mul.wrapping_mul(start); | |
186 | } | |
187 | assert_eq!(mul, vmul, "v = {:?}", v); | |
188 | } | |
189 | } | |
190 | } | |
191 | } | |
192 | }; | |
193 | } |