1 //! Implements portable arithmetic vector reductions.
4 macro_rules
! impl_arithmetic_reductions
{
5 ($id
:ident
, $elem_ty
:ident
) => {
7 /// Horizontal sum of the vector elements.
9 /// The intrinsic performs a tree-reduction of the vector elements.
10 /// That is, for an 8 element vector:
12 /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
16 /// If an operation overflows it returns the mathematical result
17 /// modulo `2^n` where `n` is the number of times it overflows.
19 /// # Floating-point vectors
21 /// If one of the vector element is `NaN` the reduction returns
23 #[cfg(not(target_arch = "aarch64"))]
25 pub fn wrapping_sum(self) -> $elem_ty
{
26 use coresimd
::simd_llvm
::simd_reduce_add_ordered
;
27 unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
29 /// Horizontal sum of the vector elements.
31 /// The intrinsic performs a tree-reduction of the vector elements.
32 /// That is, for an 8 element vector:
34 /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
38 /// If an operation overflows it returns the mathematical result
39 /// modulo `2^n` where `n` is the number of times it overflows.
41 /// # Floating-point vectors
43 /// If one of the vector element is `NaN` the reduction returns
45 #[cfg(target_arch = "aarch64")]
47 pub fn wrapping_sum(self) -> $elem_ty
{
48 // FIXME: broken on AArch64
49 // https://bugs.llvm.org/show_bug.cgi?id=36796
50 use super::codegen
::wrapping
::Wrapping
;
51 let mut x
= self.extract(0) as $elem_ty
;
52 for i
in 1..$id
::lanes() {
53 x
= Wrapping
::add(x
, self.extract(i
) as $elem_ty
);
58 /// Horizontal product of the vector elements.
60 /// The intrinsic performs a tree-reduction of the vector elements.
61 /// That is, for an 8 element vector:
63 /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
67 /// If an operation overflows it returns the mathematical result
68 /// modulo `2^n` where `n` is the number of times it overflows.
70 /// # Floating-point vectors
72 /// If one of the vector element is `NaN` the reduction returns
74 #[cfg(not(target_arch = "aarch64"))]
76 pub fn wrapping_product(self) -> $elem_ty
{
77 use coresimd
::simd_llvm
::simd_reduce_mul_ordered
;
78 unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
80 /// Horizontal product of the vector elements.
82 /// The intrinsic performs a tree-reduction of the vector elements.
83 /// That is, for an 8 element vector:
85 /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
89 /// If an operation overflows it returns the mathematical result
90 /// modulo `2^n` where `n` is the number of times it overflows.
92 /// # Floating-point vectors
94 /// If one of the vector element is `NaN` the reduction returns
96 #[cfg(target_arch = "aarch64")]
98 pub fn wrapping_product(self) -> $elem_ty
{
99 // FIXME: broken on AArch64
100 // https://bugs.llvm.org/show_bug.cgi?id=36796
101 use super::codegen
::wrapping
::Wrapping
;
102 let mut x
= self.extract(0) as $elem_ty
;
103 for i
in 1..$id
::lanes() {
104 x
= Wrapping
::mul(x
, self.extract(i
) as $elem_ty
);
113 macro_rules
! test_arithmetic_reductions
{
114 ($id
:ident
, $elem_ty
:ident
) => {
115 fn alternating(x
: usize) -> ::coresimd
::simd
::$id
{
116 use coresimd
::simd
::$id
;
117 let mut v
= $id
::splat(1 as $elem_ty
);
118 for i
in 0..$id
::lanes() {
120 v
= v
.replace(i
, 2 as $elem_ty
);
128 use coresimd
::simd
::$id
;
129 let v
= $id
::splat(0 as $elem_ty
);
130 assert_eq
!(v
.wrapping_sum(), 0 as $elem_ty
);
131 let v
= $id
::splat(1 as $elem_ty
);
132 assert_eq
!(v
.wrapping_sum(), $id
::lanes() as $elem_ty
);
133 let v
= alternating(2);
136 ($id
::lanes() / 2 + $id
::lanes()) as $elem_ty
140 fn wrapping_product() {
141 use coresimd
::simd
::$id
;
142 let v
= $id
::splat(0 as $elem_ty
);
143 assert_eq
!(v
.wrapping_product(), 0 as $elem_ty
);
144 let v
= $id
::splat(1 as $elem_ty
);
145 assert_eq
!(v
.wrapping_product(), 1 as $elem_ty
);
146 let f
= match $id
::lanes() {
152 let v
= alternating(f
);
154 v
.wrapping_product(),
155 (2_usize
.pow(($id
::lanes() / f
) as u32) as $elem_ty
)