vendor/packed_simd_2/src/api/reductions/integer_arithmetic.rs

   1 //! Implements portable horizontal integer vector arithmetic reductions.
   2
   3 macro_rules! impl_reduction_integer_arithmetic {
   4     ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
   5      | $test_tt:tt) => {
   6         impl $id {
   7             /// Horizontal wrapping sum of the vector elements.
   8             ///
   9             /// The intrinsic performs a tree-reduction of the vector elements.
  10             /// That is, for an 8 element vector:
  11             ///
  12             /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
  13             ///
  14             /// If an operation overflows it returns the mathematical result
  15             /// modulo `2^n` where `n` is the number of times it overflows.
  16             #[inline]
  17             pub fn wrapping_sum(self) -> $elem_ty {
  18                 #[cfg(not(target_arch = "aarch64"))]
  19                 {
  20                     use crate::llvm::simd_reduce_add_ordered;
  21                     let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) };
  22                     v as $elem_ty
  23                 }
  24                 #[cfg(target_arch = "aarch64")]
  25                 {
  26                     // FIXME: broken on AArch64
  27                     // https://github.com/rust-lang-nursery/packed_simd/issues/15
  28                     let mut x = self.extract(0) as $elem_ty;
  29                     for i in 1..$id::lanes() {
  30                         x = x.wrapping_add(self.extract(i) as $elem_ty);
  31                     }
  32                     x
  33                 }
  34             }
  35
  36             /// Horizontal wrapping product of the vector elements.
  37             ///
  38             /// The intrinsic performs a tree-reduction of the vector elements.
  39             /// That is, for an 8 element vector:
  40             ///
  41             /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
  42             ///
  43             /// If an operation overflows it returns the mathematical result
  44             /// modulo `2^n` where `n` is the number of times it overflows.
  45             #[inline]
  46             pub fn wrapping_product(self) -> $elem_ty {
  47                 #[cfg(not(target_arch = "aarch64"))]
  48                 {
  49                     use crate::llvm::simd_reduce_mul_ordered;
  50                     let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) };
  51                     v as $elem_ty
  52                 }
  53                 #[cfg(target_arch = "aarch64")]
  54                 {
  55                     // FIXME: broken on AArch64
  56                     // https://github.com/rust-lang-nursery/packed_simd/issues/15
  57                     let mut x = self.extract(0) as $elem_ty;
  58                     for i in 1..$id::lanes() {
  59                         x = x.wrapping_mul(self.extract(i) as $elem_ty);
  60                     }
  61                     x
  62                 }
  63             }
  64         }
  65
  66         impl crate::iter::Sum for $id {
  67             #[inline]
  68             fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
  69                 iter.fold($id::splat(0), crate::ops::Add::add)
  70             }
  71         }
  72
  73         impl crate::iter::Product for $id {
  74             #[inline]
  75             fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
  76                 iter.fold($id::splat(1), crate::ops::Mul::mul)
  77             }
  78         }
  79
  80         impl<'a> crate::iter::Sum<&'a $id> for $id {
  81             #[inline]
  82             fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
  83                 iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
  84             }
  85         }
  86
  87         impl<'a> crate::iter::Product<&'a $id> for $id {
  88             #[inline]
  89             fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
  90                 iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
  91             }
  92         }
  93
  94         test_if! {
  95             $test_tt:
  96             paste::item! {
  97                 pub mod [<$id _reduction_int_arith>] {
  98                     use super::*;
  99
 100                     fn alternating(x: usize) -> $id {
 101                         let mut v = $id::splat(1 as $elem_ty);
 102                         for i in 0..$id::lanes() {
 103                             if i % x == 0 {
 104                                 v = v.replace(i, 2 as $elem_ty);
 105                             }
 106                         }
 107                         v
 108                     }
 109
 110                     #[cfg_attr(not(target_arch = "wasm32"), test)]
 111                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 112                     fn wrapping_sum() {
 113                         let v = $id::splat(0 as $elem_ty);
 114                         assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
 115                         let v = $id::splat(1 as $elem_ty);
 116                         assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
 117                         let v = alternating(2);
 118                         if $id::lanes() > 1 {
 119                             assert_eq!(
 120                                 v.wrapping_sum(),
 121                                 ($id::lanes() / 2 + $id::lanes()) as $elem_ty
 122                             );
 123                         } else {
 124                             assert_eq!(
 125                                 v.wrapping_sum(),
 126                                 2 as $elem_ty
 127                             );
 128                         }
 129                     }
 130                     #[cfg_attr(not(target_arch = "wasm32"), test)]
 131                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 132                     fn wrapping_sum_overflow() {
 133                         let start = $elem_ty::max_value()
 134                             - ($id::lanes() as $elem_ty / 2);
 135
 136                         let v = $id::splat(start as $elem_ty);
 137                         let vwrapping_sum = v.wrapping_sum();
 138
 139                         let mut wrapping_sum = start;
 140                         for _ in 1..$id::lanes() {
 141                             wrapping_sum = wrapping_sum.wrapping_add(start);
 142                         }
 143                         assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
 144                     }
 145
 146                     #[cfg_attr(not(target_arch = "wasm32"), test)]
 147                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 148                     fn wrapping_product() {
 149                         let v = $id::splat(0 as $elem_ty);
 150                         assert_eq!(v.wrapping_product(), 0 as $elem_ty);
 151                         let v = $id::splat(1 as $elem_ty);
 152                         assert_eq!(v.wrapping_product(), 1 as $elem_ty);
 153                         let f = match $id::lanes() {
 154                             64 => 16,
 155                             32 => 8,
 156                             16 => 4,
 157                             _ => 2,
 158                         };
 159                         let v = alternating(f);
 160                         if $id::lanes() > 1 {
 161                             assert_eq!(
 162                                 v.wrapping_product(),
 163                                 (2_usize.pow(($id::lanes() / f) as u32)
 164                                  as $elem_ty)
 165                             );
 166                         } else {
 167                             assert_eq!(
 168                                 v.wrapping_product(),
 169                                 2 as $elem_ty
 170                             );
 171                         }
 172                     }
 173
 174                     #[cfg_attr(not(target_arch = "wasm32"), test)]
 175                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
 176                     fn wrapping_product_overflow() {
 177                         let start = $elem_ty::max_value()
 178                             - ($id::lanes() as $elem_ty / 2);
 179
 180                         let v = $id::splat(start as $elem_ty);
 181                         let vmul = v.wrapping_product();
 182
 183                         let mut mul = start;
 184                         for _ in 1..$id::lanes() {
 185                             mul = mul.wrapping_mul(start);
 186                         }
 187                         assert_eq!(mul, vmul, "v = {:?}", v);
 188                     }
 189                 }
 190             }
 191         }
 192     };
 193 }