src/stdsimd/coresimd/ppsv/api/arithmetic_reductions.rs

   1 //! Implements portable arithmetic vector reductions.
   2 #![allow(unused)]
   3
   4 macro_rules! impl_arithmetic_reductions {
   5     ($id:ident, $elem_ty:ident) => {
   6         impl $id {
   7             /// Horizontal sum of the vector elements.
   8             ///
   9             /// The intrinsic performs a tree-reduction of the vector elements.
  10             /// That is, for an 8 element vector:
  11             ///
  12             /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
  13             ///
  14             /// # Integer vectors
  15             ///
  16             /// If an operation overflows it returns the mathematical result
  17             /// modulo `2^n` where `n` is the number of times it overflows.
  18             ///
  19             /// # Floating-point vectors
  20             ///
  21             /// If one of the vector element is `NaN` the reduction returns
  22             /// `NaN`.
  23             #[cfg(not(target_arch = "aarch64"))]
  24             #[inline]
  25             pub fn wrapping_sum(self) -> $elem_ty {
  26                 use coresimd::simd_llvm::simd_reduce_add_ordered;
  27                 unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
  28             }
  29             /// Horizontal sum of the vector elements.
  30             ///
  31             /// The intrinsic performs a tree-reduction of the vector elements.
  32             /// That is, for an 8 element vector:
  33             ///
  34             /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
  35             ///
  36             /// # Integer vectors
  37             ///
  38             /// If an operation overflows it returns the mathematical result
  39             /// modulo `2^n` where `n` is the number of times it overflows.
  40             ///
  41             /// # Floating-point vectors
  42             ///
  43             /// If one of the vector element is `NaN` the reduction returns
  44             /// `NaN`.
  45             #[cfg(target_arch = "aarch64")]
  46             #[inline]
  47             pub fn wrapping_sum(self) -> $elem_ty {
  48                 // FIXME: broken on AArch64
  49                 // https://bugs.llvm.org/show_bug.cgi?id=36796
  50                 use super::codegen::wrapping::Wrapping;
  51                 let mut x = self.extract(0) as $elem_ty;
  52                 for i in 1..$id::lanes() {
  53                     x = Wrapping::add(x, self.extract(i) as $elem_ty);
  54                 }
  55                 x
  56             }
  57
  58             /// Horizontal product of the vector elements.
  59             ///
  60             /// The intrinsic performs a tree-reduction of the vector elements.
  61             /// That is, for an 8 element vector:
  62             ///
  63             /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
  64             ///
  65             /// # Integer vectors
  66             ///
  67             /// If an operation overflows it returns the mathematical result
  68             /// modulo `2^n` where `n` is the number of times it overflows.
  69             ///
  70             /// # Floating-point vectors
  71             ///
  72             /// If one of the vector element is `NaN` the reduction returns
  73             /// `NaN`.
  74             #[cfg(not(target_arch = "aarch64"))]
  75             #[inline]
  76             pub fn wrapping_product(self) -> $elem_ty {
  77                 use coresimd::simd_llvm::simd_reduce_mul_ordered;
  78                 unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
  79             }
  80             /// Horizontal product of the vector elements.
  81             ///
  82             /// The intrinsic performs a tree-reduction of the vector elements.
  83             /// That is, for an 8 element vector:
  84             ///
  85             /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
  86             ///
  87             /// # Integer vectors
  88             ///
  89             /// If an operation overflows it returns the mathematical result
  90             /// modulo `2^n` where `n` is the number of times it overflows.
  91             ///
  92             /// # Floating-point vectors
  93             ///
  94             /// If one of the vector element is `NaN` the reduction returns
  95             /// `NaN`.
  96             #[cfg(target_arch = "aarch64")]
  97             #[inline]
  98             pub fn wrapping_product(self) -> $elem_ty {
  99                 // FIXME: broken on AArch64
 100                 // https://bugs.llvm.org/show_bug.cgi?id=36796
 101                 use super::codegen::wrapping::Wrapping;
 102                 let mut x = self.extract(0) as $elem_ty;
 103                 for i in 1..$id::lanes() {
 104                     x = Wrapping::mul(x, self.extract(i) as $elem_ty);
 105                 }
 106                 x
 107             }
 108         }
 109     };
 110 }
 111
 112 #[cfg(test)]
 113 macro_rules! test_arithmetic_reductions {
 114     ($id:ident, $elem_ty:ident) => {
 115         fn alternating(x: usize) -> ::coresimd::simd::$id {
 116             use coresimd::simd::$id;
 117             let mut v = $id::splat(1 as $elem_ty);
 118             for i in 0..$id::lanes() {
 119                 if i % x == 0 {
 120                     v = v.replace(i, 2 as $elem_ty);
 121                 }
 122             }
 123             v
 124         }
 125
 126         #[test]
 127         fn wrapping_sum() {
 128             use coresimd::simd::$id;
 129             let v = $id::splat(0 as $elem_ty);
 130             assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
 131             let v = $id::splat(1 as $elem_ty);
 132             assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
 133             let v = alternating(2);
 134             assert_eq!(
 135                 v.wrapping_sum(),
 136                 ($id::lanes() / 2 + $id::lanes()) as $elem_ty
 137             );
 138         }
 139         #[test]
 140         fn wrapping_product() {
 141             use coresimd::simd::$id;
 142             let v = $id::splat(0 as $elem_ty);
 143             assert_eq!(v.wrapping_product(), 0 as $elem_ty);
 144             let v = $id::splat(1 as $elem_ty);
 145             assert_eq!(v.wrapping_product(), 1 as $elem_ty);
 146             let f = match $id::lanes() {
 147                 64 => 16,
 148                 32 => 8,
 149                 16 => 4,
 150                 _ => 2,
 151             };
 152             let v = alternating(f);
 153             assert_eq!(
 154                 v.wrapping_product(),
 155                 (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
 156             );
 157         }
 158     };
 159 }