src/stdsimd/crates/coresimd/tests/reductions.rs

   1 #![feature(stdsimd, sse4a_target_feature, avx512_target_feature)]
   2 #![feature(arm_target_feature)]
   3 #![feature(aarch64_target_feature)]
   4 #![feature(powerpc_target_feature)]
   5 #![allow(unused_attributes)]
   6
   7 #[macro_use]
   8 extern crate stdsimd;
   9
  10 use stdsimd::simd::*;
  11
  12 #[cfg(target_arch = "powerpc")]
  13 macro_rules! is_powerpc_feature_detected {
  14     ($t:tt) => {
  15         false
  16     };
  17 }
  18
  19 macro_rules! invoke_arch {
  20     ($macro:ident, $feature_macro:ident, $id:ident, $elem_ty:ident,
  21      [$($feature:tt),*]) => {
  22         $($macro!($feature, $feature_macro, $id, $elem_ty);)*
  23     }
  24 }
  25
  26 macro_rules! invoke_vectors {
  27     ($macro:ident, [$(($id:ident, $elem_ty:ident)),*]) => {
  28         $(
  29             #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
  30             invoke_arch!($macro, is_x86_feature_detected, $id, $elem_ty,
  31                         ["sse", "sse2", "sse3", "ssse3", "sse4.1",
  32                          "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]);
  33             #[cfg(target_arch = "aarch64")]
  34             invoke_arch!($macro, is_aarch64_feature_detected, $id, $elem_ty,
  35                         ["neon"]);
  36             #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
  37             invoke_arch!($macro, is_arm_feature_detected, $id, $elem_ty,
  38                          ["neon"]);
  39             #[cfg(target_arch = "powerpc")]
  40             invoke_arch!($macro, is_powerpc_feature_detected, $id, $elem_ty, ["altivec"]);
  41             #[cfg(target_arch = "powerpc64")]
  42             invoke_arch!($macro, is_powerpc64_feature_detected, $id, $elem_ty, ["altivec"]);
  43         )*
  44     }
  45 }
  46
  47 macro_rules! finvoke {
  48     ($macro:ident) => {
  49         invoke_vectors!(
  50             $macro,
  51             [
  52                 (f32x2, f32),
  53                 (f32x4, f32),
  54                 (f32x8, f32),
  55                 (f32x16, f32),
  56                 (f64x2, f64),
  57                 (f64x4, f64),
  58                 (f64x8, f64)
  59             ]
  60         );
  61     };
  62 }
  63
  64 macro_rules! iinvoke {
  65     ($macro:ident) => {
  66         invoke_vectors!(
  67             $macro,
  68             [
  69                 (i8x2, i8),
  70                 (i8x4, i8),
  71                 (i8x8, i8),
  72                 (i8x16, i8),
  73                 (i8x32, i8),
  74                 (i8x64, i8),
  75                 (i16x2, i16),
  76                 (i16x4, i16),
  77                 (i16x8, i16),
  78                 (i16x16, i16),
  79                 (i16x32, i16),
  80                 (i32x2, i32),
  81                 (i32x4, i32),
  82                 (i32x8, i32),
  83                 (i32x16, i32),
  84                 (i64x2, i64),
  85                 (i64x4, i64),
  86                 (i64x8, i64),
  87                 (u8x2, u8),
  88                 (u8x4, u8),
  89                 (u8x8, u8),
  90                 (u8x16, u8),
  91                 (u8x32, u8),
  92                 (u8x64, u8),
  93                 (u16x2, u16),
  94                 (u16x4, u16),
  95                 (u16x8, u16),
  96                 (u16x16, u16),
  97                 (u16x32, u16),
  98                 (u32x2, u32),
  99                 (u32x4, u32),
 100                 (u32x8, u32),
 101                 (u32x16, u32),
 102                 (u64x2, u64),
 103                 (u64x4, u64),
 104                 (u64x8, u64)
 105             ]
 106         );
 107     };
 108 }
 109
 110 macro_rules! min_nan_test {
 111     ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 112         if $feature_macro!($feature) {
 113             #[target_feature(enable = $feature)]
 114             unsafe fn test_fn() {
 115                 let n0 = ::std::$elem_ty::NAN;
 116
 117                 assert_eq!(n0.min(-3.0), -3.0);
 118                 assert_eq!((-3.0 as $elem_ty).min(n0), -3.0);
 119
 120                 let v0 = $id::splat(-3.0);
 121
 122                 // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
 123                 // When the last element is NaN the current implementation produces incorrect results.
 124                 let bugbug = 1;
 125                 for i in 0..$id::lanes() - bugbug {
 126                     let mut v = v0.replace(i, n0);
 127                     // If there is a NaN, the result is always the smallest element:
 128                     assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
 129                     for j in 0..i {
 130                         v = v.replace(j, n0);
 131                         assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
 132                     }
 133                 }
 134                 // If the vector contains all NaNs the result is NaN:
 135                 let vn = $id::splat(n0);
 136                 assert!(vn.min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}",
 137                         vn, vn.min_element(), vn.min_element().is_nan());
 138             }
 139             unsafe { test_fn() };
 140         }
 141     }
 142 }
 143
 144 #[test]
 145 fn min_nan() {
 146     finvoke!(min_nan_test);
 147 }
 148
 149 macro_rules! max_nan_test {
 150     ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 151         if $feature_macro!($feature) {
 152             #[target_feature(enable = $feature)]
 153             unsafe fn test_fn() {
 154                 let n0 = ::std::$elem_ty::NAN;
 155
 156                 assert_eq!(n0.max(-3.0), -3.0);
 157                 assert_eq!((-3.0 as $elem_ty).max(n0), -3.0);
 158
 159                 let v0 = $id::splat(-3.0);
 160
 161                 // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
 162                 // When the last element is NaN the current implementation produces incorrect results.
 163                 let bugbug = 1;
 164                 for i in 0..$id::lanes() - bugbug {
 165                     let mut v = v0.replace(i, n0);
 166                     // If there is a NaN the result is always the largest element:
 167                     assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
 168                     for j in 0..i {
 169                         v = v.replace(j, n0);
 170                         assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
 171                     }
 172                 }
 173
 174                 // If the vector contains all NaNs the result is NaN:
 175                 let vn = $id::splat(n0);
 176                 assert!(vn.max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}",
 177                         vn, vn.max_element(), vn.max_element().is_nan());
 178             }
 179             unsafe { test_fn() };
 180         }
 181     }
 182 }
 183
 184 #[test]
 185 fn max_nan() {
 186     finvoke!(max_nan_test);
 187 }
 188
 189 macro_rules! wrapping_sum_nan_test {
 190     ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 191         if $feature_macro!($feature) {
 192             #[target_feature(enable = $feature)]
 193             #[allow(unreachable_code)]
 194             unsafe fn test_fn() {
 195                 // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
 196                 // https://github.com/rust-lang-nursery/stdsimd/issues/409
 197                 return;
 198
 199                 let n0 = ::std::$elem_ty::NAN;
 200                 let v0 = $id::splat(-3.0);
 201                 for i in 0..$id::lanes() {
 202                     let mut v = v0.replace(i, n0);
 203                     // If the vector contains a NaN the result is NaN:
 204                     assert!(
 205                         v.wrapping_sum().is_nan(),
 206                         "nan at {} => {} | {:?}",
 207                         i,
 208                         v.wrapping_sum(),
 209                         v
 210                     );
 211                     for j in 0..i {
 212                         v = v.replace(j, n0);
 213                         assert!(v.wrapping_sum().is_nan());
 214                     }
 215                 }
 216                 let v = $id::splat(n0);
 217                 assert!(v.wrapping_sum().is_nan(), "all nans | {:?}", v);
 218             }
 219             unsafe { test_fn() };
 220         }
 221     };
 222 }
 223
 224 #[test]
 225 fn wrapping_sum_nan() {
 226     finvoke!(wrapping_sum_nan_test);
 227 }
 228
 229 macro_rules! wrapping_product_nan_test {
 230     ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 231         if $feature_macro!($feature) {
 232             #[target_feature(enable = $feature)]
 233             #[allow(unreachable_code)]
 234             unsafe fn test_fn() {
 235                 // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
 236                 // https://github.com/rust-lang-nursery/stdsimd/issues/409
 237                 return;
 238
 239                 let n0 = ::std::$elem_ty::NAN;
 240                 let v0 = $id::splat(-3.0);
 241                 for i in 0..$id::lanes() {
 242                     let mut v = v0.replace(i, n0);
 243                     // If the vector contains a NaN the result is NaN:
 244                     assert!(
 245                         v.wrapping_product().is_nan(),
 246                         "nan at {} | {:?}",
 247                         i,
 248                         v
 249                     );
 250                     for j in 0..i {
 251                         v = v.replace(j, n0);
 252                         assert!(v.wrapping_sum().is_nan());
 253                     }
 254                 }
 255                 let v = $id::splat(n0);
 256                 assert!(
 257                     v.wrapping_product().is_nan(),
 258                     "all nans | {:?}",
 259                     v
 260                 );
 261             }
 262             unsafe { test_fn() };
 263         }
 264     };
 265 }
 266
 267 #[test]
 268 fn wrapping_product_nan() {
 269     finvoke!(wrapping_product_nan_test);
 270 }
 271
 272 trait AsInt {
 273     type Int;
 274     fn as_int(self) -> Self::Int;
 275     fn from_int(Self::Int) -> Self;
 276 }
 277
 278 macro_rules! as_int {
 279     ($float:ident, $int:ident) => {
 280         impl AsInt for $float {
 281             type Int = $int;
 282             fn as_int(self) -> $int {
 283                 unsafe { ::std::mem::transmute(self) }
 284             }
 285             fn from_int(x: $int) -> $float {
 286                 unsafe { ::std::mem::transmute(x) }
 287             }
 288         }
 289     };
 290 }
 291
 292 as_int!(f32, u32);
 293 as_int!(f64, u64);
 294 as_int!(f32x2, i32x2);
 295 as_int!(f32x4, i32x4);
 296 as_int!(f32x8, i32x8);
 297 as_int!(f32x16, i32x16);
 298 as_int!(f64x2, i64x2);
 299 as_int!(f64x4, i64x4);
 300 as_int!(f64x8, i64x8);
 301
 302 // FIXME: these fail on i586 for some reason
 303 #[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))]
 304 mod offset {
 305     use super::*;
 306
 307     trait TreeReduceAdd {
 308         type R;
 309         fn tree_reduce_add(self) -> Self::R;
 310     }
 311
 312     macro_rules! tree_reduce_add_f {
 313     ($elem_ty:ident) => {
 314         impl<'a> TreeReduceAdd for &'a [$elem_ty] {
 315             type R = $elem_ty;
 316             fn tree_reduce_add(self) -> $elem_ty {
 317                 if self.len() == 2 {
 318                     println!("  lv: {}, rv: {} => {}", self[0], self[1], self[0] + self[1]);
 319                     self[0] + self[1]
 320                 } else {
 321                     let mid = self.len() / 2;
 322                     let (left, right) = self.split_at(mid);
 323                     println!("  splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
 324                     Self::tree_reduce_add(left) + Self::tree_reduce_add(right)
 325                 }
 326             }
 327         }
 328     };
 329 }
 330     tree_reduce_add_f!(f32);
 331     tree_reduce_add_f!(f64);
 332
 333     macro_rules! wrapping_sum_roundoff_test {
 334     ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 335         if $feature_macro!($feature) {
 336             #[target_feature(enable = $feature)]
 337             unsafe fn test_fn() {
 338                 let mut start = std::$elem_ty::EPSILON;
 339                 let mut wrapping_sum = 0. as $elem_ty;
 340
 341                 let mut v = $id::splat(0. as $elem_ty);
 342                 for i in 0..$id::lanes() {
 343                     let c = if i % 2 == 0 { 1e3 } else { -1. };
 344                     start *= 3.14 * c;
 345                     wrapping_sum += start;
 346                     // println!("{} | start: {}", stringify!($id), start);
 347                     v = v.replace(i, start);
 348                 }
 349                 let vwrapping_sum = v.wrapping_sum();
 350                 println!(
 351                     "{} | lwrapping_sum: {}",
 352                     stringify!($id),
 353                     wrapping_sum
 354                 );
 355                 println!(
 356                     "{} | vwrapping_sum: {}",
 357                     stringify!($id),
 358                     vwrapping_sum
 359                 );
 360                 let r = vwrapping_sum.as_int() == wrapping_sum.as_int();
 361                 // This is false in general; the intrinsic performs a
 362                 // tree-reduce:
 363                 println!("{} | equal: {}", stringify!($id), r);
 364
 365                 let mut a = [0. as $elem_ty; $id::lanes()];
 366                 v.store_unaligned(&mut a);
 367
 368                 let twrapping_sum = a.tree_reduce_add();
 369                 println!(
 370                     "{} | twrapping_sum: {}",
 371                     stringify!($id),
 372                     twrapping_sum
 373                 );
 374
 375                 // tolerate 1 ULP difference:
 376                 if vwrapping_sum.as_int() > twrapping_sum.as_int() {
 377                     assert!(
 378                         vwrapping_sum.as_int() - twrapping_sum.as_int()
 379                             < 2,
 380                         "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
 381                         v,
 382                         vwrapping_sum,
 383                         twrapping_sum
 384                     );
 385                 } else {
 386                     assert!(
 387                         twrapping_sum.as_int() - vwrapping_sum.as_int()
 388                             < 2,
 389                         "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
 390                         v,
 391                         vwrapping_sum,
 392                         twrapping_sum
 393                     );
 394                 }
 395             }
 396             unsafe { test_fn() };
 397         }
 398     };
 399 }
 400
 401     #[test]
 402     fn wrapping_sum_roundoff_test() {
 403         finvoke!(wrapping_sum_roundoff_test);
 404     }
 405
 406     trait TreeReduceMul {
 407         type R;
 408         fn tree_reduce_mul(self) -> Self::R;
 409     }
 410
 411     macro_rules! tree_reduce_mul_f {
 412     ($elem_ty:ident) => {
 413         impl<'a> TreeReduceMul for &'a [$elem_ty] {
 414             type R = $elem_ty;
 415             fn tree_reduce_mul(self) -> $elem_ty {
 416                 if self.len() == 2 {
 417                     println!("  lv: {}, rv: {} => {}", self[0], self[1], self[0] * self[1]);
 418                     self[0] * self[1]
 419                 } else {
 420                     let mid = self.len() / 2;
 421                     let (left, right) = self.split_at(mid);
 422                     println!("  splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
 423                     Self::tree_reduce_mul(left) * Self::tree_reduce_mul(right)
 424                 }
 425             }
 426         }
 427     };
 428 }
 429
 430     tree_reduce_mul_f!(f32);
 431     tree_reduce_mul_f!(f64);
 432
 433     macro_rules! wrapping_product_roundoff_test {
 434         ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 435             if $feature_macro!($feature) {
 436                 #[target_feature(enable = $feature)]
 437                 unsafe fn test_fn() {
 438                     let mut start = std::$elem_ty::EPSILON;
 439                     let mut mul = 1. as $elem_ty;
 440
 441                     let mut v = $id::splat(1. as $elem_ty);
 442                     for i in 0..$id::lanes() {
 443                         let c = if i % 2 == 0 { 1e3 } else { -1. };
 444                         start *= 3.14 * c;
 445                         mul *= start;
 446                         println!("{} | start: {}", stringify!($id), start);
 447                         v = v.replace(i, start);
 448                     }
 449                     let vmul = v.wrapping_product();
 450                     println!("{} | lmul: {}", stringify!($id), mul);
 451                     println!("{} | vmul: {}", stringify!($id), vmul);
 452                     let r = vmul.as_int() == mul.as_int();
 453                     // This is false in general; the intrinsic performs a
 454                     // tree-reduce:
 455                     println!("{} | equal: {}", stringify!($id), r);
 456
 457                     let mut a = [0. as $elem_ty; $id::lanes()];
 458                     v.store_unaligned(&mut a);
 459
 460                     let tmul = a.tree_reduce_mul();
 461                     println!("{} | tmul: {}", stringify!($id), tmul);
 462
 463                     // tolerate 1 ULP difference:
 464                     if vmul.as_int() > tmul.as_int() {
 465                         assert!(
 466                             vmul.as_int() - tmul.as_int() < 2,
 467                             "v: {:?} | vmul: {} | tmul: {}",
 468                             v,
 469                             vmul,
 470                             tmul
 471                         );
 472                     } else {
 473                         assert!(
 474                             tmul.as_int() - vmul.as_int() < 2,
 475                             "v: {:?} | vmul: {} | tmul: {}",
 476                             v,
 477                             vmul,
 478                             tmul
 479                         );
 480                     }
 481                 }
 482                 unsafe { test_fn() };
 483             }
 484         };
 485     }
 486
 487     #[test]
 488     fn wrapping_product_roundoff_test() {
 489         finvoke!(wrapping_product_roundoff_test);
 490     }
 491
 492     macro_rules! wrapping_sum_overflow_test {
 493         ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 494             if $feature_macro!($feature) {
 495                 #[target_feature(enable = $feature)]
 496                 unsafe fn test_fn() {
 497                     let start = $elem_ty::max_value()
 498                         - ($id::lanes() as $elem_ty / 2);
 499
 500                     let v = $id::splat(start as $elem_ty);
 501                     let vwrapping_sum = v.wrapping_sum();
 502
 503                     let mut wrapping_sum = start;
 504                     for _ in 1..$id::lanes() {
 505                         wrapping_sum = wrapping_sum.wrapping_add(start);
 506                     }
 507                     assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
 508                 }
 509                 unsafe { test_fn() };
 510             }
 511         };
 512     }
 513
 514     #[test]
 515     fn wrapping_sum_overflow_test() {
 516         iinvoke!(wrapping_sum_overflow_test);
 517     }
 518
 519     macro_rules! mul_overflow_test {
 520         ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
 521             if $feature_macro!($feature) {
 522                 #[target_feature(enable = $feature)]
 523                 unsafe fn test_fn() {
 524                     let start = $elem_ty::max_value()
 525                         - ($id::lanes() as $elem_ty / 2);
 526
 527                     let v = $id::splat(start as $elem_ty);
 528                     let vmul = v.wrapping_product();
 529
 530                     let mut mul = start;
 531                     for _ in 1..$id::lanes() {
 532                         mul = mul.wrapping_mul(start);
 533                     }
 534                     assert_eq!(mul, vmul, "v = {:?}", v);
 535                 }
 536                 unsafe { test_fn() };
 537             }
 538         };
 539     }
 540
 541     #[test]
 542     fn mul_overflow_test() {
 543         iinvoke!(mul_overflow_test);
 544     }
 545
 546 }