]> git.proxmox.com Git - rustc.git/blob - src/stdsimd/crates/coresimd/tests/reductions.rs
New upstream version 1.27.1+dfsg1
[rustc.git] / src / stdsimd / crates / coresimd / tests / reductions.rs
1 #![feature(stdsimd, sse4a_target_feature, avx512_target_feature)]
2 #![feature(arm_target_feature)]
3 #![feature(aarch64_target_feature)]
4 #![feature(powerpc_target_feature)]
5 #![allow(unused_attributes)]
6
7 #[macro_use]
8 extern crate stdsimd;
9
10 use stdsimd::simd::*;
11
12 #[cfg(target_arch = "powerpc")]
13 macro_rules! is_powerpc_feature_detected {
14 ($t:tt) => {
15 false
16 };
17 }
18
19 macro_rules! invoke_arch {
20 ($macro:ident, $feature_macro:ident, $id:ident, $elem_ty:ident,
21 [$($feature:tt),*]) => {
22 $($macro!($feature, $feature_macro, $id, $elem_ty);)*
23 }
24 }
25
26 macro_rules! invoke_vectors {
27 ($macro:ident, [$(($id:ident, $elem_ty:ident)),*]) => {
28 $(
29 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
30 invoke_arch!($macro, is_x86_feature_detected, $id, $elem_ty,
31 ["sse", "sse2", "sse3", "ssse3", "sse4.1",
32 "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]);
33 #[cfg(target_arch = "aarch64")]
34 invoke_arch!($macro, is_aarch64_feature_detected, $id, $elem_ty,
35 ["neon"]);
36 #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
37 invoke_arch!($macro, is_arm_feature_detected, $id, $elem_ty,
38 ["neon"]);
39 #[cfg(target_arch = "powerpc")]
40 invoke_arch!($macro, is_powerpc_feature_detected, $id, $elem_ty, ["altivec"]);
41 #[cfg(target_arch = "powerpc64")]
42 invoke_arch!($macro, is_powerpc64_feature_detected, $id, $elem_ty, ["altivec"]);
43 )*
44 }
45 }
46
47 macro_rules! finvoke {
48 ($macro:ident) => {
49 invoke_vectors!(
50 $macro,
51 [
52 (f32x2, f32),
53 (f32x4, f32),
54 (f32x8, f32),
55 (f32x16, f32),
56 (f64x2, f64),
57 (f64x4, f64),
58 (f64x8, f64)
59 ]
60 );
61 };
62 }
63
64 macro_rules! iinvoke {
65 ($macro:ident) => {
66 invoke_vectors!(
67 $macro,
68 [
69 (i8x2, i8),
70 (i8x4, i8),
71 (i8x8, i8),
72 (i8x16, i8),
73 (i8x32, i8),
74 (i8x64, i8),
75 (i16x2, i16),
76 (i16x4, i16),
77 (i16x8, i16),
78 (i16x16, i16),
79 (i16x32, i16),
80 (i32x2, i32),
81 (i32x4, i32),
82 (i32x8, i32),
83 (i32x16, i32),
84 (i64x2, i64),
85 (i64x4, i64),
86 (i64x8, i64),
87 (u8x2, u8),
88 (u8x4, u8),
89 (u8x8, u8),
90 (u8x16, u8),
91 (u8x32, u8),
92 (u8x64, u8),
93 (u16x2, u16),
94 (u16x4, u16),
95 (u16x8, u16),
96 (u16x16, u16),
97 (u16x32, u16),
98 (u32x2, u32),
99 (u32x4, u32),
100 (u32x8, u32),
101 (u32x16, u32),
102 (u64x2, u64),
103 (u64x4, u64),
104 (u64x8, u64)
105 ]
106 );
107 };
108 }
109
110 macro_rules! min_nan_test {
111 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
112 if $feature_macro!($feature) {
113 #[target_feature(enable = $feature)]
114 unsafe fn test_fn() {
115 let n0 = ::std::$elem_ty::NAN;
116
117 assert_eq!(n0.min(-3.0), -3.0);
118 assert_eq!((-3.0 as $elem_ty).min(n0), -3.0);
119
120 let v0 = $id::splat(-3.0);
121
122 // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
123 // When the last element is NaN the current implementation produces incorrect results.
124 let bugbug = 1;
125 for i in 0..$id::lanes() - bugbug {
126 let mut v = v0.replace(i, n0);
127 // If there is a NaN, the result is always the smallest element:
128 assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
129 for j in 0..i {
130 v = v.replace(j, n0);
131 assert_eq!(v.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.min_element(), v, v.as_int());
132 }
133 }
134 // If the vector contains all NaNs the result is NaN:
135 let vn = $id::splat(n0);
136 assert!(vn.min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}",
137 vn, vn.min_element(), vn.min_element().is_nan());
138 }
139 unsafe { test_fn() };
140 }
141 }
142 }
143
144 #[test]
145 fn min_nan() {
146 finvoke!(min_nan_test);
147 }
148
149 macro_rules! max_nan_test {
150 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
151 if $feature_macro!($feature) {
152 #[target_feature(enable = $feature)]
153 unsafe fn test_fn() {
154 let n0 = ::std::$elem_ty::NAN;
155
156 assert_eq!(n0.max(-3.0), -3.0);
157 assert_eq!((-3.0 as $elem_ty).max(n0), -3.0);
158
159 let v0 = $id::splat(-3.0);
160
161 // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
162 // When the last element is NaN the current implementation produces incorrect results.
163 let bugbug = 1;
164 for i in 0..$id::lanes() - bugbug {
165 let mut v = v0.replace(i, n0);
166 // If there is a NaN the result is always the largest element:
167 assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
168 for j in 0..i {
169 v = v.replace(j, n0);
170 assert_eq!(v.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i, v.max_element(), v, v.as_int());
171 }
172 }
173
174 // If the vector contains all NaNs the result is NaN:
175 let vn = $id::splat(n0);
176 assert!(vn.max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}",
177 vn, vn.max_element(), vn.max_element().is_nan());
178 }
179 unsafe { test_fn() };
180 }
181 }
182 }
183
184 #[test]
185 fn max_nan() {
186 finvoke!(max_nan_test);
187 }
188
189 macro_rules! wrapping_sum_nan_test {
190 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
191 if $feature_macro!($feature) {
192 #[target_feature(enable = $feature)]
193 #[allow(unreachable_code)]
194 unsafe fn test_fn() {
195 // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
196 // https://github.com/rust-lang-nursery/stdsimd/issues/409
197 return;
198
199 let n0 = ::std::$elem_ty::NAN;
200 let v0 = $id::splat(-3.0);
201 for i in 0..$id::lanes() {
202 let mut v = v0.replace(i, n0);
203 // If the vector contains a NaN the result is NaN:
204 assert!(
205 v.wrapping_sum().is_nan(),
206 "nan at {} => {} | {:?}",
207 i,
208 v.wrapping_sum(),
209 v
210 );
211 for j in 0..i {
212 v = v.replace(j, n0);
213 assert!(v.wrapping_sum().is_nan());
214 }
215 }
216 let v = $id::splat(n0);
217 assert!(v.wrapping_sum().is_nan(), "all nans | {:?}", v);
218 }
219 unsafe { test_fn() };
220 }
221 };
222 }
223
224 #[test]
225 fn wrapping_sum_nan() {
226 finvoke!(wrapping_sum_nan_test);
227 }
228
229 macro_rules! wrapping_product_nan_test {
230 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
231 if $feature_macro!($feature) {
232 #[target_feature(enable = $feature)]
233 #[allow(unreachable_code)]
234 unsafe fn test_fn() {
235 // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
236 // https://github.com/rust-lang-nursery/stdsimd/issues/409
237 return;
238
239 let n0 = ::std::$elem_ty::NAN;
240 let v0 = $id::splat(-3.0);
241 for i in 0..$id::lanes() {
242 let mut v = v0.replace(i, n0);
243 // If the vector contains a NaN the result is NaN:
244 assert!(
245 v.wrapping_product().is_nan(),
246 "nan at {} | {:?}",
247 i,
248 v
249 );
250 for j in 0..i {
251 v = v.replace(j, n0);
252 assert!(v.wrapping_sum().is_nan());
253 }
254 }
255 let v = $id::splat(n0);
256 assert!(
257 v.wrapping_product().is_nan(),
258 "all nans | {:?}",
259 v
260 );
261 }
262 unsafe { test_fn() };
263 }
264 };
265 }
266
267 #[test]
268 fn wrapping_product_nan() {
269 finvoke!(wrapping_product_nan_test);
270 }
271
272 trait AsInt {
273 type Int;
274 fn as_int(self) -> Self::Int;
275 fn from_int(Self::Int) -> Self;
276 }
277
278 macro_rules! as_int {
279 ($float:ident, $int:ident) => {
280 impl AsInt for $float {
281 type Int = $int;
282 fn as_int(self) -> $int {
283 unsafe { ::std::mem::transmute(self) }
284 }
285 fn from_int(x: $int) -> $float {
286 unsafe { ::std::mem::transmute(x) }
287 }
288 }
289 };
290 }
291
292 as_int!(f32, u32);
293 as_int!(f64, u64);
294 as_int!(f32x2, i32x2);
295 as_int!(f32x4, i32x4);
296 as_int!(f32x8, i32x8);
297 as_int!(f32x16, i32x16);
298 as_int!(f64x2, i64x2);
299 as_int!(f64x4, i64x4);
300 as_int!(f64x8, i64x8);
301
302 // FIXME: these fail on i586 for some reason
303 #[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))]
304 mod offset {
305 use super::*;
306
307 trait TreeReduceAdd {
308 type R;
309 fn tree_reduce_add(self) -> Self::R;
310 }
311
312 macro_rules! tree_reduce_add_f {
313 ($elem_ty:ident) => {
314 impl<'a> TreeReduceAdd for &'a [$elem_ty] {
315 type R = $elem_ty;
316 fn tree_reduce_add(self) -> $elem_ty {
317 if self.len() == 2 {
318 println!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] + self[1]);
319 self[0] + self[1]
320 } else {
321 let mid = self.len() / 2;
322 let (left, right) = self.split_at(mid);
323 println!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
324 Self::tree_reduce_add(left) + Self::tree_reduce_add(right)
325 }
326 }
327 }
328 };
329 }
330 tree_reduce_add_f!(f32);
331 tree_reduce_add_f!(f64);
332
333 macro_rules! wrapping_sum_roundoff_test {
334 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
335 if $feature_macro!($feature) {
336 #[target_feature(enable = $feature)]
337 unsafe fn test_fn() {
338 let mut start = std::$elem_ty::EPSILON;
339 let mut wrapping_sum = 0. as $elem_ty;
340
341 let mut v = $id::splat(0. as $elem_ty);
342 for i in 0..$id::lanes() {
343 let c = if i % 2 == 0 { 1e3 } else { -1. };
344 start *= 3.14 * c;
345 wrapping_sum += start;
346 // println!("{} | start: {}", stringify!($id), start);
347 v = v.replace(i, start);
348 }
349 let vwrapping_sum = v.wrapping_sum();
350 println!(
351 "{} | lwrapping_sum: {}",
352 stringify!($id),
353 wrapping_sum
354 );
355 println!(
356 "{} | vwrapping_sum: {}",
357 stringify!($id),
358 vwrapping_sum
359 );
360 let r = vwrapping_sum.as_int() == wrapping_sum.as_int();
361 // This is false in general; the intrinsic performs a
362 // tree-reduce:
363 println!("{} | equal: {}", stringify!($id), r);
364
365 let mut a = [0. as $elem_ty; $id::lanes()];
366 v.store_unaligned(&mut a);
367
368 let twrapping_sum = a.tree_reduce_add();
369 println!(
370 "{} | twrapping_sum: {}",
371 stringify!($id),
372 twrapping_sum
373 );
374
375 // tolerate 1 ULP difference:
376 if vwrapping_sum.as_int() > twrapping_sum.as_int() {
377 assert!(
378 vwrapping_sum.as_int() - twrapping_sum.as_int()
379 < 2,
380 "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
381 v,
382 vwrapping_sum,
383 twrapping_sum
384 );
385 } else {
386 assert!(
387 twrapping_sum.as_int() - vwrapping_sum.as_int()
388 < 2,
389 "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
390 v,
391 vwrapping_sum,
392 twrapping_sum
393 );
394 }
395 }
396 unsafe { test_fn() };
397 }
398 };
399 }
400
401 #[test]
402 fn wrapping_sum_roundoff_test() {
403 finvoke!(wrapping_sum_roundoff_test);
404 }
405
406 trait TreeReduceMul {
407 type R;
408 fn tree_reduce_mul(self) -> Self::R;
409 }
410
411 macro_rules! tree_reduce_mul_f {
412 ($elem_ty:ident) => {
413 impl<'a> TreeReduceMul for &'a [$elem_ty] {
414 type R = $elem_ty;
415 fn tree_reduce_mul(self) -> $elem_ty {
416 if self.len() == 2 {
417 println!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] * self[1]);
418 self[0] * self[1]
419 } else {
420 let mid = self.len() / 2;
421 let (left, right) = self.split_at(mid);
422 println!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
423 Self::tree_reduce_mul(left) * Self::tree_reduce_mul(right)
424 }
425 }
426 }
427 };
428 }
429
430 tree_reduce_mul_f!(f32);
431 tree_reduce_mul_f!(f64);
432
433 macro_rules! wrapping_product_roundoff_test {
434 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
435 if $feature_macro!($feature) {
436 #[target_feature(enable = $feature)]
437 unsafe fn test_fn() {
438 let mut start = std::$elem_ty::EPSILON;
439 let mut mul = 1. as $elem_ty;
440
441 let mut v = $id::splat(1. as $elem_ty);
442 for i in 0..$id::lanes() {
443 let c = if i % 2 == 0 { 1e3 } else { -1. };
444 start *= 3.14 * c;
445 mul *= start;
446 println!("{} | start: {}", stringify!($id), start);
447 v = v.replace(i, start);
448 }
449 let vmul = v.wrapping_product();
450 println!("{} | lmul: {}", stringify!($id), mul);
451 println!("{} | vmul: {}", stringify!($id), vmul);
452 let r = vmul.as_int() == mul.as_int();
453 // This is false in general; the intrinsic performs a
454 // tree-reduce:
455 println!("{} | equal: {}", stringify!($id), r);
456
457 let mut a = [0. as $elem_ty; $id::lanes()];
458 v.store_unaligned(&mut a);
459
460 let tmul = a.tree_reduce_mul();
461 println!("{} | tmul: {}", stringify!($id), tmul);
462
463 // tolerate 1 ULP difference:
464 if vmul.as_int() > tmul.as_int() {
465 assert!(
466 vmul.as_int() - tmul.as_int() < 2,
467 "v: {:?} | vmul: {} | tmul: {}",
468 v,
469 vmul,
470 tmul
471 );
472 } else {
473 assert!(
474 tmul.as_int() - vmul.as_int() < 2,
475 "v: {:?} | vmul: {} | tmul: {}",
476 v,
477 vmul,
478 tmul
479 );
480 }
481 }
482 unsafe { test_fn() };
483 }
484 };
485 }
486
487 #[test]
488 fn wrapping_product_roundoff_test() {
489 finvoke!(wrapping_product_roundoff_test);
490 }
491
492 macro_rules! wrapping_sum_overflow_test {
493 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
494 if $feature_macro!($feature) {
495 #[target_feature(enable = $feature)]
496 unsafe fn test_fn() {
497 let start = $elem_ty::max_value()
498 - ($id::lanes() as $elem_ty / 2);
499
500 let v = $id::splat(start as $elem_ty);
501 let vwrapping_sum = v.wrapping_sum();
502
503 let mut wrapping_sum = start;
504 for _ in 1..$id::lanes() {
505 wrapping_sum = wrapping_sum.wrapping_add(start);
506 }
507 assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
508 }
509 unsafe { test_fn() };
510 }
511 };
512 }
513
514 #[test]
515 fn wrapping_sum_overflow_test() {
516 iinvoke!(wrapping_sum_overflow_test);
517 }
518
519 macro_rules! mul_overflow_test {
520 ($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
521 if $feature_macro!($feature) {
522 #[target_feature(enable = $feature)]
523 unsafe fn test_fn() {
524 let start = $elem_ty::max_value()
525 - ($id::lanes() as $elem_ty / 2);
526
527 let v = $id::splat(start as $elem_ty);
528 let vmul = v.wrapping_product();
529
530 let mut mul = start;
531 for _ in 1..$id::lanes() {
532 mul = mul.wrapping_mul(start);
533 }
534 assert_eq!(mul, vmul, "v = {:?}", v);
535 }
536 unsafe { test_fn() };
537 }
538 };
539 }
540
541 #[test]
542 fn mul_overflow_test() {
543 iinvoke!(mul_overflow_test);
544 }
545
546 }