1 #![feature(stdsimd, sse4a_target_feature, avx512_target_feature)]
2 #![feature(arm_target_feature)]
3 #![feature(aarch64_target_feature)]
4 #![feature(powerpc_target_feature)]
5 #![allow(unused_attributes)]
12 #[cfg(target_arch = "powerpc")]
13 macro_rules
! is_powerpc_feature_detected
{
19 macro_rules
! invoke_arch
{
20 ($
macro:ident
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
,
21 [$
($feature
:tt
),*]) => {
22 $
($
macro!($feature
, $feature_macro
, $id
, $elem_ty
);)*
26 macro_rules
! invoke_vectors
{
27 ($
macro:ident
, [$
(($id
:ident
, $elem_ty
:ident
)),*]) => {
29 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
30 invoke_arch
!($
macro, is_x86_feature_detected
, $id
, $elem_ty
,
31 ["sse", "sse2", "sse3", "ssse3", "sse4.1",
32 "sse4.2", "sse4a", "avx2", "avx2", "avx512f"]);
33 #[cfg(target_arch = "aarch64")]
34 invoke_arch
!($
macro, is_aarch64_feature_detected
, $id
, $elem_ty
,
36 #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon"))]
37 invoke_arch
!($
macro, is_arm_feature_detected
, $id
, $elem_ty
,
39 #[cfg(target_arch = "powerpc")]
40 invoke_arch
!($
macro, is_powerpc_feature_detected
, $id
, $elem_ty
, ["altivec"]);
41 #[cfg(target_arch = "powerpc64")]
42 invoke_arch
!($
macro, is_powerpc64_feature_detected
, $id
, $elem_ty
, ["altivec"]);
47 macro_rules
! finvoke
{
64 macro_rules
! iinvoke
{
110 macro_rules
! min_nan_test
{
111 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
112 if $feature_macro
!($feature
) {
113 #[target_feature(enable = $feature)]
114 unsafe fn test_fn() {
115 let n0
= ::std
::$elem_ty
::NAN
;
117 assert_eq
!(n0
.min(-3.0), -3.0);
118 assert_eq
!((-3.0 as $elem_ty
).min(n0
), -3.0);
120 let v0
= $id
::splat(-3.0);
122 // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
123 // When the last element is NaN the current implementation produces incorrect results.
125 for i
in 0..$id
::lanes() - bugbug
{
126 let mut v
= v0
.replace(i
, n0
);
127 // If there is a NaN, the result is always the smallest element:
128 assert_eq
!(v
.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i
, v
.min_element(), v
, v
.as_int());
130 v
= v
.replace(j
, n0
);
131 assert_eq
!(v
.min_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i
, v
.min_element(), v
, v
.as_int());
134 // If the vector contains all NaNs the result is NaN:
135 let vn
= $id
::splat(n0
);
136 assert
!(vn
.min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}",
137 vn
, vn
.min_element(), vn
.min_element().is_nan());
139 unsafe { test_fn() }
;
146 finvoke
!(min_nan_test
);
149 macro_rules
! max_nan_test
{
150 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
151 if $feature_macro
!($feature
) {
152 #[target_feature(enable = $feature)]
153 unsafe fn test_fn() {
154 let n0
= ::std
::$elem_ty
::NAN
;
156 assert_eq
!(n0
.max(-3.0), -3.0);
157 assert_eq
!((-3.0 as $elem_ty
).max(n0
), -3.0);
159 let v0
= $id
::splat(-3.0);
161 // FIXME (https://github.com/rust-lang-nursery/stdsimd/issues/408):
162 // When the last element is NaN the current implementation produces incorrect results.
164 for i
in 0..$id
::lanes() - bugbug
{
165 let mut v
= v0
.replace(i
, n0
);
166 // If there is a NaN the result is always the largest element:
167 assert_eq
!(v
.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i
, v
.max_element(), v
, v
.as_int());
169 v
= v
.replace(j
, n0
);
170 assert_eq
!(v
.max_element(), -3.0, "nan at {} => {} | {:?} | {:X}", i
, v
.max_element(), v
, v
.as_int());
174 // If the vector contains all NaNs the result is NaN:
175 let vn
= $id
::splat(n0
);
176 assert
!(vn
.max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}",
177 vn
, vn
.max_element(), vn
.max_element().is_nan());
179 unsafe { test_fn() }
;
186 finvoke
!(max_nan_test
);
189 macro_rules
! wrapping_sum_nan_test
{
190 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
191 if $feature_macro
!($feature
) {
192 #[target_feature(enable = $feature)]
193 #[allow(unreachable_code)]
194 unsafe fn test_fn() {
195 // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
196 // https://github.com/rust-lang-nursery/stdsimd/issues/409
199 let n0
= ::std
::$elem_ty
::NAN
;
200 let v0
= $id
::splat(-3.0);
201 for i
in 0..$id
::lanes() {
202 let mut v
= v0
.replace(i
, n0
);
203 // If the vector contains a NaN the result is NaN:
205 v
.wrapping_sum().is_nan(),
206 "nan at {} => {} | {:?}",
212 v
= v
.replace(j
, n0
);
213 assert
!(v
.wrapping_sum().is_nan());
216 let v
= $id
::splat(n0
);
217 assert
!(v
.wrapping_sum().is_nan(), "all nans | {:?}", v
);
219 unsafe { test_fn() }
;
225 fn wrapping_sum_nan() {
226 finvoke
!(wrapping_sum_nan_test
);
229 macro_rules
! wrapping_product_nan_test
{
230 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
231 if $feature_macro
!($feature
) {
232 #[target_feature(enable = $feature)]
233 #[allow(unreachable_code)]
234 unsafe fn test_fn() {
235 // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
236 // https://github.com/rust-lang-nursery/stdsimd/issues/409
239 let n0
= ::std
::$elem_ty
::NAN
;
240 let v0
= $id
::splat(-3.0);
241 for i
in 0..$id
::lanes() {
242 let mut v
= v0
.replace(i
, n0
);
243 // If the vector contains a NaN the result is NaN:
245 v
.wrapping_product().is_nan(),
251 v
= v
.replace(j
, n0
);
252 assert
!(v
.wrapping_sum().is_nan());
255 let v
= $id
::splat(n0
);
257 v
.wrapping_product().is_nan(),
262 unsafe { test_fn() }
;
268 fn wrapping_product_nan() {
269 finvoke
!(wrapping_product_nan_test
);
274 fn as_int(self) -> Self::Int
;
275 fn from_int(Self::Int
) -> Self;
278 macro_rules
! as_int
{
279 ($float
:ident
, $int
:ident
) => {
280 impl AsInt
for $float
{
282 fn as_int(self) -> $int
{
283 unsafe { ::std::mem::transmute(self) }
285 fn from_int(x
: $int
) -> $float
{
286 unsafe { ::std::mem::transmute(x) }
294 as_int
!(f32x2
, i32x2
);
295 as_int
!(f32x4
, i32x4
);
296 as_int
!(f32x8
, i32x8
);
297 as_int
!(f32x16
, i32x16
);
298 as_int
!(f64x2
, i64x2
);
299 as_int
!(f64x4
, i64x4
);
300 as_int
!(f64x8
, i64x8
);
302 // FIXME: these fail on i586 for some reason
303 #[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))]
307 trait TreeReduceAdd
{
309 fn tree_reduce_add(self) -> Self::R
;
312 macro_rules
! tree_reduce_add_f
{
313 ($elem_ty
:ident
) => {
314 impl<'a
> TreeReduceAdd
for &'a
[$elem_ty
] {
316 fn tree_reduce_add(self) -> $elem_ty
{
318 println
!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] + self[1]);
321 let mid
= self.len() / 2;
322 let (left
, right
) = self.split_at(mid
);
323 println
!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid
, self[0], self[1]);
324 Self::tree_reduce_add(left
) + Self::tree_reduce_add(right
)
330 tree_reduce_add_f
!(f32);
331 tree_reduce_add_f
!(f64);
333 macro_rules
! wrapping_sum_roundoff_test
{
334 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
335 if $feature_macro
!($feature
) {
336 #[target_feature(enable = $feature)]
337 unsafe fn test_fn() {
338 let mut start
= std
::$elem_ty
::EPSILON
;
339 let mut wrapping_sum
= 0. as $elem_ty
;
341 let mut v
= $id
::splat(0. as $elem_ty
);
342 for i
in 0..$id
::lanes() {
343 let c
= if i
% 2 == 0 { 1e3 }
else { -1. }
;
345 wrapping_sum
+= start
;
346 // println!("{} | start: {}", stringify!($id), start);
347 v
= v
.replace(i
, start
);
349 let vwrapping_sum
= v
.wrapping_sum();
351 "{} | lwrapping_sum: {}",
356 "{} | vwrapping_sum: {}",
360 let r
= vwrapping_sum
.as_int() == wrapping_sum
.as_int();
361 // This is false in general; the intrinsic performs a
363 println
!("{} | equal: {}", stringify
!($id
), r
);
365 let mut a
= [0. as $elem_ty
; $id
::lanes()];
366 v
.store_unaligned(&mut a
);
368 let twrapping_sum
= a
.tree_reduce_add();
370 "{} | twrapping_sum: {}",
375 // tolerate 1 ULP difference:
376 if vwrapping_sum
.as_int() > twrapping_sum
.as_int() {
378 vwrapping_sum
.as_int() - twrapping_sum
.as_int()
380 "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
387 twrapping_sum
.as_int() - vwrapping_sum
.as_int()
389 "v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
396 unsafe { test_fn() }
;
402 fn wrapping_sum_roundoff_test() {
403 finvoke
!(wrapping_sum_roundoff_test
);
406 trait TreeReduceMul
{
408 fn tree_reduce_mul(self) -> Self::R
;
411 macro_rules
! tree_reduce_mul_f
{
412 ($elem_ty
:ident
) => {
413 impl<'a
> TreeReduceMul
for &'a
[$elem_ty
] {
415 fn tree_reduce_mul(self) -> $elem_ty
{
417 println
!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] * self[1]);
420 let mid
= self.len() / 2;
421 let (left
, right
) = self.split_at(mid
);
422 println
!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid
, self[0], self[1]);
423 Self::tree_reduce_mul(left
) * Self::tree_reduce_mul(right
)
430 tree_reduce_mul_f
!(f32);
431 tree_reduce_mul_f
!(f64);
433 macro_rules
! wrapping_product_roundoff_test
{
434 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
435 if $feature_macro
!($feature
) {
436 #[target_feature(enable = $feature)]
437 unsafe fn test_fn() {
438 let mut start
= std
::$elem_ty
::EPSILON
;
439 let mut mul
= 1. as $elem_ty
;
441 let mut v
= $id
::splat(1. as $elem_ty
);
442 for i
in 0..$id
::lanes() {
443 let c
= if i
% 2 == 0 { 1e3 }
else { -1. }
;
446 println
!("{} | start: {}", stringify
!($id
), start
);
447 v
= v
.replace(i
, start
);
449 let vmul
= v
.wrapping_product();
450 println
!("{} | lmul: {}", stringify
!($id
), mul
);
451 println
!("{} | vmul: {}", stringify
!($id
), vmul
);
452 let r
= vmul
.as_int() == mul
.as_int();
453 // This is false in general; the intrinsic performs a
455 println
!("{} | equal: {}", stringify
!($id
), r
);
457 let mut a
= [0. as $elem_ty
; $id
::lanes()];
458 v
.store_unaligned(&mut a
);
460 let tmul
= a
.tree_reduce_mul();
461 println
!("{} | tmul: {}", stringify
!($id
), tmul
);
463 // tolerate 1 ULP difference:
464 if vmul
.as_int() > tmul
.as_int() {
466 vmul
.as_int() - tmul
.as_int() < 2,
467 "v: {:?} | vmul: {} | tmul: {}",
474 tmul
.as_int() - vmul
.as_int() < 2,
475 "v: {:?} | vmul: {} | tmul: {}",
482 unsafe { test_fn() }
;
488 fn wrapping_product_roundoff_test() {
489 finvoke
!(wrapping_product_roundoff_test
);
492 macro_rules
! wrapping_sum_overflow_test
{
493 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
494 if $feature_macro
!($feature
) {
495 #[target_feature(enable = $feature)]
496 unsafe fn test_fn() {
497 let start
= $elem_ty
::max_value()
498 - ($id
::lanes() as $elem_ty
/ 2);
500 let v
= $id
::splat(start
as $elem_ty
);
501 let vwrapping_sum
= v
.wrapping_sum();
503 let mut wrapping_sum
= start
;
504 for _
in 1..$id
::lanes() {
505 wrapping_sum
= wrapping_sum
.wrapping_add(start
);
507 assert_eq
!(wrapping_sum
, vwrapping_sum
, "v = {:?}", v
);
509 unsafe { test_fn() }
;
515 fn wrapping_sum_overflow_test() {
516 iinvoke
!(wrapping_sum_overflow_test
);
519 macro_rules
! mul_overflow_test
{
520 ($feature
:tt
, $feature_macro
:ident
, $id
:ident
, $elem_ty
:ident
) => {
521 if $feature_macro
!($feature
) {
522 #[target_feature(enable = $feature)]
523 unsafe fn test_fn() {
524 let start
= $elem_ty
::max_value()
525 - ($id
::lanes() as $elem_ty
/ 2);
527 let v
= $id
::splat(start
as $elem_ty
);
528 let vmul
= v
.wrapping_product();
531 for _
in 1..$id
::lanes() {
532 mul
= mul
.wrapping_mul(start
);
534 assert_eq
!(mul
, vmul
, "v = {:?}", v
);
536 unsafe { test_fn() }
;
542 fn mul_overflow_test() {
543 iinvoke
!(mul_overflow_test
);