]> git.proxmox.com Git - rustc.git/blob - vendor/packed_simd_2/src/codegen/math/float/mul_add.rs
New upstream version 1.68.2+dfsg1
[rustc.git] / vendor / packed_simd_2 / src / codegen / math / float / mul_add.rs
1 //! Vertical floating-point `mul_add`
2 #![allow(unused)]
3 use crate::*;
4
5 // FIXME: 64-bit 1 element mul_add
6
7 pub(crate) trait MulAdd {
8 fn mul_add(self, y: Self, z: Self) -> Self;
9 }
10
11 #[cfg(not(target_arch = "s390x"))]
12 #[allow(improper_ctypes)]
13 extern "C" {
14 #[link_name = "llvm.fma.v2f32"]
15 fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
16 #[link_name = "llvm.fma.v4f32"]
17 fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
18 #[link_name = "llvm.fma.v8f32"]
19 fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
20 #[link_name = "llvm.fma.v16f32"]
21 fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
22 /* FIXME 64-bit single elem vectors
23 #[link_name = "llvm.fma.v1f64"]
24 fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
25 */
26 #[link_name = "llvm.fma.v2f64"]
27 fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
28 #[link_name = "llvm.fma.v4f64"]
29 fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
30 #[link_name = "llvm.fma.v8f64"]
31 fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
32 }
33
34 gen_tertiary_impl_table!(MulAdd, mul_add);
35
36 cfg_if! {
37 if #[cfg(target_arch = "s390x")] {
38 // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
39 macro_rules! impl_broken {
40 ($id:ident) => {
41 impl MulAdd for $id {
42 #[inline]
43 fn mul_add(self, y: Self, z: Self) -> Self {
44 self * y + z
45 }
46 }
47 };
48 }
49
50 impl_broken!(f32x2);
51 impl_broken!(f32x4);
52 impl_broken!(f32x8);
53 impl_broken!(f32x16);
54
55 impl_broken!(f64x2);
56 impl_broken!(f64x4);
57 impl_broken!(f64x8);
58 } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
59 use sleef_sys::*;
60 cfg_if! {
61 if #[cfg(target_feature = "avx2")] {
62 impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128);
63 impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2);
64 impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2);
65
66 impl_tertiary!(f32x4: Sleef_fmaf4_avx2128);
67 impl_tertiary!(f32x8: Sleef_fmaf8_avx2);
68 impl_tertiary!(f64x2: Sleef_fmad2_avx2128);
69 impl_tertiary!(f64x4: Sleef_fmad4_avx2);
70 } else if #[cfg(target_feature = "avx")] {
71 impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
72 impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx);
73 impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx);
74
75 impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
76 impl_tertiary!(f32x8: Sleef_fmaf8_avx);
77 impl_tertiary!(f64x2: Sleef_fmad2_sse4);
78 impl_tertiary!(f64x4: Sleef_fmad4_avx);
79 } else if #[cfg(target_feature = "sse4.2")] {
80 impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
81 impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4);
82 impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4);
83
84 impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
85 impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4);
86 impl_tertiary!(f64x2: Sleef_fmad2_sse4);
87 impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4);
88 } else {
89 impl_tertiary!(f32x2: fma_v2f32);
90 impl_tertiary!(f32x16: fma_v16f32);
91 impl_tertiary!(f64x8: fma_v8f64);
92
93 impl_tertiary!(f32x4: fma_v4f32);
94 impl_tertiary!(f32x8: fma_v8f32);
95 impl_tertiary!(f64x2: fma_v2f64);
96 impl_tertiary!(f64x4: fma_v4f64);
97 }
98 }
99 } else {
100 impl_tertiary!(f32x2: fma_v2f32);
101 impl_tertiary!(f32x4: fma_v4f32);
102 impl_tertiary!(f32x8: fma_v8f32);
103 impl_tertiary!(f32x16: fma_v16f32);
104 // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
105 impl_tertiary!(f64x2: fma_v2f64);
106 impl_tertiary!(f64x4: fma_v4f64);
107 impl_tertiary!(f64x8: fma_v8f64);
108 }
109 }