]>
Commit | Line | Data |
---|---|---|
1 | //! Vertical floating-point `exp` | |
2 | #![allow(unused)] | |
3 | ||
4 | // FIXME 64-bit expgle elem vectors misexpg | |
5 | ||
6 | use crate::*; | |
7 | ||
8 | pub(crate) trait Exp { | |
9 | fn exp(self) -> Self; | |
10 | } | |
11 | ||
12 | #[allow(improper_ctypes)] | |
13 | extern "C" { | |
14 | #[link_name = "llvm.exp.v2f32"] | |
15 | fn exp_v2f32(x: f32x2) -> f32x2; | |
16 | #[link_name = "llvm.exp.v4f32"] | |
17 | fn exp_v4f32(x: f32x4) -> f32x4; | |
18 | #[link_name = "llvm.exp.v8f32"] | |
19 | fn exp_v8f32(x: f32x8) -> f32x8; | |
20 | #[link_name = "llvm.exp.v16f32"] | |
21 | fn exp_v16f32(x: f32x16) -> f32x16; | |
22 | /* FIXME 64-bit expgle elem vectors | |
23 | #[link_name = "llvm.exp.v1f64"] | |
24 | fn exp_v1f64(x: f64x1) -> f64x1; | |
25 | */ | |
26 | #[link_name = "llvm.exp.v2f64"] | |
27 | fn exp_v2f64(x: f64x2) -> f64x2; | |
28 | #[link_name = "llvm.exp.v4f64"] | |
29 | fn exp_v4f64(x: f64x4) -> f64x4; | |
30 | #[link_name = "llvm.exp.v8f64"] | |
31 | fn exp_v8f64(x: f64x8) -> f64x8; | |
32 | ||
33 | #[link_name = "llvm.exp.f32"] | |
34 | fn exp_f32(x: f32) -> f32; | |
35 | #[link_name = "llvm.exp.f64"] | |
36 | fn exp_f64(x: f64) -> f64; | |
37 | } | |
38 | ||
39 | gen_unary_impl_table!(Exp, exp); | |
40 | ||
41 | cfg_if! { | |
42 | if #[cfg(target_arch = "s390x")] { | |
43 | // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 | |
44 | impl_unary!(f32x2[f32; 2]: exp_f32); | |
45 | impl_unary!(f32x4[f32; 4]: exp_f32); | |
46 | impl_unary!(f32x8[f32; 8]: exp_f32); | |
47 | impl_unary!(f32x16[f32; 16]: exp_f32); | |
48 | ||
49 | impl_unary!(f64x2[f64; 2]: exp_f64); | |
50 | impl_unary!(f64x4[f64; 4]: exp_f64); | |
51 | impl_unary!(f64x8[f64; 8]: exp_f64); | |
52 | } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { | |
53 | use sleef_sys::*; | |
54 | cfg_if! { | |
55 | if #[cfg(target_feature = "avx2")] { | |
56 | impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); | |
57 | impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); | |
58 | impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); | |
59 | ||
60 | impl_unary!(f32x4: Sleef_expf4_u10avx2128); | |
61 | impl_unary!(f32x8: Sleef_expf8_u10avx2); | |
62 | impl_unary!(f64x2: Sleef_expd2_u10avx2128); | |
63 | impl_unary!(f64x4: Sleef_expd4_u10avx2); | |
64 | } else if #[cfg(target_feature = "avx")] { | |
65 | impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); | |
66 | impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); | |
67 | impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); | |
68 | ||
69 | impl_unary!(f32x4: Sleef_expf4_u10sse4); | |
70 | impl_unary!(f32x8: Sleef_expf8_u10avx); | |
71 | impl_unary!(f64x2: Sleef_expd2_u10sse4); | |
72 | impl_unary!(f64x4: Sleef_expd4_u10avx); | |
73 | } else if #[cfg(target_feature = "sse4.2")] { | |
74 | impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); | |
75 | impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); | |
76 | impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); | |
77 | ||
78 | impl_unary!(f32x4: Sleef_expf4_u10sse4); | |
79 | impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); | |
80 | impl_unary!(f64x2: Sleef_expd2_u10sse4); | |
81 | impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); | |
82 | } else if #[cfg(target_feature = "sse2")] { | |
83 | impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); | |
84 | impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); | |
85 | impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); | |
86 | ||
87 | impl_unary!(f32x4: Sleef_expf4_u10sse2); | |
88 | impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); | |
89 | impl_unary!(f64x2: Sleef_expd2_u10sse2); | |
90 | impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); | |
91 | } else { | |
92 | impl_unary!(f32x2[f32; 2]: exp_f32); | |
93 | impl_unary!(f32x16: exp_v16f32); | |
94 | impl_unary!(f64x8: exp_v8f64); | |
95 | ||
96 | impl_unary!(f32x4: exp_v4f32); | |
97 | impl_unary!(f32x8: exp_v8f32); | |
98 | impl_unary!(f64x2: exp_v2f64); | |
99 | impl_unary!(f64x4: exp_v4f64); | |
100 | } | |
101 | } | |
102 | } else { | |
103 | impl_unary!(f32x2[f32; 2]: exp_f32); | |
104 | impl_unary!(f32x4: exp_v4f32); | |
105 | impl_unary!(f32x8: exp_v8f32); | |
106 | impl_unary!(f32x16: exp_v16f32); | |
107 | ||
108 | impl_unary!(f64x2: exp_v2f64); | |
109 | impl_unary!(f64x4: exp_v4f64); | |
110 | impl_unary!(f64x8: exp_v8f64); | |
111 | } | |
112 | } |