]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! `x86_64` Streaming SIMD Extensions (SSE) |
2 | ||
532ac7d7 | 3 | use crate::core_arch::x86::*; |
0531ce1d XL |
4 | |
5 | #[cfg(test)] | |
416331ca | 6 | use stdarch_test::assert_instr; |
0531ce1d XL |
7 | |
8 | #[allow(improper_ctypes)] | |
9 | extern "C" { | |
10 | #[link_name = "llvm.x86.sse.cvtss2si64"] | |
11 | fn cvtss2si64(a: __m128) -> i64; | |
12 | #[link_name = "llvm.x86.sse.cvttss2si64"] | |
13 | fn cvttss2si64(a: __m128) -> i64; | |
14 | #[link_name = "llvm.x86.sse.cvtsi642ss"] | |
15 | fn cvtsi642ss(a: __m128, b: i64) -> __m128; | |
16 | } | |
17 | ||
532ac7d7 | 18 | /// Converts the lowest 32 bit float in the input vector to a 64 bit integer. |
0531ce1d XL |
19 | /// |
20 | /// The result is rounded according to the current rounding mode. If the result | |
21 | /// cannot be represented as a 64 bit integer the result will be | |
ba9703b0 | 22 | /// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation |
0531ce1d XL |
23 | /// floating point exception if unmasked (see |
24 | /// [`_mm_setcsr`](fn._mm_setcsr.html)). | |
25 | /// | |
26 | /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). | |
83c7162d XL |
27 | /// |
28 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64) | |
0531ce1d XL |
29 | #[inline] |
30 | #[target_feature(enable = "sse")] | |
31 | #[cfg_attr(test, assert_instr(cvtss2si))] | |
83c7162d | 32 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
33 | pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { |
34 | cvtss2si64(a) | |
35 | } | |
36 | ||
532ac7d7 | 37 | /// Converts the lowest 32 bit float in the input vector to a 64 bit integer |
0531ce1d XL |
38 | /// with truncation. |
39 | /// | |
40 | /// The result is rounded always using truncation (round towards zero). If the | |
41 | /// result cannot be represented as a 64 bit integer the result will be | |
ba9703b0 | 42 | /// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating |
0531ce1d XL |
43 | /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). |
44 | /// | |
45 | /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). | |
83c7162d XL |
46 | /// |
47 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64) | |
0531ce1d XL |
48 | #[inline] |
49 | #[target_feature(enable = "sse")] | |
50 | #[cfg_attr(test, assert_instr(cvttss2si))] | |
83c7162d | 51 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
52 | pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { |
53 | cvttss2si64(a) | |
54 | } | |
55 | ||
532ac7d7 | 56 | /// Converts a 64 bit integer to a 32 bit float. The result vector is the input |
0531ce1d XL |
57 | /// vector `a` with the lowest 32 bit float replaced by the converted integer. |
58 | /// | |
59 | /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit | |
60 | /// input). | |
83c7162d XL |
61 | /// |
62 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss) | |
0531ce1d XL |
63 | #[inline] |
64 | #[target_feature(enable = "sse")] | |
65 | #[cfg_attr(test, assert_instr(cvtsi2ss))] | |
83c7162d | 66 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
67 | pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { |
68 | cvtsi642ss(a, b) | |
69 | } | |
70 | ||
71 | #[cfg(test)] | |
72 | mod tests { | |
532ac7d7 | 73 | use crate::core_arch::arch::x86_64::*; |
416331ca | 74 | use stdarch_test::simd_test; |
0531ce1d | 75 | |
83c7162d | 76 | #[simd_test(enable = "sse")] |
0531ce1d XL |
77 | unsafe fn test_mm_cvtss_si64() { |
78 | let inputs = &[ | |
79 | (42.0f32, 42i64), | |
80 | (-31.4, -31), | |
81 | (-33.5, -34), | |
82 | (-34.5, -34), | |
83 | (4.0e10, 40_000_000_000), | |
84 | (4.0e-10, 0), | |
ba9703b0 | 85 | (f32::NAN, i64::MIN), |
0531ce1d XL |
86 | (2147483500.1, 2147483520), |
87 | (9.223371e18, 9223370937343148032), | |
88 | ]; | |
89 | for i in 0..inputs.len() { | |
90 | let (xi, e) = inputs[i]; | |
91 | let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); | |
92 | let r = _mm_cvtss_si64(x); | |
93 | assert_eq!( | |
94 | e, r, | |
95 | "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", | |
96 | i, x, r, e | |
97 | ); | |
98 | } | |
99 | } | |
100 | ||
83c7162d | 101 | #[simd_test(enable = "sse")] |
0531ce1d XL |
102 | unsafe fn test_mm_cvttss_si64() { |
103 | let inputs = &[ | |
104 | (42.0f32, 42i64), | |
105 | (-31.4, -31), | |
106 | (-33.5, -33), | |
107 | (-34.5, -34), | |
108 | (10.999, 10), | |
109 | (-5.99, -5), | |
110 | (4.0e10, 40_000_000_000), | |
111 | (4.0e-10, 0), | |
ba9703b0 | 112 | (f32::NAN, i64::MIN), |
0531ce1d XL |
113 | (2147483500.1, 2147483520), |
114 | (9.223371e18, 9223370937343148032), | |
ba9703b0 | 115 | (9.223372e18, i64::MIN), |
0531ce1d XL |
116 | ]; |
117 | for i in 0..inputs.len() { | |
118 | let (xi, e) = inputs[i]; | |
119 | let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); | |
120 | let r = _mm_cvttss_si64(x); | |
121 | assert_eq!( | |
122 | e, r, | |
123 | "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", | |
124 | i, x, r, e | |
125 | ); | |
126 | } | |
127 | } | |
128 | ||
83c7162d | 129 | #[simd_test(enable = "sse")] |
e1599b0c | 130 | unsafe fn test_mm_cvtsi64_ss() { |
0531ce1d XL |
131 | let inputs = &[ |
132 | (4555i64, 4555.0f32), | |
133 | (322223333, 322223330.0), | |
134 | (-432, -432.0), | |
135 | (-322223333, -322223330.0), | |
136 | (9223372036854775807, 9.223372e18), | |
137 | (-9223372036854775808, -9.223372e18), | |
138 | ]; | |
139 | ||
140 | for i in 0..inputs.len() { | |
141 | let (x, f) = inputs[i]; | |
142 | let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); | |
143 | let r = _mm_cvtsi64_ss(a, x); | |
144 | let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); | |
145 | assert_eq_m128(e, r); | |
146 | } | |
147 | } | |
148 | } |