]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Streaming SIMD Extensions 4.1 (SSE4.1) |
2 | ||
532ac7d7 XL |
3 | use crate::{ |
4 | core_arch::{simd::*, simd_llvm::*, x86::*}, | |
5 | mem::transmute, | |
6 | }; | |
0531ce1d XL |
7 | |
8 | #[cfg(test)] | |
416331ca | 9 | use stdarch_test::assert_instr; |
0531ce1d XL |
10 | |
11 | // SSE4 rounding constans | |
12 | /// round to nearest | |
83c7162d | 13 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
14 | pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00; |
15 | /// round down | |
83c7162d | 16 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
17 | pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01; |
18 | /// round up | |
83c7162d | 19 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
20 | pub const _MM_FROUND_TO_POS_INF: i32 = 0x02; |
21 | /// truncate | |
83c7162d | 22 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
23 | pub const _MM_FROUND_TO_ZERO: i32 = 0x03; |
24 | /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE` | |
83c7162d | 25 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
26 | pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04; |
27 | /// do not suppress exceptions | |
83c7162d | 28 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
29 | pub const _MM_FROUND_RAISE_EXC: i32 = 0x00; |
30 | /// suppress exceptions | |
83c7162d | 31 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
32 | pub const _MM_FROUND_NO_EXC: i32 = 0x08; |
33 | /// round to nearest and do not suppress exceptions | |
83c7162d | 34 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
35 | pub const _MM_FROUND_NINT: i32 = 0x00; |
36 | /// round down and do not suppress exceptions | |
83c7162d | 37 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 38 | pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF; |
0531ce1d | 39 | /// round up and do not suppress exceptions |
83c7162d | 40 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 41 | pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF; |
0531ce1d | 42 | /// truncate and do not suppress exceptions |
83c7162d | 43 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 44 | pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO; |
0531ce1d XL |
45 | /// use MXCSR.RC and do not suppress exceptions; see |
46 | /// `vendor::_MM_SET_ROUNDING_MODE` | |
83c7162d | 47 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 48 | pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION; |
0531ce1d | 49 | /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE` |
83c7162d | 50 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 51 | pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION; |
0531ce1d XL |
52 | |
53 | /// Blend packed 8-bit integers from `a` and `b` using `mask` | |
54 | /// | |
55 | /// The high bit of each corresponding mask byte determines the selection. | |
56 | /// If the high bit is set the element of `a` is selected. The element | |
57 | /// of `b` is selected otherwise. | |
83c7162d XL |
58 | /// |
59 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8) | |
0531ce1d XL |
60 | #[inline] |
61 | #[target_feature(enable = "sse4.1")] | |
62 | #[cfg_attr(test, assert_instr(pblendvb))] | |
83c7162d | 63 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 64 | pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { |
532ac7d7 | 65 | transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16())) |
0531ce1d XL |
66 | } |
67 | ||
17df50a5 | 68 | /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. |
0531ce1d XL |
69 | /// |
70 | /// The mask bits determine the selection. A clear bit selects the | |
71 | /// corresponding element of `a`, and a set bit the corresponding | |
72 | /// element of `b`. | |
83c7162d XL |
73 | /// |
74 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16) | |
0531ce1d XL |
75 | #[inline] |
76 | #[target_feature(enable = "sse4.1")] | |
8faf50e0 XL |
77 | // Note: LLVM7 prefers the single-precision floating-point domain when possible |
78 | // see https://bugs.llvm.org/show_bug.cgi?id=38195 | |
17df50a5 XL |
79 | // #[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xF0))] |
80 | #[cfg_attr(test, assert_instr(blendps, IMM8 = 0xF0))] | |
81 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 82 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
83 | pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
84 | static_assert_imm8!(IMM8); | |
85 | transmute(pblendw(a.as_i16x8(), b.as_i16x8(), IMM8 as u8)) | |
0531ce1d XL |
86 | } |
87 | ||
88 | /// Blend packed double-precision (64-bit) floating-point elements from `a` | |
89 | /// and `b` using `mask` | |
83c7162d XL |
90 | /// |
91 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd) | |
0531ce1d XL |
92 | #[inline] |
93 | #[target_feature(enable = "sse4.1")] | |
94 | #[cfg_attr(test, assert_instr(blendvpd))] | |
83c7162d | 95 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
96 | pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { |
97 | blendvpd(a, b, mask) | |
98 | } | |
99 | ||
100 | /// Blend packed single-precision (32-bit) floating-point elements from `a` | |
101 | /// and `b` using `mask` | |
83c7162d XL |
102 | /// |
103 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps) | |
0531ce1d XL |
104 | #[inline] |
105 | #[target_feature(enable = "sse4.1")] | |
106 | #[cfg_attr(test, assert_instr(blendvps))] | |
83c7162d | 107 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
108 | pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { |
109 | blendvps(a, b, mask) | |
110 | } | |
111 | ||
112 | /// Blend packed double-precision (64-bit) floating-point elements from `a` | |
17df50a5 | 113 | /// and `b` using control mask `IMM2` |
83c7162d XL |
114 | /// |
115 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd) | |
0531ce1d XL |
116 | #[inline] |
117 | #[target_feature(enable = "sse4.1")] | |
8faf50e0 XL |
118 | // Note: LLVM7 prefers the single-precision floating-point domain when possible |
119 | // see https://bugs.llvm.org/show_bug.cgi?id=38195 | |
17df50a5 XL |
120 | // #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))] |
121 | #[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))] | |
122 | #[rustc_legacy_const_generics(2)] | |
123 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
124 | pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d { | |
125 | static_assert_imm2!(IMM2); | |
126 | blendpd(a, b, IMM2 as u8) | |
0531ce1d XL |
127 | } |
128 | ||
129 | /// Blend packed single-precision (32-bit) floating-point elements from `a` | |
17df50a5 | 130 | /// and `b` using mask `IMM4` |
83c7162d XL |
131 | /// |
132 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps) | |
0531ce1d XL |
133 | #[inline] |
134 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
135 | #[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))] |
136 | #[rustc_legacy_const_generics(2)] | |
137 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
138 | pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 { | |
139 | static_assert_imm4!(IMM4); | |
140 | blendps(a, b, IMM4 as u8) | |
0531ce1d XL |
141 | } |
142 | ||
532ac7d7 | 143 | /// Extracts a single-precision (32-bit) floating-point element from `a`, |
3c0e092e XL |
144 | /// selected with `IMM8`. The returned `i32` stores the float's bit-pattern, |
145 | /// and may be converted back to a floating point number via casting. | |
83c7162d | 146 | /// |
3c0e092e XL |
147 | /// # Example |
148 | /// ```rust | |
149 | /// # #[cfg(target_arch = "x86")] | |
150 | /// # use std::arch::x86::*; | |
151 | /// # #[cfg(target_arch = "x86_64")] | |
152 | /// # use std::arch::x86_64::*; | |
153 | /// # fn main() { | |
154 | /// # if is_x86_feature_detected!("sse4.1") { | |
155 | /// # #[target_feature(enable = "sse4.1")] | |
156 | /// # unsafe fn worker() { | |
157 | /// let mut float_store = vec![1.0, 1.0, 2.0, 3.0]; | |
158 | /// unsafe { | |
159 | /// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0); | |
160 | /// let x: i32 = _mm_extract_ps::<2>(simd_floats); | |
161 | /// float_store.push(f32::from_bits(x as u32)); | |
162 | /// } | |
163 | /// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]); | |
164 | /// # } | |
165 | /// # unsafe { worker() } | |
166 | /// # } | |
167 | /// # } | |
168 | /// ``` | |
83c7162d | 169 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps) |
0531ce1d XL |
170 | #[inline] |
171 | #[target_feature(enable = "sse4.1")] | |
0731742a XL |
172 | #[cfg_attr( |
173 | all(test, not(target_os = "windows")), | |
17df50a5 | 174 | assert_instr(extractps, IMM8 = 0) |
0731742a | 175 | )] |
17df50a5 | 176 | #[rustc_legacy_const_generics(1)] |
83c7162d | 177 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
178 | pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 { |
179 | static_assert_imm2!(IMM8); | |
180 | transmute(simd_extract::<_, f32>(a, IMM8 as u32)) | |
0531ce1d XL |
181 | } |
182 | ||
17df50a5 | 183 | /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit |
0531ce1d XL |
184 | /// integer containing the zero-extended integer data. |
185 | /// | |
fc512014 | 186 | /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). |
83c7162d XL |
187 | /// |
188 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8) | |
0531ce1d XL |
189 | #[inline] |
190 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
191 | #[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))] |
192 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 193 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
194 | pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 { |
195 | static_assert_imm4!(IMM8); | |
196 | simd_extract::<_, u8>(a.as_u8x16(), IMM8 as u32) as i32 | |
0531ce1d XL |
197 | } |
198 | ||
17df50a5 | 199 | /// Extracts an 32-bit integer from `a` selected with `IMM8` |
83c7162d XL |
200 | /// |
201 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32) | |
0531ce1d XL |
202 | #[inline] |
203 | #[target_feature(enable = "sse4.1")] | |
0731742a XL |
204 | #[cfg_attr( |
205 | all(test, not(target_os = "windows")), | |
17df50a5 | 206 | assert_instr(extractps, IMM8 = 1) |
0731742a | 207 | )] |
17df50a5 | 208 | #[rustc_legacy_const_generics(1)] |
83c7162d | 209 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
210 | pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 { |
211 | static_assert_imm2!(IMM8); | |
212 | simd_extract::<_, i32>(a.as_i32x4(), IMM8 as u32) | |
0531ce1d XL |
213 | } |
214 | ||
215 | /// Select a single value in `a` to store at some position in `b`, | |
17df50a5 | 216 | /// Then zero elements according to `IMM8`. |
0531ce1d | 217 | /// |
17df50a5 | 218 | /// `IMM8` specifies which bits from operand `a` will be copied, which bits in |
0531ce1d XL |
219 | /// the result they will be copied to, and which bits in the result will be |
220 | /// cleared. The following assignments are made: | |
221 | /// | |
222 | /// * Bits `[7:6]` specify the bits to copy from operand `a`: | |
223 | /// - `00`: Selects bits `[31:0]` from operand `a`. | |
224 | /// - `01`: Selects bits `[63:32]` from operand `a`. | |
225 | /// - `10`: Selects bits `[95:64]` from operand `a`. | |
226 | /// - `11`: Selects bits `[127:96]` from operand `a`. | |
227 | /// | |
228 | /// * Bits `[5:4]` specify the bits in the result to which the selected bits | |
229 | /// from operand `a` are copied: | |
230 | /// - `00`: Copies the selected bits from `a` to result bits `[31:0]`. | |
231 | /// - `01`: Copies the selected bits from `a` to result bits `[63:32]`. | |
232 | /// - `10`: Copies the selected bits from `a` to result bits `[95:64]`. | |
233 | /// - `11`: Copies the selected bits from `a` to result bits `[127:96]`. | |
234 | /// | |
235 | /// * Bits `[3:0]`: If any of these bits are set, the corresponding result | |
236 | /// element is cleared. | |
83c7162d XL |
237 | /// |
238 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps) | |
0531ce1d XL |
239 | #[inline] |
240 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
241 | #[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))] |
242 | #[rustc_legacy_const_generics(2)] | |
243 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
244 | pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { | |
245 | static_assert_imm8!(IMM8); | |
246 | insertps(a, b, IMM8 as u8) | |
0531ce1d XL |
247 | } |
248 | ||
532ac7d7 | 249 | /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a |
17df50a5 | 250 | /// location specified by `IMM8`. |
83c7162d XL |
251 | /// |
252 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8) | |
0531ce1d XL |
253 | #[inline] |
254 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
255 | #[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))] |
256 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 257 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
258 | pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
259 | static_assert_imm4!(IMM8); | |
260 | transmute(simd_insert(a.as_i8x16(), IMM8 as u32, i as i8)) | |
0531ce1d XL |
261 | } |
262 | ||
532ac7d7 | 263 | /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a |
17df50a5 | 264 | /// location specified by `IMM8`. |
83c7162d XL |
265 | /// |
266 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32) | |
0531ce1d XL |
267 | #[inline] |
268 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
269 | #[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))] |
270 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 271 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
272 | pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
273 | static_assert_imm2!(IMM8); | |
274 | transmute(simd_insert(a.as_i32x4(), IMM8 as u32, i)) | |
0531ce1d XL |
275 | } |
276 | ||
532ac7d7 | 277 | /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum |
0531ce1d | 278 | /// values in dst. |
83c7162d XL |
279 | /// |
280 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8) | |
0531ce1d XL |
281 | #[inline] |
282 | #[target_feature(enable = "sse4.1")] | |
283 | #[cfg_attr(test, assert_instr(pmaxsb))] | |
83c7162d | 284 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 285 | pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 286 | transmute(pmaxsb(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
287 | } |
288 | ||
532ac7d7 | 289 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed |
0531ce1d | 290 | /// maximum. |
83c7162d XL |
291 | /// |
292 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16) | |
0531ce1d XL |
293 | #[inline] |
294 | #[target_feature(enable = "sse4.1")] | |
295 | #[cfg_attr(test, assert_instr(pmaxuw))] | |
83c7162d | 296 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 297 | pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 298 | transmute(pmaxuw(a.as_u16x8(), b.as_u16x8())) |
0531ce1d XL |
299 | } |
300 | ||
532ac7d7 | 301 | /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum |
0531ce1d | 302 | /// values. |
83c7162d XL |
303 | /// |
304 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32) | |
0531ce1d XL |
305 | #[inline] |
306 | #[target_feature(enable = "sse4.1")] | |
307 | #[cfg_attr(test, assert_instr(pmaxsd))] | |
83c7162d | 308 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 309 | pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 310 | transmute(pmaxsd(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
311 | } |
312 | ||
532ac7d7 | 313 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed |
0531ce1d | 314 | /// maximum values. |
83c7162d XL |
315 | /// |
316 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32) | |
0531ce1d XL |
317 | #[inline] |
318 | #[target_feature(enable = "sse4.1")] | |
319 | #[cfg_attr(test, assert_instr(pmaxud))] | |
83c7162d | 320 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 321 | pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 322 | transmute(pmaxud(a.as_u32x4(), b.as_u32x4())) |
0531ce1d XL |
323 | } |
324 | ||
532ac7d7 | 325 | /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum |
0531ce1d | 326 | /// values in dst. |
83c7162d XL |
327 | /// |
328 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8) | |
0531ce1d XL |
329 | #[inline] |
330 | #[target_feature(enable = "sse4.1")] | |
331 | #[cfg_attr(test, assert_instr(pminsb))] | |
83c7162d | 332 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 333 | pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 334 | transmute(pminsb(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
335 | } |
336 | ||
532ac7d7 | 337 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed |
0531ce1d | 338 | /// minimum. |
83c7162d XL |
339 | /// |
340 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16) | |
0531ce1d XL |
341 | #[inline] |
342 | #[target_feature(enable = "sse4.1")] | |
343 | #[cfg_attr(test, assert_instr(pminuw))] | |
83c7162d | 344 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 345 | pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 346 | transmute(pminuw(a.as_u16x8(), b.as_u16x8())) |
0531ce1d XL |
347 | } |
348 | ||
532ac7d7 | 349 | /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum |
0531ce1d | 350 | /// values. |
83c7162d XL |
351 | /// |
352 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32) | |
0531ce1d XL |
353 | #[inline] |
354 | #[target_feature(enable = "sse4.1")] | |
355 | #[cfg_attr(test, assert_instr(pminsd))] | |
83c7162d | 356 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 357 | pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 358 | transmute(pminsd(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
359 | } |
360 | ||
532ac7d7 | 361 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed |
0531ce1d | 362 | /// minimum values. |
83c7162d XL |
363 | /// |
364 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32) | |
0531ce1d XL |
365 | #[inline] |
366 | #[target_feature(enable = "sse4.1")] | |
367 | #[cfg_attr(test, assert_instr(pminud))] | |
83c7162d | 368 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 369 | pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 370 | transmute(pminud(a.as_u32x4(), b.as_u32x4())) |
0531ce1d XL |
371 | } |
372 | ||
532ac7d7 | 373 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
0531ce1d | 374 | /// using unsigned saturation |
83c7162d XL |
375 | /// |
376 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32) | |
0531ce1d XL |
377 | #[inline] |
378 | #[target_feature(enable = "sse4.1")] | |
379 | #[cfg_attr(test, assert_instr(packusdw))] | |
83c7162d | 380 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 381 | pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 382 | transmute(packusdw(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
383 | } |
384 | ||
532ac7d7 | 385 | /// Compares packed 64-bit integers in `a` and `b` for equality |
83c7162d XL |
386 | /// |
387 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64) | |
0531ce1d XL |
388 | #[inline] |
389 | #[target_feature(enable = "sse4.1")] | |
390 | #[cfg_attr(test, assert_instr(pcmpeqq))] | |
83c7162d | 391 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 392 | pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 393 | transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) |
0531ce1d XL |
394 | } |
395 | ||
396 | /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers | |
83c7162d XL |
397 | /// |
398 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16) | |
0531ce1d XL |
399 | #[inline] |
400 | #[target_feature(enable = "sse4.1")] | |
401 | #[cfg_attr(test, assert_instr(pmovsxbw))] | |
83c7162d | 402 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
403 | pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { |
404 | let a = a.as_i8x16(); | |
17df50a5 | 405 | let a: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
532ac7d7 | 406 | transmute(simd_cast::<_, i16x8>(a)) |
0531ce1d XL |
407 | } |
408 | ||
409 | /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers | |
83c7162d XL |
410 | /// |
411 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32) | |
0531ce1d XL |
412 | #[inline] |
413 | #[target_feature(enable = "sse4.1")] | |
414 | #[cfg_attr(test, assert_instr(pmovsxbd))] | |
83c7162d | 415 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
416 | pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { |
417 | let a = a.as_i8x16(); | |
17df50a5 | 418 | let a: i8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 419 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
420 | } |
421 | ||
422 | /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed | |
423 | /// 64-bit integers | |
83c7162d XL |
424 | /// |
425 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64) | |
0531ce1d XL |
426 | #[inline] |
427 | #[target_feature(enable = "sse4.1")] | |
428 | #[cfg_attr(test, assert_instr(pmovsxbq))] | |
83c7162d | 429 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
430 | pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { |
431 | let a = a.as_i8x16(); | |
17df50a5 | 432 | let a: i8x2 = simd_shuffle2!(a, a, [0, 1]); |
532ac7d7 | 433 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
434 | } |
435 | ||
436 | /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers | |
83c7162d XL |
437 | /// |
438 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32) | |
0531ce1d XL |
439 | #[inline] |
440 | #[target_feature(enable = "sse4.1")] | |
441 | #[cfg_attr(test, assert_instr(pmovsxwd))] | |
83c7162d | 442 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
443 | pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { |
444 | let a = a.as_i16x8(); | |
17df50a5 | 445 | let a: i16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 446 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
447 | } |
448 | ||
449 | /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers | |
83c7162d XL |
450 | /// |
451 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64) | |
0531ce1d XL |
452 | #[inline] |
453 | #[target_feature(enable = "sse4.1")] | |
454 | #[cfg_attr(test, assert_instr(pmovsxwq))] | |
83c7162d | 455 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
456 | pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { |
457 | let a = a.as_i16x8(); | |
17df50a5 | 458 | let a: i16x2 = simd_shuffle2!(a, a, [0, 1]); |
532ac7d7 | 459 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
460 | } |
461 | ||
462 | /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers | |
83c7162d XL |
463 | /// |
464 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64) | |
0531ce1d XL |
465 | #[inline] |
466 | #[target_feature(enable = "sse4.1")] | |
467 | #[cfg_attr(test, assert_instr(pmovsxdq))] | |
83c7162d | 468 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
469 | pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { |
470 | let a = a.as_i32x4(); | |
17df50a5 | 471 | let a: i32x2 = simd_shuffle2!(a, a, [0, 1]); |
532ac7d7 | 472 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
473 | } |
474 | ||
532ac7d7 | 475 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers |
83c7162d XL |
476 | /// |
477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16) | |
0531ce1d XL |
478 | #[inline] |
479 | #[target_feature(enable = "sse4.1")] | |
480 | #[cfg_attr(test, assert_instr(pmovzxbw))] | |
83c7162d | 481 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
482 | pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { |
483 | let a = a.as_u8x16(); | |
17df50a5 | 484 | let a: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
532ac7d7 | 485 | transmute(simd_cast::<_, i16x8>(a)) |
0531ce1d XL |
486 | } |
487 | ||
532ac7d7 | 488 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers |
83c7162d XL |
489 | /// |
490 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32) | |
0531ce1d XL |
491 | #[inline] |
492 | #[target_feature(enable = "sse4.1")] | |
493 | #[cfg_attr(test, assert_instr(pmovzxbd))] | |
83c7162d | 494 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
495 | pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { |
496 | let a = a.as_u8x16(); | |
17df50a5 | 497 | let a: u8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 498 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
499 | } |
500 | ||
532ac7d7 | 501 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers |
83c7162d XL |
502 | /// |
503 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64) | |
0531ce1d XL |
504 | #[inline] |
505 | #[target_feature(enable = "sse4.1")] | |
506 | #[cfg_attr(test, assert_instr(pmovzxbq))] | |
83c7162d | 507 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
508 | pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { |
509 | let a = a.as_u8x16(); | |
17df50a5 | 510 | let a: u8x2 = simd_shuffle2!(a, a, [0, 1]); |
532ac7d7 | 511 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
512 | } |
513 | ||
532ac7d7 | 514 | /// Zeroes extend packed unsigned 16-bit integers in `a` |
0531ce1d | 515 | /// to packed 32-bit integers |
83c7162d XL |
516 | /// |
517 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32) | |
0531ce1d XL |
518 | #[inline] |
519 | #[target_feature(enable = "sse4.1")] | |
520 | #[cfg_attr(test, assert_instr(pmovzxwd))] | |
83c7162d | 521 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
522 | pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { |
523 | let a = a.as_u16x8(); | |
17df50a5 | 524 | let a: u16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 525 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
526 | } |
527 | ||
532ac7d7 | 528 | /// Zeroes extend packed unsigned 16-bit integers in `a` |
0531ce1d | 529 | /// to packed 64-bit integers |
83c7162d XL |
530 | /// |
531 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64) | |
0531ce1d XL |
532 | #[inline] |
533 | #[target_feature(enable = "sse4.1")] | |
534 | #[cfg_attr(test, assert_instr(pmovzxwq))] | |
83c7162d | 535 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
536 | pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { |
537 | let a = a.as_u16x8(); | |
17df50a5 | 538 | let a: u16x2 = simd_shuffle2!(a, a, [0, 1]); |
532ac7d7 | 539 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
540 | } |
541 | ||
532ac7d7 | 542 | /// Zeroes extend packed unsigned 32-bit integers in `a` |
0531ce1d | 543 | /// to packed 64-bit integers |
83c7162d XL |
544 | /// |
545 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64) | |
0531ce1d XL |
546 | #[inline] |
547 | #[target_feature(enable = "sse4.1")] | |
548 | #[cfg_attr(test, assert_instr(pmovzxdq))] | |
83c7162d | 549 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
550 | pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { |
551 | let a = a.as_u32x4(); | |
17df50a5 | 552 | let a: u32x2 = simd_shuffle2!(a, a, [0, 1]); |
532ac7d7 | 553 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
554 | } |
555 | ||
556 | /// Returns the dot product of two __m128d vectors. | |
557 | /// | |
17df50a5 | 558 | /// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask. |
0531ce1d XL |
559 | /// If a condition mask bit is zero, the corresponding multiplication is |
560 | /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of | |
561 | /// the dot product will be stored in the return value component. Otherwise if | |
562 | /// the broadcast mask bit is zero then the return component will be zero. | |
83c7162d XL |
563 | /// |
564 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd) | |
0531ce1d XL |
565 | #[inline] |
566 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
567 | #[cfg_attr(test, assert_instr(dppd, IMM8 = 0))] |
568 | #[rustc_legacy_const_generics(2)] | |
569 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
570 | pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { | |
571 | static_assert_imm8!(IMM8); | |
572 | dppd(a, b, IMM8 as u8) | |
0531ce1d XL |
573 | } |
574 | ||
575 | /// Returns the dot product of two __m128 vectors. | |
576 | /// | |
17df50a5 | 577 | /// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask. |
0531ce1d XL |
578 | /// If a condition mask bit is zero, the corresponding multiplication is |
579 | /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of | |
580 | /// the dot product will be stored in the return value component. Otherwise if | |
581 | /// the broadcast mask bit is zero then the return component will be zero. | |
83c7162d XL |
582 | /// |
583 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps) | |
0531ce1d XL |
584 | #[inline] |
585 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
586 | #[cfg_attr(test, assert_instr(dpps, IMM8 = 0))] |
587 | #[rustc_legacy_const_generics(2)] | |
588 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
589 | pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { | |
590 | static_assert_imm8!(IMM8); | |
591 | dpps(a, b, IMM8 as u8) | |
0531ce1d XL |
592 | } |
593 | ||
594 | /// Round the packed double-precision (64-bit) floating-point elements in `a` | |
532ac7d7 | 595 | /// down to an integer value, and stores the results as packed double-precision |
0531ce1d | 596 | /// floating-point elements. |
83c7162d XL |
597 | /// |
598 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd) | |
0531ce1d XL |
599 | #[inline] |
600 | #[target_feature(enable = "sse4.1")] | |
601 | #[cfg_attr(test, assert_instr(roundpd))] | |
83c7162d | 602 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 603 | pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { |
74b04a01 | 604 | simd_floor(a) |
0531ce1d XL |
605 | } |
606 | ||
607 | /// Round the packed single-precision (32-bit) floating-point elements in `a` | |
532ac7d7 | 608 | /// down to an integer value, and stores the results as packed single-precision |
0531ce1d | 609 | /// floating-point elements. |
83c7162d XL |
610 | /// |
611 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps) | |
0531ce1d XL |
612 | #[inline] |
613 | #[target_feature(enable = "sse4.1")] | |
614 | #[cfg_attr(test, assert_instr(roundps))] | |
83c7162d | 615 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 616 | pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { |
74b04a01 | 617 | simd_floor(a) |
0531ce1d XL |
618 | } |
619 | ||
620 | /// Round the lower double-precision (64-bit) floating-point element in `b` | |
621 | /// down to an integer value, store the result as a double-precision | |
622 | /// floating-point element in the lower element of the intrinsic result, | |
532ac7d7 | 623 | /// and copies the upper element from `a` to the upper element of the intrinsic |
0531ce1d | 624 | /// result. |
83c7162d XL |
625 | /// |
626 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd) | |
0531ce1d XL |
627 | #[inline] |
628 | #[target_feature(enable = "sse4.1")] | |
629 | #[cfg_attr(test, assert_instr(roundsd))] | |
83c7162d | 630 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
631 | pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { |
632 | roundsd(a, b, _MM_FROUND_FLOOR) | |
633 | } | |
634 | ||
635 | /// Round the lower single-precision (32-bit) floating-point element in `b` | |
636 | /// down to an integer value, store the result as a single-precision | |
637 | /// floating-point element in the lower element of the intrinsic result, | |
532ac7d7 | 638 | /// and copies the upper 3 packed elements from `a` to the upper elements |
0531ce1d | 639 | /// of the intrinsic result. |
83c7162d XL |
640 | /// |
641 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss) | |
0531ce1d XL |
642 | #[inline] |
643 | #[target_feature(enable = "sse4.1")] | |
644 | #[cfg_attr(test, assert_instr(roundss))] | |
83c7162d | 645 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
646 | pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { |
647 | roundss(a, b, _MM_FROUND_FLOOR) | |
648 | } | |
649 | ||
650 | /// Round the packed double-precision (64-bit) floating-point elements in `a` | |
532ac7d7 | 651 | /// up to an integer value, and stores the results as packed double-precision |
0531ce1d | 652 | /// floating-point elements. |
83c7162d XL |
653 | /// |
654 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd) | |
0531ce1d XL |
655 | #[inline] |
656 | #[target_feature(enable = "sse4.1")] | |
657 | #[cfg_attr(test, assert_instr(roundpd))] | |
83c7162d | 658 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 659 | pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { |
74b04a01 | 660 | simd_ceil(a) |
0531ce1d XL |
661 | } |
662 | ||
663 | /// Round the packed single-precision (32-bit) floating-point elements in `a` | |
532ac7d7 | 664 | /// up to an integer value, and stores the results as packed single-precision |
0531ce1d | 665 | /// floating-point elements. |
83c7162d XL |
666 | /// |
667 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps) | |
0531ce1d XL |
668 | #[inline] |
669 | #[target_feature(enable = "sse4.1")] | |
670 | #[cfg_attr(test, assert_instr(roundps))] | |
83c7162d | 671 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 672 | pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { |
74b04a01 | 673 | simd_ceil(a) |
0531ce1d XL |
674 | } |
675 | ||
676 | /// Round the lower double-precision (64-bit) floating-point element in `b` | |
677 | /// up to an integer value, store the result as a double-precision | |
678 | /// floating-point element in the lower element of the intrisic result, | |
532ac7d7 | 679 | /// and copies the upper element from `a` to the upper element |
0531ce1d | 680 | /// of the intrinsic result. |
83c7162d XL |
681 | /// |
682 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd) | |
0531ce1d XL |
683 | #[inline] |
684 | #[target_feature(enable = "sse4.1")] | |
685 | #[cfg_attr(test, assert_instr(roundsd))] | |
83c7162d | 686 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
687 | pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { |
688 | roundsd(a, b, _MM_FROUND_CEIL) | |
689 | } | |
690 | ||
691 | /// Round the lower single-precision (32-bit) floating-point element in `b` | |
692 | /// up to an integer value, store the result as a single-precision | |
693 | /// floating-point element in the lower element of the intrinsic result, | |
532ac7d7 | 694 | /// and copies the upper 3 packed elements from `a` to the upper elements |
0531ce1d | 695 | /// of the intrinsic result. |
83c7162d XL |
696 | /// |
697 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss) | |
0531ce1d XL |
698 | #[inline] |
699 | #[target_feature(enable = "sse4.1")] | |
700 | #[cfg_attr(test, assert_instr(roundss))] | |
83c7162d | 701 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
702 | pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { |
703 | roundss(a, b, _MM_FROUND_CEIL) | |
704 | } | |
705 | ||
706 | /// Round the packed double-precision (64-bit) floating-point elements in `a` | |
17df50a5 | 707 | /// using the `ROUNDING` parameter, and stores the results as packed |
0531ce1d XL |
708 | /// double-precision floating-point elements. |
709 | /// Rounding is done according to the rounding parameter, which can be one of: | |
710 | /// | |
711 | /// ``` | |
0531ce1d XL |
712 | /// #[cfg(target_arch = "x86")] |
713 | /// use std::arch::x86::*; | |
714 | /// #[cfg(target_arch = "x86_64")] | |
715 | /// use std::arch::x86_64::*; | |
716 | /// | |
717 | /// # fn main() { | |
718 | /// // round to nearest, and suppress exceptions: | |
719 | /// # let _x = | |
720 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
721 | /// // round down, and suppress exceptions: | |
722 | /// # let _x = | |
723 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
724 | /// // round up, and suppress exceptions: | |
725 | /// # let _x = | |
726 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
727 | /// // truncate, and suppress exceptions: | |
728 | /// # let _x = | |
729 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
730 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
731 | /// # let _x = | |
732 | /// _MM_FROUND_CUR_DIRECTION; | |
733 | /// # } | |
734 | /// ``` | |
83c7162d XL |
735 | /// |
736 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd) | |
0531ce1d XL |
737 | #[inline] |
738 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
739 | #[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))] |
740 | #[rustc_legacy_const_generics(1)] | |
741 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
742 | pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d { | |
743 | static_assert_imm4!(ROUNDING); | |
744 | roundpd(a, ROUNDING) | |
0531ce1d XL |
745 | } |
746 | ||
747 | /// Round the packed single-precision (32-bit) floating-point elements in `a` | |
17df50a5 | 748 | /// using the `ROUNDING` parameter, and stores the results as packed |
0531ce1d XL |
749 | /// single-precision floating-point elements. |
750 | /// Rounding is done according to the rounding parameter, which can be one of: | |
751 | /// | |
752 | /// ``` | |
0531ce1d XL |
753 | /// #[cfg(target_arch = "x86")] |
754 | /// use std::arch::x86::*; | |
755 | /// #[cfg(target_arch = "x86_64")] | |
756 | /// use std::arch::x86_64::*; | |
757 | /// | |
758 | /// # fn main() { | |
759 | /// // round to nearest, and suppress exceptions: | |
760 | /// # let _x = | |
761 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
762 | /// // round down, and suppress exceptions: | |
763 | /// # let _x = | |
764 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
765 | /// // round up, and suppress exceptions: | |
766 | /// # let _x = | |
767 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
768 | /// // truncate, and suppress exceptions: | |
769 | /// # let _x = | |
770 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
771 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
772 | /// # let _x = | |
773 | /// _MM_FROUND_CUR_DIRECTION; | |
774 | /// # } | |
775 | /// ``` | |
83c7162d XL |
776 | /// |
777 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps) | |
0531ce1d XL |
778 | #[inline] |
779 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
780 | #[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))] |
781 | #[rustc_legacy_const_generics(1)] | |
782 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
783 | pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 { | |
784 | static_assert_imm4!(ROUNDING); | |
785 | roundps(a, ROUNDING) | |
0531ce1d XL |
786 | } |
787 | ||
788 | /// Round the lower double-precision (64-bit) floating-point element in `b` | |
17df50a5 | 789 | /// using the `ROUNDING` parameter, store the result as a double-precision |
0531ce1d | 790 | /// floating-point element in the lower element of the intrinsic result, |
532ac7d7 | 791 | /// and copies the upper element from `a` to the upper element of the intrinsic |
0531ce1d XL |
792 | /// result. |
793 | /// Rounding is done according to the rounding parameter, which can be one of: | |
794 | /// | |
795 | /// ``` | |
0531ce1d XL |
796 | /// #[cfg(target_arch = "x86")] |
797 | /// use std::arch::x86::*; | |
798 | /// #[cfg(target_arch = "x86_64")] | |
799 | /// use std::arch::x86_64::*; | |
800 | /// | |
801 | /// # fn main() { | |
802 | /// // round to nearest, and suppress exceptions: | |
803 | /// # let _x = | |
804 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
805 | /// // round down, and suppress exceptions: | |
806 | /// # let _x = | |
807 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
808 | /// // round up, and suppress exceptions: | |
809 | /// # let _x = | |
810 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
811 | /// // truncate, and suppress exceptions: | |
812 | /// # let _x = | |
813 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
814 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
815 | /// # let _x = | |
816 | /// _MM_FROUND_CUR_DIRECTION; | |
817 | /// # } | |
818 | /// ``` | |
83c7162d XL |
819 | /// |
820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd) | |
0531ce1d XL |
821 | #[inline] |
822 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
823 | #[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))] |
824 | #[rustc_legacy_const_generics(2)] | |
825 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
826 | pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d { | |
827 | static_assert_imm4!(ROUNDING); | |
828 | roundsd(a, b, ROUNDING) | |
0531ce1d XL |
829 | } |
830 | ||
831 | /// Round the lower single-precision (32-bit) floating-point element in `b` | |
17df50a5 | 832 | /// using the `ROUNDING` parameter, store the result as a single-precision |
0531ce1d | 833 | /// floating-point element in the lower element of the intrinsic result, |
532ac7d7 | 834 | /// and copies the upper 3 packed elements from `a` to the upper elements |
0531ce1d XL |
835 | /// of the instrinsic result. |
836 | /// Rounding is done according to the rounding parameter, which can be one of: | |
837 | /// | |
838 | /// ``` | |
0531ce1d XL |
839 | /// #[cfg(target_arch = "x86")] |
840 | /// use std::arch::x86::*; | |
841 | /// #[cfg(target_arch = "x86_64")] | |
842 | /// use std::arch::x86_64::*; | |
843 | /// | |
844 | /// # fn main() { | |
845 | /// // round to nearest, and suppress exceptions: | |
846 | /// # let _x = | |
847 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
848 | /// // round down, and suppress exceptions: | |
849 | /// # let _x = | |
850 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
851 | /// // round up, and suppress exceptions: | |
852 | /// # let _x = | |
853 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
854 | /// // truncate, and suppress exceptions: | |
855 | /// # let _x = | |
856 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
857 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
858 | /// # let _x = | |
859 | /// _MM_FROUND_CUR_DIRECTION; | |
860 | /// # } | |
861 | /// ``` | |
83c7162d XL |
862 | /// |
863 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss) | |
0531ce1d XL |
864 | #[inline] |
865 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
866 | #[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))] |
867 | #[rustc_legacy_const_generics(2)] | |
868 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
869 | pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 { | |
870 | static_assert_imm4!(ROUNDING); | |
871 | roundss(a, b, ROUNDING) | |
0531ce1d XL |
872 | } |
873 | ||
874 | /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, | |
875 | /// returning a vector containing its value in its first position, and its | |
876 | /// index | |
877 | /// in its second position; all other elements are set to zero. | |
878 | /// | |
fc512014 | 879 | /// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW` |
0531ce1d XL |
880 | /// instruction. |
881 | /// | |
882 | /// Arguments: | |
883 | /// | |
884 | /// * `a` - A 128-bit vector of type `__m128i`. | |
885 | /// | |
886 | /// Returns: | |
887 | /// | |
888 | /// A 128-bit value where: | |
889 | /// | |
890 | /// * bits `[15:0]` - contain the minimum value found in parameter `a`, | |
891 | /// * bits `[18:16]` - contain the index of the minimum value | |
892 | /// * remaining bits are set to `0`. | |
83c7162d XL |
893 | /// |
894 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16) | |
0531ce1d XL |
895 | #[inline] |
896 | #[target_feature(enable = "sse4.1")] | |
897 | #[cfg_attr(test, assert_instr(phminposuw))] | |
83c7162d | 898 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 899 | pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { |
532ac7d7 | 900 | transmute(phminposuw(a.as_u16x8())) |
0531ce1d XL |
901 | } |
902 | ||
532ac7d7 XL |
903 | /// Multiplies the low 32-bit integers from each packed 64-bit |
904 | /// element in `a` and `b`, and returns the signed 64-bit result. | |
83c7162d XL |
905 | /// |
906 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32) | |
0531ce1d XL |
907 | #[inline] |
908 | #[target_feature(enable = "sse4.1")] | |
909 | #[cfg_attr(test, assert_instr(pmuldq))] | |
83c7162d | 910 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 911 | pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 912 | transmute(pmuldq(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
913 | } |
914 | ||
532ac7d7 | 915 | /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate |
0531ce1d XL |
916 | /// 64-bit integers, and returns the lowest 32-bit, whatever they might be, |
917 | /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2), | |
918 | /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping | |
919 | /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would | |
920 | /// return a negative number. | |
83c7162d XL |
921 | /// |
922 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32) | |
0531ce1d XL |
923 | #[inline] |
924 | #[target_feature(enable = "sse4.1")] | |
925 | #[cfg_attr(test, assert_instr(pmulld))] | |
83c7162d | 926 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 927 | pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 928 | transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
929 | } |
930 | ||
931 | /// Subtracts 8-bit unsigned integer values and computes the absolute | |
932 | /// values of the differences to the corresponding bits in the destination. | |
933 | /// Then sums of the absolute differences are returned according to the bit | |
934 | /// fields in the immediate operand. | |
935 | /// | |
936 | /// The following algorithm is performed: | |
937 | /// | |
938 | /// ```ignore | |
17df50a5 XL |
939 | /// i = IMM8[2] * 4 |
940 | /// j = IMM8[1:0] * 4 | |
0531ce1d XL |
941 | /// for k := 0 to 7 |
942 | /// d0 = abs(a[i + k + 0] - b[j + 0]) | |
943 | /// d1 = abs(a[i + k + 1] - b[j + 1]) | |
944 | /// d2 = abs(a[i + k + 2] - b[j + 2]) | |
945 | /// d3 = abs(a[i + k + 3] - b[j + 3]) | |
946 | /// r[k] = d0 + d1 + d2 + d3 | |
947 | /// ``` | |
948 | /// | |
949 | /// Arguments: | |
950 | /// | |
951 | /// * `a` - A 128-bit vector of type `__m128i`. | |
952 | /// * `b` - A 128-bit vector of type `__m128i`. | |
17df50a5 | 953 | /// * `IMM8` - An 8-bit immediate operand specifying how the absolute |
0731742a | 954 | /// differences are to be calculated |
0531ce1d XL |
955 | /// * Bit `[2]` specify the offset for operand `a` |
956 | /// * Bits `[1:0]` specify the offset for operand `b` | |
957 | /// | |
958 | /// Returns: | |
959 | /// | |
0731742a XL |
960 | /// * A `__m128i` vector containing the sums of the sets of absolute |
961 | /// differences between both operands. | |
83c7162d XL |
962 | /// |
963 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8) | |
0531ce1d XL |
964 | #[inline] |
965 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
966 | #[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))] |
967 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 968 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
969 | pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
970 | static_assert_imm3!(IMM8); | |
971 | transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) | |
0531ce1d XL |
972 | } |
973 | ||
974 | /// Tests whether the specified bits in a 128-bit integer vector are all | |
975 | /// zeros. | |
976 | /// | |
977 | /// Arguments: | |
978 | /// | |
979 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
980 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 981 | /// operand `a`. |
0531ce1d XL |
982 | /// |
983 | /// Returns: | |
984 | /// | |
985 | /// * `1` - if the specified bits are all zeros, | |
986 | /// * `0` - otherwise. | |
83c7162d XL |
987 | /// |
988 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128) | |
0531ce1d XL |
989 | #[inline] |
990 | #[target_feature(enable = "sse4.1")] | |
991 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 992 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
993 | pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { |
994 | ptestz(a.as_i64x2(), mask.as_i64x2()) | |
995 | } | |
996 | ||
997 | /// Tests whether the specified bits in a 128-bit integer vector are all | |
998 | /// ones. | |
999 | /// | |
1000 | /// Arguments: | |
1001 | /// | |
1002 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1003 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1004 | /// operand `a`. |
0531ce1d XL |
1005 | /// |
1006 | /// Returns: | |
1007 | /// | |
1008 | /// * `1` - if the specified bits are all ones, | |
1009 | /// * `0` - otherwise. | |
83c7162d XL |
1010 | /// |
1011 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128) | |
0531ce1d XL |
1012 | #[inline] |
1013 | #[target_feature(enable = "sse4.1")] | |
1014 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1015 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1016 | pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { |
1017 | ptestc(a.as_i64x2(), mask.as_i64x2()) | |
1018 | } | |
1019 | ||
1020 | /// Tests whether the specified bits in a 128-bit integer vector are | |
1021 | /// neither all zeros nor all ones. | |
1022 | /// | |
1023 | /// Arguments: | |
1024 | /// | |
1025 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1026 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1027 | /// operand `a`. |
0531ce1d XL |
1028 | /// |
1029 | /// Returns: | |
1030 | /// | |
1031 | /// * `1` - if the specified bits are neither all zeros nor all ones, | |
1032 | /// * `0` - otherwise. | |
83c7162d XL |
1033 | /// |
1034 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128) | |
0531ce1d XL |
1035 | #[inline] |
1036 | #[target_feature(enable = "sse4.1")] | |
1037 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1038 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1039 | pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { |
1040 | ptestnzc(a.as_i64x2(), mask.as_i64x2()) | |
1041 | } | |
1042 | ||
1043 | /// Tests whether the specified bits in a 128-bit integer vector are all | |
1044 | /// zeros. | |
1045 | /// | |
1046 | /// Arguments: | |
1047 | /// | |
1048 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1049 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1050 | /// operand `a`. |
0531ce1d XL |
1051 | /// |
1052 | /// Returns: | |
1053 | /// | |
1054 | /// * `1` - if the specified bits are all zeros, | |
1055 | /// * `0` - otherwise. | |
83c7162d XL |
1056 | /// |
1057 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros) | |
0531ce1d XL |
1058 | #[inline] |
1059 | #[target_feature(enable = "sse4.1")] | |
1060 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1061 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1062 | pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { |
1063 | _mm_testz_si128(a, mask) | |
1064 | } | |
1065 | ||
1066 | /// Tests whether the specified bits in `a` 128-bit integer vector are all | |
1067 | /// ones. | |
1068 | /// | |
1069 | /// Argument: | |
1070 | /// | |
1071 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1072 | /// | |
1073 | /// Returns: | |
1074 | /// | |
1075 | /// * `1` - if the bits specified in the operand are all set to 1, | |
1076 | /// * `0` - otherwise. | |
83c7162d XL |
1077 | /// |
1078 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones) | |
0531ce1d XL |
1079 | #[inline] |
1080 | #[target_feature(enable = "sse4.1")] | |
1081 | #[cfg_attr(test, assert_instr(pcmpeqd))] | |
1082 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1083 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1084 | pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { |
1085 | _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) | |
1086 | } | |
1087 | ||
1088 | /// Tests whether the specified bits in a 128-bit integer vector are | |
1089 | /// neither all zeros nor all ones. | |
1090 | /// | |
1091 | /// Arguments: | |
1092 | /// | |
1093 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1094 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1095 | /// operand `a`. |
0531ce1d XL |
1096 | /// |
1097 | /// Returns: | |
1098 | /// | |
1099 | /// * `1` - if the specified bits are neither all zeros nor all ones, | |
1100 | /// * `0` - otherwise. | |
83c7162d XL |
1101 | /// |
1102 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros) | |
0531ce1d XL |
1103 | #[inline] |
1104 | #[target_feature(enable = "sse4.1")] | |
1105 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1106 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1107 | pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { |
1108 | _mm_testnzc_si128(a, mask) | |
1109 | } | |
1110 | ||
1111 | #[allow(improper_ctypes)] | |
1112 | extern "C" { | |
1113 | #[link_name = "llvm.x86.sse41.pblendvb"] | |
1114 | fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16; | |
1115 | #[link_name = "llvm.x86.sse41.blendvpd"] | |
1116 | fn blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d; | |
1117 | #[link_name = "llvm.x86.sse41.blendvps"] | |
1118 | fn blendvps(a: __m128, b: __m128, mask: __m128) -> __m128; | |
1119 | #[link_name = "llvm.x86.sse41.blendpd"] | |
1120 | fn blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d; | |
1121 | #[link_name = "llvm.x86.sse41.blendps"] | |
1122 | fn blendps(a: __m128, b: __m128, imm4: u8) -> __m128; | |
1123 | #[link_name = "llvm.x86.sse41.pblendw"] | |
1124 | fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8; | |
1125 | #[link_name = "llvm.x86.sse41.insertps"] | |
1126 | fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; | |
1127 | #[link_name = "llvm.x86.sse41.pmaxsb"] | |
1128 | fn pmaxsb(a: i8x16, b: i8x16) -> i8x16; | |
1129 | #[link_name = "llvm.x86.sse41.pmaxuw"] | |
1130 | fn pmaxuw(a: u16x8, b: u16x8) -> u16x8; | |
1131 | #[link_name = "llvm.x86.sse41.pmaxsd"] | |
1132 | fn pmaxsd(a: i32x4, b: i32x4) -> i32x4; | |
1133 | #[link_name = "llvm.x86.sse41.pmaxud"] | |
1134 | fn pmaxud(a: u32x4, b: u32x4) -> u32x4; | |
1135 | #[link_name = "llvm.x86.sse41.pminsb"] | |
1136 | fn pminsb(a: i8x16, b: i8x16) -> i8x16; | |
1137 | #[link_name = "llvm.x86.sse41.pminuw"] | |
1138 | fn pminuw(a: u16x8, b: u16x8) -> u16x8; | |
1139 | #[link_name = "llvm.x86.sse41.pminsd"] | |
1140 | fn pminsd(a: i32x4, b: i32x4) -> i32x4; | |
1141 | #[link_name = "llvm.x86.sse41.pminud"] | |
1142 | fn pminud(a: u32x4, b: u32x4) -> u32x4; | |
1143 | #[link_name = "llvm.x86.sse41.packusdw"] | |
1144 | fn packusdw(a: i32x4, b: i32x4) -> u16x8; | |
1145 | #[link_name = "llvm.x86.sse41.dppd"] | |
1146 | fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; | |
1147 | #[link_name = "llvm.x86.sse41.dpps"] | |
1148 | fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128; | |
1149 | #[link_name = "llvm.x86.sse41.round.pd"] | |
1150 | fn roundpd(a: __m128d, rounding: i32) -> __m128d; | |
1151 | #[link_name = "llvm.x86.sse41.round.ps"] | |
1152 | fn roundps(a: __m128, rounding: i32) -> __m128; | |
1153 | #[link_name = "llvm.x86.sse41.round.sd"] | |
1154 | fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d; | |
1155 | #[link_name = "llvm.x86.sse41.round.ss"] | |
1156 | fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128; | |
1157 | #[link_name = "llvm.x86.sse41.phminposuw"] | |
1158 | fn phminposuw(a: u16x8) -> u16x8; | |
1159 | #[link_name = "llvm.x86.sse41.pmuldq"] | |
1160 | fn pmuldq(a: i32x4, b: i32x4) -> i64x2; | |
1161 | #[link_name = "llvm.x86.sse41.mpsadbw"] | |
1162 | fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; | |
1163 | #[link_name = "llvm.x86.sse41.ptestz"] | |
1164 | fn ptestz(a: i64x2, mask: i64x2) -> i32; | |
1165 | #[link_name = "llvm.x86.sse41.ptestc"] | |
1166 | fn ptestc(a: i64x2, mask: i64x2) -> i32; | |
1167 | #[link_name = "llvm.x86.sse41.ptestnzc"] | |
1168 | fn ptestnzc(a: i64x2, mask: i64x2) -> i32; | |
1169 | } | |
1170 | ||
1171 | #[cfg(test)] | |
1172 | mod tests { | |
532ac7d7 | 1173 | use crate::core_arch::x86::*; |
0531ce1d | 1174 | use std::mem; |
416331ca | 1175 | use stdarch_test::simd_test; |
0531ce1d | 1176 | |
83c7162d | 1177 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1178 | unsafe fn test_mm_blendv_epi8() { |
0731742a | 1179 | #[rustfmt::skip] |
0531ce1d XL |
1180 | let a = _mm_setr_epi8( |
1181 | 0, 1, 2, 3, 4, 5, 6, 7, | |
1182 | 8, 9, 10, 11, 12, 13, 14, 15, | |
1183 | ); | |
0731742a | 1184 | #[rustfmt::skip] |
0531ce1d XL |
1185 | let b = _mm_setr_epi8( |
1186 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
1187 | ); | |
0731742a | 1188 | #[rustfmt::skip] |
0531ce1d XL |
1189 | let mask = _mm_setr_epi8( |
1190 | 0, -1, 0, -1, 0, -1, 0, -1, | |
1191 | 0, -1, 0, -1, 0, -1, 0, -1, | |
1192 | ); | |
0731742a | 1193 | #[rustfmt::skip] |
0531ce1d XL |
1194 | let e = _mm_setr_epi8( |
1195 | 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31, | |
1196 | ); | |
1197 | assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e); | |
1198 | } | |
1199 | ||
83c7162d | 1200 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1201 | unsafe fn test_mm_blendv_pd() { |
1202 | let a = _mm_set1_pd(0.0); | |
1203 | let b = _mm_set1_pd(1.0); | |
532ac7d7 | 1204 | let mask = transmute(_mm_setr_epi64x(0, -1)); |
0531ce1d XL |
1205 | let r = _mm_blendv_pd(a, b, mask); |
1206 | let e = _mm_setr_pd(0.0, 1.0); | |
1207 | assert_eq_m128d(r, e); | |
1208 | } | |
1209 | ||
83c7162d | 1210 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1211 | unsafe fn test_mm_blendv_ps() { |
1212 | let a = _mm_set1_ps(0.0); | |
1213 | let b = _mm_set1_ps(1.0); | |
532ac7d7 | 1214 | let mask = transmute(_mm_setr_epi32(0, -1, 0, -1)); |
0531ce1d XL |
1215 | let r = _mm_blendv_ps(a, b, mask); |
1216 | let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); | |
1217 | assert_eq_m128(r, e); | |
1218 | } | |
1219 | ||
83c7162d | 1220 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1221 | unsafe fn test_mm_blend_pd() { |
1222 | let a = _mm_set1_pd(0.0); | |
1223 | let b = _mm_set1_pd(1.0); | |
17df50a5 | 1224 | let r = _mm_blend_pd::<0b10>(a, b); |
0531ce1d XL |
1225 | let e = _mm_setr_pd(0.0, 1.0); |
1226 | assert_eq_m128d(r, e); | |
1227 | } | |
1228 | ||
83c7162d | 1229 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1230 | unsafe fn test_mm_blend_ps() { |
1231 | let a = _mm_set1_ps(0.0); | |
1232 | let b = _mm_set1_ps(1.0); | |
17df50a5 | 1233 | let r = _mm_blend_ps::<0b1010>(a, b); |
0531ce1d XL |
1234 | let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); |
1235 | assert_eq_m128(r, e); | |
1236 | } | |
1237 | ||
83c7162d | 1238 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1239 | unsafe fn test_mm_blend_epi16() { |
1240 | let a = _mm_set1_epi16(0); | |
1241 | let b = _mm_set1_epi16(1); | |
17df50a5 | 1242 | let r = _mm_blend_epi16::<0b1010_1100>(a, b); |
0531ce1d XL |
1243 | let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1); |
1244 | assert_eq_m128i(r, e); | |
1245 | } | |
1246 | ||
83c7162d | 1247 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1248 | unsafe fn test_mm_extract_ps() { |
1249 | let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0); | |
17df50a5 | 1250 | let r: f32 = transmute(_mm_extract_ps::<1>(a)); |
0531ce1d | 1251 | assert_eq!(r, 1.0); |
17df50a5 XL |
1252 | let r: f32 = transmute(_mm_extract_ps::<3>(a)); |
1253 | assert_eq!(r, 3.0); | |
0531ce1d XL |
1254 | } |
1255 | ||
83c7162d | 1256 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1257 | unsafe fn test_mm_extract_epi8() { |
0731742a | 1258 | #[rustfmt::skip] |
0531ce1d XL |
1259 | let a = _mm_setr_epi8( |
1260 | -1, 1, 2, 3, 4, 5, 6, 7, | |
1261 | 8, 9, 10, 11, 12, 13, 14, 15 | |
1262 | ); | |
17df50a5 XL |
1263 | let r1 = _mm_extract_epi8::<0>(a); |
1264 | let r2 = _mm_extract_epi8::<3>(a); | |
0531ce1d XL |
1265 | assert_eq!(r1, 0xFF); |
1266 | assert_eq!(r2, 3); | |
1267 | } | |
1268 | ||
83c7162d | 1269 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1270 | unsafe fn test_mm_extract_epi32() { |
1271 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
17df50a5 | 1272 | let r = _mm_extract_epi32::<1>(a); |
0531ce1d | 1273 | assert_eq!(r, 1); |
17df50a5 XL |
1274 | let r = _mm_extract_epi32::<3>(a); |
1275 | assert_eq!(r, 3); | |
0531ce1d XL |
1276 | } |
1277 | ||
83c7162d | 1278 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1279 | unsafe fn test_mm_insert_ps() { |
1280 | let a = _mm_set1_ps(1.0); | |
1281 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
17df50a5 | 1282 | let r = _mm_insert_ps::<0b11_00_1100>(a, b); |
0531ce1d XL |
1283 | let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); |
1284 | assert_eq_m128(r, e); | |
1285 | } | |
1286 | ||
83c7162d | 1287 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1288 | unsafe fn test_mm_insert_epi8() { |
1289 | let a = _mm_set1_epi8(0); | |
1290 | let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
17df50a5 | 1291 | let r = _mm_insert_epi8::<1>(a, 32); |
0531ce1d | 1292 | assert_eq_m128i(r, e); |
17df50a5 XL |
1293 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0); |
1294 | let r = _mm_insert_epi8::<14>(a, 32); | |
0531ce1d XL |
1295 | assert_eq_m128i(r, e); |
1296 | } | |
1297 | ||
83c7162d | 1298 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1299 | unsafe fn test_mm_insert_epi32() { |
1300 | let a = _mm_set1_epi32(0); | |
1301 | let e = _mm_setr_epi32(0, 32, 0, 0); | |
17df50a5 | 1302 | let r = _mm_insert_epi32::<1>(a, 32); |
0531ce1d | 1303 | assert_eq_m128i(r, e); |
17df50a5 XL |
1304 | let e = _mm_setr_epi32(0, 0, 0, 32); |
1305 | let r = _mm_insert_epi32::<3>(a, 32); | |
0531ce1d XL |
1306 | assert_eq_m128i(r, e); |
1307 | } | |
1308 | ||
83c7162d | 1309 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1310 | unsafe fn test_mm_max_epi8() { |
0731742a | 1311 | #[rustfmt::skip] |
0531ce1d XL |
1312 | let a = _mm_setr_epi8( |
1313 | 1, 4, 5, 8, 9, 12, 13, 16, | |
1314 | 17, 20, 21, 24, 25, 28, 29, 32, | |
1315 | ); | |
0731742a | 1316 | #[rustfmt::skip] |
0531ce1d XL |
1317 | let b = _mm_setr_epi8( |
1318 | 2, 3, 6, 7, 10, 11, 14, 15, | |
1319 | 18, 19, 22, 23, 26, 27, 30, 31, | |
1320 | ); | |
1321 | let r = _mm_max_epi8(a, b); | |
0731742a | 1322 | #[rustfmt::skip] |
0531ce1d XL |
1323 | let e = _mm_setr_epi8( |
1324 | 2, 4, 6, 8, 10, 12, 14, 16, | |
1325 | 18, 20, 22, 24, 26, 28, 30, 32, | |
1326 | ); | |
1327 | assert_eq_m128i(r, e); | |
1328 | } | |
1329 | ||
83c7162d | 1330 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1331 | unsafe fn test_mm_max_epu16() { |
1332 | let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); | |
1333 | let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); | |
1334 | let r = _mm_max_epu16(a, b); | |
1335 | let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16); | |
1336 | assert_eq_m128i(r, e); | |
1337 | } | |
1338 | ||
83c7162d | 1339 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1340 | unsafe fn test_mm_max_epi32() { |
1341 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1342 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1343 | let r = _mm_max_epi32(a, b); | |
1344 | let e = _mm_setr_epi32(2, 4, 6, 8); | |
1345 | assert_eq_m128i(r, e); | |
1346 | } | |
1347 | ||
83c7162d | 1348 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1349 | unsafe fn test_mm_max_epu32() { |
1350 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1351 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1352 | let r = _mm_max_epu32(a, b); | |
1353 | let e = _mm_setr_epi32(2, 4, 6, 8); | |
1354 | assert_eq_m128i(r, e); | |
1355 | } | |
1356 | ||
83c7162d | 1357 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1358 | unsafe fn test_mm_min_epi8_1() { |
0731742a | 1359 | #[rustfmt::skip] |
0531ce1d XL |
1360 | let a = _mm_setr_epi8( |
1361 | 1, 4, 5, 8, 9, 12, 13, 16, | |
1362 | 17, 20, 21, 24, 25, 28, 29, 32, | |
1363 | ); | |
0731742a | 1364 | #[rustfmt::skip] |
0531ce1d XL |
1365 | let b = _mm_setr_epi8( |
1366 | 2, 3, 6, 7, 10, 11, 14, 15, | |
1367 | 18, 19, 22, 23, 26, 27, 30, 31, | |
1368 | ); | |
1369 | let r = _mm_min_epi8(a, b); | |
0731742a | 1370 | #[rustfmt::skip] |
0531ce1d XL |
1371 | let e = _mm_setr_epi8( |
1372 | 1, 3, 5, 7, 9, 11, 13, 15, | |
1373 | 17, 19, 21, 23, 25, 27, 29, 31, | |
1374 | ); | |
1375 | assert_eq_m128i(r, e); | |
1376 | } | |
1377 | ||
83c7162d | 1378 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1379 | unsafe fn test_mm_min_epi8_2() { |
0731742a | 1380 | #[rustfmt::skip] |
0531ce1d XL |
1381 | let a = _mm_setr_epi8( |
1382 | 1, -4, -5, 8, -9, -12, 13, -16, | |
1383 | 17, 20, 21, 24, 25, 28, 29, 32, | |
1384 | ); | |
0731742a | 1385 | #[rustfmt::skip] |
0531ce1d XL |
1386 | let b = _mm_setr_epi8( |
1387 | 2, -3, -6, 7, -10, -11, 14, -15, | |
1388 | 18, 19, 22, 23, 26, 27, 30, 31, | |
1389 | ); | |
1390 | let r = _mm_min_epi8(a, b); | |
0731742a | 1391 | #[rustfmt::skip] |
0531ce1d XL |
1392 | let e = _mm_setr_epi8( |
1393 | 1, -4, -6, 7, -10, -12, 13, -16, | |
1394 | 17, 19, 21, 23, 25, 27, 29, 31, | |
1395 | ); | |
1396 | assert_eq_m128i(r, e); | |
1397 | } | |
1398 | ||
83c7162d | 1399 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1400 | unsafe fn test_mm_min_epu16() { |
1401 | let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); | |
1402 | let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); | |
1403 | let r = _mm_min_epu16(a, b); | |
1404 | let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15); | |
1405 | assert_eq_m128i(r, e); | |
1406 | } | |
1407 | ||
83c7162d | 1408 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1409 | unsafe fn test_mm_min_epi32_1() { |
1410 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1411 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1412 | let r = _mm_min_epi32(a, b); | |
1413 | let e = _mm_setr_epi32(1, 3, 5, 7); | |
1414 | assert_eq_m128i(r, e); | |
1415 | } | |
1416 | ||
83c7162d | 1417 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1418 | unsafe fn test_mm_min_epi32_2() { |
1419 | let a = _mm_setr_epi32(-1, 4, 5, -7); | |
1420 | let b = _mm_setr_epi32(-2, 3, -6, 8); | |
1421 | let r = _mm_min_epi32(a, b); | |
1422 | let e = _mm_setr_epi32(-2, 3, -6, -7); | |
1423 | assert_eq_m128i(r, e); | |
1424 | } | |
1425 | ||
83c7162d | 1426 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1427 | unsafe fn test_mm_min_epu32() { |
1428 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1429 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1430 | let r = _mm_min_epu32(a, b); | |
1431 | let e = _mm_setr_epi32(1, 3, 5, 7); | |
1432 | assert_eq_m128i(r, e); | |
1433 | } | |
1434 | ||
83c7162d | 1435 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1436 | unsafe fn test_mm_packus_epi32() { |
1437 | let a = _mm_setr_epi32(1, 2, 3, 4); | |
1438 | let b = _mm_setr_epi32(-1, -2, -3, -4); | |
1439 | let r = _mm_packus_epi32(a, b); | |
1440 | let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0); | |
1441 | assert_eq_m128i(r, e); | |
1442 | } | |
1443 | ||
83c7162d | 1444 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1445 | unsafe fn test_mm_cmpeq_epi64() { |
1446 | let a = _mm_setr_epi64x(0, 1); | |
1447 | let b = _mm_setr_epi64x(0, 0); | |
1448 | let r = _mm_cmpeq_epi64(a, b); | |
1449 | let e = _mm_setr_epi64x(-1, 0); | |
1450 | assert_eq_m128i(r, e); | |
1451 | } | |
1452 | ||
83c7162d | 1453 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1454 | unsafe fn test_mm_cvtepi8_epi16() { |
1455 | let a = _mm_set1_epi8(10); | |
1456 | let r = _mm_cvtepi8_epi16(a); | |
1457 | let e = _mm_set1_epi16(10); | |
1458 | assert_eq_m128i(r, e); | |
1459 | let a = _mm_set1_epi8(-10); | |
1460 | let r = _mm_cvtepi8_epi16(a); | |
1461 | let e = _mm_set1_epi16(-10); | |
1462 | assert_eq_m128i(r, e); | |
1463 | } | |
1464 | ||
83c7162d | 1465 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1466 | unsafe fn test_mm_cvtepi8_epi32() { |
1467 | let a = _mm_set1_epi8(10); | |
1468 | let r = _mm_cvtepi8_epi32(a); | |
1469 | let e = _mm_set1_epi32(10); | |
1470 | assert_eq_m128i(r, e); | |
1471 | let a = _mm_set1_epi8(-10); | |
1472 | let r = _mm_cvtepi8_epi32(a); | |
1473 | let e = _mm_set1_epi32(-10); | |
1474 | assert_eq_m128i(r, e); | |
1475 | } | |
1476 | ||
83c7162d | 1477 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1478 | unsafe fn test_mm_cvtepi8_epi64() { |
1479 | let a = _mm_set1_epi8(10); | |
1480 | let r = _mm_cvtepi8_epi64(a); | |
1481 | let e = _mm_set1_epi64x(10); | |
1482 | assert_eq_m128i(r, e); | |
1483 | let a = _mm_set1_epi8(-10); | |
1484 | let r = _mm_cvtepi8_epi64(a); | |
1485 | let e = _mm_set1_epi64x(-10); | |
1486 | assert_eq_m128i(r, e); | |
1487 | } | |
1488 | ||
83c7162d | 1489 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1490 | unsafe fn test_mm_cvtepi16_epi32() { |
1491 | let a = _mm_set1_epi16(10); | |
1492 | let r = _mm_cvtepi16_epi32(a); | |
1493 | let e = _mm_set1_epi32(10); | |
1494 | assert_eq_m128i(r, e); | |
1495 | let a = _mm_set1_epi16(-10); | |
1496 | let r = _mm_cvtepi16_epi32(a); | |
1497 | let e = _mm_set1_epi32(-10); | |
1498 | assert_eq_m128i(r, e); | |
1499 | } | |
1500 | ||
83c7162d | 1501 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1502 | unsafe fn test_mm_cvtepi16_epi64() { |
1503 | let a = _mm_set1_epi16(10); | |
1504 | let r = _mm_cvtepi16_epi64(a); | |
1505 | let e = _mm_set1_epi64x(10); | |
1506 | assert_eq_m128i(r, e); | |
1507 | let a = _mm_set1_epi16(-10); | |
1508 | let r = _mm_cvtepi16_epi64(a); | |
1509 | let e = _mm_set1_epi64x(-10); | |
1510 | assert_eq_m128i(r, e); | |
1511 | } | |
1512 | ||
83c7162d | 1513 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1514 | unsafe fn test_mm_cvtepi32_epi64() { |
1515 | let a = _mm_set1_epi32(10); | |
1516 | let r = _mm_cvtepi32_epi64(a); | |
1517 | let e = _mm_set1_epi64x(10); | |
1518 | assert_eq_m128i(r, e); | |
1519 | let a = _mm_set1_epi32(-10); | |
1520 | let r = _mm_cvtepi32_epi64(a); | |
1521 | let e = _mm_set1_epi64x(-10); | |
1522 | assert_eq_m128i(r, e); | |
1523 | } | |
1524 | ||
83c7162d | 1525 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1526 | unsafe fn test_mm_cvtepu8_epi16() { |
1527 | let a = _mm_set1_epi8(10); | |
1528 | let r = _mm_cvtepu8_epi16(a); | |
1529 | let e = _mm_set1_epi16(10); | |
1530 | assert_eq_m128i(r, e); | |
1531 | } | |
1532 | ||
83c7162d | 1533 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1534 | unsafe fn test_mm_cvtepu8_epi32() { |
1535 | let a = _mm_set1_epi8(10); | |
1536 | let r = _mm_cvtepu8_epi32(a); | |
1537 | let e = _mm_set1_epi32(10); | |
1538 | assert_eq_m128i(r, e); | |
1539 | } | |
1540 | ||
83c7162d | 1541 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1542 | unsafe fn test_mm_cvtepu8_epi64() { |
1543 | let a = _mm_set1_epi8(10); | |
1544 | let r = _mm_cvtepu8_epi64(a); | |
1545 | let e = _mm_set1_epi64x(10); | |
1546 | assert_eq_m128i(r, e); | |
1547 | } | |
1548 | ||
83c7162d | 1549 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1550 | unsafe fn test_mm_cvtepu16_epi32() { |
1551 | let a = _mm_set1_epi16(10); | |
1552 | let r = _mm_cvtepu16_epi32(a); | |
1553 | let e = _mm_set1_epi32(10); | |
1554 | assert_eq_m128i(r, e); | |
1555 | } | |
1556 | ||
83c7162d | 1557 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1558 | unsafe fn test_mm_cvtepu16_epi64() { |
1559 | let a = _mm_set1_epi16(10); | |
1560 | let r = _mm_cvtepu16_epi64(a); | |
1561 | let e = _mm_set1_epi64x(10); | |
1562 | assert_eq_m128i(r, e); | |
1563 | } | |
1564 | ||
83c7162d | 1565 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1566 | unsafe fn test_mm_cvtepu32_epi64() { |
1567 | let a = _mm_set1_epi32(10); | |
1568 | let r = _mm_cvtepu32_epi64(a); | |
1569 | let e = _mm_set1_epi64x(10); | |
1570 | assert_eq_m128i(r, e); | |
1571 | } | |
1572 | ||
83c7162d | 1573 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1574 | unsafe fn test_mm_dp_pd() { |
1575 | let a = _mm_setr_pd(2.0, 3.0); | |
1576 | let b = _mm_setr_pd(1.0, 4.0); | |
1577 | let e = _mm_setr_pd(14.0, 0.0); | |
17df50a5 | 1578 | assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e); |
0531ce1d XL |
1579 | } |
1580 | ||
83c7162d | 1581 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1582 | unsafe fn test_mm_dp_ps() { |
1583 | let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0); | |
1584 | let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0); | |
1585 | let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0); | |
17df50a5 | 1586 | assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e); |
0531ce1d XL |
1587 | } |
1588 | ||
83c7162d | 1589 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1590 | unsafe fn test_mm_floor_pd() { |
1591 | let a = _mm_setr_pd(2.5, 4.5); | |
1592 | let r = _mm_floor_pd(a); | |
1593 | let e = _mm_setr_pd(2.0, 4.0); | |
1594 | assert_eq_m128d(r, e); | |
1595 | } | |
1596 | ||
83c7162d | 1597 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1598 | unsafe fn test_mm_floor_ps() { |
1599 | let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); | |
1600 | let r = _mm_floor_ps(a); | |
1601 | let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); | |
1602 | assert_eq_m128(r, e); | |
1603 | } | |
1604 | ||
83c7162d | 1605 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1606 | unsafe fn test_mm_floor_sd() { |
1607 | let a = _mm_setr_pd(2.5, 4.5); | |
1608 | let b = _mm_setr_pd(-1.5, -3.5); | |
1609 | let r = _mm_floor_sd(a, b); | |
1610 | let e = _mm_setr_pd(-2.0, 4.5); | |
1611 | assert_eq_m128d(r, e); | |
1612 | } | |
1613 | ||
83c7162d | 1614 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1615 | unsafe fn test_mm_floor_ss() { |
1616 | let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); | |
1617 | let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5); | |
1618 | let r = _mm_floor_ss(a, b); | |
1619 | let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5); | |
1620 | assert_eq_m128(r, e); | |
1621 | } | |
1622 | ||
83c7162d | 1623 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1624 | unsafe fn test_mm_ceil_pd() { |
1625 | let a = _mm_setr_pd(1.5, 3.5); | |
1626 | let r = _mm_ceil_pd(a); | |
1627 | let e = _mm_setr_pd(2.0, 4.0); | |
1628 | assert_eq_m128d(r, e); | |
1629 | } | |
1630 | ||
83c7162d | 1631 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1632 | unsafe fn test_mm_ceil_ps() { |
1633 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1634 | let r = _mm_ceil_ps(a); | |
1635 | let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); | |
1636 | assert_eq_m128(r, e); | |
1637 | } | |
1638 | ||
83c7162d | 1639 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1640 | unsafe fn test_mm_ceil_sd() { |
1641 | let a = _mm_setr_pd(1.5, 3.5); | |
1642 | let b = _mm_setr_pd(-2.5, -4.5); | |
1643 | let r = _mm_ceil_sd(a, b); | |
1644 | let e = _mm_setr_pd(-2.0, 3.5); | |
1645 | assert_eq_m128d(r, e); | |
1646 | } | |
1647 | ||
83c7162d | 1648 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1649 | unsafe fn test_mm_ceil_ss() { |
1650 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1651 | let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5); | |
1652 | let r = _mm_ceil_ss(a, b); | |
1653 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); | |
1654 | assert_eq_m128(r, e); | |
1655 | } | |
1656 | ||
83c7162d | 1657 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1658 | unsafe fn test_mm_round_pd() { |
1659 | let a = _mm_setr_pd(1.25, 3.75); | |
17df50a5 | 1660 | let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); |
0531ce1d XL |
1661 | let e = _mm_setr_pd(1.0, 4.0); |
1662 | assert_eq_m128d(r, e); | |
1663 | } | |
1664 | ||
83c7162d | 1665 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1666 | unsafe fn test_mm_round_ps() { |
1667 | let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25); | |
17df50a5 | 1668 | let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a); |
0531ce1d XL |
1669 | let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0); |
1670 | assert_eq_m128(r, e); | |
1671 | } | |
1672 | ||
83c7162d | 1673 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1674 | unsafe fn test_mm_round_sd() { |
1675 | let a = _mm_setr_pd(1.5, 3.5); | |
1676 | let b = _mm_setr_pd(-2.5, -4.5); | |
1677 | let old_mode = _MM_GET_ROUNDING_MODE(); | |
1678 | _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO); | |
17df50a5 | 1679 | let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b); |
0531ce1d XL |
1680 | _MM_SET_ROUNDING_MODE(old_mode); |
1681 | let e = _mm_setr_pd(-2.0, 3.5); | |
1682 | assert_eq_m128d(r, e); | |
1683 | } | |
1684 | ||
83c7162d | 1685 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1686 | unsafe fn test_mm_round_ss() { |
1687 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1688 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); | |
1689 | let old_mode = _MM_GET_ROUNDING_MODE(); | |
1690 | _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); | |
17df50a5 | 1691 | let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b); |
0531ce1d XL |
1692 | _MM_SET_ROUNDING_MODE(old_mode); |
1693 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); | |
1694 | assert_eq_m128(r, e); | |
1695 | } | |
1696 | ||
83c7162d | 1697 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1698 | unsafe fn test_mm_minpos_epu16_1() { |
1699 | let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66); | |
1700 | let r = _mm_minpos_epu16(a); | |
1701 | let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); | |
1702 | assert_eq_m128i(r, e); | |
1703 | } | |
1704 | ||
83c7162d | 1705 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1706 | unsafe fn test_mm_minpos_epu16_2() { |
1707 | let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66); | |
1708 | let r = _mm_minpos_epu16(a); | |
1709 | let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
1710 | assert_eq_m128i(r, e); | |
1711 | } | |
1712 | ||
83c7162d | 1713 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1714 | unsafe fn test_mm_mul_epi32() { |
1715 | { | |
1716 | let a = _mm_setr_epi32(1, 1, 1, 1); | |
1717 | let b = _mm_setr_epi32(1, 2, 3, 4); | |
1718 | let r = _mm_mul_epi32(a, b); | |
1719 | let e = _mm_setr_epi64x(1, 3); | |
1720 | assert_eq_m128i(r, e); | |
1721 | } | |
1722 | { | |
0731742a | 1723 | let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */); |
0531ce1d | 1724 | let b = _mm_setr_epi32( |
8faf50e0 XL |
1725 | -20, -256, /* ignored */ |
1726 | 666666, 666666, /* ignored */ | |
0531ce1d XL |
1727 | ); |
1728 | let r = _mm_mul_epi32(a, b); | |
1729 | let e = _mm_setr_epi64x(-300, 823043843622); | |
1730 | assert_eq_m128i(r, e); | |
1731 | } | |
1732 | } | |
1733 | ||
83c7162d | 1734 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1735 | unsafe fn test_mm_mullo_epi32() { |
1736 | { | |
1737 | let a = _mm_setr_epi32(1, 1, 1, 1); | |
1738 | let b = _mm_setr_epi32(1, 2, 3, 4); | |
1739 | let r = _mm_mullo_epi32(a, b); | |
1740 | let e = _mm_setr_epi32(1, 2, 3, 4); | |
1741 | assert_eq_m128i(r, e); | |
1742 | } | |
1743 | { | |
1744 | let a = _mm_setr_epi32(15, -2, 1234567, 99999); | |
1745 | let b = _mm_setr_epi32(-20, -256, 666666, -99999); | |
1746 | let r = _mm_mullo_epi32(a, b); | |
1747 | // Attention, most significant bit in r[2] is treated | |
1748 | // as a sign bit: | |
1749 | // 1234567 * 666666 = -1589877210 | |
1750 | let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409); | |
1751 | assert_eq_m128i(r, e); | |
1752 | } | |
1753 | } | |
1754 | ||
83c7162d | 1755 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1756 | unsafe fn test_mm_minpos_epu16() { |
1757 | let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3); | |
1758 | let r = _mm_minpos_epu16(a); | |
1759 | let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0); | |
1760 | assert_eq_m128i(r, e); | |
1761 | } | |
1762 | ||
83c7162d | 1763 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1764 | unsafe fn test_mm_mpsadbw_epu8() { |
0731742a | 1765 | #[rustfmt::skip] |
0531ce1d XL |
1766 | let a = _mm_setr_epi8( |
1767 | 0, 1, 2, 3, 4, 5, 6, 7, | |
1768 | 8, 9, 10, 11, 12, 13, 14, 15, | |
1769 | ); | |
1770 | ||
17df50a5 | 1771 | let r = _mm_mpsadbw_epu8::<0b000>(a, a); |
0531ce1d XL |
1772 | let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
1773 | assert_eq_m128i(r, e); | |
1774 | ||
17df50a5 | 1775 | let r = _mm_mpsadbw_epu8::<0b001>(a, a); |
0531ce1d XL |
1776 | let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12); |
1777 | assert_eq_m128i(r, e); | |
1778 | ||
17df50a5 | 1779 | let r = _mm_mpsadbw_epu8::<0b100>(a, a); |
0531ce1d XL |
1780 | let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44); |
1781 | assert_eq_m128i(r, e); | |
1782 | ||
17df50a5 | 1783 | let r = _mm_mpsadbw_epu8::<0b101>(a, a); |
0531ce1d XL |
1784 | let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
1785 | assert_eq_m128i(r, e); | |
1786 | ||
17df50a5 | 1787 | let r = _mm_mpsadbw_epu8::<0b111>(a, a); |
0531ce1d XL |
1788 | let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); |
1789 | assert_eq_m128i(r, e); | |
1790 | } | |
1791 | ||
83c7162d | 1792 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1793 | unsafe fn test_mm_testz_si128() { |
1794 | let a = _mm_set1_epi8(1); | |
1795 | let mask = _mm_set1_epi8(0); | |
1796 | let r = _mm_testz_si128(a, mask); | |
1797 | assert_eq!(r, 1); | |
1798 | let a = _mm_set1_epi8(0b101); | |
1799 | let mask = _mm_set1_epi8(0b110); | |
1800 | let r = _mm_testz_si128(a, mask); | |
1801 | assert_eq!(r, 0); | |
1802 | let a = _mm_set1_epi8(0b011); | |
1803 | let mask = _mm_set1_epi8(0b100); | |
1804 | let r = _mm_testz_si128(a, mask); | |
1805 | assert_eq!(r, 1); | |
1806 | } | |
1807 | ||
83c7162d | 1808 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1809 | unsafe fn test_mm_testc_si128() { |
1810 | let a = _mm_set1_epi8(-1); | |
1811 | let mask = _mm_set1_epi8(0); | |
1812 | let r = _mm_testc_si128(a, mask); | |
1813 | assert_eq!(r, 1); | |
1814 | let a = _mm_set1_epi8(0b101); | |
1815 | let mask = _mm_set1_epi8(0b110); | |
1816 | let r = _mm_testc_si128(a, mask); | |
1817 | assert_eq!(r, 0); | |
1818 | let a = _mm_set1_epi8(0b101); | |
1819 | let mask = _mm_set1_epi8(0b100); | |
1820 | let r = _mm_testc_si128(a, mask); | |
1821 | assert_eq!(r, 1); | |
1822 | } | |
1823 | ||
83c7162d | 1824 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1825 | unsafe fn test_mm_testnzc_si128() { |
1826 | let a = _mm_set1_epi8(0); | |
1827 | let mask = _mm_set1_epi8(1); | |
1828 | let r = _mm_testnzc_si128(a, mask); | |
1829 | assert_eq!(r, 0); | |
1830 | let a = _mm_set1_epi8(-1); | |
1831 | let mask = _mm_set1_epi8(0); | |
1832 | let r = _mm_testnzc_si128(a, mask); | |
1833 | assert_eq!(r, 0); | |
1834 | let a = _mm_set1_epi8(0b101); | |
1835 | let mask = _mm_set1_epi8(0b110); | |
1836 | let r = _mm_testnzc_si128(a, mask); | |
1837 | assert_eq!(r, 1); | |
1838 | let a = _mm_set1_epi8(0b101); | |
1839 | let mask = _mm_set1_epi8(0b101); | |
1840 | let r = _mm_testnzc_si128(a, mask); | |
1841 | assert_eq!(r, 0); | |
1842 | } | |
1843 | ||
83c7162d | 1844 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1845 | unsafe fn test_mm_test_all_zeros() { |
1846 | let a = _mm_set1_epi8(1); | |
1847 | let mask = _mm_set1_epi8(0); | |
1848 | let r = _mm_test_all_zeros(a, mask); | |
1849 | assert_eq!(r, 1); | |
1850 | let a = _mm_set1_epi8(0b101); | |
1851 | let mask = _mm_set1_epi8(0b110); | |
1852 | let r = _mm_test_all_zeros(a, mask); | |
1853 | assert_eq!(r, 0); | |
1854 | let a = _mm_set1_epi8(0b011); | |
1855 | let mask = _mm_set1_epi8(0b100); | |
1856 | let r = _mm_test_all_zeros(a, mask); | |
1857 | assert_eq!(r, 1); | |
1858 | } | |
1859 | ||
83c7162d | 1860 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1861 | unsafe fn test_mm_test_all_ones() { |
1862 | let a = _mm_set1_epi8(-1); | |
1863 | let r = _mm_test_all_ones(a); | |
1864 | assert_eq!(r, 1); | |
1865 | let a = _mm_set1_epi8(0b101); | |
1866 | let r = _mm_test_all_ones(a); | |
1867 | assert_eq!(r, 0); | |
1868 | } | |
1869 | ||
83c7162d | 1870 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1871 | unsafe fn test_mm_test_mix_ones_zeros() { |
1872 | let a = _mm_set1_epi8(0); | |
1873 | let mask = _mm_set1_epi8(1); | |
1874 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1875 | assert_eq!(r, 0); | |
1876 | let a = _mm_set1_epi8(-1); | |
1877 | let mask = _mm_set1_epi8(0); | |
1878 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1879 | assert_eq!(r, 0); | |
1880 | let a = _mm_set1_epi8(0b101); | |
1881 | let mask = _mm_set1_epi8(0b110); | |
1882 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1883 | assert_eq!(r, 1); | |
1884 | let a = _mm_set1_epi8(0b101); | |
1885 | let mask = _mm_set1_epi8(0b101); | |
1886 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1887 | assert_eq!(r, 0); | |
1888 | } | |
1889 | } |