]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Streaming SIMD Extensions 4.1 (SSE4.1) |
2 | ||
c620b35d FG |
3 | use crate::core_arch::{simd::*, x86::*}; |
4 | use crate::intrinsics::simd::*; | |
0531ce1d XL |
5 | |
6 | #[cfg(test)] | |
416331ca | 7 | use stdarch_test::assert_instr; |
0531ce1d | 8 | |
a2a8927a | 9 | // SSE4 rounding constants |
0531ce1d | 10 | /// round to nearest |
83c7162d | 11 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
12 | pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00; |
13 | /// round down | |
83c7162d | 14 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
15 | pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01; |
16 | /// round up | |
83c7162d | 17 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
18 | pub const _MM_FROUND_TO_POS_INF: i32 = 0x02; |
19 | /// truncate | |
83c7162d | 20 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
21 | pub const _MM_FROUND_TO_ZERO: i32 = 0x03; |
22 | /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE` | |
83c7162d | 23 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
24 | pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04; |
25 | /// do not suppress exceptions | |
83c7162d | 26 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
27 | pub const _MM_FROUND_RAISE_EXC: i32 = 0x00; |
28 | /// suppress exceptions | |
83c7162d | 29 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
30 | pub const _MM_FROUND_NO_EXC: i32 = 0x08; |
31 | /// round to nearest and do not suppress exceptions | |
83c7162d | 32 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
33 | pub const _MM_FROUND_NINT: i32 = 0x00; |
34 | /// round down and do not suppress exceptions | |
83c7162d | 35 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 36 | pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF; |
0531ce1d | 37 | /// round up and do not suppress exceptions |
83c7162d | 38 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 39 | pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF; |
0531ce1d | 40 | /// truncate and do not suppress exceptions |
83c7162d | 41 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 42 | pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO; |
0531ce1d XL |
43 | /// use MXCSR.RC and do not suppress exceptions; see |
44 | /// `vendor::_MM_SET_ROUNDING_MODE` | |
83c7162d | 45 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 46 | pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION; |
0531ce1d | 47 | /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE` |
83c7162d | 48 | #[stable(feature = "simd_x86", since = "1.27.0")] |
74b04a01 | 49 | pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION; |
0531ce1d XL |
50 | |
51 | /// Blend packed 8-bit integers from `a` and `b` using `mask` | |
52 | /// | |
53 | /// The high bit of each corresponding mask byte determines the selection. | |
54 | /// If the high bit is set the element of `a` is selected. The element | |
55 | /// of `b` is selected otherwise. | |
83c7162d | 56 | /// |
353b0b11 | 57 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8) |
0531ce1d XL |
58 | #[inline] |
59 | #[target_feature(enable = "sse4.1")] | |
60 | #[cfg_attr(test, assert_instr(pblendvb))] | |
83c7162d | 61 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 62 | pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { |
ed00b5ec FG |
63 | let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::splat(0)); |
64 | transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) | |
0531ce1d XL |
65 | } |
66 | ||
17df50a5 | 67 | /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`. |
0531ce1d XL |
68 | /// |
69 | /// The mask bits determine the selection. A clear bit selects the | |
70 | /// corresponding element of `a`, and a set bit the corresponding | |
71 | /// element of `b`. | |
83c7162d | 72 | /// |
353b0b11 | 73 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16) |
0531ce1d XL |
74 | #[inline] |
75 | #[target_feature(enable = "sse4.1")] | |
ed00b5ec | 76 | #[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))] |
17df50a5 | 77 | #[rustc_legacy_const_generics(2)] |
83c7162d | 78 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 79 | pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
353b0b11 | 80 | static_assert_uimm_bits!(IMM8, 8); |
ed00b5ec FG |
81 | transmute::<i16x8, _>(simd_shuffle!( |
82 | a.as_i16x8(), | |
83 | b.as_i16x8(), | |
84 | [ | |
85 | [0, 8][IMM8 as usize & 1], | |
86 | [1, 9][(IMM8 >> 1) as usize & 1], | |
87 | [2, 10][(IMM8 >> 2) as usize & 1], | |
88 | [3, 11][(IMM8 >> 3) as usize & 1], | |
89 | [4, 12][(IMM8 >> 4) as usize & 1], | |
90 | [5, 13][(IMM8 >> 5) as usize & 1], | |
91 | [6, 14][(IMM8 >> 6) as usize & 1], | |
92 | [7, 15][(IMM8 >> 7) as usize & 1], | |
93 | ] | |
94 | )) | |
0531ce1d XL |
95 | } |
96 | ||
97 | /// Blend packed double-precision (64-bit) floating-point elements from `a` | |
98 | /// and `b` using `mask` | |
83c7162d | 99 | /// |
353b0b11 | 100 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd) |
0531ce1d XL |
101 | #[inline] |
102 | #[target_feature(enable = "sse4.1")] | |
103 | #[cfg_attr(test, assert_instr(blendvpd))] | |
83c7162d | 104 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 105 | pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { |
ed00b5ec FG |
106 | let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::splat(0)); |
107 | transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) | |
0531ce1d XL |
108 | } |
109 | ||
110 | /// Blend packed single-precision (32-bit) floating-point elements from `a` | |
111 | /// and `b` using `mask` | |
83c7162d | 112 | /// |
353b0b11 | 113 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps) |
0531ce1d XL |
114 | #[inline] |
115 | #[target_feature(enable = "sse4.1")] | |
116 | #[cfg_attr(test, assert_instr(blendvps))] | |
83c7162d | 117 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 118 | pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { |
ed00b5ec FG |
119 | let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::splat(0)); |
120 | transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) | |
0531ce1d XL |
121 | } |
122 | ||
123 | /// Blend packed double-precision (64-bit) floating-point elements from `a` | |
17df50a5 | 124 | /// and `b` using control mask `IMM2` |
83c7162d | 125 | /// |
353b0b11 | 126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd) |
0531ce1d XL |
127 | #[inline] |
128 | #[target_feature(enable = "sse4.1")] | |
8faf50e0 XL |
129 | // Note: LLVM7 prefers the single-precision floating-point domain when possible |
130 | // see https://bugs.llvm.org/show_bug.cgi?id=38195 | |
17df50a5 XL |
131 | // #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))] |
132 | #[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))] | |
133 | #[rustc_legacy_const_generics(2)] | |
134 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
135 | pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d { | |
353b0b11 | 136 | static_assert_uimm_bits!(IMM2, 2); |
ed00b5ec FG |
137 | transmute::<f64x2, _>(simd_shuffle!( |
138 | a.as_f64x2(), | |
139 | b.as_f64x2(), | |
140 | [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]] | |
141 | )) | |
0531ce1d XL |
142 | } |
143 | ||
144 | /// Blend packed single-precision (32-bit) floating-point elements from `a` | |
17df50a5 | 145 | /// and `b` using mask `IMM4` |
83c7162d | 146 | /// |
353b0b11 | 147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps) |
0531ce1d XL |
148 | #[inline] |
149 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
150 | #[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))] |
151 | #[rustc_legacy_const_generics(2)] | |
152 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
153 | pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 { | |
353b0b11 | 154 | static_assert_uimm_bits!(IMM4, 4); |
ed00b5ec FG |
155 | transmute::<f32x4, _>(simd_shuffle!( |
156 | a.as_f32x4(), | |
157 | b.as_f32x4(), | |
158 | [ | |
159 | [0, 4][IMM4 as usize & 1], | |
160 | [1, 5][(IMM4 >> 1) as usize & 1], | |
161 | [2, 6][(IMM4 >> 2) as usize & 1], | |
162 | [3, 7][(IMM4 >> 3) as usize & 1], | |
163 | ] | |
164 | )) | |
0531ce1d XL |
165 | } |
166 | ||
532ac7d7 | 167 | /// Extracts a single-precision (32-bit) floating-point element from `a`, |
3c0e092e XL |
168 | /// selected with `IMM8`. The returned `i32` stores the float's bit-pattern, |
169 | /// and may be converted back to a floating point number via casting. | |
83c7162d | 170 | /// |
3c0e092e XL |
171 | /// # Example |
172 | /// ```rust | |
173 | /// # #[cfg(target_arch = "x86")] | |
174 | /// # use std::arch::x86::*; | |
175 | /// # #[cfg(target_arch = "x86_64")] | |
176 | /// # use std::arch::x86_64::*; | |
177 | /// # fn main() { | |
178 | /// # if is_x86_feature_detected!("sse4.1") { | |
179 | /// # #[target_feature(enable = "sse4.1")] | |
180 | /// # unsafe fn worker() { | |
181 | /// let mut float_store = vec![1.0, 1.0, 2.0, 3.0]; | |
a2a8927a XL |
182 | /// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0); |
183 | /// let x: i32 = _mm_extract_ps::<2>(simd_floats); | |
184 | /// float_store.push(f32::from_bits(x as u32)); | |
3c0e092e XL |
185 | /// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]); |
186 | /// # } | |
187 | /// # unsafe { worker() } | |
188 | /// # } | |
189 | /// # } | |
190 | /// ``` | |
353b0b11 | 191 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps) |
0531ce1d XL |
192 | #[inline] |
193 | #[target_feature(enable = "sse4.1")] | |
0731742a XL |
194 | #[cfg_attr( |
195 | all(test, not(target_os = "windows")), | |
17df50a5 | 196 | assert_instr(extractps, IMM8 = 0) |
0731742a | 197 | )] |
17df50a5 | 198 | #[rustc_legacy_const_generics(1)] |
83c7162d | 199 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 200 | pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 { |
353b0b11 | 201 | static_assert_uimm_bits!(IMM8, 2); |
c620b35d | 202 | simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 |
0531ce1d XL |
203 | } |
204 | ||
17df50a5 | 205 | /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit |
0531ce1d XL |
206 | /// integer containing the zero-extended integer data. |
207 | /// | |
fc512014 | 208 | /// See [LLVM commit D20468](https://reviews.llvm.org/D20468). |
83c7162d | 209 | /// |
353b0b11 | 210 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8) |
0531ce1d XL |
211 | #[inline] |
212 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
213 | #[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))] |
214 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 215 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 216 | pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 { |
353b0b11 | 217 | static_assert_uimm_bits!(IMM8, 4); |
c620b35d | 218 | simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 |
0531ce1d XL |
219 | } |
220 | ||
17df50a5 | 221 | /// Extracts an 32-bit integer from `a` selected with `IMM8` |
83c7162d | 222 | /// |
353b0b11 | 223 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32) |
0531ce1d XL |
224 | #[inline] |
225 | #[target_feature(enable = "sse4.1")] | |
0731742a XL |
226 | #[cfg_attr( |
227 | all(test, not(target_os = "windows")), | |
17df50a5 | 228 | assert_instr(extractps, IMM8 = 1) |
0731742a | 229 | )] |
17df50a5 | 230 | #[rustc_legacy_const_generics(1)] |
83c7162d | 231 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 232 | pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 { |
353b0b11 | 233 | static_assert_uimm_bits!(IMM8, 2); |
c620b35d | 234 | simd_extract!(a.as_i32x4(), IMM8 as u32, i32) |
0531ce1d XL |
235 | } |
236 | ||
e8be2606 | 237 | /// Select a single value in `b` to store at some position in `a`, |
17df50a5 | 238 | /// Then zero elements according to `IMM8`. |
0531ce1d | 239 | /// |
e8be2606 | 240 | /// `IMM8` specifies which bits from operand `b` will be copied, which bits in |
0531ce1d XL |
241 | /// the result they will be copied to, and which bits in the result will be |
242 | /// cleared. The following assignments are made: | |
243 | /// | |
e8be2606 FG |
244 | /// * Bits `[7:6]` specify the bits to copy from operand `b`: |
245 | /// - `00`: Selects bits `[31:0]` from operand `b`. | |
246 | /// - `01`: Selects bits `[63:32]` from operand `b`. | |
247 | /// - `10`: Selects bits `[95:64]` from operand `b`. | |
248 | /// - `11`: Selects bits `[127:96]` from operand `b`. | |
0531ce1d XL |
249 | /// |
250 | /// * Bits `[5:4]` specify the bits in the result to which the selected bits | |
e8be2606 FG |
251 | /// from operand `b` are copied: |
252 | /// - `00`: Copies the selected bits from `b` to result bits `[31:0]`. | |
253 | /// - `01`: Copies the selected bits from `b` to result bits `[63:32]`. | |
254 | /// - `10`: Copies the selected bits from `b` to result bits `[95:64]`. | |
255 | /// - `11`: Copies the selected bits from `b` to result bits `[127:96]`. | |
0531ce1d XL |
256 | /// |
257 | /// * Bits `[3:0]`: If any of these bits are set, the corresponding result | |
258 | /// element is cleared. | |
83c7162d | 259 | /// |
353b0b11 | 260 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_ps) |
0531ce1d XL |
261 | #[inline] |
262 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
263 | #[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))] |
264 | #[rustc_legacy_const_generics(2)] | |
265 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
266 | pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { | |
353b0b11 | 267 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 268 | insertps(a, b, IMM8 as u8) |
0531ce1d XL |
269 | } |
270 | ||
532ac7d7 | 271 | /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a |
17df50a5 | 272 | /// location specified by `IMM8`. |
83c7162d | 273 | /// |
353b0b11 | 274 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8) |
0531ce1d XL |
275 | #[inline] |
276 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
277 | #[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))] |
278 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 279 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 280 | pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
353b0b11 | 281 | static_assert_uimm_bits!(IMM8, 4); |
c620b35d | 282 | transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) |
0531ce1d XL |
283 | } |
284 | ||
532ac7d7 | 285 | /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a |
17df50a5 | 286 | /// location specified by `IMM8`. |
83c7162d | 287 | /// |
353b0b11 | 288 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32) |
0531ce1d XL |
289 | #[inline] |
290 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
291 | #[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))] |
292 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 293 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 294 | pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
353b0b11 | 295 | static_assert_uimm_bits!(IMM8, 2); |
c620b35d | 296 | transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) |
0531ce1d XL |
297 | } |
298 | ||
532ac7d7 | 299 | /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum |
0531ce1d | 300 | /// values in dst. |
83c7162d | 301 | /// |
353b0b11 | 302 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8) |
0531ce1d XL |
303 | #[inline] |
304 | #[target_feature(enable = "sse4.1")] | |
305 | #[cfg_attr(test, assert_instr(pmaxsb))] | |
83c7162d | 306 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 307 | pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
308 | let a = a.as_i8x16(); |
309 | let b = b.as_i8x16(); | |
310 | transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b)) | |
0531ce1d XL |
311 | } |
312 | ||
532ac7d7 | 313 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed |
0531ce1d | 314 | /// maximum. |
83c7162d | 315 | /// |
353b0b11 | 316 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16) |
0531ce1d XL |
317 | #[inline] |
318 | #[target_feature(enable = "sse4.1")] | |
319 | #[cfg_attr(test, assert_instr(pmaxuw))] | |
83c7162d | 320 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 321 | pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
322 | let a = a.as_u16x8(); |
323 | let b = b.as_u16x8(); | |
324 | transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b)) | |
0531ce1d XL |
325 | } |
326 | ||
532ac7d7 | 327 | /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum |
0531ce1d | 328 | /// values. |
83c7162d | 329 | /// |
353b0b11 | 330 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32) |
0531ce1d XL |
331 | #[inline] |
332 | #[target_feature(enable = "sse4.1")] | |
333 | #[cfg_attr(test, assert_instr(pmaxsd))] | |
83c7162d | 334 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 335 | pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
336 | let a = a.as_i32x4(); |
337 | let b = b.as_i32x4(); | |
338 | transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b)) | |
0531ce1d XL |
339 | } |
340 | ||
532ac7d7 | 341 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed |
0531ce1d | 342 | /// maximum values. |
83c7162d | 343 | /// |
353b0b11 | 344 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32) |
0531ce1d XL |
345 | #[inline] |
346 | #[target_feature(enable = "sse4.1")] | |
347 | #[cfg_attr(test, assert_instr(pmaxud))] | |
83c7162d | 348 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 349 | pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
350 | let a = a.as_u32x4(); |
351 | let b = b.as_u32x4(); | |
352 | transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b)) | |
0531ce1d XL |
353 | } |
354 | ||
532ac7d7 | 355 | /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum |
0531ce1d | 356 | /// values in dst. |
83c7162d | 357 | /// |
353b0b11 | 358 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8) |
0531ce1d XL |
359 | #[inline] |
360 | #[target_feature(enable = "sse4.1")] | |
361 | #[cfg_attr(test, assert_instr(pminsb))] | |
83c7162d | 362 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 363 | pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
364 | let a = a.as_i8x16(); |
365 | let b = b.as_i8x16(); | |
366 | transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b)) | |
0531ce1d XL |
367 | } |
368 | ||
532ac7d7 | 369 | /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed |
0531ce1d | 370 | /// minimum. |
83c7162d | 371 | /// |
353b0b11 | 372 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16) |
0531ce1d XL |
373 | #[inline] |
374 | #[target_feature(enable = "sse4.1")] | |
375 | #[cfg_attr(test, assert_instr(pminuw))] | |
83c7162d | 376 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 377 | pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
378 | let a = a.as_u16x8(); |
379 | let b = b.as_u16x8(); | |
380 | transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b)) | |
0531ce1d XL |
381 | } |
382 | ||
532ac7d7 | 383 | /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum |
0531ce1d | 384 | /// values. |
83c7162d | 385 | /// |
353b0b11 | 386 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32) |
0531ce1d XL |
387 | #[inline] |
388 | #[target_feature(enable = "sse4.1")] | |
389 | #[cfg_attr(test, assert_instr(pminsd))] | |
83c7162d | 390 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 391 | pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
392 | let a = a.as_i32x4(); |
393 | let b = b.as_i32x4(); | |
394 | transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b)) | |
0531ce1d XL |
395 | } |
396 | ||
532ac7d7 | 397 | /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed |
0531ce1d | 398 | /// minimum values. |
83c7162d | 399 | /// |
353b0b11 | 400 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu32) |
0531ce1d XL |
401 | #[inline] |
402 | #[target_feature(enable = "sse4.1")] | |
403 | #[cfg_attr(test, assert_instr(pminud))] | |
83c7162d | 404 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 405 | pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { |
9ffffee4 FG |
406 | let a = a.as_u32x4(); |
407 | let b = b.as_u32x4(); | |
408 | transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b)) | |
0531ce1d XL |
409 | } |
410 | ||
532ac7d7 | 411 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
0531ce1d | 412 | /// using unsigned saturation |
83c7162d | 413 | /// |
353b0b11 | 414 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32) |
0531ce1d XL |
415 | #[inline] |
416 | #[target_feature(enable = "sse4.1")] | |
417 | #[cfg_attr(test, assert_instr(packusdw))] | |
83c7162d | 418 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 419 | pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 420 | transmute(packusdw(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
421 | } |
422 | ||
532ac7d7 | 423 | /// Compares packed 64-bit integers in `a` and `b` for equality |
83c7162d | 424 | /// |
353b0b11 | 425 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64) |
0531ce1d XL |
426 | #[inline] |
427 | #[target_feature(enable = "sse4.1")] | |
428 | #[cfg_attr(test, assert_instr(pcmpeqq))] | |
83c7162d | 429 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 430 | pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 431 | transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) |
0531ce1d XL |
432 | } |
433 | ||
434 | /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers | |
83c7162d | 435 | /// |
353b0b11 | 436 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16) |
0531ce1d XL |
437 | #[inline] |
438 | #[target_feature(enable = "sse4.1")] | |
439 | #[cfg_attr(test, assert_instr(pmovsxbw))] | |
83c7162d | 440 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
441 | pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { |
442 | let a = a.as_i8x16(); | |
353b0b11 | 443 | let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
532ac7d7 | 444 | transmute(simd_cast::<_, i16x8>(a)) |
0531ce1d XL |
445 | } |
446 | ||
447 | /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers | |
83c7162d | 448 | /// |
353b0b11 | 449 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32) |
0531ce1d XL |
450 | #[inline] |
451 | #[target_feature(enable = "sse4.1")] | |
452 | #[cfg_attr(test, assert_instr(pmovsxbd))] | |
83c7162d | 453 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
454 | pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { |
455 | let a = a.as_i8x16(); | |
353b0b11 | 456 | let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 457 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
458 | } |
459 | ||
460 | /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed | |
461 | /// 64-bit integers | |
83c7162d | 462 | /// |
353b0b11 | 463 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64) |
0531ce1d XL |
464 | #[inline] |
465 | #[target_feature(enable = "sse4.1")] | |
466 | #[cfg_attr(test, assert_instr(pmovsxbq))] | |
83c7162d | 467 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
468 | pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { |
469 | let a = a.as_i8x16(); | |
353b0b11 | 470 | let a: i8x2 = simd_shuffle!(a, a, [0, 1]); |
532ac7d7 | 471 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
472 | } |
473 | ||
474 | /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers | |
83c7162d | 475 | /// |
353b0b11 | 476 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32) |
0531ce1d XL |
477 | #[inline] |
478 | #[target_feature(enable = "sse4.1")] | |
479 | #[cfg_attr(test, assert_instr(pmovsxwd))] | |
83c7162d | 480 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
481 | pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { |
482 | let a = a.as_i16x8(); | |
353b0b11 | 483 | let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 484 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
485 | } |
486 | ||
487 | /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers | |
83c7162d | 488 | /// |
353b0b11 | 489 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64) |
0531ce1d XL |
490 | #[inline] |
491 | #[target_feature(enable = "sse4.1")] | |
492 | #[cfg_attr(test, assert_instr(pmovsxwq))] | |
83c7162d | 493 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
494 | pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { |
495 | let a = a.as_i16x8(); | |
353b0b11 | 496 | let a: i16x2 = simd_shuffle!(a, a, [0, 1]); |
532ac7d7 | 497 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
498 | } |
499 | ||
500 | /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers | |
83c7162d | 501 | /// |
353b0b11 | 502 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64) |
0531ce1d XL |
503 | #[inline] |
504 | #[target_feature(enable = "sse4.1")] | |
505 | #[cfg_attr(test, assert_instr(pmovsxdq))] | |
83c7162d | 506 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
507 | pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { |
508 | let a = a.as_i32x4(); | |
353b0b11 | 509 | let a: i32x2 = simd_shuffle!(a, a, [0, 1]); |
532ac7d7 | 510 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
511 | } |
512 | ||
532ac7d7 | 513 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers |
83c7162d | 514 | /// |
353b0b11 | 515 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16) |
0531ce1d XL |
516 | #[inline] |
517 | #[target_feature(enable = "sse4.1")] | |
518 | #[cfg_attr(test, assert_instr(pmovzxbw))] | |
83c7162d | 519 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
520 | pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { |
521 | let a = a.as_u8x16(); | |
353b0b11 | 522 | let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); |
532ac7d7 | 523 | transmute(simd_cast::<_, i16x8>(a)) |
0531ce1d XL |
524 | } |
525 | ||
532ac7d7 | 526 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers |
83c7162d | 527 | /// |
353b0b11 | 528 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32) |
0531ce1d XL |
529 | #[inline] |
530 | #[target_feature(enable = "sse4.1")] | |
531 | #[cfg_attr(test, assert_instr(pmovzxbd))] | |
83c7162d | 532 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
533 | pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { |
534 | let a = a.as_u8x16(); | |
353b0b11 | 535 | let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 536 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
537 | } |
538 | ||
532ac7d7 | 539 | /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers |
83c7162d | 540 | /// |
353b0b11 | 541 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64) |
0531ce1d XL |
542 | #[inline] |
543 | #[target_feature(enable = "sse4.1")] | |
544 | #[cfg_attr(test, assert_instr(pmovzxbq))] | |
83c7162d | 545 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
546 | pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { |
547 | let a = a.as_u8x16(); | |
353b0b11 | 548 | let a: u8x2 = simd_shuffle!(a, a, [0, 1]); |
532ac7d7 | 549 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
550 | } |
551 | ||
532ac7d7 | 552 | /// Zeroes extend packed unsigned 16-bit integers in `a` |
0531ce1d | 553 | /// to packed 32-bit integers |
83c7162d | 554 | /// |
353b0b11 | 555 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32) |
0531ce1d XL |
556 | #[inline] |
557 | #[target_feature(enable = "sse4.1")] | |
558 | #[cfg_attr(test, assert_instr(pmovzxwd))] | |
83c7162d | 559 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
560 | pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { |
561 | let a = a.as_u16x8(); | |
353b0b11 | 562 | let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]); |
532ac7d7 | 563 | transmute(simd_cast::<_, i32x4>(a)) |
0531ce1d XL |
564 | } |
565 | ||
532ac7d7 | 566 | /// Zeroes extend packed unsigned 16-bit integers in `a` |
0531ce1d | 567 | /// to packed 64-bit integers |
83c7162d | 568 | /// |
353b0b11 | 569 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64) |
0531ce1d XL |
570 | #[inline] |
571 | #[target_feature(enable = "sse4.1")] | |
572 | #[cfg_attr(test, assert_instr(pmovzxwq))] | |
83c7162d | 573 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
574 | pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { |
575 | let a = a.as_u16x8(); | |
353b0b11 | 576 | let a: u16x2 = simd_shuffle!(a, a, [0, 1]); |
532ac7d7 | 577 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
578 | } |
579 | ||
532ac7d7 | 580 | /// Zeroes extend packed unsigned 32-bit integers in `a` |
0531ce1d | 581 | /// to packed 64-bit integers |
83c7162d | 582 | /// |
353b0b11 | 583 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64) |
0531ce1d XL |
584 | #[inline] |
585 | #[target_feature(enable = "sse4.1")] | |
586 | #[cfg_attr(test, assert_instr(pmovzxdq))] | |
83c7162d | 587 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
588 | pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { |
589 | let a = a.as_u32x4(); | |
353b0b11 | 590 | let a: u32x2 = simd_shuffle!(a, a, [0, 1]); |
532ac7d7 | 591 | transmute(simd_cast::<_, i64x2>(a)) |
0531ce1d XL |
592 | } |
593 | ||
594 | /// Returns the dot product of two __m128d vectors. | |
595 | /// | |
17df50a5 | 596 | /// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask. |
0531ce1d XL |
597 | /// If a condition mask bit is zero, the corresponding multiplication is |
598 | /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of | |
599 | /// the dot product will be stored in the return value component. Otherwise if | |
600 | /// the broadcast mask bit is zero then the return component will be zero. | |
83c7162d | 601 | /// |
353b0b11 | 602 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd) |
0531ce1d XL |
603 | #[inline] |
604 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
605 | #[cfg_attr(test, assert_instr(dppd, IMM8 = 0))] |
606 | #[rustc_legacy_const_generics(2)] | |
607 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
608 | pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d { | |
353b0b11 | 609 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 610 | dppd(a, b, IMM8 as u8) |
0531ce1d XL |
611 | } |
612 | ||
613 | /// Returns the dot product of two __m128 vectors. | |
614 | /// | |
17df50a5 | 615 | /// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask. |
0531ce1d XL |
616 | /// If a condition mask bit is zero, the corresponding multiplication is |
617 | /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of | |
618 | /// the dot product will be stored in the return value component. Otherwise if | |
619 | /// the broadcast mask bit is zero then the return component will be zero. | |
83c7162d | 620 | /// |
353b0b11 | 621 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps) |
0531ce1d XL |
622 | #[inline] |
623 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
624 | #[cfg_attr(test, assert_instr(dpps, IMM8 = 0))] |
625 | #[rustc_legacy_const_generics(2)] | |
626 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
627 | pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 { | |
353b0b11 | 628 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 629 | dpps(a, b, IMM8 as u8) |
0531ce1d XL |
630 | } |
631 | ||
632 | /// Round the packed double-precision (64-bit) floating-point elements in `a` | |
532ac7d7 | 633 | /// down to an integer value, and stores the results as packed double-precision |
0531ce1d | 634 | /// floating-point elements. |
83c7162d | 635 | /// |
353b0b11 | 636 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd) |
0531ce1d XL |
637 | #[inline] |
638 | #[target_feature(enable = "sse4.1")] | |
639 | #[cfg_attr(test, assert_instr(roundpd))] | |
83c7162d | 640 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 641 | pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { |
74b04a01 | 642 | simd_floor(a) |
0531ce1d XL |
643 | } |
644 | ||
645 | /// Round the packed single-precision (32-bit) floating-point elements in `a` | |
532ac7d7 | 646 | /// down to an integer value, and stores the results as packed single-precision |
0531ce1d | 647 | /// floating-point elements. |
83c7162d | 648 | /// |
353b0b11 | 649 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps) |
0531ce1d XL |
650 | #[inline] |
651 | #[target_feature(enable = "sse4.1")] | |
652 | #[cfg_attr(test, assert_instr(roundps))] | |
83c7162d | 653 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 654 | pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { |
74b04a01 | 655 | simd_floor(a) |
0531ce1d XL |
656 | } |
657 | ||
658 | /// Round the lower double-precision (64-bit) floating-point element in `b` | |
659 | /// down to an integer value, store the result as a double-precision | |
660 | /// floating-point element in the lower element of the intrinsic result, | |
532ac7d7 | 661 | /// and copies the upper element from `a` to the upper element of the intrinsic |
0531ce1d | 662 | /// result. |
83c7162d | 663 | /// |
353b0b11 | 664 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd) |
0531ce1d XL |
665 | #[inline] |
666 | #[target_feature(enable = "sse4.1")] | |
667 | #[cfg_attr(test, assert_instr(roundsd))] | |
83c7162d | 668 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
669 | pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { |
670 | roundsd(a, b, _MM_FROUND_FLOOR) | |
671 | } | |
672 | ||
673 | /// Round the lower single-precision (32-bit) floating-point element in `b` | |
674 | /// down to an integer value, store the result as a single-precision | |
675 | /// floating-point element in the lower element of the intrinsic result, | |
532ac7d7 | 676 | /// and copies the upper 3 packed elements from `a` to the upper elements |
0531ce1d | 677 | /// of the intrinsic result. |
83c7162d | 678 | /// |
353b0b11 | 679 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss) |
0531ce1d XL |
680 | #[inline] |
681 | #[target_feature(enable = "sse4.1")] | |
682 | #[cfg_attr(test, assert_instr(roundss))] | |
83c7162d | 683 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
684 | pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { |
685 | roundss(a, b, _MM_FROUND_FLOOR) | |
686 | } | |
687 | ||
688 | /// Round the packed double-precision (64-bit) floating-point elements in `a` | |
532ac7d7 | 689 | /// up to an integer value, and stores the results as packed double-precision |
0531ce1d | 690 | /// floating-point elements. |
83c7162d | 691 | /// |
353b0b11 | 692 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd) |
0531ce1d XL |
693 | #[inline] |
694 | #[target_feature(enable = "sse4.1")] | |
695 | #[cfg_attr(test, assert_instr(roundpd))] | |
83c7162d | 696 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 697 | pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { |
74b04a01 | 698 | simd_ceil(a) |
0531ce1d XL |
699 | } |
700 | ||
701 | /// Round the packed single-precision (32-bit) floating-point elements in `a` | |
532ac7d7 | 702 | /// up to an integer value, and stores the results as packed single-precision |
0531ce1d | 703 | /// floating-point elements. |
83c7162d | 704 | /// |
353b0b11 | 705 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps) |
0531ce1d XL |
706 | #[inline] |
707 | #[target_feature(enable = "sse4.1")] | |
708 | #[cfg_attr(test, assert_instr(roundps))] | |
83c7162d | 709 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 710 | pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { |
74b04a01 | 711 | simd_ceil(a) |
0531ce1d XL |
712 | } |
713 | ||
714 | /// Round the lower double-precision (64-bit) floating-point element in `b` | |
715 | /// up to an integer value, store the result as a double-precision | |
353b0b11 | 716 | /// floating-point element in the lower element of the intrinsic result, |
532ac7d7 | 717 | /// and copies the upper element from `a` to the upper element |
0531ce1d | 718 | /// of the intrinsic result. |
83c7162d | 719 | /// |
353b0b11 | 720 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd) |
0531ce1d XL |
721 | #[inline] |
722 | #[target_feature(enable = "sse4.1")] | |
723 | #[cfg_attr(test, assert_instr(roundsd))] | |
83c7162d | 724 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
725 | pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { |
726 | roundsd(a, b, _MM_FROUND_CEIL) | |
727 | } | |
728 | ||
729 | /// Round the lower single-precision (32-bit) floating-point element in `b` | |
730 | /// up to an integer value, store the result as a single-precision | |
731 | /// floating-point element in the lower element of the intrinsic result, | |
532ac7d7 | 732 | /// and copies the upper 3 packed elements from `a` to the upper elements |
0531ce1d | 733 | /// of the intrinsic result. |
83c7162d | 734 | /// |
353b0b11 | 735 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss) |
0531ce1d XL |
736 | #[inline] |
737 | #[target_feature(enable = "sse4.1")] | |
738 | #[cfg_attr(test, assert_instr(roundss))] | |
83c7162d | 739 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
740 | pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { |
741 | roundss(a, b, _MM_FROUND_CEIL) | |
742 | } | |
743 | ||
744 | /// Round the packed double-precision (64-bit) floating-point elements in `a` | |
17df50a5 | 745 | /// using the `ROUNDING` parameter, and stores the results as packed |
0531ce1d XL |
746 | /// double-precision floating-point elements. |
747 | /// Rounding is done according to the rounding parameter, which can be one of: | |
748 | /// | |
749 | /// ``` | |
0531ce1d XL |
750 | /// #[cfg(target_arch = "x86")] |
751 | /// use std::arch::x86::*; | |
752 | /// #[cfg(target_arch = "x86_64")] | |
753 | /// use std::arch::x86_64::*; | |
754 | /// | |
755 | /// # fn main() { | |
756 | /// // round to nearest, and suppress exceptions: | |
757 | /// # let _x = | |
758 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
759 | /// // round down, and suppress exceptions: | |
760 | /// # let _x = | |
761 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
762 | /// // round up, and suppress exceptions: | |
763 | /// # let _x = | |
764 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
765 | /// // truncate, and suppress exceptions: | |
766 | /// # let _x = | |
767 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
768 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
769 | /// # let _x = | |
770 | /// _MM_FROUND_CUR_DIRECTION; | |
771 | /// # } | |
772 | /// ``` | |
83c7162d | 773 | /// |
353b0b11 | 774 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd) |
0531ce1d XL |
775 | #[inline] |
776 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
777 | #[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))] |
778 | #[rustc_legacy_const_generics(1)] | |
779 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
780 | pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d { | |
353b0b11 | 781 | static_assert_uimm_bits!(ROUNDING, 4); |
17df50a5 | 782 | roundpd(a, ROUNDING) |
0531ce1d XL |
783 | } |
784 | ||
785 | /// Round the packed single-precision (32-bit) floating-point elements in `a` | |
17df50a5 | 786 | /// using the `ROUNDING` parameter, and stores the results as packed |
0531ce1d XL |
787 | /// single-precision floating-point elements. |
788 | /// Rounding is done according to the rounding parameter, which can be one of: | |
789 | /// | |
790 | /// ``` | |
0531ce1d XL |
791 | /// #[cfg(target_arch = "x86")] |
792 | /// use std::arch::x86::*; | |
793 | /// #[cfg(target_arch = "x86_64")] | |
794 | /// use std::arch::x86_64::*; | |
795 | /// | |
796 | /// # fn main() { | |
797 | /// // round to nearest, and suppress exceptions: | |
798 | /// # let _x = | |
799 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
800 | /// // round down, and suppress exceptions: | |
801 | /// # let _x = | |
802 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
803 | /// // round up, and suppress exceptions: | |
804 | /// # let _x = | |
805 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
806 | /// // truncate, and suppress exceptions: | |
807 | /// # let _x = | |
808 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
809 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
810 | /// # let _x = | |
811 | /// _MM_FROUND_CUR_DIRECTION; | |
812 | /// # } | |
813 | /// ``` | |
83c7162d | 814 | /// |
353b0b11 | 815 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ps) |
0531ce1d XL |
816 | #[inline] |
817 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
818 | #[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))] |
819 | #[rustc_legacy_const_generics(1)] | |
820 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
821 | pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 { | |
353b0b11 | 822 | static_assert_uimm_bits!(ROUNDING, 4); |
17df50a5 | 823 | roundps(a, ROUNDING) |
0531ce1d XL |
824 | } |
825 | ||
826 | /// Round the lower double-precision (64-bit) floating-point element in `b` | |
17df50a5 | 827 | /// using the `ROUNDING` parameter, store the result as a double-precision |
0531ce1d | 828 | /// floating-point element in the lower element of the intrinsic result, |
532ac7d7 | 829 | /// and copies the upper element from `a` to the upper element of the intrinsic |
0531ce1d XL |
830 | /// result. |
831 | /// Rounding is done according to the rounding parameter, which can be one of: | |
832 | /// | |
833 | /// ``` | |
0531ce1d XL |
834 | /// #[cfg(target_arch = "x86")] |
835 | /// use std::arch::x86::*; | |
836 | /// #[cfg(target_arch = "x86_64")] | |
837 | /// use std::arch::x86_64::*; | |
838 | /// | |
839 | /// # fn main() { | |
840 | /// // round to nearest, and suppress exceptions: | |
841 | /// # let _x = | |
842 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
843 | /// // round down, and suppress exceptions: | |
844 | /// # let _x = | |
845 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
846 | /// // round up, and suppress exceptions: | |
847 | /// # let _x = | |
848 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
849 | /// // truncate, and suppress exceptions: | |
850 | /// # let _x = | |
851 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
852 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
853 | /// # let _x = | |
854 | /// _MM_FROUND_CUR_DIRECTION; | |
855 | /// # } | |
856 | /// ``` | |
83c7162d | 857 | /// |
353b0b11 | 858 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd) |
0531ce1d XL |
859 | #[inline] |
860 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
861 | #[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))] |
862 | #[rustc_legacy_const_generics(2)] | |
863 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
864 | pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d { | |
353b0b11 | 865 | static_assert_uimm_bits!(ROUNDING, 4); |
17df50a5 | 866 | roundsd(a, b, ROUNDING) |
0531ce1d XL |
867 | } |
868 | ||
869 | /// Round the lower single-precision (32-bit) floating-point element in `b` | |
17df50a5 | 870 | /// using the `ROUNDING` parameter, store the result as a single-precision |
0531ce1d | 871 | /// floating-point element in the lower element of the intrinsic result, |
532ac7d7 | 872 | /// and copies the upper 3 packed elements from `a` to the upper elements |
a2a8927a | 873 | /// of the intrinsic result. |
0531ce1d XL |
874 | /// Rounding is done according to the rounding parameter, which can be one of: |
875 | /// | |
876 | /// ``` | |
0531ce1d XL |
877 | /// #[cfg(target_arch = "x86")] |
878 | /// use std::arch::x86::*; | |
879 | /// #[cfg(target_arch = "x86_64")] | |
880 | /// use std::arch::x86_64::*; | |
881 | /// | |
882 | /// # fn main() { | |
883 | /// // round to nearest, and suppress exceptions: | |
884 | /// # let _x = | |
885 | /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; | |
886 | /// // round down, and suppress exceptions: | |
887 | /// # let _x = | |
888 | /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; | |
889 | /// // round up, and suppress exceptions: | |
890 | /// # let _x = | |
891 | /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; | |
892 | /// // truncate, and suppress exceptions: | |
893 | /// # let _x = | |
894 | /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; | |
895 | /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: | |
896 | /// # let _x = | |
897 | /// _MM_FROUND_CUR_DIRECTION; | |
898 | /// # } | |
899 | /// ``` | |
83c7162d | 900 | /// |
353b0b11 | 901 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss) |
0531ce1d XL |
902 | #[inline] |
903 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
904 | #[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))] |
905 | #[rustc_legacy_const_generics(2)] | |
906 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
907 | pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 { | |
353b0b11 | 908 | static_assert_uimm_bits!(ROUNDING, 4); |
17df50a5 | 909 | roundss(a, b, ROUNDING) |
0531ce1d XL |
910 | } |
911 | ||
912 | /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, | |
913 | /// returning a vector containing its value in its first position, and its | |
914 | /// index | |
915 | /// in its second position; all other elements are set to zero. | |
916 | /// | |
fc512014 | 917 | /// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW` |
0531ce1d XL |
918 | /// instruction. |
919 | /// | |
920 | /// Arguments: | |
921 | /// | |
922 | /// * `a` - A 128-bit vector of type `__m128i`. | |
923 | /// | |
924 | /// Returns: | |
925 | /// | |
926 | /// A 128-bit value where: | |
927 | /// | |
928 | /// * bits `[15:0]` - contain the minimum value found in parameter `a`, | |
929 | /// * bits `[18:16]` - contain the index of the minimum value | |
930 | /// * remaining bits are set to `0`. | |
83c7162d | 931 | /// |
353b0b11 | 932 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16) |
0531ce1d XL |
933 | #[inline] |
934 | #[target_feature(enable = "sse4.1")] | |
935 | #[cfg_attr(test, assert_instr(phminposuw))] | |
83c7162d | 936 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 937 | pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { |
532ac7d7 | 938 | transmute(phminposuw(a.as_u16x8())) |
0531ce1d XL |
939 | } |
940 | ||
532ac7d7 XL |
941 | /// Multiplies the low 32-bit integers from each packed 64-bit |
942 | /// element in `a` and `b`, and returns the signed 64-bit result. | |
83c7162d | 943 | /// |
353b0b11 | 944 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32) |
0531ce1d XL |
945 | #[inline] |
946 | #[target_feature(enable = "sse4.1")] | |
947 | #[cfg_attr(test, assert_instr(pmuldq))] | |
83c7162d | 948 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 949 | pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { |
ed00b5ec FG |
950 | let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2())); |
951 | let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2())); | |
952 | transmute(simd_mul(a, b)) | |
0531ce1d XL |
953 | } |
954 | ||
532ac7d7 | 955 | /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate |
0531ce1d XL |
956 | /// 64-bit integers, and returns the lowest 32-bit, whatever they might be, |
957 | /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2), | |
958 | /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping | |
959 | /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would | |
960 | /// return a negative number. | |
83c7162d | 961 | /// |
353b0b11 | 962 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32) |
0531ce1d XL |
963 | #[inline] |
964 | #[target_feature(enable = "sse4.1")] | |
965 | #[cfg_attr(test, assert_instr(pmulld))] | |
83c7162d | 966 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 967 | pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 968 | transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
969 | } |
970 | ||
971 | /// Subtracts 8-bit unsigned integer values and computes the absolute | |
972 | /// values of the differences to the corresponding bits in the destination. | |
973 | /// Then sums of the absolute differences are returned according to the bit | |
974 | /// fields in the immediate operand. | |
975 | /// | |
976 | /// The following algorithm is performed: | |
977 | /// | |
978 | /// ```ignore | |
17df50a5 XL |
979 | /// i = IMM8[2] * 4 |
980 | /// j = IMM8[1:0] * 4 | |
0531ce1d XL |
981 | /// for k := 0 to 7 |
982 | /// d0 = abs(a[i + k + 0] - b[j + 0]) | |
983 | /// d1 = abs(a[i + k + 1] - b[j + 1]) | |
984 | /// d2 = abs(a[i + k + 2] - b[j + 2]) | |
985 | /// d3 = abs(a[i + k + 3] - b[j + 3]) | |
986 | /// r[k] = d0 + d1 + d2 + d3 | |
987 | /// ``` | |
988 | /// | |
989 | /// Arguments: | |
990 | /// | |
991 | /// * `a` - A 128-bit vector of type `__m128i`. | |
992 | /// * `b` - A 128-bit vector of type `__m128i`. | |
17df50a5 | 993 | /// * `IMM8` - An 8-bit immediate operand specifying how the absolute |
0731742a | 994 | /// differences are to be calculated |
0531ce1d XL |
995 | /// * Bit `[2]` specify the offset for operand `a` |
996 | /// * Bits `[1:0]` specify the offset for operand `b` | |
997 | /// | |
998 | /// Returns: | |
999 | /// | |
0731742a XL |
1000 | /// * A `__m128i` vector containing the sums of the sets of absolute |
1001 | /// differences between both operands. | |
83c7162d | 1002 | /// |
353b0b11 | 1003 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8) |
0531ce1d XL |
1004 | #[inline] |
1005 | #[target_feature(enable = "sse4.1")] | |
17df50a5 XL |
1006 | #[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))] |
1007 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 1008 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 | 1009 | pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
353b0b11 | 1010 | static_assert_uimm_bits!(IMM8, 3); |
17df50a5 | 1011 | transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8)) |
0531ce1d XL |
1012 | } |
1013 | ||
1014 | /// Tests whether the specified bits in a 128-bit integer vector are all | |
1015 | /// zeros. | |
1016 | /// | |
1017 | /// Arguments: | |
1018 | /// | |
1019 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1020 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1021 | /// operand `a`. |
0531ce1d XL |
1022 | /// |
1023 | /// Returns: | |
1024 | /// | |
1025 | /// * `1` - if the specified bits are all zeros, | |
1026 | /// * `0` - otherwise. | |
83c7162d | 1027 | /// |
353b0b11 | 1028 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128) |
0531ce1d XL |
1029 | #[inline] |
1030 | #[target_feature(enable = "sse4.1")] | |
1031 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1032 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1033 | pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { |
1034 | ptestz(a.as_i64x2(), mask.as_i64x2()) | |
1035 | } | |
1036 | ||
1037 | /// Tests whether the specified bits in a 128-bit integer vector are all | |
1038 | /// ones. | |
1039 | /// | |
1040 | /// Arguments: | |
1041 | /// | |
1042 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1043 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1044 | /// operand `a`. |
0531ce1d XL |
1045 | /// |
1046 | /// Returns: | |
1047 | /// | |
1048 | /// * `1` - if the specified bits are all ones, | |
1049 | /// * `0` - otherwise. | |
83c7162d | 1050 | /// |
353b0b11 | 1051 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128) |
0531ce1d XL |
1052 | #[inline] |
1053 | #[target_feature(enable = "sse4.1")] | |
1054 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1055 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1056 | pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { |
1057 | ptestc(a.as_i64x2(), mask.as_i64x2()) | |
1058 | } | |
1059 | ||
1060 | /// Tests whether the specified bits in a 128-bit integer vector are | |
1061 | /// neither all zeros nor all ones. | |
1062 | /// | |
1063 | /// Arguments: | |
1064 | /// | |
1065 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1066 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1067 | /// operand `a`. |
0531ce1d XL |
1068 | /// |
1069 | /// Returns: | |
1070 | /// | |
1071 | /// * `1` - if the specified bits are neither all zeros nor all ones, | |
1072 | /// * `0` - otherwise. | |
83c7162d | 1073 | /// |
353b0b11 | 1074 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128) |
0531ce1d XL |
1075 | #[inline] |
1076 | #[target_feature(enable = "sse4.1")] | |
1077 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1078 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1079 | pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { |
1080 | ptestnzc(a.as_i64x2(), mask.as_i64x2()) | |
1081 | } | |
1082 | ||
1083 | /// Tests whether the specified bits in a 128-bit integer vector are all | |
1084 | /// zeros. | |
1085 | /// | |
1086 | /// Arguments: | |
1087 | /// | |
1088 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1089 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1090 | /// operand `a`. |
0531ce1d XL |
1091 | /// |
1092 | /// Returns: | |
1093 | /// | |
1094 | /// * `1` - if the specified bits are all zeros, | |
1095 | /// * `0` - otherwise. | |
83c7162d | 1096 | /// |
353b0b11 | 1097 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros) |
0531ce1d XL |
1098 | #[inline] |
1099 | #[target_feature(enable = "sse4.1")] | |
1100 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1101 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1102 | pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { |
1103 | _mm_testz_si128(a, mask) | |
1104 | } | |
1105 | ||
1106 | /// Tests whether the specified bits in `a` 128-bit integer vector are all | |
1107 | /// ones. | |
1108 | /// | |
1109 | /// Argument: | |
1110 | /// | |
1111 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1112 | /// | |
1113 | /// Returns: | |
1114 | /// | |
1115 | /// * `1` - if the bits specified in the operand are all set to 1, | |
1116 | /// * `0` - otherwise. | |
83c7162d | 1117 | /// |
353b0b11 | 1118 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones) |
0531ce1d XL |
1119 | #[inline] |
1120 | #[target_feature(enable = "sse4.1")] | |
1121 | #[cfg_attr(test, assert_instr(pcmpeqd))] | |
1122 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1123 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1124 | pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { |
1125 | _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) | |
1126 | } | |
1127 | ||
1128 | /// Tests whether the specified bits in a 128-bit integer vector are | |
1129 | /// neither all zeros nor all ones. | |
1130 | /// | |
1131 | /// Arguments: | |
1132 | /// | |
1133 | /// * `a` - A 128-bit integer vector containing the bits to be tested. | |
1134 | /// * `mask` - A 128-bit integer vector selecting which bits to test in | |
0731742a | 1135 | /// operand `a`. |
0531ce1d XL |
1136 | /// |
1137 | /// Returns: | |
1138 | /// | |
1139 | /// * `1` - if the specified bits are neither all zeros nor all ones, | |
1140 | /// * `0` - otherwise. | |
83c7162d | 1141 | /// |
353b0b11 | 1142 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_mix_ones_zeros) |
0531ce1d XL |
1143 | #[inline] |
1144 | #[target_feature(enable = "sse4.1")] | |
1145 | #[cfg_attr(test, assert_instr(ptest))] | |
83c7162d | 1146 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1147 | pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { |
1148 | _mm_testnzc_si128(a, mask) | |
1149 | } | |
1150 | ||
1151 | #[allow(improper_ctypes)] | |
1152 | extern "C" { | |
0531ce1d XL |
1153 | #[link_name = "llvm.x86.sse41.insertps"] |
1154 | fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; | |
0531ce1d XL |
1155 | #[link_name = "llvm.x86.sse41.packusdw"] |
1156 | fn packusdw(a: i32x4, b: i32x4) -> u16x8; | |
1157 | #[link_name = "llvm.x86.sse41.dppd"] | |
1158 | fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; | |
1159 | #[link_name = "llvm.x86.sse41.dpps"] | |
1160 | fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128; | |
1161 | #[link_name = "llvm.x86.sse41.round.pd"] | |
1162 | fn roundpd(a: __m128d, rounding: i32) -> __m128d; | |
1163 | #[link_name = "llvm.x86.sse41.round.ps"] | |
1164 | fn roundps(a: __m128, rounding: i32) -> __m128; | |
1165 | #[link_name = "llvm.x86.sse41.round.sd"] | |
1166 | fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d; | |
1167 | #[link_name = "llvm.x86.sse41.round.ss"] | |
1168 | fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128; | |
1169 | #[link_name = "llvm.x86.sse41.phminposuw"] | |
1170 | fn phminposuw(a: u16x8) -> u16x8; | |
0531ce1d XL |
1171 | #[link_name = "llvm.x86.sse41.mpsadbw"] |
1172 | fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; | |
1173 | #[link_name = "llvm.x86.sse41.ptestz"] | |
1174 | fn ptestz(a: i64x2, mask: i64x2) -> i32; | |
1175 | #[link_name = "llvm.x86.sse41.ptestc"] | |
1176 | fn ptestc(a: i64x2, mask: i64x2) -> i32; | |
1177 | #[link_name = "llvm.x86.sse41.ptestnzc"] | |
1178 | fn ptestnzc(a: i64x2, mask: i64x2) -> i32; | |
1179 | } | |
1180 | ||
1181 | #[cfg(test)] | |
1182 | mod tests { | |
532ac7d7 | 1183 | use crate::core_arch::x86::*; |
0531ce1d | 1184 | use std::mem; |
416331ca | 1185 | use stdarch_test::simd_test; |
0531ce1d | 1186 | |
83c7162d | 1187 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1188 | unsafe fn test_mm_blendv_epi8() { |
0731742a | 1189 | #[rustfmt::skip] |
0531ce1d XL |
1190 | let a = _mm_setr_epi8( |
1191 | 0, 1, 2, 3, 4, 5, 6, 7, | |
1192 | 8, 9, 10, 11, 12, 13, 14, 15, | |
1193 | ); | |
0731742a | 1194 | #[rustfmt::skip] |
0531ce1d XL |
1195 | let b = _mm_setr_epi8( |
1196 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
1197 | ); | |
0731742a | 1198 | #[rustfmt::skip] |
0531ce1d XL |
1199 | let mask = _mm_setr_epi8( |
1200 | 0, -1, 0, -1, 0, -1, 0, -1, | |
1201 | 0, -1, 0, -1, 0, -1, 0, -1, | |
1202 | ); | |
0731742a | 1203 | #[rustfmt::skip] |
0531ce1d XL |
1204 | let e = _mm_setr_epi8( |
1205 | 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31, | |
1206 | ); | |
1207 | assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e); | |
1208 | } | |
1209 | ||
83c7162d | 1210 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1211 | unsafe fn test_mm_blendv_pd() { |
1212 | let a = _mm_set1_pd(0.0); | |
1213 | let b = _mm_set1_pd(1.0); | |
532ac7d7 | 1214 | let mask = transmute(_mm_setr_epi64x(0, -1)); |
0531ce1d XL |
1215 | let r = _mm_blendv_pd(a, b, mask); |
1216 | let e = _mm_setr_pd(0.0, 1.0); | |
1217 | assert_eq_m128d(r, e); | |
1218 | } | |
1219 | ||
83c7162d | 1220 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1221 | unsafe fn test_mm_blendv_ps() { |
1222 | let a = _mm_set1_ps(0.0); | |
1223 | let b = _mm_set1_ps(1.0); | |
532ac7d7 | 1224 | let mask = transmute(_mm_setr_epi32(0, -1, 0, -1)); |
0531ce1d XL |
1225 | let r = _mm_blendv_ps(a, b, mask); |
1226 | let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); | |
1227 | assert_eq_m128(r, e); | |
1228 | } | |
1229 | ||
83c7162d | 1230 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1231 | unsafe fn test_mm_blend_pd() { |
1232 | let a = _mm_set1_pd(0.0); | |
1233 | let b = _mm_set1_pd(1.0); | |
17df50a5 | 1234 | let r = _mm_blend_pd::<0b10>(a, b); |
0531ce1d XL |
1235 | let e = _mm_setr_pd(0.0, 1.0); |
1236 | assert_eq_m128d(r, e); | |
1237 | } | |
1238 | ||
83c7162d | 1239 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1240 | unsafe fn test_mm_blend_ps() { |
1241 | let a = _mm_set1_ps(0.0); | |
1242 | let b = _mm_set1_ps(1.0); | |
17df50a5 | 1243 | let r = _mm_blend_ps::<0b1010>(a, b); |
0531ce1d XL |
1244 | let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); |
1245 | assert_eq_m128(r, e); | |
1246 | } | |
1247 | ||
83c7162d | 1248 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1249 | unsafe fn test_mm_blend_epi16() { |
1250 | let a = _mm_set1_epi16(0); | |
1251 | let b = _mm_set1_epi16(1); | |
17df50a5 | 1252 | let r = _mm_blend_epi16::<0b1010_1100>(a, b); |
0531ce1d XL |
1253 | let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1); |
1254 | assert_eq_m128i(r, e); | |
1255 | } | |
1256 | ||
83c7162d | 1257 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1258 | unsafe fn test_mm_extract_ps() { |
1259 | let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0); | |
ed00b5ec | 1260 | let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32); |
0531ce1d | 1261 | assert_eq!(r, 1.0); |
ed00b5ec | 1262 | let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32); |
17df50a5 | 1263 | assert_eq!(r, 3.0); |
0531ce1d XL |
1264 | } |
1265 | ||
83c7162d | 1266 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1267 | unsafe fn test_mm_extract_epi8() { |
0731742a | 1268 | #[rustfmt::skip] |
0531ce1d XL |
1269 | let a = _mm_setr_epi8( |
1270 | -1, 1, 2, 3, 4, 5, 6, 7, | |
1271 | 8, 9, 10, 11, 12, 13, 14, 15 | |
1272 | ); | |
17df50a5 XL |
1273 | let r1 = _mm_extract_epi8::<0>(a); |
1274 | let r2 = _mm_extract_epi8::<3>(a); | |
0531ce1d XL |
1275 | assert_eq!(r1, 0xFF); |
1276 | assert_eq!(r2, 3); | |
1277 | } | |
1278 | ||
83c7162d | 1279 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1280 | unsafe fn test_mm_extract_epi32() { |
1281 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
17df50a5 | 1282 | let r = _mm_extract_epi32::<1>(a); |
0531ce1d | 1283 | assert_eq!(r, 1); |
17df50a5 XL |
1284 | let r = _mm_extract_epi32::<3>(a); |
1285 | assert_eq!(r, 3); | |
0531ce1d XL |
1286 | } |
1287 | ||
83c7162d | 1288 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1289 | unsafe fn test_mm_insert_ps() { |
1290 | let a = _mm_set1_ps(1.0); | |
1291 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
17df50a5 | 1292 | let r = _mm_insert_ps::<0b11_00_1100>(a, b); |
0531ce1d XL |
1293 | let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); |
1294 | assert_eq_m128(r, e); | |
c620b35d FG |
1295 | |
1296 | // Zeroing takes precedence over copied value | |
1297 | let a = _mm_set1_ps(1.0); | |
1298 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
1299 | let r = _mm_insert_ps::<0b11_00_0001>(a, b); | |
1300 | let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0); | |
1301 | assert_eq_m128(r, e); | |
0531ce1d XL |
1302 | } |
1303 | ||
83c7162d | 1304 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1305 | unsafe fn test_mm_insert_epi8() { |
1306 | let a = _mm_set1_epi8(0); | |
1307 | let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
17df50a5 | 1308 | let r = _mm_insert_epi8::<1>(a, 32); |
0531ce1d | 1309 | assert_eq_m128i(r, e); |
17df50a5 XL |
1310 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0); |
1311 | let r = _mm_insert_epi8::<14>(a, 32); | |
0531ce1d XL |
1312 | assert_eq_m128i(r, e); |
1313 | } | |
1314 | ||
83c7162d | 1315 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1316 | unsafe fn test_mm_insert_epi32() { |
1317 | let a = _mm_set1_epi32(0); | |
1318 | let e = _mm_setr_epi32(0, 32, 0, 0); | |
17df50a5 | 1319 | let r = _mm_insert_epi32::<1>(a, 32); |
0531ce1d | 1320 | assert_eq_m128i(r, e); |
17df50a5 XL |
1321 | let e = _mm_setr_epi32(0, 0, 0, 32); |
1322 | let r = _mm_insert_epi32::<3>(a, 32); | |
0531ce1d XL |
1323 | assert_eq_m128i(r, e); |
1324 | } | |
1325 | ||
83c7162d | 1326 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1327 | unsafe fn test_mm_max_epi8() { |
0731742a | 1328 | #[rustfmt::skip] |
0531ce1d XL |
1329 | let a = _mm_setr_epi8( |
1330 | 1, 4, 5, 8, 9, 12, 13, 16, | |
1331 | 17, 20, 21, 24, 25, 28, 29, 32, | |
1332 | ); | |
0731742a | 1333 | #[rustfmt::skip] |
0531ce1d XL |
1334 | let b = _mm_setr_epi8( |
1335 | 2, 3, 6, 7, 10, 11, 14, 15, | |
1336 | 18, 19, 22, 23, 26, 27, 30, 31, | |
1337 | ); | |
1338 | let r = _mm_max_epi8(a, b); | |
0731742a | 1339 | #[rustfmt::skip] |
0531ce1d XL |
1340 | let e = _mm_setr_epi8( |
1341 | 2, 4, 6, 8, 10, 12, 14, 16, | |
1342 | 18, 20, 22, 24, 26, 28, 30, 32, | |
1343 | ); | |
1344 | assert_eq_m128i(r, e); | |
1345 | } | |
1346 | ||
83c7162d | 1347 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1348 | unsafe fn test_mm_max_epu16() { |
1349 | let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); | |
1350 | let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); | |
1351 | let r = _mm_max_epu16(a, b); | |
1352 | let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16); | |
1353 | assert_eq_m128i(r, e); | |
1354 | } | |
1355 | ||
83c7162d | 1356 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1357 | unsafe fn test_mm_max_epi32() { |
1358 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1359 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1360 | let r = _mm_max_epi32(a, b); | |
1361 | let e = _mm_setr_epi32(2, 4, 6, 8); | |
1362 | assert_eq_m128i(r, e); | |
1363 | } | |
1364 | ||
83c7162d | 1365 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1366 | unsafe fn test_mm_max_epu32() { |
1367 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1368 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1369 | let r = _mm_max_epu32(a, b); | |
1370 | let e = _mm_setr_epi32(2, 4, 6, 8); | |
1371 | assert_eq_m128i(r, e); | |
1372 | } | |
1373 | ||
83c7162d | 1374 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1375 | unsafe fn test_mm_min_epi8_1() { |
0731742a | 1376 | #[rustfmt::skip] |
0531ce1d XL |
1377 | let a = _mm_setr_epi8( |
1378 | 1, 4, 5, 8, 9, 12, 13, 16, | |
1379 | 17, 20, 21, 24, 25, 28, 29, 32, | |
1380 | ); | |
0731742a | 1381 | #[rustfmt::skip] |
0531ce1d XL |
1382 | let b = _mm_setr_epi8( |
1383 | 2, 3, 6, 7, 10, 11, 14, 15, | |
1384 | 18, 19, 22, 23, 26, 27, 30, 31, | |
1385 | ); | |
1386 | let r = _mm_min_epi8(a, b); | |
0731742a | 1387 | #[rustfmt::skip] |
0531ce1d XL |
1388 | let e = _mm_setr_epi8( |
1389 | 1, 3, 5, 7, 9, 11, 13, 15, | |
1390 | 17, 19, 21, 23, 25, 27, 29, 31, | |
1391 | ); | |
1392 | assert_eq_m128i(r, e); | |
1393 | } | |
1394 | ||
83c7162d | 1395 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1396 | unsafe fn test_mm_min_epi8_2() { |
0731742a | 1397 | #[rustfmt::skip] |
0531ce1d XL |
1398 | let a = _mm_setr_epi8( |
1399 | 1, -4, -5, 8, -9, -12, 13, -16, | |
1400 | 17, 20, 21, 24, 25, 28, 29, 32, | |
1401 | ); | |
0731742a | 1402 | #[rustfmt::skip] |
0531ce1d XL |
1403 | let b = _mm_setr_epi8( |
1404 | 2, -3, -6, 7, -10, -11, 14, -15, | |
1405 | 18, 19, 22, 23, 26, 27, 30, 31, | |
1406 | ); | |
1407 | let r = _mm_min_epi8(a, b); | |
0731742a | 1408 | #[rustfmt::skip] |
0531ce1d XL |
1409 | let e = _mm_setr_epi8( |
1410 | 1, -4, -6, 7, -10, -12, 13, -16, | |
1411 | 17, 19, 21, 23, 25, 27, 29, 31, | |
1412 | ); | |
1413 | assert_eq_m128i(r, e); | |
1414 | } | |
1415 | ||
83c7162d | 1416 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1417 | unsafe fn test_mm_min_epu16() { |
1418 | let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); | |
1419 | let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); | |
1420 | let r = _mm_min_epu16(a, b); | |
1421 | let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15); | |
1422 | assert_eq_m128i(r, e); | |
1423 | } | |
1424 | ||
83c7162d | 1425 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1426 | unsafe fn test_mm_min_epi32_1() { |
1427 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1428 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1429 | let r = _mm_min_epi32(a, b); | |
1430 | let e = _mm_setr_epi32(1, 3, 5, 7); | |
1431 | assert_eq_m128i(r, e); | |
1432 | } | |
1433 | ||
83c7162d | 1434 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1435 | unsafe fn test_mm_min_epi32_2() { |
1436 | let a = _mm_setr_epi32(-1, 4, 5, -7); | |
1437 | let b = _mm_setr_epi32(-2, 3, -6, 8); | |
1438 | let r = _mm_min_epi32(a, b); | |
1439 | let e = _mm_setr_epi32(-2, 3, -6, -7); | |
1440 | assert_eq_m128i(r, e); | |
1441 | } | |
1442 | ||
83c7162d | 1443 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1444 | unsafe fn test_mm_min_epu32() { |
1445 | let a = _mm_setr_epi32(1, 4, 5, 8); | |
1446 | let b = _mm_setr_epi32(2, 3, 6, 7); | |
1447 | let r = _mm_min_epu32(a, b); | |
1448 | let e = _mm_setr_epi32(1, 3, 5, 7); | |
1449 | assert_eq_m128i(r, e); | |
1450 | } | |
1451 | ||
83c7162d | 1452 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1453 | unsafe fn test_mm_packus_epi32() { |
1454 | let a = _mm_setr_epi32(1, 2, 3, 4); | |
1455 | let b = _mm_setr_epi32(-1, -2, -3, -4); | |
1456 | let r = _mm_packus_epi32(a, b); | |
1457 | let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0); | |
1458 | assert_eq_m128i(r, e); | |
1459 | } | |
1460 | ||
83c7162d | 1461 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1462 | unsafe fn test_mm_cmpeq_epi64() { |
1463 | let a = _mm_setr_epi64x(0, 1); | |
1464 | let b = _mm_setr_epi64x(0, 0); | |
1465 | let r = _mm_cmpeq_epi64(a, b); | |
1466 | let e = _mm_setr_epi64x(-1, 0); | |
1467 | assert_eq_m128i(r, e); | |
1468 | } | |
1469 | ||
83c7162d | 1470 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1471 | unsafe fn test_mm_cvtepi8_epi16() { |
1472 | let a = _mm_set1_epi8(10); | |
1473 | let r = _mm_cvtepi8_epi16(a); | |
1474 | let e = _mm_set1_epi16(10); | |
1475 | assert_eq_m128i(r, e); | |
1476 | let a = _mm_set1_epi8(-10); | |
1477 | let r = _mm_cvtepi8_epi16(a); | |
1478 | let e = _mm_set1_epi16(-10); | |
1479 | assert_eq_m128i(r, e); | |
1480 | } | |
1481 | ||
83c7162d | 1482 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1483 | unsafe fn test_mm_cvtepi8_epi32() { |
1484 | let a = _mm_set1_epi8(10); | |
1485 | let r = _mm_cvtepi8_epi32(a); | |
1486 | let e = _mm_set1_epi32(10); | |
1487 | assert_eq_m128i(r, e); | |
1488 | let a = _mm_set1_epi8(-10); | |
1489 | let r = _mm_cvtepi8_epi32(a); | |
1490 | let e = _mm_set1_epi32(-10); | |
1491 | assert_eq_m128i(r, e); | |
1492 | } | |
1493 | ||
83c7162d | 1494 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1495 | unsafe fn test_mm_cvtepi8_epi64() { |
1496 | let a = _mm_set1_epi8(10); | |
1497 | let r = _mm_cvtepi8_epi64(a); | |
1498 | let e = _mm_set1_epi64x(10); | |
1499 | assert_eq_m128i(r, e); | |
1500 | let a = _mm_set1_epi8(-10); | |
1501 | let r = _mm_cvtepi8_epi64(a); | |
1502 | let e = _mm_set1_epi64x(-10); | |
1503 | assert_eq_m128i(r, e); | |
1504 | } | |
1505 | ||
83c7162d | 1506 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1507 | unsafe fn test_mm_cvtepi16_epi32() { |
1508 | let a = _mm_set1_epi16(10); | |
1509 | let r = _mm_cvtepi16_epi32(a); | |
1510 | let e = _mm_set1_epi32(10); | |
1511 | assert_eq_m128i(r, e); | |
1512 | let a = _mm_set1_epi16(-10); | |
1513 | let r = _mm_cvtepi16_epi32(a); | |
1514 | let e = _mm_set1_epi32(-10); | |
1515 | assert_eq_m128i(r, e); | |
1516 | } | |
1517 | ||
83c7162d | 1518 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1519 | unsafe fn test_mm_cvtepi16_epi64() { |
1520 | let a = _mm_set1_epi16(10); | |
1521 | let r = _mm_cvtepi16_epi64(a); | |
1522 | let e = _mm_set1_epi64x(10); | |
1523 | assert_eq_m128i(r, e); | |
1524 | let a = _mm_set1_epi16(-10); | |
1525 | let r = _mm_cvtepi16_epi64(a); | |
1526 | let e = _mm_set1_epi64x(-10); | |
1527 | assert_eq_m128i(r, e); | |
1528 | } | |
1529 | ||
83c7162d | 1530 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1531 | unsafe fn test_mm_cvtepi32_epi64() { |
1532 | let a = _mm_set1_epi32(10); | |
1533 | let r = _mm_cvtepi32_epi64(a); | |
1534 | let e = _mm_set1_epi64x(10); | |
1535 | assert_eq_m128i(r, e); | |
1536 | let a = _mm_set1_epi32(-10); | |
1537 | let r = _mm_cvtepi32_epi64(a); | |
1538 | let e = _mm_set1_epi64x(-10); | |
1539 | assert_eq_m128i(r, e); | |
1540 | } | |
1541 | ||
83c7162d | 1542 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1543 | unsafe fn test_mm_cvtepu8_epi16() { |
1544 | let a = _mm_set1_epi8(10); | |
1545 | let r = _mm_cvtepu8_epi16(a); | |
1546 | let e = _mm_set1_epi16(10); | |
1547 | assert_eq_m128i(r, e); | |
1548 | } | |
1549 | ||
83c7162d | 1550 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1551 | unsafe fn test_mm_cvtepu8_epi32() { |
1552 | let a = _mm_set1_epi8(10); | |
1553 | let r = _mm_cvtepu8_epi32(a); | |
1554 | let e = _mm_set1_epi32(10); | |
1555 | assert_eq_m128i(r, e); | |
1556 | } | |
1557 | ||
83c7162d | 1558 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1559 | unsafe fn test_mm_cvtepu8_epi64() { |
1560 | let a = _mm_set1_epi8(10); | |
1561 | let r = _mm_cvtepu8_epi64(a); | |
1562 | let e = _mm_set1_epi64x(10); | |
1563 | assert_eq_m128i(r, e); | |
1564 | } | |
1565 | ||
83c7162d | 1566 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1567 | unsafe fn test_mm_cvtepu16_epi32() { |
1568 | let a = _mm_set1_epi16(10); | |
1569 | let r = _mm_cvtepu16_epi32(a); | |
1570 | let e = _mm_set1_epi32(10); | |
1571 | assert_eq_m128i(r, e); | |
1572 | } | |
1573 | ||
83c7162d | 1574 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1575 | unsafe fn test_mm_cvtepu16_epi64() { |
1576 | let a = _mm_set1_epi16(10); | |
1577 | let r = _mm_cvtepu16_epi64(a); | |
1578 | let e = _mm_set1_epi64x(10); | |
1579 | assert_eq_m128i(r, e); | |
1580 | } | |
1581 | ||
83c7162d | 1582 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1583 | unsafe fn test_mm_cvtepu32_epi64() { |
1584 | let a = _mm_set1_epi32(10); | |
1585 | let r = _mm_cvtepu32_epi64(a); | |
1586 | let e = _mm_set1_epi64x(10); | |
1587 | assert_eq_m128i(r, e); | |
1588 | } | |
1589 | ||
83c7162d | 1590 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1591 | unsafe fn test_mm_dp_pd() { |
1592 | let a = _mm_setr_pd(2.0, 3.0); | |
1593 | let b = _mm_setr_pd(1.0, 4.0); | |
1594 | let e = _mm_setr_pd(14.0, 0.0); | |
17df50a5 | 1595 | assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e); |
0531ce1d XL |
1596 | } |
1597 | ||
83c7162d | 1598 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1599 | unsafe fn test_mm_dp_ps() { |
1600 | let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0); | |
1601 | let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0); | |
1602 | let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0); | |
17df50a5 | 1603 | assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e); |
0531ce1d XL |
1604 | } |
1605 | ||
83c7162d | 1606 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1607 | unsafe fn test_mm_floor_pd() { |
1608 | let a = _mm_setr_pd(2.5, 4.5); | |
1609 | let r = _mm_floor_pd(a); | |
1610 | let e = _mm_setr_pd(2.0, 4.0); | |
1611 | assert_eq_m128d(r, e); | |
1612 | } | |
1613 | ||
83c7162d | 1614 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1615 | unsafe fn test_mm_floor_ps() { |
1616 | let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); | |
1617 | let r = _mm_floor_ps(a); | |
1618 | let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); | |
1619 | assert_eq_m128(r, e); | |
1620 | } | |
1621 | ||
83c7162d | 1622 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1623 | unsafe fn test_mm_floor_sd() { |
1624 | let a = _mm_setr_pd(2.5, 4.5); | |
1625 | let b = _mm_setr_pd(-1.5, -3.5); | |
1626 | let r = _mm_floor_sd(a, b); | |
1627 | let e = _mm_setr_pd(-2.0, 4.5); | |
1628 | assert_eq_m128d(r, e); | |
1629 | } | |
1630 | ||
83c7162d | 1631 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1632 | unsafe fn test_mm_floor_ss() { |
1633 | let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); | |
1634 | let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5); | |
1635 | let r = _mm_floor_ss(a, b); | |
1636 | let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5); | |
1637 | assert_eq_m128(r, e); | |
1638 | } | |
1639 | ||
83c7162d | 1640 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1641 | unsafe fn test_mm_ceil_pd() { |
1642 | let a = _mm_setr_pd(1.5, 3.5); | |
1643 | let r = _mm_ceil_pd(a); | |
1644 | let e = _mm_setr_pd(2.0, 4.0); | |
1645 | assert_eq_m128d(r, e); | |
1646 | } | |
1647 | ||
83c7162d | 1648 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1649 | unsafe fn test_mm_ceil_ps() { |
1650 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1651 | let r = _mm_ceil_ps(a); | |
1652 | let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); | |
1653 | assert_eq_m128(r, e); | |
1654 | } | |
1655 | ||
83c7162d | 1656 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1657 | unsafe fn test_mm_ceil_sd() { |
1658 | let a = _mm_setr_pd(1.5, 3.5); | |
1659 | let b = _mm_setr_pd(-2.5, -4.5); | |
1660 | let r = _mm_ceil_sd(a, b); | |
1661 | let e = _mm_setr_pd(-2.0, 3.5); | |
1662 | assert_eq_m128d(r, e); | |
1663 | } | |
1664 | ||
83c7162d | 1665 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1666 | unsafe fn test_mm_ceil_ss() { |
1667 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1668 | let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5); | |
1669 | let r = _mm_ceil_ss(a, b); | |
1670 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); | |
1671 | assert_eq_m128(r, e); | |
1672 | } | |
1673 | ||
83c7162d | 1674 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1675 | unsafe fn test_mm_round_pd() { |
1676 | let a = _mm_setr_pd(1.25, 3.75); | |
17df50a5 | 1677 | let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a); |
0531ce1d XL |
1678 | let e = _mm_setr_pd(1.0, 4.0); |
1679 | assert_eq_m128d(r, e); | |
1680 | } | |
1681 | ||
83c7162d | 1682 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1683 | unsafe fn test_mm_round_ps() { |
1684 | let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25); | |
17df50a5 | 1685 | let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a); |
0531ce1d XL |
1686 | let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0); |
1687 | assert_eq_m128(r, e); | |
1688 | } | |
1689 | ||
83c7162d | 1690 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1691 | unsafe fn test_mm_round_sd() { |
1692 | let a = _mm_setr_pd(1.5, 3.5); | |
1693 | let b = _mm_setr_pd(-2.5, -4.5); | |
c620b35d FG |
1694 | let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b); |
1695 | let e = _mm_setr_pd(-2.0, 3.5); | |
1696 | assert_eq_m128d(r, e); | |
1697 | ||
1698 | let a = _mm_setr_pd(1.5, 3.5); | |
1699 | let b = _mm_setr_pd(-2.5, -4.5); | |
1700 | let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b); | |
1701 | let e = _mm_setr_pd(-3.0, 3.5); | |
1702 | assert_eq_m128d(r, e); | |
1703 | ||
1704 | let a = _mm_setr_pd(1.5, 3.5); | |
1705 | let b = _mm_setr_pd(-2.5, -4.5); | |
1706 | let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b); | |
1707 | let e = _mm_setr_pd(-2.0, 3.5); | |
1708 | assert_eq_m128d(r, e); | |
1709 | ||
1710 | let a = _mm_setr_pd(1.5, 3.5); | |
1711 | let b = _mm_setr_pd(-2.5, -4.5); | |
1712 | let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b); | |
0531ce1d XL |
1713 | let e = _mm_setr_pd(-2.0, 3.5); |
1714 | assert_eq_m128d(r, e); | |
1715 | } | |
1716 | ||
83c7162d | 1717 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1718 | unsafe fn test_mm_round_ss() { |
1719 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1720 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); | |
c620b35d | 1721 | let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b); |
0531ce1d XL |
1722 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); |
1723 | assert_eq_m128(r, e); | |
c620b35d FG |
1724 | |
1725 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1726 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); | |
1727 | let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b); | |
1728 | let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); | |
1729 | assert_eq_m128(r, e); | |
1730 | ||
1731 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1732 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); | |
1733 | let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b); | |
1734 | let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); | |
1735 | assert_eq_m128(r, e); | |
1736 | ||
1737 | let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); | |
1738 | let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); | |
1739 | let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b); | |
1740 | let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5); | |
1741 | assert_eq_m128(r, e); | |
0531ce1d XL |
1742 | } |
1743 | ||
83c7162d | 1744 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1745 | unsafe fn test_mm_minpos_epu16_1() { |
1746 | let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66); | |
1747 | let r = _mm_minpos_epu16(a); | |
1748 | let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); | |
1749 | assert_eq_m128i(r, e); | |
1750 | } | |
1751 | ||
83c7162d | 1752 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1753 | unsafe fn test_mm_minpos_epu16_2() { |
1754 | let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66); | |
1755 | let r = _mm_minpos_epu16(a); | |
1756 | let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
1757 | assert_eq_m128i(r, e); | |
1758 | } | |
1759 | ||
c620b35d FG |
1760 | #[simd_test(enable = "sse4.1")] |
1761 | unsafe fn test_mm_minpos_epu16_3() { | |
1762 | // Case where the minimum value is repeated | |
1763 | let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13); | |
1764 | let r = _mm_minpos_epu16(a); | |
1765 | let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); | |
1766 | assert_eq_m128i(r, e); | |
1767 | } | |
1768 | ||
83c7162d | 1769 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1770 | unsafe fn test_mm_mul_epi32() { |
1771 | { | |
1772 | let a = _mm_setr_epi32(1, 1, 1, 1); | |
1773 | let b = _mm_setr_epi32(1, 2, 3, 4); | |
1774 | let r = _mm_mul_epi32(a, b); | |
1775 | let e = _mm_setr_epi64x(1, 3); | |
1776 | assert_eq_m128i(r, e); | |
1777 | } | |
1778 | { | |
0731742a | 1779 | let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */); |
0531ce1d | 1780 | let b = _mm_setr_epi32( |
8faf50e0 XL |
1781 | -20, -256, /* ignored */ |
1782 | 666666, 666666, /* ignored */ | |
0531ce1d XL |
1783 | ); |
1784 | let r = _mm_mul_epi32(a, b); | |
1785 | let e = _mm_setr_epi64x(-300, 823043843622); | |
1786 | assert_eq_m128i(r, e); | |
1787 | } | |
1788 | } | |
1789 | ||
83c7162d | 1790 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1791 | unsafe fn test_mm_mullo_epi32() { |
1792 | { | |
1793 | let a = _mm_setr_epi32(1, 1, 1, 1); | |
1794 | let b = _mm_setr_epi32(1, 2, 3, 4); | |
1795 | let r = _mm_mullo_epi32(a, b); | |
1796 | let e = _mm_setr_epi32(1, 2, 3, 4); | |
1797 | assert_eq_m128i(r, e); | |
1798 | } | |
1799 | { | |
1800 | let a = _mm_setr_epi32(15, -2, 1234567, 99999); | |
1801 | let b = _mm_setr_epi32(-20, -256, 666666, -99999); | |
1802 | let r = _mm_mullo_epi32(a, b); | |
1803 | // Attention, most significant bit in r[2] is treated | |
1804 | // as a sign bit: | |
1805 | // 1234567 * 666666 = -1589877210 | |
1806 | let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409); | |
1807 | assert_eq_m128i(r, e); | |
1808 | } | |
1809 | } | |
1810 | ||
83c7162d | 1811 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1812 | unsafe fn test_mm_minpos_epu16() { |
1813 | let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3); | |
1814 | let r = _mm_minpos_epu16(a); | |
1815 | let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0); | |
1816 | assert_eq_m128i(r, e); | |
1817 | } | |
1818 | ||
83c7162d | 1819 | #[simd_test(enable = "sse4.1")] |
0531ce1d | 1820 | unsafe fn test_mm_mpsadbw_epu8() { |
0731742a | 1821 | #[rustfmt::skip] |
0531ce1d XL |
1822 | let a = _mm_setr_epi8( |
1823 | 0, 1, 2, 3, 4, 5, 6, 7, | |
1824 | 8, 9, 10, 11, 12, 13, 14, 15, | |
1825 | ); | |
1826 | ||
17df50a5 | 1827 | let r = _mm_mpsadbw_epu8::<0b000>(a, a); |
0531ce1d XL |
1828 | let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
1829 | assert_eq_m128i(r, e); | |
1830 | ||
17df50a5 | 1831 | let r = _mm_mpsadbw_epu8::<0b001>(a, a); |
0531ce1d XL |
1832 | let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12); |
1833 | assert_eq_m128i(r, e); | |
1834 | ||
17df50a5 | 1835 | let r = _mm_mpsadbw_epu8::<0b100>(a, a); |
0531ce1d XL |
1836 | let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44); |
1837 | assert_eq_m128i(r, e); | |
1838 | ||
17df50a5 | 1839 | let r = _mm_mpsadbw_epu8::<0b101>(a, a); |
0531ce1d XL |
1840 | let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); |
1841 | assert_eq_m128i(r, e); | |
1842 | ||
17df50a5 | 1843 | let r = _mm_mpsadbw_epu8::<0b111>(a, a); |
0531ce1d XL |
1844 | let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); |
1845 | assert_eq_m128i(r, e); | |
1846 | } | |
1847 | ||
83c7162d | 1848 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1849 | unsafe fn test_mm_testz_si128() { |
1850 | let a = _mm_set1_epi8(1); | |
1851 | let mask = _mm_set1_epi8(0); | |
1852 | let r = _mm_testz_si128(a, mask); | |
1853 | assert_eq!(r, 1); | |
1854 | let a = _mm_set1_epi8(0b101); | |
1855 | let mask = _mm_set1_epi8(0b110); | |
1856 | let r = _mm_testz_si128(a, mask); | |
1857 | assert_eq!(r, 0); | |
1858 | let a = _mm_set1_epi8(0b011); | |
1859 | let mask = _mm_set1_epi8(0b100); | |
1860 | let r = _mm_testz_si128(a, mask); | |
1861 | assert_eq!(r, 1); | |
1862 | } | |
1863 | ||
83c7162d | 1864 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1865 | unsafe fn test_mm_testc_si128() { |
1866 | let a = _mm_set1_epi8(-1); | |
1867 | let mask = _mm_set1_epi8(0); | |
1868 | let r = _mm_testc_si128(a, mask); | |
1869 | assert_eq!(r, 1); | |
1870 | let a = _mm_set1_epi8(0b101); | |
1871 | let mask = _mm_set1_epi8(0b110); | |
1872 | let r = _mm_testc_si128(a, mask); | |
1873 | assert_eq!(r, 0); | |
1874 | let a = _mm_set1_epi8(0b101); | |
1875 | let mask = _mm_set1_epi8(0b100); | |
1876 | let r = _mm_testc_si128(a, mask); | |
1877 | assert_eq!(r, 1); | |
1878 | } | |
1879 | ||
83c7162d | 1880 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1881 | unsafe fn test_mm_testnzc_si128() { |
1882 | let a = _mm_set1_epi8(0); | |
1883 | let mask = _mm_set1_epi8(1); | |
1884 | let r = _mm_testnzc_si128(a, mask); | |
1885 | assert_eq!(r, 0); | |
1886 | let a = _mm_set1_epi8(-1); | |
1887 | let mask = _mm_set1_epi8(0); | |
1888 | let r = _mm_testnzc_si128(a, mask); | |
1889 | assert_eq!(r, 0); | |
1890 | let a = _mm_set1_epi8(0b101); | |
1891 | let mask = _mm_set1_epi8(0b110); | |
1892 | let r = _mm_testnzc_si128(a, mask); | |
1893 | assert_eq!(r, 1); | |
1894 | let a = _mm_set1_epi8(0b101); | |
1895 | let mask = _mm_set1_epi8(0b101); | |
1896 | let r = _mm_testnzc_si128(a, mask); | |
1897 | assert_eq!(r, 0); | |
1898 | } | |
1899 | ||
83c7162d | 1900 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1901 | unsafe fn test_mm_test_all_zeros() { |
1902 | let a = _mm_set1_epi8(1); | |
1903 | let mask = _mm_set1_epi8(0); | |
1904 | let r = _mm_test_all_zeros(a, mask); | |
1905 | assert_eq!(r, 1); | |
1906 | let a = _mm_set1_epi8(0b101); | |
1907 | let mask = _mm_set1_epi8(0b110); | |
1908 | let r = _mm_test_all_zeros(a, mask); | |
1909 | assert_eq!(r, 0); | |
1910 | let a = _mm_set1_epi8(0b011); | |
1911 | let mask = _mm_set1_epi8(0b100); | |
1912 | let r = _mm_test_all_zeros(a, mask); | |
1913 | assert_eq!(r, 1); | |
1914 | } | |
1915 | ||
83c7162d | 1916 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1917 | unsafe fn test_mm_test_all_ones() { |
1918 | let a = _mm_set1_epi8(-1); | |
1919 | let r = _mm_test_all_ones(a); | |
1920 | assert_eq!(r, 1); | |
1921 | let a = _mm_set1_epi8(0b101); | |
1922 | let r = _mm_test_all_ones(a); | |
1923 | assert_eq!(r, 0); | |
1924 | } | |
1925 | ||
83c7162d | 1926 | #[simd_test(enable = "sse4.1")] |
0531ce1d XL |
1927 | unsafe fn test_mm_test_mix_ones_zeros() { |
1928 | let a = _mm_set1_epi8(0); | |
1929 | let mask = _mm_set1_epi8(1); | |
1930 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1931 | assert_eq!(r, 0); | |
1932 | let a = _mm_set1_epi8(-1); | |
1933 | let mask = _mm_set1_epi8(0); | |
1934 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1935 | assert_eq!(r, 0); | |
1936 | let a = _mm_set1_epi8(0b101); | |
1937 | let mask = _mm_set1_epi8(0b110); | |
1938 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1939 | assert_eq!(r, 1); | |
1940 | let a = _mm_set1_epi8(0b101); | |
1941 | let mask = _mm_set1_epi8(0b101); | |
1942 | let r = _mm_test_mix_ones_zeros(a, mask); | |
1943 | assert_eq!(r, 0); | |
1944 | } | |
1945 | } |