1 //! Streaming SIMD Extensions 4.1 (SSE4.1)
4 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
9 use stdarch_test
::assert_instr
;
11 // SSE4 rounding constans
13 #[stable(feature = "simd_x86", since = "1.27.0")]
14 pub const _MM_FROUND_TO_NEAREST_INT
: i32 = 0x00;
16 #[stable(feature = "simd_x86", since = "1.27.0")]
17 pub const _MM_FROUND_TO_NEG_INF
: i32 = 0x01;
19 #[stable(feature = "simd_x86", since = "1.27.0")]
20 pub const _MM_FROUND_TO_POS_INF
: i32 = 0x02;
22 #[stable(feature = "simd_x86", since = "1.27.0")]
23 pub const _MM_FROUND_TO_ZERO
: i32 = 0x03;
24 /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`
25 #[stable(feature = "simd_x86", since = "1.27.0")]
26 pub const _MM_FROUND_CUR_DIRECTION
: i32 = 0x04;
27 /// do not suppress exceptions
28 #[stable(feature = "simd_x86", since = "1.27.0")]
29 pub const _MM_FROUND_RAISE_EXC
: i32 = 0x00;
30 /// suppress exceptions
31 #[stable(feature = "simd_x86", since = "1.27.0")]
32 pub const _MM_FROUND_NO_EXC
: i32 = 0x08;
33 /// round to nearest and do not suppress exceptions
34 #[stable(feature = "simd_x86", since = "1.27.0")]
35 pub const _MM_FROUND_NINT
: i32 = 0x00;
36 /// round down and do not suppress exceptions
37 #[stable(feature = "simd_x86", since = "1.27.0")]
38 pub const _MM_FROUND_FLOOR
: i32 = _MM_FROUND_RAISE_EXC
| _MM_FROUND_TO_NEG_INF
;
39 /// round up and do not suppress exceptions
40 #[stable(feature = "simd_x86", since = "1.27.0")]
41 pub const _MM_FROUND_CEIL
: i32 = _MM_FROUND_RAISE_EXC
| _MM_FROUND_TO_POS_INF
;
42 /// truncate and do not suppress exceptions
43 #[stable(feature = "simd_x86", since = "1.27.0")]
44 pub const _MM_FROUND_TRUNC
: i32 = _MM_FROUND_RAISE_EXC
| _MM_FROUND_TO_ZERO
;
45 /// use MXCSR.RC and do not suppress exceptions; see
46 /// `vendor::_MM_SET_ROUNDING_MODE`
47 #[stable(feature = "simd_x86", since = "1.27.0")]
48 pub const _MM_FROUND_RINT
: i32 = _MM_FROUND_RAISE_EXC
| _MM_FROUND_CUR_DIRECTION
;
49 /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE`
50 #[stable(feature = "simd_x86", since = "1.27.0")]
51 pub const _MM_FROUND_NEARBYINT
: i32 = _MM_FROUND_NO_EXC
| _MM_FROUND_CUR_DIRECTION
;
53 /// Blend packed 8-bit integers from `a` and `b` using `mask`
55 /// The high bit of each corresponding mask byte determines the selection.
56 /// If the high bit is set the element of `a` is selected. The element
57 /// of `b` is selected otherwise.
59 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8)
61 #[target_feature(enable = "sse4.1")]
62 #[cfg_attr(test, assert_instr(pblendvb))]
63 #[stable(feature = "simd_x86", since = "1.27.0")]
64 pub unsafe fn _mm_blendv_epi8(a
: __m128i
, b
: __m128i
, mask
: __m128i
) -> __m128i
{
65 transmute(pblendvb(a
.as_i8x16(), b
.as_i8x16(), mask
.as_i8x16()))
68 /// Blend packed 16-bit integers from `a` and `b` using the mask `imm8`.
70 /// The mask bits determine the selection. A clear bit selects the
71 /// corresponding element of `a`, and a set bit the corresponding
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
76 #[target_feature(enable = "sse4.1")]
77 // Note: LLVM7 prefers the single-precision floating-point domain when possible
78 // see https://bugs.llvm.org/show_bug.cgi?id=38195
79 // #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
80 #[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))]
81 #[rustc_args_required_const(2)]
82 #[stable(feature = "simd_x86", since = "1.27.0")]
83 pub unsafe fn _mm_blend_epi16(a
: __m128i
, b
: __m128i
, imm8
: i32) -> __m128i
{
91 transmute(constify_imm8
!(imm8
, call
))
94 /// Blend packed double-precision (64-bit) floating-point elements from `a`
95 /// and `b` using `mask`
97 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd)
99 #[target_feature(enable = "sse4.1")]
100 #[cfg_attr(test, assert_instr(blendvpd))]
101 #[stable(feature = "simd_x86", since = "1.27.0")]
102 pub unsafe fn _mm_blendv_pd(a
: __m128d
, b
: __m128d
, mask
: __m128d
) -> __m128d
{
106 /// Blend packed single-precision (32-bit) floating-point elements from `a`
107 /// and `b` using `mask`
109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps)
111 #[target_feature(enable = "sse4.1")]
112 #[cfg_attr(test, assert_instr(blendvps))]
113 #[stable(feature = "simd_x86", since = "1.27.0")]
114 pub unsafe fn _mm_blendv_ps(a
: __m128
, b
: __m128
, mask
: __m128
) -> __m128
{
118 /// Blend packed double-precision (64-bit) floating-point elements from `a`
119 /// and `b` using control mask `imm2`
121 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
123 #[target_feature(enable = "sse4.1")]
124 // Note: LLVM7 prefers the single-precision floating-point domain when possible
125 // see https://bugs.llvm.org/show_bug.cgi?id=38195
126 // #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
127 #[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))]
128 #[rustc_args_required_const(2)]
129 #[stable(feature = "simd_x86", since = "1.27.0")]
130 pub unsafe fn _mm_blend_pd(a
: __m128d
, b
: __m128d
, imm2
: i32) -> __m128d
{
136 constify_imm2
!(imm2
, call
)
139 /// Blend packed single-precision (32-bit) floating-point elements from `a`
140 /// and `b` using mask `imm4`
142 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps)
144 #[target_feature(enable = "sse4.1")]
145 #[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
146 #[rustc_args_required_const(2)]
147 #[stable(feature = "simd_x86", since = "1.27.0")]
148 pub unsafe fn _mm_blend_ps(a
: __m128
, b
: __m128
, imm4
: i32) -> __m128
{
154 constify_imm4
!(imm4
, call
)
157 /// Extracts a single-precision (32-bit) floating-point element from `a`,
158 /// selected with `imm8`
160 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps)
162 #[target_feature(enable = "sse4.1")]
164 all(test
, not(target_os
= "windows")),
165 assert_instr(extractps
, imm8
= 0)
167 #[rustc_args_required_const(1)]
168 #[stable(feature = "simd_x86", since = "1.27.0")]
169 pub unsafe fn _mm_extract_ps(a
: __m128
, imm8
: i32) -> i32 {
172 transmute(simd_extract
::<_
, f32>(a
, $imm2
))
175 constify_imm2
!(imm8
, call
)
178 /// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
179 /// integer containing the zero-extended integer data.
181 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
183 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8)
185 #[target_feature(enable = "sse4.1")]
186 #[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
187 #[rustc_args_required_const(1)]
188 #[stable(feature = "simd_x86", since = "1.27.0")]
189 pub unsafe fn _mm_extract_epi8(a
: __m128i
, imm8
: i32) -> i32 {
190 let a
= a
.as_u8x16();
193 simd_extract
::<_
, u8>(a
, $imm4
) as i32
196 constify_imm4
!(imm8
, call
)
199 /// Extracts an 32-bit integer from `a` selected with `imm8`
201 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32)
203 #[target_feature(enable = "sse4.1")]
205 all(test
, not(target_os
= "windows")),
206 assert_instr(extractps
, imm8
= 1)
208 #[rustc_args_required_const(1)]
209 #[stable(feature = "simd_x86", since = "1.27.0")]
210 pub unsafe fn _mm_extract_epi32(a
: __m128i
, imm8
: i32) -> i32 {
211 let a
= a
.as_i32x4();
214 simd_extract
::<_
, i32>(a
, $imm2
)
217 constify_imm2
!(imm8
, call
)
220 /// Select a single value in `a` to store at some position in `b`,
221 /// Then zero elements according to `imm8`.
223 /// `imm8` specifies which bits from operand `a` will be copied, which bits in
224 /// the result they will be copied to, and which bits in the result will be
225 /// cleared. The following assignments are made:
227 /// * Bits `[7:6]` specify the bits to copy from operand `a`:
228 /// - `00`: Selects bits `[31:0]` from operand `a`.
229 /// - `01`: Selects bits `[63:32]` from operand `a`.
230 /// - `10`: Selects bits `[95:64]` from operand `a`.
231 /// - `11`: Selects bits `[127:96]` from operand `a`.
233 /// * Bits `[5:4]` specify the bits in the result to which the selected bits
234 /// from operand `a` are copied:
235 /// - `00`: Copies the selected bits from `a` to result bits `[31:0]`.
236 /// - `01`: Copies the selected bits from `a` to result bits `[63:32]`.
237 /// - `10`: Copies the selected bits from `a` to result bits `[95:64]`.
238 /// - `11`: Copies the selected bits from `a` to result bits `[127:96]`.
240 /// * Bits `[3:0]`: If any of these bits are set, the corresponding result
241 /// element is cleared.
243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps)
245 #[target_feature(enable = "sse4.1")]
246 #[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
247 #[rustc_args_required_const(2)]
248 #[stable(feature = "simd_x86", since = "1.27.0")]
249 pub unsafe fn _mm_insert_ps(a
: __m128
, b
: __m128
, imm8
: i32) -> __m128
{
252 insertps(a
, b
, $imm8
)
255 constify_imm8
!(imm8
, call
)
258 /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
259 /// location specified by `imm8`.
261 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8)
263 #[target_feature(enable = "sse4.1")]
264 #[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
265 #[rustc_args_required_const(2)]
266 #[stable(feature = "simd_x86", since = "1.27.0")]
267 pub unsafe fn _mm_insert_epi8(a
: __m128i
, i
: i32, imm8
: i32) -> __m128i
{
268 let a
= a
.as_i8x16();
271 transmute(simd_insert(a
, $imm4
, i
as i8))
274 constify_imm4
!(imm8
, call
)
277 /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
278 /// location specified by `imm8`.
280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32)
282 #[target_feature(enable = "sse4.1")]
283 #[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
284 #[rustc_args_required_const(2)]
285 #[stable(feature = "simd_x86", since = "1.27.0")]
286 pub unsafe fn _mm_insert_epi32(a
: __m128i
, i
: i32, imm8
: i32) -> __m128i
{
287 let a
= a
.as_i32x4();
290 transmute(simd_insert(a
, $imm2
, i
))
293 constify_imm2
!(imm8
, call
)
296 /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8)
301 #[target_feature(enable = "sse4.1")]
302 #[cfg_attr(test, assert_instr(pmaxsb))]
303 #[stable(feature = "simd_x86", since = "1.27.0")]
304 pub unsafe fn _mm_max_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
305 transmute(pmaxsb(a
.as_i8x16(), b
.as_i8x16()))
308 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
311 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16)
313 #[target_feature(enable = "sse4.1")]
314 #[cfg_attr(test, assert_instr(pmaxuw))]
315 #[stable(feature = "simd_x86", since = "1.27.0")]
316 pub unsafe fn _mm_max_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
317 transmute(pmaxuw(a
.as_u16x8(), b
.as_u16x8()))
320 /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32)
325 #[target_feature(enable = "sse4.1")]
326 #[cfg_attr(test, assert_instr(pmaxsd))]
327 #[stable(feature = "simd_x86", since = "1.27.0")]
328 pub unsafe fn _mm_max_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
329 transmute(pmaxsd(a
.as_i32x4(), b
.as_i32x4()))
332 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32)
337 #[target_feature(enable = "sse4.1")]
338 #[cfg_attr(test, assert_instr(pmaxud))]
339 #[stable(feature = "simd_x86", since = "1.27.0")]
340 pub unsafe fn _mm_max_epu32(a
: __m128i
, b
: __m128i
) -> __m128i
{
341 transmute(pmaxud(a
.as_u32x4(), b
.as_u32x4()))
344 /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
347 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8)
349 #[target_feature(enable = "sse4.1")]
350 #[cfg_attr(test, assert_instr(pminsb))]
351 #[stable(feature = "simd_x86", since = "1.27.0")]
352 pub unsafe fn _mm_min_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
353 transmute(pminsb(a
.as_i8x16(), b
.as_i8x16()))
356 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
359 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16)
361 #[target_feature(enable = "sse4.1")]
362 #[cfg_attr(test, assert_instr(pminuw))]
363 #[stable(feature = "simd_x86", since = "1.27.0")]
364 pub unsafe fn _mm_min_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
365 transmute(pminuw(a
.as_u16x8(), b
.as_u16x8()))
368 /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
371 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32)
373 #[target_feature(enable = "sse4.1")]
374 #[cfg_attr(test, assert_instr(pminsd))]
375 #[stable(feature = "simd_x86", since = "1.27.0")]
376 pub unsafe fn _mm_min_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
377 transmute(pminsd(a
.as_i32x4(), b
.as_i32x4()))
380 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
383 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32)
385 #[target_feature(enable = "sse4.1")]
386 #[cfg_attr(test, assert_instr(pminud))]
387 #[stable(feature = "simd_x86", since = "1.27.0")]
388 pub unsafe fn _mm_min_epu32(a
: __m128i
, b
: __m128i
) -> __m128i
{
389 transmute(pminud(a
.as_u32x4(), b
.as_u32x4()))
392 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
393 /// using unsigned saturation
395 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32)
397 #[target_feature(enable = "sse4.1")]
398 #[cfg_attr(test, assert_instr(packusdw))]
399 #[stable(feature = "simd_x86", since = "1.27.0")]
400 pub unsafe fn _mm_packus_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
401 transmute(packusdw(a
.as_i32x4(), b
.as_i32x4()))
404 /// Compares packed 64-bit integers in `a` and `b` for equality
406 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64)
408 #[target_feature(enable = "sse4.1")]
409 #[cfg_attr(test, assert_instr(pcmpeqq))]
410 #[stable(feature = "simd_x86", since = "1.27.0")]
411 pub unsafe fn _mm_cmpeq_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
412 transmute(simd_eq
::<_
, i64x2
>(a
.as_i64x2(), b
.as_i64x2()))
415 /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
417 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16)
419 #[target_feature(enable = "sse4.1")]
420 #[cfg_attr(test, assert_instr(pmovsxbw))]
421 #[stable(feature = "simd_x86", since = "1.27.0")]
422 pub unsafe fn _mm_cvtepi8_epi16(a
: __m128i
) -> __m128i
{
423 let a
= a
.as_i8x16();
424 let a
= simd_shuffle8
::<_
, i8x8
>(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
425 transmute(simd_cast
::<_
, i16x8
>(a
))
428 /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
430 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32)
432 #[target_feature(enable = "sse4.1")]
433 #[cfg_attr(test, assert_instr(pmovsxbd))]
434 #[stable(feature = "simd_x86", since = "1.27.0")]
435 pub unsafe fn _mm_cvtepi8_epi32(a
: __m128i
) -> __m128i
{
436 let a
= a
.as_i8x16();
437 let a
= simd_shuffle4
::<_
, i8x4
>(a
, a
, [0, 1, 2, 3]);
438 transmute(simd_cast
::<_
, i32x4
>(a
))
441 /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64)
446 #[target_feature(enable = "sse4.1")]
447 #[cfg_attr(test, assert_instr(pmovsxbq))]
448 #[stable(feature = "simd_x86", since = "1.27.0")]
449 pub unsafe fn _mm_cvtepi8_epi64(a
: __m128i
) -> __m128i
{
450 let a
= a
.as_i8x16();
451 let a
= simd_shuffle2
::<_
, i8x2
>(a
, a
, [0, 1]);
452 transmute(simd_cast
::<_
, i64x2
>(a
))
455 /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
457 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32)
459 #[target_feature(enable = "sse4.1")]
460 #[cfg_attr(test, assert_instr(pmovsxwd))]
461 #[stable(feature = "simd_x86", since = "1.27.0")]
462 pub unsafe fn _mm_cvtepi16_epi32(a
: __m128i
) -> __m128i
{
463 let a
= a
.as_i16x8();
464 let a
= simd_shuffle4
::<_
, i16x4
>(a
, a
, [0, 1, 2, 3]);
465 transmute(simd_cast
::<_
, i32x4
>(a
))
468 /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
470 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64)
472 #[target_feature(enable = "sse4.1")]
473 #[cfg_attr(test, assert_instr(pmovsxwq))]
474 #[stable(feature = "simd_x86", since = "1.27.0")]
475 pub unsafe fn _mm_cvtepi16_epi64(a
: __m128i
) -> __m128i
{
476 let a
= a
.as_i16x8();
477 let a
= simd_shuffle2
::<_
, i16x2
>(a
, a
, [0, 1]);
478 transmute(simd_cast
::<_
, i64x2
>(a
))
481 /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
483 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64)
485 #[target_feature(enable = "sse4.1")]
486 #[cfg_attr(test, assert_instr(pmovsxdq))]
487 #[stable(feature = "simd_x86", since = "1.27.0")]
488 pub unsafe fn _mm_cvtepi32_epi64(a
: __m128i
) -> __m128i
{
489 let a
= a
.as_i32x4();
490 let a
= simd_shuffle2
::<_
, i32x2
>(a
, a
, [0, 1]);
491 transmute(simd_cast
::<_
, i64x2
>(a
))
494 /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16)
498 #[target_feature(enable = "sse4.1")]
499 #[cfg_attr(test, assert_instr(pmovzxbw))]
500 #[stable(feature = "simd_x86", since = "1.27.0")]
501 pub unsafe fn _mm_cvtepu8_epi16(a
: __m128i
) -> __m128i
{
502 let a
= a
.as_u8x16();
503 let a
= simd_shuffle8
::<_
, u8x8
>(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
504 transmute(simd_cast
::<_
, i16x8
>(a
))
507 /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32)
511 #[target_feature(enable = "sse4.1")]
512 #[cfg_attr(test, assert_instr(pmovzxbd))]
513 #[stable(feature = "simd_x86", since = "1.27.0")]
514 pub unsafe fn _mm_cvtepu8_epi32(a
: __m128i
) -> __m128i
{
515 let a
= a
.as_u8x16();
516 let a
= simd_shuffle4
::<_
, u8x4
>(a
, a
, [0, 1, 2, 3]);
517 transmute(simd_cast
::<_
, i32x4
>(a
))
520 /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
522 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64)
524 #[target_feature(enable = "sse4.1")]
525 #[cfg_attr(test, assert_instr(pmovzxbq))]
526 #[stable(feature = "simd_x86", since = "1.27.0")]
527 pub unsafe fn _mm_cvtepu8_epi64(a
: __m128i
) -> __m128i
{
528 let a
= a
.as_u8x16();
529 let a
= simd_shuffle2
::<_
, u8x2
>(a
, a
, [0, 1]);
530 transmute(simd_cast
::<_
, i64x2
>(a
))
533 /// Zeroes extend packed unsigned 16-bit integers in `a`
534 /// to packed 32-bit integers
536 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32)
538 #[target_feature(enable = "sse4.1")]
539 #[cfg_attr(test, assert_instr(pmovzxwd))]
540 #[stable(feature = "simd_x86", since = "1.27.0")]
541 pub unsafe fn _mm_cvtepu16_epi32(a
: __m128i
) -> __m128i
{
542 let a
= a
.as_u16x8();
543 let a
= simd_shuffle4
::<_
, u16x4
>(a
, a
, [0, 1, 2, 3]);
544 transmute(simd_cast
::<_
, i32x4
>(a
))
547 /// Zeroes extend packed unsigned 16-bit integers in `a`
548 /// to packed 64-bit integers
550 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64)
552 #[target_feature(enable = "sse4.1")]
553 #[cfg_attr(test, assert_instr(pmovzxwq))]
554 #[stable(feature = "simd_x86", since = "1.27.0")]
555 pub unsafe fn _mm_cvtepu16_epi64(a
: __m128i
) -> __m128i
{
556 let a
= a
.as_u16x8();
557 let a
= simd_shuffle2
::<_
, u16x2
>(a
, a
, [0, 1]);
558 transmute(simd_cast
::<_
, i64x2
>(a
))
561 /// Zeroes extend packed unsigned 32-bit integers in `a`
562 /// to packed 64-bit integers
564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64)
566 #[target_feature(enable = "sse4.1")]
567 #[cfg_attr(test, assert_instr(pmovzxdq))]
568 #[stable(feature = "simd_x86", since = "1.27.0")]
569 pub unsafe fn _mm_cvtepu32_epi64(a
: __m128i
) -> __m128i
{
570 let a
= a
.as_u32x4();
571 let a
= simd_shuffle2
::<_
, u32x2
>(a
, a
, [0, 1]);
572 transmute(simd_cast
::<_
, i64x2
>(a
))
575 /// Returns the dot product of two __m128d vectors.
577 /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
578 /// If a condition mask bit is zero, the corresponding multiplication is
579 /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
580 /// the dot product will be stored in the return value component. Otherwise if
581 /// the broadcast mask bit is zero then the return component will be zero.
583 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd)
585 #[target_feature(enable = "sse4.1")]
586 #[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
587 #[rustc_args_required_const(2)]
588 #[stable(feature = "simd_x86", since = "1.27.0")]
589 pub unsafe fn _mm_dp_pd(a
: __m128d
, b
: __m128d
, imm8
: i32) -> __m128d
{
595 constify_imm8
!(imm8
, call
)
598 /// Returns the dot product of two __m128 vectors.
600 /// `imm8[3:0]` is the broadcast mask, and `imm8[7:4]` is the condition mask.
601 /// If a condition mask bit is zero, the corresponding multiplication is
602 /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
603 /// the dot product will be stored in the return value component. Otherwise if
604 /// the broadcast mask bit is zero then the return component will be zero.
606 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps)
608 #[target_feature(enable = "sse4.1")]
609 #[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
610 #[rustc_args_required_const(2)]
611 #[stable(feature = "simd_x86", since = "1.27.0")]
612 pub unsafe fn _mm_dp_ps(a
: __m128
, b
: __m128
, imm8
: i32) -> __m128
{
618 constify_imm8
!(imm8
, call
)
621 /// Round the packed double-precision (64-bit) floating-point elements in `a`
622 /// down to an integer value, and stores the results as packed double-precision
623 /// floating-point elements.
625 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd)
627 #[target_feature(enable = "sse4.1")]
628 #[cfg_attr(test, assert_instr(roundpd))]
629 #[stable(feature = "simd_x86", since = "1.27.0")]
630 pub unsafe fn _mm_floor_pd(a
: __m128d
) -> __m128d
{
634 /// Round the packed single-precision (32-bit) floating-point elements in `a`
635 /// down to an integer value, and stores the results as packed single-precision
636 /// floating-point elements.
638 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps)
640 #[target_feature(enable = "sse4.1")]
641 #[cfg_attr(test, assert_instr(roundps))]
642 #[stable(feature = "simd_x86", since = "1.27.0")]
643 pub unsafe fn _mm_floor_ps(a
: __m128
) -> __m128
{
647 /// Round the lower double-precision (64-bit) floating-point element in `b`
648 /// down to an integer value, store the result as a double-precision
649 /// floating-point element in the lower element of the intrinsic result,
650 /// and copies the upper element from `a` to the upper element of the intrinsic
653 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd)
655 #[target_feature(enable = "sse4.1")]
656 #[cfg_attr(test, assert_instr(roundsd))]
657 #[stable(feature = "simd_x86", since = "1.27.0")]
658 pub unsafe fn _mm_floor_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
659 roundsd(a
, b
, _MM_FROUND_FLOOR
)
662 /// Round the lower single-precision (32-bit) floating-point element in `b`
663 /// down to an integer value, store the result as a single-precision
664 /// floating-point element in the lower element of the intrinsic result,
665 /// and copies the upper 3 packed elements from `a` to the upper elements
666 /// of the intrinsic result.
668 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss)
670 #[target_feature(enable = "sse4.1")]
671 #[cfg_attr(test, assert_instr(roundss))]
672 #[stable(feature = "simd_x86", since = "1.27.0")]
673 pub unsafe fn _mm_floor_ss(a
: __m128
, b
: __m128
) -> __m128
{
674 roundss(a
, b
, _MM_FROUND_FLOOR
)
677 /// Round the packed double-precision (64-bit) floating-point elements in `a`
678 /// up to an integer value, and stores the results as packed double-precision
679 /// floating-point elements.
681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd)
683 #[target_feature(enable = "sse4.1")]
684 #[cfg_attr(test, assert_instr(roundpd))]
685 #[stable(feature = "simd_x86", since = "1.27.0")]
686 pub unsafe fn _mm_ceil_pd(a
: __m128d
) -> __m128d
{
690 /// Round the packed single-precision (32-bit) floating-point elements in `a`
691 /// up to an integer value, and stores the results as packed single-precision
692 /// floating-point elements.
694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps)
696 #[target_feature(enable = "sse4.1")]
697 #[cfg_attr(test, assert_instr(roundps))]
698 #[stable(feature = "simd_x86", since = "1.27.0")]
699 pub unsafe fn _mm_ceil_ps(a
: __m128
) -> __m128
{
703 /// Round the lower double-precision (64-bit) floating-point element in `b`
704 /// up to an integer value, store the result as a double-precision
705 /// floating-point element in the lower element of the intrisic result,
706 /// and copies the upper element from `a` to the upper element
707 /// of the intrinsic result.
709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd)
711 #[target_feature(enable = "sse4.1")]
712 #[cfg_attr(test, assert_instr(roundsd))]
713 #[stable(feature = "simd_x86", since = "1.27.0")]
714 pub unsafe fn _mm_ceil_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
715 roundsd(a
, b
, _MM_FROUND_CEIL
)
718 /// Round the lower single-precision (32-bit) floating-point element in `b`
719 /// up to an integer value, store the result as a single-precision
720 /// floating-point element in the lower element of the intrinsic result,
721 /// and copies the upper 3 packed elements from `a` to the upper elements
722 /// of the intrinsic result.
724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss)
726 #[target_feature(enable = "sse4.1")]
727 #[cfg_attr(test, assert_instr(roundss))]
728 #[stable(feature = "simd_x86", since = "1.27.0")]
729 pub unsafe fn _mm_ceil_ss(a
: __m128
, b
: __m128
) -> __m128
{
730 roundss(a
, b
, _MM_FROUND_CEIL
)
733 /// Round the packed double-precision (64-bit) floating-point elements in `a`
734 /// using the `rounding` parameter, and stores the results as packed
735 /// double-precision floating-point elements.
736 /// Rounding is done according to the rounding parameter, which can be one of:
739 /// #[cfg(target_arch = "x86")]
740 /// use std::arch::x86::*;
741 /// #[cfg(target_arch = "x86_64")]
742 /// use std::arch::x86_64::*;
745 /// // round to nearest, and suppress exceptions:
747 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
748 /// // round down, and suppress exceptions:
750 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
751 /// // round up, and suppress exceptions:
753 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
754 /// // truncate, and suppress exceptions:
756 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
757 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
759 /// _MM_FROUND_CUR_DIRECTION;
763 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd)
765 #[target_feature(enable = "sse4.1")]
766 #[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
767 #[rustc_args_required_const(1)]
768 #[stable(feature = "simd_x86", since = "1.27.0")]
769 pub unsafe fn _mm_round_pd(a
: __m128d
, rounding
: i32) -> __m128d
{
775 constify_imm4
!(rounding
, call
)
778 /// Round the packed single-precision (32-bit) floating-point elements in `a`
779 /// using the `rounding` parameter, and stores the results as packed
780 /// single-precision floating-point elements.
781 /// Rounding is done according to the rounding parameter, which can be one of:
784 /// #[cfg(target_arch = "x86")]
785 /// use std::arch::x86::*;
786 /// #[cfg(target_arch = "x86_64")]
787 /// use std::arch::x86_64::*;
790 /// // round to nearest, and suppress exceptions:
792 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
793 /// // round down, and suppress exceptions:
795 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
796 /// // round up, and suppress exceptions:
798 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
799 /// // truncate, and suppress exceptions:
801 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
802 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
804 /// _MM_FROUND_CUR_DIRECTION;
808 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps)
810 #[target_feature(enable = "sse4.1")]
811 #[cfg_attr(test, assert_instr(roundps, rounding = 0))]
812 #[rustc_args_required_const(1)]
813 #[stable(feature = "simd_x86", since = "1.27.0")]
814 pub unsafe fn _mm_round_ps(a
: __m128
, rounding
: i32) -> __m128
{
820 constify_imm4
!(rounding
, call
)
823 /// Round the lower double-precision (64-bit) floating-point element in `b`
824 /// using the `rounding` parameter, store the result as a double-precision
825 /// floating-point element in the lower element of the intrinsic result,
826 /// and copies the upper element from `a` to the upper element of the intrinsic
828 /// Rounding is done according to the rounding parameter, which can be one of:
831 /// #[cfg(target_arch = "x86")]
832 /// use std::arch::x86::*;
833 /// #[cfg(target_arch = "x86_64")]
834 /// use std::arch::x86_64::*;
837 /// // round to nearest, and suppress exceptions:
839 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
840 /// // round down, and suppress exceptions:
842 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
843 /// // round up, and suppress exceptions:
845 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
846 /// // truncate, and suppress exceptions:
848 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
849 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
851 /// _MM_FROUND_CUR_DIRECTION;
855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd)
857 #[target_feature(enable = "sse4.1")]
858 #[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
859 #[rustc_args_required_const(2)]
860 #[stable(feature = "simd_x86", since = "1.27.0")]
861 pub unsafe fn _mm_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
867 constify_imm4
!(rounding
, call
)
870 /// Round the lower single-precision (32-bit) floating-point element in `b`
871 /// using the `rounding` parameter, store the result as a single-precision
872 /// floating-point element in the lower element of the intrinsic result,
873 /// and copies the upper 3 packed elements from `a` to the upper elements
874 /// of the instrinsic result.
875 /// Rounding is done according to the rounding parameter, which can be one of:
878 /// #[cfg(target_arch = "x86")]
879 /// use std::arch::x86::*;
880 /// #[cfg(target_arch = "x86_64")]
881 /// use std::arch::x86_64::*;
884 /// // round to nearest, and suppress exceptions:
886 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
887 /// // round down, and suppress exceptions:
889 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
890 /// // round up, and suppress exceptions:
892 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
893 /// // truncate, and suppress exceptions:
895 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
896 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
898 /// _MM_FROUND_CUR_DIRECTION;
902 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss)
904 #[target_feature(enable = "sse4.1")]
905 #[cfg_attr(test, assert_instr(roundss, rounding = 0))]
906 #[rustc_args_required_const(2)]
907 #[stable(feature = "simd_x86", since = "1.27.0")]
908 pub unsafe fn _mm_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
914 constify_imm4
!(rounding
, call
)
917 /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector,
918 /// returning a vector containing its value in its first position, and its
920 /// in its second position; all other elements are set to zero.
922 /// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
927 /// * `a` - A 128-bit vector of type `__m128i`.
931 /// A 128-bit value where:
933 /// * bits `[15:0]` - contain the minimum value found in parameter `a`,
934 /// * bits `[18:16]` - contain the index of the minimum value
935 /// * remaining bits are set to `0`.
937 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16)
939 #[target_feature(enable = "sse4.1")]
940 #[cfg_attr(test, assert_instr(phminposuw))]
941 #[stable(feature = "simd_x86", since = "1.27.0")]
942 pub unsafe fn _mm_minpos_epu16(a
: __m128i
) -> __m128i
{
943 transmute(phminposuw(a
.as_u16x8()))
946 /// Multiplies the low 32-bit integers from each packed 64-bit
947 /// element in `a` and `b`, and returns the signed 64-bit result.
949 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32)
951 #[target_feature(enable = "sse4.1")]
952 #[cfg_attr(test, assert_instr(pmuldq))]
953 #[stable(feature = "simd_x86", since = "1.27.0")]
954 pub unsafe fn _mm_mul_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
955 transmute(pmuldq(a
.as_i32x4(), b
.as_i32x4()))
958 /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
959 /// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
960 /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2),
961 /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
962 /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
963 /// return a negative number.
965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32)
967 #[target_feature(enable = "sse4.1")]
968 #[cfg_attr(test, assert_instr(pmulld))]
969 #[stable(feature = "simd_x86", since = "1.27.0")]
970 pub unsafe fn _mm_mullo_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
971 transmute(simd_mul(a
.as_i32x4(), b
.as_i32x4()))
974 /// Subtracts 8-bit unsigned integer values and computes the absolute
975 /// values of the differences to the corresponding bits in the destination.
976 /// Then sums of the absolute differences are returned according to the bit
977 /// fields in the immediate operand.
979 /// The following algorithm is performed:
983 /// j = imm8[1:0] * 4
985 /// d0 = abs(a[i + k + 0] - b[j + 0])
986 /// d1 = abs(a[i + k + 1] - b[j + 1])
987 /// d2 = abs(a[i + k + 2] - b[j + 2])
988 /// d3 = abs(a[i + k + 3] - b[j + 3])
989 /// r[k] = d0 + d1 + d2 + d3
994 /// * `a` - A 128-bit vector of type `__m128i`.
995 /// * `b` - A 128-bit vector of type `__m128i`.
996 /// * `imm8` - An 8-bit immediate operand specifying how the absolute
997 /// differences are to be calculated
998 /// * Bit `[2]` specify the offset for operand `a`
999 /// * Bits `[1:0]` specify the offset for operand `b`
1003 /// * A `__m128i` vector containing the sums of the sets of absolute
1004 /// differences between both operands.
1006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8)
1008 #[target_feature(enable = "sse4.1")]
1009 #[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
1010 #[rustc_args_required_const(2)]
1011 #[stable(feature = "simd_x86", since = "1.27.0")]
1012 pub unsafe fn _mm_mpsadbw_epu8(a
: __m128i
, b
: __m128i
, imm8
: i32) -> __m128i
{
1013 let a
= a
.as_u8x16();
1014 let b
= b
.as_u8x16();
1017 mpsadbw(a
, b
, $imm8
)
1020 transmute(constify_imm3
!(imm8
, call
))
1023 /// Tests whether the specified bits in a 128-bit integer vector are all
1028 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1029 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1034 /// * `1` - if the specified bits are all zeros,
1035 /// * `0` - otherwise.
1037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128)
1039 #[target_feature(enable = "sse4.1")]
1040 #[cfg_attr(test, assert_instr(ptest))]
1041 #[stable(feature = "simd_x86", since = "1.27.0")]
1042 pub unsafe fn _mm_testz_si128(a
: __m128i
, mask
: __m128i
) -> i32 {
1043 ptestz(a
.as_i64x2(), mask
.as_i64x2())
1046 /// Tests whether the specified bits in a 128-bit integer vector are all
1051 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1052 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1057 /// * `1` - if the specified bits are all ones,
1058 /// * `0` - otherwise.
1060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128)
1062 #[target_feature(enable = "sse4.1")]
1063 #[cfg_attr(test, assert_instr(ptest))]
1064 #[stable(feature = "simd_x86", since = "1.27.0")]
1065 pub unsafe fn _mm_testc_si128(a
: __m128i
, mask
: __m128i
) -> i32 {
1066 ptestc(a
.as_i64x2(), mask
.as_i64x2())
1069 /// Tests whether the specified bits in a 128-bit integer vector are
1070 /// neither all zeros nor all ones.
1074 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1075 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1080 /// * `1` - if the specified bits are neither all zeros nor all ones,
1081 /// * `0` - otherwise.
1083 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128)
1085 #[target_feature(enable = "sse4.1")]
1086 #[cfg_attr(test, assert_instr(ptest))]
1087 #[stable(feature = "simd_x86", since = "1.27.0")]
1088 pub unsafe fn _mm_testnzc_si128(a
: __m128i
, mask
: __m128i
) -> i32 {
1089 ptestnzc(a
.as_i64x2(), mask
.as_i64x2())
1092 /// Tests whether the specified bits in a 128-bit integer vector are all
1097 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1098 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1103 /// * `1` - if the specified bits are all zeros,
1104 /// * `0` - otherwise.
1106 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros)
1108 #[target_feature(enable = "sse4.1")]
1109 #[cfg_attr(test, assert_instr(ptest))]
1110 #[stable(feature = "simd_x86", since = "1.27.0")]
1111 pub unsafe fn _mm_test_all_zeros(a
: __m128i
, mask
: __m128i
) -> i32 {
1112 _mm_testz_si128(a
, mask
)
1115 /// Tests whether the specified bits in `a` 128-bit integer vector are all
1120 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1124 /// * `1` - if the bits specified in the operand are all set to 1,
1125 /// * `0` - otherwise.
1127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones)
1129 #[target_feature(enable = "sse4.1")]
1130 #[cfg_attr(test, assert_instr(pcmpeqd))]
1131 #[cfg_attr(test, assert_instr(ptest))]
1132 #[stable(feature = "simd_x86", since = "1.27.0")]
1133 pub unsafe fn _mm_test_all_ones(a
: __m128i
) -> i32 {
1134 _mm_testc_si128(a
, _mm_cmpeq_epi32(a
, a
))
1137 /// Tests whether the specified bits in a 128-bit integer vector are
1138 /// neither all zeros nor all ones.
1142 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1143 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1148 /// * `1` - if the specified bits are neither all zeros nor all ones,
1149 /// * `0` - otherwise.
1151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros)
1153 #[target_feature(enable = "sse4.1")]
1154 #[cfg_attr(test, assert_instr(ptest))]
1155 #[stable(feature = "simd_x86", since = "1.27.0")]
1156 pub unsafe fn _mm_test_mix_ones_zeros(a
: __m128i
, mask
: __m128i
) -> i32 {
1157 _mm_testnzc_si128(a
, mask
)
1160 #[allow(improper_ctypes)]
1162 #[link_name = "llvm.x86.sse41.pblendvb"]
1163 fn pblendvb(a
: i8x16
, b
: i8x16
, mask
: i8x16
) -> i8x16
;
1164 #[link_name = "llvm.x86.sse41.blendvpd"]
1165 fn blendvpd(a
: __m128d
, b
: __m128d
, mask
: __m128d
) -> __m128d
;
1166 #[link_name = "llvm.x86.sse41.blendvps"]
1167 fn blendvps(a
: __m128
, b
: __m128
, mask
: __m128
) -> __m128
;
1168 #[link_name = "llvm.x86.sse41.blendpd"]
1169 fn blendpd(a
: __m128d
, b
: __m128d
, imm2
: u8) -> __m128d
;
1170 #[link_name = "llvm.x86.sse41.blendps"]
1171 fn blendps(a
: __m128
, b
: __m128
, imm4
: u8) -> __m128
;
1172 #[link_name = "llvm.x86.sse41.pblendw"]
1173 fn pblendw(a
: i16x8
, b
: i16x8
, imm8
: u8) -> i16x8
;
1174 #[link_name = "llvm.x86.sse41.insertps"]
1175 fn insertps(a
: __m128
, b
: __m128
, imm8
: u8) -> __m128
;
1176 #[link_name = "llvm.x86.sse41.pmaxsb"]
1177 fn pmaxsb(a
: i8x16
, b
: i8x16
) -> i8x16
;
1178 #[link_name = "llvm.x86.sse41.pmaxuw"]
1179 fn pmaxuw(a
: u16x8
, b
: u16x8
) -> u16x8
;
1180 #[link_name = "llvm.x86.sse41.pmaxsd"]
1181 fn pmaxsd(a
: i32x4
, b
: i32x4
) -> i32x4
;
1182 #[link_name = "llvm.x86.sse41.pmaxud"]
1183 fn pmaxud(a
: u32x4
, b
: u32x4
) -> u32x4
;
1184 #[link_name = "llvm.x86.sse41.pminsb"]
1185 fn pminsb(a
: i8x16
, b
: i8x16
) -> i8x16
;
1186 #[link_name = "llvm.x86.sse41.pminuw"]
1187 fn pminuw(a
: u16x8
, b
: u16x8
) -> u16x8
;
1188 #[link_name = "llvm.x86.sse41.pminsd"]
1189 fn pminsd(a
: i32x4
, b
: i32x4
) -> i32x4
;
1190 #[link_name = "llvm.x86.sse41.pminud"]
1191 fn pminud(a
: u32x4
, b
: u32x4
) -> u32x4
;
1192 #[link_name = "llvm.x86.sse41.packusdw"]
1193 fn packusdw(a
: i32x4
, b
: i32x4
) -> u16x8
;
1194 #[link_name = "llvm.x86.sse41.dppd"]
1195 fn dppd(a
: __m128d
, b
: __m128d
, imm8
: u8) -> __m128d
;
1196 #[link_name = "llvm.x86.sse41.dpps"]
1197 fn dpps(a
: __m128
, b
: __m128
, imm8
: u8) -> __m128
;
1198 #[link_name = "llvm.x86.sse41.round.pd"]
1199 fn roundpd(a
: __m128d
, rounding
: i32) -> __m128d
;
1200 #[link_name = "llvm.x86.sse41.round.ps"]
1201 fn roundps(a
: __m128
, rounding
: i32) -> __m128
;
1202 #[link_name = "llvm.x86.sse41.round.sd"]
1203 fn roundsd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
;
1204 #[link_name = "llvm.x86.sse41.round.ss"]
1205 fn roundss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
;
1206 #[link_name = "llvm.x86.sse41.phminposuw"]
1207 fn phminposuw(a
: u16x8
) -> u16x8
;
1208 #[link_name = "llvm.x86.sse41.pmuldq"]
1209 fn pmuldq(a
: i32x4
, b
: i32x4
) -> i64x2
;
1210 #[link_name = "llvm.x86.sse41.mpsadbw"]
1211 fn mpsadbw(a
: u8x16
, b
: u8x16
, imm8
: u8) -> u16x8
;
1212 #[link_name = "llvm.x86.sse41.ptestz"]
1213 fn ptestz(a
: i64x2
, mask
: i64x2
) -> i32;
1214 #[link_name = "llvm.x86.sse41.ptestc"]
1215 fn ptestc(a
: i64x2
, mask
: i64x2
) -> i32;
1216 #[link_name = "llvm.x86.sse41.ptestnzc"]
1217 fn ptestnzc(a
: i64x2
, mask
: i64x2
) -> i32;
1222 use crate::core_arch
::x86
::*;
1224 use stdarch_test
::simd_test
;
1226 #[simd_test(enable = "sse4.1")]
1227 unsafe fn test_mm_blendv_epi8() {
1229 let a
= _mm_setr_epi8(
1230 0, 1, 2, 3, 4, 5, 6, 7,
1231 8, 9, 10, 11, 12, 13, 14, 15,
1234 let b
= _mm_setr_epi8(
1235 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1238 let mask
= _mm_setr_epi8(
1239 0, -1, 0, -1, 0, -1, 0, -1,
1240 0, -1, 0, -1, 0, -1, 0, -1,
1243 let e
= _mm_setr_epi8(
1244 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1246 assert_eq_m128i(_mm_blendv_epi8(a
, b
, mask
), e
);
1249 #[simd_test(enable = "sse4.1")]
1250 unsafe fn test_mm_blendv_pd() {
1251 let a
= _mm_set1_pd(0.0);
1252 let b
= _mm_set1_pd(1.0);
1253 let mask
= transmute(_mm_setr_epi64x(0, -1));
1254 let r
= _mm_blendv_pd(a
, b
, mask
);
1255 let e
= _mm_setr_pd(0.0, 1.0);
1256 assert_eq_m128d(r
, e
);
1259 #[simd_test(enable = "sse4.1")]
1260 unsafe fn test_mm_blendv_ps() {
1261 let a
= _mm_set1_ps(0.0);
1262 let b
= _mm_set1_ps(1.0);
1263 let mask
= transmute(_mm_setr_epi32(0, -1, 0, -1));
1264 let r
= _mm_blendv_ps(a
, b
, mask
);
1265 let e
= _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1266 assert_eq_m128(r
, e
);
1269 #[simd_test(enable = "sse4.1")]
1270 unsafe fn test_mm_blend_pd() {
1271 let a
= _mm_set1_pd(0.0);
1272 let b
= _mm_set1_pd(1.0);
1273 let r
= _mm_blend_pd(a
, b
, 0b10);
1274 let e
= _mm_setr_pd(0.0, 1.0);
1275 assert_eq_m128d(r
, e
);
1278 #[simd_test(enable = "sse4.1")]
1279 unsafe fn test_mm_blend_ps() {
1280 let a
= _mm_set1_ps(0.0);
1281 let b
= _mm_set1_ps(1.0);
1282 let r
= _mm_blend_ps(a
, b
, 0b1010);
1283 let e
= _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1284 assert_eq_m128(r
, e
);
1287 #[simd_test(enable = "sse4.1")]
1288 unsafe fn test_mm_blend_epi16() {
1289 let a
= _mm_set1_epi16(0);
1290 let b
= _mm_set1_epi16(1);
1291 let r
= _mm_blend_epi16(a
, b
, 0b1010_1100);
1292 let e
= _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1293 assert_eq_m128i(r
, e
);
1296 #[simd_test(enable = "sse4.1")]
1297 unsafe fn test_mm_extract_ps() {
1298 let a
= _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1299 let r
: f32 = transmute(_mm_extract_ps(a
, 1));
1301 let r
: f32 = transmute(_mm_extract_ps(a
, 5));
1305 #[simd_test(enable = "sse4.1")]
1306 unsafe fn test_mm_extract_epi8() {
1308 let a
= _mm_setr_epi8(
1309 -1, 1, 2, 3, 4, 5, 6, 7,
1310 8, 9, 10, 11, 12, 13, 14, 15
1312 let r1
= _mm_extract_epi8(a
, 0);
1313 let r2
= _mm_extract_epi8(a
, 19);
1314 assert_eq
!(r1
, 0xFF);
1318 #[simd_test(enable = "sse4.1")]
1319 unsafe fn test_mm_extract_epi32() {
1320 let a
= _mm_setr_epi32(0, 1, 2, 3);
1321 let r
= _mm_extract_epi32(a
, 1);
1323 let r
= _mm_extract_epi32(a
, 5);
1327 #[simd_test(enable = "sse4.1")]
1328 unsafe fn test_mm_insert_ps() {
1329 let a
= _mm_set1_ps(1.0);
1330 let b
= _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1331 let r
= _mm_insert_ps(a
, b
, 0b11_00_1100);
1332 let e
= _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1333 assert_eq_m128(r
, e
);
1336 #[simd_test(enable = "sse4.1")]
1337 unsafe fn test_mm_insert_epi8() {
1338 let a
= _mm_set1_epi8(0);
1339 let e
= _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1340 let r
= _mm_insert_epi8(a
, 32, 1);
1341 assert_eq_m128i(r
, e
);
1342 let r
= _mm_insert_epi8(a
, 32, 17);
1343 assert_eq_m128i(r
, e
);
1346 #[simd_test(enable = "sse4.1")]
1347 unsafe fn test_mm_insert_epi32() {
1348 let a
= _mm_set1_epi32(0);
1349 let e
= _mm_setr_epi32(0, 32, 0, 0);
1350 let r
= _mm_insert_epi32(a
, 32, 1);
1351 assert_eq_m128i(r
, e
);
1352 let r
= _mm_insert_epi32(a
, 32, 5);
1353 assert_eq_m128i(r
, e
);
1356 #[simd_test(enable = "sse4.1")]
1357 unsafe fn test_mm_max_epi8() {
1359 let a
= _mm_setr_epi8(
1360 1, 4, 5, 8, 9, 12, 13, 16,
1361 17, 20, 21, 24, 25, 28, 29, 32,
1364 let b
= _mm_setr_epi8(
1365 2, 3, 6, 7, 10, 11, 14, 15,
1366 18, 19, 22, 23, 26, 27, 30, 31,
1368 let r
= _mm_max_epi8(a
, b
);
1370 let e
= _mm_setr_epi8(
1371 2, 4, 6, 8, 10, 12, 14, 16,
1372 18, 20, 22, 24, 26, 28, 30, 32,
1374 assert_eq_m128i(r
, e
);
1377 #[simd_test(enable = "sse4.1")]
1378 unsafe fn test_mm_max_epu16() {
1379 let a
= _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1380 let b
= _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1381 let r
= _mm_max_epu16(a
, b
);
1382 let e
= _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1383 assert_eq_m128i(r
, e
);
1386 #[simd_test(enable = "sse4.1")]
1387 unsafe fn test_mm_max_epi32() {
1388 let a
= _mm_setr_epi32(1, 4, 5, 8);
1389 let b
= _mm_setr_epi32(2, 3, 6, 7);
1390 let r
= _mm_max_epi32(a
, b
);
1391 let e
= _mm_setr_epi32(2, 4, 6, 8);
1392 assert_eq_m128i(r
, e
);
1395 #[simd_test(enable = "sse4.1")]
1396 unsafe fn test_mm_max_epu32() {
1397 let a
= _mm_setr_epi32(1, 4, 5, 8);
1398 let b
= _mm_setr_epi32(2, 3, 6, 7);
1399 let r
= _mm_max_epu32(a
, b
);
1400 let e
= _mm_setr_epi32(2, 4, 6, 8);
1401 assert_eq_m128i(r
, e
);
1404 #[simd_test(enable = "sse4.1")]
1405 unsafe fn test_mm_min_epi8_1() {
1407 let a
= _mm_setr_epi8(
1408 1, 4, 5, 8, 9, 12, 13, 16,
1409 17, 20, 21, 24, 25, 28, 29, 32,
1412 let b
= _mm_setr_epi8(
1413 2, 3, 6, 7, 10, 11, 14, 15,
1414 18, 19, 22, 23, 26, 27, 30, 31,
1416 let r
= _mm_min_epi8(a
, b
);
1418 let e
= _mm_setr_epi8(
1419 1, 3, 5, 7, 9, 11, 13, 15,
1420 17, 19, 21, 23, 25, 27, 29, 31,
1422 assert_eq_m128i(r
, e
);
1425 #[simd_test(enable = "sse4.1")]
1426 unsafe fn test_mm_min_epi8_2() {
1428 let a
= _mm_setr_epi8(
1429 1, -4, -5, 8, -9, -12, 13, -16,
1430 17, 20, 21, 24, 25, 28, 29, 32,
1433 let b
= _mm_setr_epi8(
1434 2, -3, -6, 7, -10, -11, 14, -15,
1435 18, 19, 22, 23, 26, 27, 30, 31,
1437 let r
= _mm_min_epi8(a
, b
);
1439 let e
= _mm_setr_epi8(
1440 1, -4, -6, 7, -10, -12, 13, -16,
1441 17, 19, 21, 23, 25, 27, 29, 31,
1443 assert_eq_m128i(r
, e
);
1446 #[simd_test(enable = "sse4.1")]
1447 unsafe fn test_mm_min_epu16() {
1448 let a
= _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1449 let b
= _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1450 let r
= _mm_min_epu16(a
, b
);
1451 let e
= _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1452 assert_eq_m128i(r
, e
);
1455 #[simd_test(enable = "sse4.1")]
1456 unsafe fn test_mm_min_epi32_1() {
1457 let a
= _mm_setr_epi32(1, 4, 5, 8);
1458 let b
= _mm_setr_epi32(2, 3, 6, 7);
1459 let r
= _mm_min_epi32(a
, b
);
1460 let e
= _mm_setr_epi32(1, 3, 5, 7);
1461 assert_eq_m128i(r
, e
);
1464 #[simd_test(enable = "sse4.1")]
1465 unsafe fn test_mm_min_epi32_2() {
1466 let a
= _mm_setr_epi32(-1, 4, 5, -7);
1467 let b
= _mm_setr_epi32(-2, 3, -6, 8);
1468 let r
= _mm_min_epi32(a
, b
);
1469 let e
= _mm_setr_epi32(-2, 3, -6, -7);
1470 assert_eq_m128i(r
, e
);
1473 #[simd_test(enable = "sse4.1")]
1474 unsafe fn test_mm_min_epu32() {
1475 let a
= _mm_setr_epi32(1, 4, 5, 8);
1476 let b
= _mm_setr_epi32(2, 3, 6, 7);
1477 let r
= _mm_min_epu32(a
, b
);
1478 let e
= _mm_setr_epi32(1, 3, 5, 7);
1479 assert_eq_m128i(r
, e
);
1482 #[simd_test(enable = "sse4.1")]
1483 unsafe fn test_mm_packus_epi32() {
1484 let a
= _mm_setr_epi32(1, 2, 3, 4);
1485 let b
= _mm_setr_epi32(-1, -2, -3, -4);
1486 let r
= _mm_packus_epi32(a
, b
);
1487 let e
= _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1488 assert_eq_m128i(r
, e
);
1491 #[simd_test(enable = "sse4.1")]
1492 unsafe fn test_mm_cmpeq_epi64() {
1493 let a
= _mm_setr_epi64x(0, 1);
1494 let b
= _mm_setr_epi64x(0, 0);
1495 let r
= _mm_cmpeq_epi64(a
, b
);
1496 let e
= _mm_setr_epi64x(-1, 0);
1497 assert_eq_m128i(r
, e
);
1500 #[simd_test(enable = "sse4.1")]
1501 unsafe fn test_mm_cvtepi8_epi16() {
1502 let a
= _mm_set1_epi8(10);
1503 let r
= _mm_cvtepi8_epi16(a
);
1504 let e
= _mm_set1_epi16(10);
1505 assert_eq_m128i(r
, e
);
1506 let a
= _mm_set1_epi8(-10);
1507 let r
= _mm_cvtepi8_epi16(a
);
1508 let e
= _mm_set1_epi16(-10);
1509 assert_eq_m128i(r
, e
);
1512 #[simd_test(enable = "sse4.1")]
1513 unsafe fn test_mm_cvtepi8_epi32() {
1514 let a
= _mm_set1_epi8(10);
1515 let r
= _mm_cvtepi8_epi32(a
);
1516 let e
= _mm_set1_epi32(10);
1517 assert_eq_m128i(r
, e
);
1518 let a
= _mm_set1_epi8(-10);
1519 let r
= _mm_cvtepi8_epi32(a
);
1520 let e
= _mm_set1_epi32(-10);
1521 assert_eq_m128i(r
, e
);
1524 #[simd_test(enable = "sse4.1")]
1525 unsafe fn test_mm_cvtepi8_epi64() {
1526 let a
= _mm_set1_epi8(10);
1527 let r
= _mm_cvtepi8_epi64(a
);
1528 let e
= _mm_set1_epi64x(10);
1529 assert_eq_m128i(r
, e
);
1530 let a
= _mm_set1_epi8(-10);
1531 let r
= _mm_cvtepi8_epi64(a
);
1532 let e
= _mm_set1_epi64x(-10);
1533 assert_eq_m128i(r
, e
);
1536 #[simd_test(enable = "sse4.1")]
1537 unsafe fn test_mm_cvtepi16_epi32() {
1538 let a
= _mm_set1_epi16(10);
1539 let r
= _mm_cvtepi16_epi32(a
);
1540 let e
= _mm_set1_epi32(10);
1541 assert_eq_m128i(r
, e
);
1542 let a
= _mm_set1_epi16(-10);
1543 let r
= _mm_cvtepi16_epi32(a
);
1544 let e
= _mm_set1_epi32(-10);
1545 assert_eq_m128i(r
, e
);
1548 #[simd_test(enable = "sse4.1")]
1549 unsafe fn test_mm_cvtepi16_epi64() {
1550 let a
= _mm_set1_epi16(10);
1551 let r
= _mm_cvtepi16_epi64(a
);
1552 let e
= _mm_set1_epi64x(10);
1553 assert_eq_m128i(r
, e
);
1554 let a
= _mm_set1_epi16(-10);
1555 let r
= _mm_cvtepi16_epi64(a
);
1556 let e
= _mm_set1_epi64x(-10);
1557 assert_eq_m128i(r
, e
);
1560 #[simd_test(enable = "sse4.1")]
1561 unsafe fn test_mm_cvtepi32_epi64() {
1562 let a
= _mm_set1_epi32(10);
1563 let r
= _mm_cvtepi32_epi64(a
);
1564 let e
= _mm_set1_epi64x(10);
1565 assert_eq_m128i(r
, e
);
1566 let a
= _mm_set1_epi32(-10);
1567 let r
= _mm_cvtepi32_epi64(a
);
1568 let e
= _mm_set1_epi64x(-10);
1569 assert_eq_m128i(r
, e
);
1572 #[simd_test(enable = "sse4.1")]
1573 unsafe fn test_mm_cvtepu8_epi16() {
1574 let a
= _mm_set1_epi8(10);
1575 let r
= _mm_cvtepu8_epi16(a
);
1576 let e
= _mm_set1_epi16(10);
1577 assert_eq_m128i(r
, e
);
1580 #[simd_test(enable = "sse4.1")]
1581 unsafe fn test_mm_cvtepu8_epi32() {
1582 let a
= _mm_set1_epi8(10);
1583 let r
= _mm_cvtepu8_epi32(a
);
1584 let e
= _mm_set1_epi32(10);
1585 assert_eq_m128i(r
, e
);
1588 #[simd_test(enable = "sse4.1")]
1589 unsafe fn test_mm_cvtepu8_epi64() {
1590 let a
= _mm_set1_epi8(10);
1591 let r
= _mm_cvtepu8_epi64(a
);
1592 let e
= _mm_set1_epi64x(10);
1593 assert_eq_m128i(r
, e
);
1596 #[simd_test(enable = "sse4.1")]
1597 unsafe fn test_mm_cvtepu16_epi32() {
1598 let a
= _mm_set1_epi16(10);
1599 let r
= _mm_cvtepu16_epi32(a
);
1600 let e
= _mm_set1_epi32(10);
1601 assert_eq_m128i(r
, e
);
1604 #[simd_test(enable = "sse4.1")]
1605 unsafe fn test_mm_cvtepu16_epi64() {
1606 let a
= _mm_set1_epi16(10);
1607 let r
= _mm_cvtepu16_epi64(a
);
1608 let e
= _mm_set1_epi64x(10);
1609 assert_eq_m128i(r
, e
);
1612 #[simd_test(enable = "sse4.1")]
1613 unsafe fn test_mm_cvtepu32_epi64() {
1614 let a
= _mm_set1_epi32(10);
1615 let r
= _mm_cvtepu32_epi64(a
);
1616 let e
= _mm_set1_epi64x(10);
1617 assert_eq_m128i(r
, e
);
1620 #[simd_test(enable = "sse4.1")]
1621 unsafe fn test_mm_dp_pd() {
1622 let a
= _mm_setr_pd(2.0, 3.0);
1623 let b
= _mm_setr_pd(1.0, 4.0);
1624 let e
= _mm_setr_pd(14.0, 0.0);
1625 assert_eq_m128d(_mm_dp_pd(a
, b
, 0b00110001), e
);
1628 #[simd_test(enable = "sse4.1")]
1629 unsafe fn test_mm_dp_ps() {
1630 let a
= _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1631 let b
= _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1632 let e
= _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1633 assert_eq_m128(_mm_dp_ps(a
, b
, 0b01110101), e
);
1636 #[simd_test(enable = "sse4.1")]
1637 unsafe fn test_mm_floor_pd() {
1638 let a
= _mm_setr_pd(2.5, 4.5);
1639 let r
= _mm_floor_pd(a
);
1640 let e
= _mm_setr_pd(2.0, 4.0);
1641 assert_eq_m128d(r
, e
);
1644 #[simd_test(enable = "sse4.1")]
1645 unsafe fn test_mm_floor_ps() {
1646 let a
= _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1647 let r
= _mm_floor_ps(a
);
1648 let e
= _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1649 assert_eq_m128(r
, e
);
1652 #[simd_test(enable = "sse4.1")]
1653 unsafe fn test_mm_floor_sd() {
1654 let a
= _mm_setr_pd(2.5, 4.5);
1655 let b
= _mm_setr_pd(-1.5, -3.5);
1656 let r
= _mm_floor_sd(a
, b
);
1657 let e
= _mm_setr_pd(-2.0, 4.5);
1658 assert_eq_m128d(r
, e
);
1661 #[simd_test(enable = "sse4.1")]
1662 unsafe fn test_mm_floor_ss() {
1663 let a
= _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1664 let b
= _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1665 let r
= _mm_floor_ss(a
, b
);
1666 let e
= _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1667 assert_eq_m128(r
, e
);
1670 #[simd_test(enable = "sse4.1")]
1671 unsafe fn test_mm_ceil_pd() {
1672 let a
= _mm_setr_pd(1.5, 3.5);
1673 let r
= _mm_ceil_pd(a
);
1674 let e
= _mm_setr_pd(2.0, 4.0);
1675 assert_eq_m128d(r
, e
);
1678 #[simd_test(enable = "sse4.1")]
1679 unsafe fn test_mm_ceil_ps() {
1680 let a
= _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1681 let r
= _mm_ceil_ps(a
);
1682 let e
= _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1683 assert_eq_m128(r
, e
);
1686 #[simd_test(enable = "sse4.1")]
1687 unsafe fn test_mm_ceil_sd() {
1688 let a
= _mm_setr_pd(1.5, 3.5);
1689 let b
= _mm_setr_pd(-2.5, -4.5);
1690 let r
= _mm_ceil_sd(a
, b
);
1691 let e
= _mm_setr_pd(-2.0, 3.5);
1692 assert_eq_m128d(r
, e
);
1695 #[simd_test(enable = "sse4.1")]
1696 unsafe fn test_mm_ceil_ss() {
1697 let a
= _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1698 let b
= _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1699 let r
= _mm_ceil_ss(a
, b
);
1700 let e
= _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1701 assert_eq_m128(r
, e
);
1704 #[simd_test(enable = "sse4.1")]
1705 unsafe fn test_mm_round_pd() {
1706 let a
= _mm_setr_pd(1.25, 3.75);
1707 let r
= _mm_round_pd(a
, _MM_FROUND_TO_NEAREST_INT
);
1708 let e
= _mm_setr_pd(1.0, 4.0);
1709 assert_eq_m128d(r
, e
);
1712 #[simd_test(enable = "sse4.1")]
1713 unsafe fn test_mm_round_ps() {
1714 let a
= _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1715 let r
= _mm_round_ps(a
, _MM_FROUND_TO_ZERO
);
1716 let e
= _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1717 assert_eq_m128(r
, e
);
1720 #[simd_test(enable = "sse4.1")]
1721 unsafe fn test_mm_round_sd() {
1722 let a
= _mm_setr_pd(1.5, 3.5);
1723 let b
= _mm_setr_pd(-2.5, -4.5);
1724 let old_mode
= _MM_GET_ROUNDING_MODE();
1725 _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO
);
1726 let r
= _mm_round_sd(a
, b
, _MM_FROUND_CUR_DIRECTION
);
1727 _MM_SET_ROUNDING_MODE(old_mode
);
1728 let e
= _mm_setr_pd(-2.0, 3.5);
1729 assert_eq_m128d(r
, e
);
1732 #[simd_test(enable = "sse4.1")]
1733 unsafe fn test_mm_round_ss() {
1734 let a
= _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1735 let b
= _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1736 let old_mode
= _MM_GET_ROUNDING_MODE();
1737 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST
);
1738 let r
= _mm_round_ss(a
, b
, _MM_FROUND_CUR_DIRECTION
);
1739 _MM_SET_ROUNDING_MODE(old_mode
);
1740 let e
= _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1741 assert_eq_m128(r
, e
);
1744 #[simd_test(enable = "sse4.1")]
1745 unsafe fn test_mm_minpos_epu16_1() {
1746 let a
= _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1747 let r
= _mm_minpos_epu16(a
);
1748 let e
= _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1749 assert_eq_m128i(r
, e
);
1752 #[simd_test(enable = "sse4.1")]
1753 unsafe fn test_mm_minpos_epu16_2() {
1754 let a
= _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1755 let r
= _mm_minpos_epu16(a
);
1756 let e
= _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1757 assert_eq_m128i(r
, e
);
1760 #[simd_test(enable = "sse4.1")]
1761 unsafe fn test_mm_mul_epi32() {
1763 let a
= _mm_setr_epi32(1, 1, 1, 1);
1764 let b
= _mm_setr_epi32(1, 2, 3, 4);
1765 let r
= _mm_mul_epi32(a
, b
);
1766 let e
= _mm_setr_epi64x(1, 3);
1767 assert_eq_m128i(r
, e
);
1770 let a
= _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */);
1771 let b
= _mm_setr_epi32(
1772 -20, -256, /* ignored */
1773 666666, 666666, /* ignored */
1775 let r
= _mm_mul_epi32(a
, b
);
1776 let e
= _mm_setr_epi64x(-300, 823043843622);
1777 assert_eq_m128i(r
, e
);
1781 #[simd_test(enable = "sse4.1")]
1782 unsafe fn test_mm_mullo_epi32() {
1784 let a
= _mm_setr_epi32(1, 1, 1, 1);
1785 let b
= _mm_setr_epi32(1, 2, 3, 4);
1786 let r
= _mm_mullo_epi32(a
, b
);
1787 let e
= _mm_setr_epi32(1, 2, 3, 4);
1788 assert_eq_m128i(r
, e
);
1791 let a
= _mm_setr_epi32(15, -2, 1234567, 99999);
1792 let b
= _mm_setr_epi32(-20, -256, 666666, -99999);
1793 let r
= _mm_mullo_epi32(a
, b
);
1794 // Attention, most significant bit in r[2] is treated
1796 // 1234567 * 666666 = -1589877210
1797 let e
= _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1798 assert_eq_m128i(r
, e
);
1802 #[simd_test(enable = "sse4.1")]
1803 unsafe fn test_mm_minpos_epu16() {
1804 let a
= _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1805 let r
= _mm_minpos_epu16(a
);
1806 let e
= _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1807 assert_eq_m128i(r
, e
);
1810 #[simd_test(enable = "sse4.1")]
1811 unsafe fn test_mm_mpsadbw_epu8() {
1813 let a
= _mm_setr_epi8(
1814 0, 1, 2, 3, 4, 5, 6, 7,
1815 8, 9, 10, 11, 12, 13, 14, 15,
1818 let r
= _mm_mpsadbw_epu8(a
, a
, 0b000);
1819 let e
= _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1820 assert_eq_m128i(r
, e
);
1822 let r
= _mm_mpsadbw_epu8(a
, a
, 0b001);
1823 let e
= _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1824 assert_eq_m128i(r
, e
);
1826 let r
= _mm_mpsadbw_epu8(a
, a
, 0b100);
1827 let e
= _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1828 assert_eq_m128i(r
, e
);
1830 let r
= _mm_mpsadbw_epu8(a
, a
, 0b101);
1831 let e
= _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1832 assert_eq_m128i(r
, e
);
1834 let r
= _mm_mpsadbw_epu8(a
, a
, 0b111);
1835 let e
= _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1836 assert_eq_m128i(r
, e
);
1839 #[simd_test(enable = "sse4.1")]
1840 unsafe fn test_mm_testz_si128() {
1841 let a
= _mm_set1_epi8(1);
1842 let mask
= _mm_set1_epi8(0);
1843 let r
= _mm_testz_si128(a
, mask
);
1845 let a
= _mm_set1_epi8(0b101);
1846 let mask
= _mm_set1_epi8(0b110);
1847 let r
= _mm_testz_si128(a
, mask
);
1849 let a
= _mm_set1_epi8(0b011);
1850 let mask
= _mm_set1_epi8(0b100);
1851 let r
= _mm_testz_si128(a
, mask
);
1855 #[simd_test(enable = "sse4.1")]
1856 unsafe fn test_mm_testc_si128() {
1857 let a
= _mm_set1_epi8(-1);
1858 let mask
= _mm_set1_epi8(0);
1859 let r
= _mm_testc_si128(a
, mask
);
1861 let a
= _mm_set1_epi8(0b101);
1862 let mask
= _mm_set1_epi8(0b110);
1863 let r
= _mm_testc_si128(a
, mask
);
1865 let a
= _mm_set1_epi8(0b101);
1866 let mask
= _mm_set1_epi8(0b100);
1867 let r
= _mm_testc_si128(a
, mask
);
1871 #[simd_test(enable = "sse4.1")]
1872 unsafe fn test_mm_testnzc_si128() {
1873 let a
= _mm_set1_epi8(0);
1874 let mask
= _mm_set1_epi8(1);
1875 let r
= _mm_testnzc_si128(a
, mask
);
1877 let a
= _mm_set1_epi8(-1);
1878 let mask
= _mm_set1_epi8(0);
1879 let r
= _mm_testnzc_si128(a
, mask
);
1881 let a
= _mm_set1_epi8(0b101);
1882 let mask
= _mm_set1_epi8(0b110);
1883 let r
= _mm_testnzc_si128(a
, mask
);
1885 let a
= _mm_set1_epi8(0b101);
1886 let mask
= _mm_set1_epi8(0b101);
1887 let r
= _mm_testnzc_si128(a
, mask
);
1891 #[simd_test(enable = "sse4.1")]
1892 unsafe fn test_mm_test_all_zeros() {
1893 let a
= _mm_set1_epi8(1);
1894 let mask
= _mm_set1_epi8(0);
1895 let r
= _mm_test_all_zeros(a
, mask
);
1897 let a
= _mm_set1_epi8(0b101);
1898 let mask
= _mm_set1_epi8(0b110);
1899 let r
= _mm_test_all_zeros(a
, mask
);
1901 let a
= _mm_set1_epi8(0b011);
1902 let mask
= _mm_set1_epi8(0b100);
1903 let r
= _mm_test_all_zeros(a
, mask
);
1907 #[simd_test(enable = "sse4.1")]
1908 unsafe fn test_mm_test_all_ones() {
1909 let a
= _mm_set1_epi8(-1);
1910 let r
= _mm_test_all_ones(a
);
1912 let a
= _mm_set1_epi8(0b101);
1913 let r
= _mm_test_all_ones(a
);
1917 #[simd_test(enable = "sse4.1")]
1918 unsafe fn test_mm_test_mix_ones_zeros() {
1919 let a
= _mm_set1_epi8(0);
1920 let mask
= _mm_set1_epi8(1);
1921 let r
= _mm_test_mix_ones_zeros(a
, mask
);
1923 let a
= _mm_set1_epi8(-1);
1924 let mask
= _mm_set1_epi8(0);
1925 let r
= _mm_test_mix_ones_zeros(a
, mask
);
1927 let a
= _mm_set1_epi8(0b101);
1928 let mask
= _mm_set1_epi8(0b110);
1929 let r
= _mm_test_mix_ones_zeros(a
, mask
);
1931 let a
= _mm_set1_epi8(0b101);
1932 let mask
= _mm_set1_epi8(0b101);
1933 let r
= _mm_test_mix_ones_zeros(a
, mask
);