1 //! Advanced Vector Extensions 2 (AVX)
3 //! AVX2 expands most AVX commands to 256-bit wide vector registers and
4 //! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
6 //! The references are:
8 //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9 //! Instruction Set Reference, A-Z][intel64_ref].
10 //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11 //! System Instructions][amd64_ref].
13 //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14 //! overview of the instructions available.
16 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17 //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18 //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
22 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
27 use stdarch_test
::assert_instr
;
29 /// Computes the absolute values of packed 32-bit integers in `a`.
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi32)
33 #[target_feature(enable = "avx2")]
34 #[cfg_attr(test, assert_instr(vpabsd))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm256_abs_epi32(a
: __m256i
) -> __m256i
{
37 transmute(pabsd(a
.as_i32x8()))
40 /// Computes the absolute values of packed 16-bit integers in `a`.
42 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi16)
44 #[target_feature(enable = "avx2")]
45 #[cfg_attr(test, assert_instr(vpabsw))]
46 #[stable(feature = "simd_x86", since = "1.27.0")]
47 pub unsafe fn _mm256_abs_epi16(a
: __m256i
) -> __m256i
{
48 transmute(pabsw(a
.as_i16x16()))
51 /// Computes the absolute values of packed 8-bit integers in `a`.
53 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi8)
55 #[target_feature(enable = "avx2")]
56 #[cfg_attr(test, assert_instr(vpabsb))]
57 #[stable(feature = "simd_x86", since = "1.27.0")]
58 pub unsafe fn _mm256_abs_epi8(a
: __m256i
) -> __m256i
{
59 transmute(pabsb(a
.as_i8x32()))
62 /// Adds packed 64-bit integers in `a` and `b`.
64 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64)
66 #[target_feature(enable = "avx2")]
67 #[cfg_attr(test, assert_instr(vpaddq))]
68 #[stable(feature = "simd_x86", since = "1.27.0")]
69 pub unsafe fn _mm256_add_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
70 transmute(simd_add(a
.as_i64x4(), b
.as_i64x4()))
73 /// Adds packed 32-bit integers in `a` and `b`.
75 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32)
77 #[target_feature(enable = "avx2")]
78 #[cfg_attr(test, assert_instr(vpaddd))]
79 #[stable(feature = "simd_x86", since = "1.27.0")]
80 pub unsafe fn _mm256_add_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
81 transmute(simd_add(a
.as_i32x8(), b
.as_i32x8()))
84 /// Adds packed 16-bit integers in `a` and `b`.
86 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16)
88 #[target_feature(enable = "avx2")]
89 #[cfg_attr(test, assert_instr(vpaddw))]
90 #[stable(feature = "simd_x86", since = "1.27.0")]
91 pub unsafe fn _mm256_add_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
92 transmute(simd_add(a
.as_i16x16(), b
.as_i16x16()))
95 /// Adds packed 8-bit integers in `a` and `b`.
97 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8)
99 #[target_feature(enable = "avx2")]
100 #[cfg_attr(test, assert_instr(vpaddb))]
101 #[stable(feature = "simd_x86", since = "1.27.0")]
102 pub unsafe fn _mm256_add_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
103 transmute(simd_add(a
.as_i8x32(), b
.as_i8x32()))
106 /// Adds packed 8-bit integers in `a` and `b` using saturation.
108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8)
110 #[target_feature(enable = "avx2")]
111 #[cfg_attr(test, assert_instr(vpaddsb))]
112 #[stable(feature = "simd_x86", since = "1.27.0")]
113 pub unsafe fn _mm256_adds_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
114 transmute(simd_saturating_add(a
.as_i8x32(), b
.as_i8x32()))
117 /// Adds packed 16-bit integers in `a` and `b` using saturation.
119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16)
121 #[target_feature(enable = "avx2")]
122 #[cfg_attr(test, assert_instr(vpaddsw))]
123 #[stable(feature = "simd_x86", since = "1.27.0")]
124 pub unsafe fn _mm256_adds_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
125 transmute(simd_saturating_add(a
.as_i16x16(), b
.as_i16x16()))
128 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8)
132 #[target_feature(enable = "avx2")]
133 #[cfg_attr(test, assert_instr(vpaddusb))]
134 #[stable(feature = "simd_x86", since = "1.27.0")]
135 pub unsafe fn _mm256_adds_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
136 transmute(simd_saturating_add(a
.as_u8x32(), b
.as_u8x32()))
139 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
141 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16)
143 #[target_feature(enable = "avx2")]
144 #[cfg_attr(test, assert_instr(vpaddusw))]
145 #[stable(feature = "simd_x86", since = "1.27.0")]
146 pub unsafe fn _mm256_adds_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
147 transmute(simd_saturating_add(a
.as_u16x16(), b
.as_u16x16()))
150 /// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
151 /// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8)
155 #[target_feature(enable = "avx2")]
156 #[cfg_attr(test, assert_instr(vpalignr, n = 7))]
157 #[rustc_args_required_const(2)]
158 #[stable(feature = "simd_x86", since = "1.27.0")]
159 pub unsafe fn _mm256_alignr_epi8(a
: __m256i
, b
: __m256i
, n
: i32) -> __m256i
{
161 // If `palignr` is shifting the pair of vectors more than the size of two
164 return _mm256_set1_epi8(0);
166 // If `palignr` is shifting the pair of input vectors more than one lane,
167 // but less than two lanes, convert to shifting in zeroes.
168 let (a
, b
, n
) = if n
> 16 {
169 (_mm256_set1_epi8(0), a
, n
- 16)
174 let a
= a
.as_i8x32();
175 let b
= b
.as_i8x32();
177 let r
: i8x32
= match n
{
182 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
183 23, 24, 25, 26, 27, 28, 29, 30, 31,
190 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
191 24, 25, 26, 27, 28, 29, 30, 31, 48,
198 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24,
199 25, 26, 27, 28, 29, 30, 31, 48, 49,
206 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24,
207 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
214 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25,
215 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
222 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26,
223 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
230 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27,
231 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
238 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27,
239 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
246 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28,
247 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
254 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29,
255 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
258 10 => simd_shuffle32(
262 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30,
263 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
266 11 => simd_shuffle32(
270 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31,
271 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
274 12 => simd_shuffle32(
278 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48,
279 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
282 13 => simd_shuffle32(
286 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49,
287 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
290 14 => simd_shuffle32(
294 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50,
295 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
298 15 => simd_shuffle32(
302 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51,
303 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
311 /// Computes the bitwise AND of 256 bits (representing integer data)
314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256)
316 #[target_feature(enable = "avx2")]
317 #[cfg_attr(test, assert_instr(vandps))]
318 #[stable(feature = "simd_x86", since = "1.27.0")]
319 pub unsafe fn _mm256_and_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
320 transmute(simd_and(a
.as_i64x4(), b
.as_i64x4()))
323 /// Computes the bitwise NOT of 256 bits (representing integer data)
324 /// in `a` and then AND with `b`.
326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256)
328 #[target_feature(enable = "avx2")]
329 #[cfg_attr(test, assert_instr(vandnps))]
330 #[stable(feature = "simd_x86", since = "1.27.0")]
331 pub unsafe fn _mm256_andnot_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
332 let all_ones
= _mm256_set1_epi8(-1);
334 simd_xor(a
.as_i64x4(), all_ones
.as_i64x4()),
339 /// Averages packed unsigned 16-bit integers in `a` and `b`.
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16)
343 #[target_feature(enable = "avx2")]
344 #[cfg_attr(test, assert_instr(vpavgw))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm256_avg_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
347 transmute(pavgw(a
.as_u16x16(), b
.as_u16x16()))
350 /// Averages packed unsigned 8-bit integers in `a` and `b`.
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8)
354 #[target_feature(enable = "avx2")]
355 #[cfg_attr(test, assert_instr(vpavgb))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm256_avg_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
358 transmute(pavgb(a
.as_u8x32(), b
.as_u8x32()))
361 /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32)
365 #[target_feature(enable = "avx2")]
366 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
367 #[rustc_args_required_const(2)]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_blend_epi32(a
: __m128i
, b
: __m128i
, imm8
: i32) -> __m128i
{
370 let imm8
= (imm8
& 0xFF) as u8;
371 let a
= a
.as_i32x4();
372 let b
= b
.as_i32x4();
373 macro_rules
! blend2
{
374 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
375 simd_shuffle4(a
, b
, [$a
, $b
, $c
, $d
]);
378 macro_rules
! blend1
{
379 ($a
:expr
, $b
:expr
) => {
380 match (imm8
>> 2) & 0b11 {
381 0b00 => blend2
!($a
, $b
, 2, 3),
382 0b01 => blend2
!($a
, $b
, 6, 3),
383 0b10 => blend2
!($a
, $b
, 2, 7),
384 _
=> blend2
!($a
, $b
, 6, 7),
388 let r
: i32x4
= match imm8
& 0b11 {
389 0b00 => blend1
!(0, 1),
390 0b01 => blend1
!(4, 1),
391 0b10 => blend1
!(0, 5),
397 /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
399 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32)
401 #[target_feature(enable = "avx2")]
402 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
403 #[rustc_args_required_const(2)]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm256_blend_epi32(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
406 let imm8
= (imm8
& 0xFF) as u8;
407 let a
= a
.as_i32x8();
408 let b
= b
.as_i32x8();
409 macro_rules
! blend4
{
420 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
]);
423 macro_rules
! blend3
{
424 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
) => {
425 match (imm8
>> 6) & 0b11 {
426 0b00 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 7),
427 0b01 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 14, 7),
428 0b10 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 15),
429 _
=> blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 14, 15),
433 macro_rules
! blend2
{
434 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
435 match (imm8
>> 4) & 0b11 {
436 0b00 => blend3
!($a
, $b
, $c
, $d
, 4, 5),
437 0b01 => blend3
!($a
, $b
, $c
, $d
, 12, 5),
438 0b10 => blend3
!($a
, $b
, $c
, $d
, 4, 13),
439 _
=> blend3
!($a
, $b
, $c
, $d
, 12, 13),
443 macro_rules
! blend1
{
444 ($a
:expr
, $b
:expr
) => {
445 match (imm8
>> 2) & 0b11 {
446 0b00 => blend2
!($a
, $b
, 2, 3),
447 0b01 => blend2
!($a
, $b
, 10, 3),
448 0b10 => blend2
!($a
, $b
, 2, 11),
449 _
=> blend2
!($a
, $b
, 10, 11),
453 let r
: i32x8
= match imm8
& 0b11 {
454 0b00 => blend1
!(0, 1),
455 0b01 => blend1
!(8, 1),
456 0b10 => blend1
!(0, 9),
462 /// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`.
464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16)
466 #[target_feature(enable = "avx2")]
467 #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
468 #[rustc_args_required_const(2)]
469 #[stable(feature = "simd_x86", since = "1.27.0")]
470 pub unsafe fn _mm256_blend_epi16(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
471 let imm8
= (imm8
& 0xFF) as u8;
472 let a
= a
.as_i16x16();
473 let b
= b
.as_i16x16();
474 macro_rules
! blend4
{
497 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
502 macro_rules
! blend3
{
517 match (imm8
>> 6) & 0b11 {
518 0b00 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 7, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 14, 15),
520 blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 22, 7, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 30, 15)
523 blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 23, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 14, 31)
525 _
=> blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 22, 23, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 30, 31),
529 macro_rules
! blend2
{
540 match (imm8
>> 4) & 0b11 {
541 0b00 => blend3
!($a
, $b
, $c
, $d
, 4, 5, $a2
, $b2
, $c2
, $d2
, 12, 13),
542 0b01 => blend3
!($a
, $b
, $c
, $d
, 20, 5, $a2
, $b2
, $c2
, $d2
, 28, 13),
543 0b10 => blend3
!($a
, $b
, $c
, $d
, 4, 21, $a2
, $b2
, $c2
, $d2
, 12, 29),
544 _
=> blend3
!($a
, $b
, $c
, $d
, 20, 21, $a2
, $b2
, $c2
, $d2
, 28, 29),
548 macro_rules
! blend1
{
549 ($a1
:expr
, $b1
:expr
, $a2
:expr
, $b2
:expr
) => {
550 match (imm8
>> 2) & 0b11 {
551 0b00 => blend2
!($a1
, $b1
, 2, 3, $a2
, $b2
, 10, 11),
552 0b01 => blend2
!($a1
, $b1
, 18, 3, $a2
, $b2
, 26, 11),
553 0b10 => blend2
!($a1
, $b1
, 2, 19, $a2
, $b2
, 10, 27),
554 _
=> blend2
!($a1
, $b1
, 18, 19, $a2
, $b2
, 26, 27),
558 let r
: i16x16
= match imm8
& 0b11 {
559 0b00 => blend1
!(0, 1, 8, 9),
560 0b01 => blend1
!(16, 1, 24, 9),
561 0b10 => blend1
!(0, 17, 8, 25),
562 _
=> blend1
!(16, 17, 24, 25),
567 /// Blends packed 8-bit integers from `a` and `b` using `mask`.
569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8)
571 #[target_feature(enable = "avx2")]
572 #[cfg_attr(test, assert_instr(vpblendvb))]
573 #[stable(feature = "simd_x86", since = "1.27.0")]
574 pub unsafe fn _mm256_blendv_epi8(a
: __m256i
, b
: __m256i
, mask
: __m256i
) -> __m256i
{
575 transmute(pblendvb(a
.as_i8x32(), b
.as_i8x32(), mask
.as_i8x32()))
578 /// Broadcasts the low packed 8-bit integer from `a` to all elements of
579 /// the 128-bit returned value.
581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8)
583 #[target_feature(enable = "avx2")]
584 #[cfg_attr(test, assert_instr(vpbroadcastb))]
585 #[stable(feature = "simd_x86", since = "1.27.0")]
586 pub unsafe fn _mm_broadcastb_epi8(a
: __m128i
) -> __m128i
{
587 let zero
= _mm_setzero_si128();
588 let ret
= simd_shuffle16(a
.as_i8x16(), zero
.as_i8x16(), [0_u32; 16]);
589 transmute
::<i8x16
, _
>(ret
)
592 /// Broadcasts the low packed 8-bit integer from `a` to all elements of
593 /// the 256-bit returned value.
595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8)
597 #[target_feature(enable = "avx2")]
598 #[cfg_attr(test, assert_instr(vpbroadcastb))]
599 #[stable(feature = "simd_x86", since = "1.27.0")]
600 pub unsafe fn _mm256_broadcastb_epi8(a
: __m128i
) -> __m256i
{
601 let zero
= _mm_setzero_si128();
602 let ret
= simd_shuffle32(a
.as_i8x16(), zero
.as_i8x16(), [0_u32; 32]);
603 transmute
::<i8x32
, _
>(ret
)
606 // N.B., `simd_shuffle4` with integer data types for `a` and `b` is
607 // often compiled to `vbroadcastss`.
608 /// Broadcasts the low packed 32-bit integer from `a` to all elements of
609 /// the 128-bit returned value.
611 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32)
613 #[target_feature(enable = "avx2")]
614 #[cfg_attr(test, assert_instr(vbroadcastss))]
615 #[stable(feature = "simd_x86", since = "1.27.0")]
616 pub unsafe fn _mm_broadcastd_epi32(a
: __m128i
) -> __m128i
{
617 let zero
= _mm_setzero_si128();
618 let ret
= simd_shuffle4(a
.as_i32x4(), zero
.as_i32x4(), [0_u32; 4]);
619 transmute
::<i32x4
, _
>(ret
)
622 // N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
623 // often compiled to `vbroadcastss`.
624 /// Broadcasts the low packed 32-bit integer from `a` to all elements of
625 /// the 256-bit returned value.
627 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32)
629 #[target_feature(enable = "avx2")]
630 #[cfg_attr(test, assert_instr(vbroadcastss))]
631 #[stable(feature = "simd_x86", since = "1.27.0")]
632 pub unsafe fn _mm256_broadcastd_epi32(a
: __m128i
) -> __m256i
{
633 let zero
= _mm_setzero_si128();
634 let ret
= simd_shuffle8(a
.as_i32x4(), zero
.as_i32x4(), [0_u32; 8]);
635 transmute
::<i32x8
, _
>(ret
)
638 /// Broadcasts the low packed 64-bit integer from `a` to all elements of
639 /// the 128-bit returned value.
641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64)
643 #[target_feature(enable = "avx2")]
644 // FIXME: https://github.com/rust-lang/stdarch/issues/791
645 #[cfg_attr(test, assert_instr(vmovddup))]
646 #[stable(feature = "simd_x86", since = "1.27.0")]
647 pub unsafe fn _mm_broadcastq_epi64(a
: __m128i
) -> __m128i
{
648 let ret
= simd_shuffle2(a
.as_i64x2(), a
.as_i64x2(), [0_u32; 2]);
649 transmute
::<i64x2
, _
>(ret
)
652 /// Broadcasts the low packed 64-bit integer from `a` to all elements of
653 /// the 256-bit returned value.
655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64)
657 #[target_feature(enable = "avx2")]
658 #[cfg_attr(test, assert_instr(vbroadcastsd))]
659 #[stable(feature = "simd_x86", since = "1.27.0")]
660 pub unsafe fn _mm256_broadcastq_epi64(a
: __m128i
) -> __m256i
{
661 let ret
= simd_shuffle4(a
.as_i64x2(), a
.as_i64x2(), [0_u32; 4]);
662 transmute
::<i64x4
, _
>(ret
)
665 /// Broadcasts the low double-precision (64-bit) floating-point element
666 /// from `a` to all elements of the 128-bit returned value.
668 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd)
670 #[target_feature(enable = "avx2")]
671 #[cfg_attr(test, assert_instr(vmovddup))]
672 #[stable(feature = "simd_x86", since = "1.27.0")]
673 pub unsafe fn _mm_broadcastsd_pd(a
: __m128d
) -> __m128d
{
674 simd_shuffle2(a
, _mm_setzero_pd(), [0_u32; 2])
677 /// Broadcasts the low double-precision (64-bit) floating-point element
678 /// from `a` to all elements of the 256-bit returned value.
680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd)
682 #[target_feature(enable = "avx2")]
683 #[cfg_attr(test, assert_instr(vbroadcastsd))]
684 #[stable(feature = "simd_x86", since = "1.27.0")]
685 pub unsafe fn _mm256_broadcastsd_pd(a
: __m128d
) -> __m256d
{
686 simd_shuffle4(a
, _mm_setzero_pd(), [0_u32; 4])
689 // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
691 /// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
692 /// the 256-bit returned value.
694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256)
696 #[target_feature(enable = "avx2")]
697 #[stable(feature = "simd_x86", since = "1.27.0")]
698 pub unsafe fn _mm256_broadcastsi128_si256(a
: __m128i
) -> __m256i
{
699 let zero
= _mm_setzero_si128();
700 let ret
= simd_shuffle4(a
.as_i64x2(), zero
.as_i64x2(), [0, 1, 0, 1]);
701 transmute
::<i64x4
, _
>(ret
)
704 /// Broadcasts the low single-precision (32-bit) floating-point element
705 /// from `a` to all elements of the 128-bit returned value.
707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps)
709 #[target_feature(enable = "avx2")]
710 #[cfg_attr(test, assert_instr(vbroadcastss))]
711 #[stable(feature = "simd_x86", since = "1.27.0")]
712 pub unsafe fn _mm_broadcastss_ps(a
: __m128
) -> __m128
{
713 simd_shuffle4(a
, _mm_setzero_ps(), [0_u32; 4])
716 /// Broadcasts the low single-precision (32-bit) floating-point element
717 /// from `a` to all elements of the 256-bit returned value.
719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps)
721 #[target_feature(enable = "avx2")]
722 #[cfg_attr(test, assert_instr(vbroadcastss))]
723 #[stable(feature = "simd_x86", since = "1.27.0")]
724 pub unsafe fn _mm256_broadcastss_ps(a
: __m128
) -> __m256
{
725 simd_shuffle8(a
, _mm_setzero_ps(), [0_u32; 8])
728 /// Broadcasts the low packed 16-bit integer from a to all elements of
729 /// the 128-bit returned value
731 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16)
733 #[target_feature(enable = "avx2")]
734 #[cfg_attr(test, assert_instr(vpbroadcastw))]
735 #[stable(feature = "simd_x86", since = "1.27.0")]
736 pub unsafe fn _mm_broadcastw_epi16(a
: __m128i
) -> __m128i
{
737 let zero
= _mm_setzero_si128();
738 let ret
= simd_shuffle8(a
.as_i16x8(), zero
.as_i16x8(), [0_u32; 8]);
739 transmute
::<i16x8
, _
>(ret
)
742 /// Broadcasts the low packed 16-bit integer from a to all elements of
743 /// the 256-bit returned value
745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16)
747 #[target_feature(enable = "avx2")]
748 #[cfg_attr(test, assert_instr(vpbroadcastw))]
749 #[stable(feature = "simd_x86", since = "1.27.0")]
750 pub unsafe fn _mm256_broadcastw_epi16(a
: __m128i
) -> __m256i
{
751 let zero
= _mm_setzero_si128();
752 let ret
= simd_shuffle16(a
.as_i16x8(), zero
.as_i16x8(), [0_u32; 16]);
753 transmute
::<i16x16
, _
>(ret
)
756 /// Compares packed 64-bit integers in `a` and `b` for equality.
758 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64)
760 #[target_feature(enable = "avx2")]
761 #[cfg_attr(test, assert_instr(vpcmpeqq))]
762 #[stable(feature = "simd_x86", since = "1.27.0")]
763 pub unsafe fn _mm256_cmpeq_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
764 transmute
::<i64x4
, _
>(simd_eq(a
.as_i64x4(), b
.as_i64x4()))
767 /// Compares packed 32-bit integers in `a` and `b` for equality.
769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32)
771 #[target_feature(enable = "avx2")]
772 #[cfg_attr(test, assert_instr(vpcmpeqd))]
773 #[stable(feature = "simd_x86", since = "1.27.0")]
774 pub unsafe fn _mm256_cmpeq_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
775 transmute
::<i32x8
, _
>(simd_eq(a
.as_i32x8(), b
.as_i32x8()))
778 /// Compares packed 16-bit integers in `a` and `b` for equality.
780 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16)
782 #[target_feature(enable = "avx2")]
783 #[cfg_attr(test, assert_instr(vpcmpeqw))]
784 #[stable(feature = "simd_x86", since = "1.27.0")]
785 pub unsafe fn _mm256_cmpeq_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
786 transmute
::<i16x16
, _
>(simd_eq(a
.as_i16x16(), b
.as_i16x16()))
789 /// Compares packed 8-bit integers in `a` and `b` for equality.
791 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8)
793 #[target_feature(enable = "avx2")]
794 #[cfg_attr(test, assert_instr(vpcmpeqb))]
795 #[stable(feature = "simd_x86", since = "1.27.0")]
796 pub unsafe fn _mm256_cmpeq_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
797 transmute
::<i8x32
, _
>(simd_eq(a
.as_i8x32(), b
.as_i8x32()))
800 /// Compares packed 64-bit integers in `a` and `b` for greater-than.
802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64)
804 #[target_feature(enable = "avx2")]
805 #[cfg_attr(test, assert_instr(vpcmpgtq))]
806 #[stable(feature = "simd_x86", since = "1.27.0")]
807 pub unsafe fn _mm256_cmpgt_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
808 transmute
::<i64x4
, _
>(simd_gt(a
.as_i64x4(), b
.as_i64x4()))
811 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32)
815 #[target_feature(enable = "avx2")]
816 #[cfg_attr(test, assert_instr(vpcmpgtd))]
817 #[stable(feature = "simd_x86", since = "1.27.0")]
818 pub unsafe fn _mm256_cmpgt_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
819 transmute
::<i32x8
, _
>(simd_gt(a
.as_i32x8(), b
.as_i32x8()))
822 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16)
826 #[target_feature(enable = "avx2")]
827 #[cfg_attr(test, assert_instr(vpcmpgtw))]
828 #[stable(feature = "simd_x86", since = "1.27.0")]
829 pub unsafe fn _mm256_cmpgt_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
830 transmute
::<i16x16
, _
>(simd_gt(a
.as_i16x16(), b
.as_i16x16()))
833 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8)
837 #[target_feature(enable = "avx2")]
838 #[cfg_attr(test, assert_instr(vpcmpgtb))]
839 #[stable(feature = "simd_x86", since = "1.27.0")]
840 pub unsafe fn _mm256_cmpgt_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
841 transmute
::<i8x32
, _
>(simd_gt(a
.as_i8x32(), b
.as_i8x32()))
844 /// Sign-extend 16-bit integers to 32-bit integers.
846 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi32)
848 #[target_feature(enable = "avx2")]
849 #[cfg_attr(test, assert_instr(vpmovsxwd))]
850 #[stable(feature = "simd_x86", since = "1.27.0")]
851 pub unsafe fn _mm256_cvtepi16_epi32(a
: __m128i
) -> __m256i
{
852 transmute
::<i32x8
, _
>(simd_cast(a
.as_i16x8()))
855 /// Sign-extend 16-bit integers to 64-bit integers.
857 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi64)
859 #[target_feature(enable = "avx2")]
860 #[cfg_attr(test, assert_instr(vpmovsxwq))]
861 #[stable(feature = "simd_x86", since = "1.27.0")]
862 pub unsafe fn _mm256_cvtepi16_epi64(a
: __m128i
) -> __m256i
{
863 let a
= a
.as_i16x8();
864 let v64
: i16x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
865 transmute
::<i64x4
, _
>(simd_cast(v64
))
868 /// Sign-extend 32-bit integers to 64-bit integers.
870 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_epi64)
872 #[target_feature(enable = "avx2")]
873 #[cfg_attr(test, assert_instr(vpmovsxdq))]
874 #[stable(feature = "simd_x86", since = "1.27.0")]
875 pub unsafe fn _mm256_cvtepi32_epi64(a
: __m128i
) -> __m256i
{
876 transmute
::<i64x4
, _
>(simd_cast(a
.as_i32x4()))
879 /// Sign-extend 8-bit integers to 16-bit integers.
881 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi16)
883 #[target_feature(enable = "avx2")]
884 #[cfg_attr(test, assert_instr(vpmovsxbw))]
885 #[stable(feature = "simd_x86", since = "1.27.0")]
886 pub unsafe fn _mm256_cvtepi8_epi16(a
: __m128i
) -> __m256i
{
887 transmute
::<i16x16
, _
>(simd_cast(a
.as_i8x16()))
890 /// Sign-extend 8-bit integers to 32-bit integers.
892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi32)
894 #[target_feature(enable = "avx2")]
895 #[cfg_attr(test, assert_instr(vpmovsxbd))]
896 #[stable(feature = "simd_x86", since = "1.27.0")]
897 pub unsafe fn _mm256_cvtepi8_epi32(a
: __m128i
) -> __m256i
{
898 let a
= a
.as_i8x16();
899 let v64
: i8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
900 transmute
::<i32x8
, _
>(simd_cast(v64
))
903 /// Sign-extend 8-bit integers to 64-bit integers.
905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi64)
907 #[target_feature(enable = "avx2")]
908 #[cfg_attr(test, assert_instr(vpmovsxbq))]
909 #[stable(feature = "simd_x86", since = "1.27.0")]
910 pub unsafe fn _mm256_cvtepi8_epi64(a
: __m128i
) -> __m256i
{
911 let a
= a
.as_i8x16();
912 let v32
: i8x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
913 transmute
::<i64x4
, _
>(simd_cast(v32
))
916 /// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
917 /// integers, and stores the results in `dst`.
919 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32)
921 #[target_feature(enable = "avx2")]
922 #[cfg_attr(test, assert_instr(vpmovzxwd))]
923 #[stable(feature = "simd_x86", since = "1.27.0")]
924 pub unsafe fn _mm256_cvtepu16_epi32(a
: __m128i
) -> __m256i
{
925 transmute
::<i32x8
, _
>(simd_cast(a
.as_u16x8()))
928 /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
929 /// integers. The upper four elements of `a` are unused.
931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi64)
933 #[target_feature(enable = "avx2")]
934 #[cfg_attr(test, assert_instr(vpmovzxwq))]
935 #[stable(feature = "simd_x86", since = "1.27.0")]
936 pub unsafe fn _mm256_cvtepu16_epi64(a
: __m128i
) -> __m256i
{
937 let a
= a
.as_u16x8();
938 let v64
: u16x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
939 transmute
::<i64x4
, _
>(simd_cast(v64
))
942 /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_epi64)
946 #[target_feature(enable = "avx2")]
947 #[cfg_attr(test, assert_instr(vpmovzxdq))]
948 #[stable(feature = "simd_x86", since = "1.27.0")]
949 pub unsafe fn _mm256_cvtepu32_epi64(a
: __m128i
) -> __m256i
{
950 transmute
::<i64x4
, _
>(simd_cast(a
.as_u32x4()))
953 /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi16)
957 #[target_feature(enable = "avx2")]
958 #[cfg_attr(test, assert_instr(vpmovzxbw))]
959 #[stable(feature = "simd_x86", since = "1.27.0")]
960 pub unsafe fn _mm256_cvtepu8_epi16(a
: __m128i
) -> __m256i
{
961 transmute
::<i16x16
, _
>(simd_cast(a
.as_u8x16()))
964 /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
965 /// integers. The upper eight elements of `a` are unused.
967 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi32)
969 #[target_feature(enable = "avx2")]
970 #[cfg_attr(test, assert_instr(vpmovzxbd))]
971 #[stable(feature = "simd_x86", since = "1.27.0")]
972 pub unsafe fn _mm256_cvtepu8_epi32(a
: __m128i
) -> __m256i
{
973 let a
= a
.as_u8x16();
974 let v64
: u8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
975 transmute
::<i32x8
, _
>(simd_cast(v64
))
978 /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
979 /// integers. The upper twelve elements of `a` are unused.
981 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi64)
983 #[target_feature(enable = "avx2")]
984 #[cfg_attr(test, assert_instr(vpmovzxbq))]
985 #[stable(feature = "simd_x86", since = "1.27.0")]
986 pub unsafe fn _mm256_cvtepu8_epi64(a
: __m128i
) -> __m256i
{
987 let a
= a
.as_u8x16();
988 let v32
: u8x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
989 transmute
::<i64x4
, _
>(simd_cast(v32
))
992 /// Extracts 128 bits (of integer data) from `a` selected with `imm8`.
994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256)
996 #[target_feature(enable = "avx2")]
998 all(test
, not(target_os
= "windows")),
999 assert_instr(vextractf128
, imm8
= 1)
1001 #[rustc_args_required_const(1)]
1002 #[stable(feature = "simd_x86", since = "1.27.0")]
1003 pub unsafe fn _mm256_extracti128_si256(a
: __m256i
, imm8
: i32) -> __m128i
{
1004 let a
= a
.as_i64x4();
1005 let b
= _mm256_undefined_si256().as_i64x4();
1006 let dst
: i64x2
= match imm8
& 0b01 {
1007 0 => simd_shuffle2(a
, b
, [0, 1]),
1008 _
=> simd_shuffle2(a
, b
, [2, 3]),
1013 /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
1015 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16)
1017 #[target_feature(enable = "avx2")]
1018 #[cfg_attr(test, assert_instr(vphaddw))]
1019 #[stable(feature = "simd_x86", since = "1.27.0")]
1020 pub unsafe fn _mm256_hadd_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1021 transmute(phaddw(a
.as_i16x16(), b
.as_i16x16()))
1024 /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
1026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32)
1028 #[target_feature(enable = "avx2")]
1029 #[cfg_attr(test, assert_instr(vphaddd))]
1030 #[stable(feature = "simd_x86", since = "1.27.0")]
1031 pub unsafe fn _mm256_hadd_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
1032 transmute(phaddd(a
.as_i32x8(), b
.as_i32x8()))
1035 /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
1036 /// using saturation.
1038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16)
1040 #[target_feature(enable = "avx2")]
1041 #[cfg_attr(test, assert_instr(vphaddsw))]
1042 #[stable(feature = "simd_x86", since = "1.27.0")]
1043 pub unsafe fn _mm256_hadds_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1044 transmute(phaddsw(a
.as_i16x16(), b
.as_i16x16()))
1047 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
1049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi16)
1051 #[target_feature(enable = "avx2")]
1052 #[cfg_attr(test, assert_instr(vphsubw))]
1053 #[stable(feature = "simd_x86", since = "1.27.0")]
1054 pub unsafe fn _mm256_hsub_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1055 transmute(phsubw(a
.as_i16x16(), b
.as_i16x16()))
1058 /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
1060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi32)
1062 #[target_feature(enable = "avx2")]
1063 #[cfg_attr(test, assert_instr(vphsubd))]
1064 #[stable(feature = "simd_x86", since = "1.27.0")]
1065 pub unsafe fn _mm256_hsub_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
1066 transmute(phsubd(a
.as_i32x8(), b
.as_i32x8()))
1069 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
1070 /// using saturation.
1072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsubs_epi16)
1074 #[target_feature(enable = "avx2")]
1075 #[cfg_attr(test, assert_instr(vphsubsw))]
1076 #[stable(feature = "simd_x86", since = "1.27.0")]
1077 pub unsafe fn _mm256_hsubs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1078 transmute(phsubsw(a
.as_i16x16(), b
.as_i16x16()))
1081 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1083 /// `scale` is between 1 and 8.
1085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi32)
1087 #[target_feature(enable = "avx2")]
1088 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1089 #[rustc_args_required_const(2)]
1090 #[stable(feature = "simd_x86", since = "1.27.0")]
1091 pub unsafe fn _mm_i32gather_epi32(slice
: *const i32, offsets
: __m128i
, scale
: i32) -> __m128i
{
1092 let zero
= _mm_setzero_si128().as_i32x4();
1093 let neg_one
= _mm_set1_epi32(-1).as_i32x4();
1094 let offsets
= offsets
.as_i32x4();
1095 let slice
= slice
as *const i8;
1098 pgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
1101 let r
= constify_imm8
!(scale
, call
);
1105 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1107 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1108 /// that position instead.
1110 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi32)
1112 #[target_feature(enable = "avx2")]
1113 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1114 #[rustc_args_required_const(4)]
1115 #[stable(feature = "simd_x86", since = "1.27.0")]
1116 pub unsafe fn _mm_mask_i32gather_epi32(
1123 let src
= src
.as_i32x4();
1124 let mask
= mask
.as_i32x4();
1125 let offsets
= offsets
.as_i32x4();
1126 let slice
= slice
as *const i8;
1129 pgatherdd(src
, slice
, offsets
, mask
, $imm8
)
1132 let r
= constify_imm8
!(scale
, call
);
1136 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1138 /// `scale` is between 1 and 8.
1140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi32)
1142 #[target_feature(enable = "avx2")]
1143 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1144 #[rustc_args_required_const(2)]
1145 #[stable(feature = "simd_x86", since = "1.27.0")]
1146 pub unsafe fn _mm256_i32gather_epi32(slice
: *const i32, offsets
: __m256i
, scale
: i32) -> __m256i
{
1147 let zero
= _mm256_setzero_si256().as_i32x8();
1148 let neg_one
= _mm256_set1_epi32(-1).as_i32x8();
1149 let offsets
= offsets
.as_i32x8();
1150 let slice
= slice
as *const i8;
1153 vpgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
1156 let r
= constify_imm8
!(scale
, call
);
1160 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1162 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1163 /// that position instead.
1165 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi32)
1167 #[target_feature(enable = "avx2")]
1168 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1169 #[rustc_args_required_const(4)]
1170 #[stable(feature = "simd_x86", since = "1.27.0")]
1171 pub unsafe fn _mm256_mask_i32gather_epi32(
1178 let src
= src
.as_i32x8();
1179 let mask
= mask
.as_i32x8();
1180 let offsets
= offsets
.as_i32x8();
1181 let slice
= slice
as *const i8;
1184 vpgatherdd(src
, slice
, offsets
, mask
, $imm8
)
1187 let r
= constify_imm8
!(scale
, call
);
1191 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1193 /// `scale` is between 1 and 8.
1195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_ps)
1197 #[target_feature(enable = "avx2")]
1198 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1199 #[rustc_args_required_const(2)]
1200 #[stable(feature = "simd_x86", since = "1.27.0")]
1201 pub unsafe fn _mm_i32gather_ps(slice
: *const f32, offsets
: __m128i
, scale
: i32) -> __m128
{
1202 let zero
= _mm_setzero_ps();
1203 let neg_one
= _mm_set1_ps(-1.0);
1204 let offsets
= offsets
.as_i32x4();
1205 let slice
= slice
as *const i8;
1208 pgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
1211 constify_imm8
!(scale
, call
)
1214 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1216 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1217 /// that position instead.
1219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_ps)
1221 #[target_feature(enable = "avx2")]
1222 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1223 #[rustc_args_required_const(4)]
1224 #[stable(feature = "simd_x86", since = "1.27.0")]
1225 pub unsafe fn _mm_mask_i32gather_ps(
1232 let offsets
= offsets
.as_i32x4();
1233 let slice
= slice
as *const i8;
1236 pgatherdps(src
, slice
, offsets
, mask
, $imm8
)
1239 constify_imm8
!(scale
, call
)
1242 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1244 /// `scale` is between 1 and 8.
1246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_ps)
1248 #[target_feature(enable = "avx2")]
1249 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1250 #[rustc_args_required_const(2)]
1251 #[stable(feature = "simd_x86", since = "1.27.0")]
1252 pub unsafe fn _mm256_i32gather_ps(slice
: *const f32, offsets
: __m256i
, scale
: i32) -> __m256
{
1253 let zero
= _mm256_setzero_ps();
1254 let neg_one
= _mm256_set1_ps(-1.0);
1255 let offsets
= offsets
.as_i32x8();
1256 let slice
= slice
as *const i8;
1259 vpgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
1262 constify_imm8
!(scale
, call
)
1265 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1267 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1268 /// that position instead.
1270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_ps)
1272 #[target_feature(enable = "avx2")]
1273 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1274 #[rustc_args_required_const(4)]
1275 #[stable(feature = "simd_x86", since = "1.27.0")]
1276 pub unsafe fn _mm256_mask_i32gather_ps(
1283 let offsets
= offsets
.as_i32x8();
1284 let slice
= slice
as *const i8;
1287 vpgatherdps(src
, slice
, offsets
, mask
, $imm8
)
1290 constify_imm8
!(scale
, call
)
1293 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1295 /// `scale` is between 1 and 8.
1297 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi64)
1299 #[target_feature(enable = "avx2")]
1300 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1301 #[rustc_args_required_const(2)]
1302 #[stable(feature = "simd_x86", since = "1.27.0")]
1303 pub unsafe fn _mm_i32gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m128i
{
1304 let zero
= _mm_setzero_si128().as_i64x2();
1305 let neg_one
= _mm_set1_epi64x(-1).as_i64x2();
1306 let offsets
= offsets
.as_i32x4();
1307 let slice
= slice
as *const i8;
1310 pgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
1313 let r
= constify_imm8
!(scale
, call
);
1317 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1319 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1320 /// that position instead.
1322 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi64)
1324 #[target_feature(enable = "avx2")]
1325 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1326 #[rustc_args_required_const(4)]
1327 #[stable(feature = "simd_x86", since = "1.27.0")]
1328 pub unsafe fn _mm_mask_i32gather_epi64(
1335 let src
= src
.as_i64x2();
1336 let mask
= mask
.as_i64x2();
1337 let offsets
= offsets
.as_i32x4();
1338 let slice
= slice
as *const i8;
1341 pgatherdq(src
, slice
, offsets
, mask
, $imm8
)
1344 let r
= constify_imm8
!(scale
, call
);
1348 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1350 /// `scale` is between 1 and 8.
1352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64)
1354 #[target_feature(enable = "avx2")]
1355 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1356 #[rustc_args_required_const(2)]
1357 #[stable(feature = "simd_x86", since = "1.27.0")]
1358 pub unsafe fn _mm256_i32gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m256i
{
1359 let zero
= _mm256_setzero_si256().as_i64x4();
1360 let neg_one
= _mm256_set1_epi64x(-1).as_i64x4();
1361 let offsets
= offsets
.as_i32x4();
1362 let slice
= slice
as *const i8;
1365 vpgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
1368 let r
= constify_imm8
!(scale
, call
);
1372 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1374 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1375 /// that position instead.
1377 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi64)
1379 #[target_feature(enable = "avx2")]
1380 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1381 #[rustc_args_required_const(4)]
1382 #[stable(feature = "simd_x86", since = "1.27.0")]
1383 pub unsafe fn _mm256_mask_i32gather_epi64(
1390 let src
= src
.as_i64x4();
1391 let mask
= mask
.as_i64x4();
1392 let offsets
= offsets
.as_i32x4();
1393 let slice
= slice
as *const i8;
1396 vpgatherdq(src
, slice
, offsets
, mask
, $imm8
)
1399 let r
= constify_imm8
!(scale
, call
);
1403 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1405 /// `scale` is between 1 and 8.
1407 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_pd)
1409 #[target_feature(enable = "avx2")]
1410 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1411 #[rustc_args_required_const(2)]
1412 #[stable(feature = "simd_x86", since = "1.27.0")]
1413 pub unsafe fn _mm_i32gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m128d
{
1414 let zero
= _mm_setzero_pd();
1415 let neg_one
= _mm_set1_pd(-1.0);
1416 let offsets
= offsets
.as_i32x4();
1417 let slice
= slice
as *const i8;
1420 pgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1423 constify_imm8
!(scale
, call
)
1426 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1428 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1429 /// that position instead.
1431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_pd)
1433 #[target_feature(enable = "avx2")]
1434 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1435 #[rustc_args_required_const(4)]
1436 #[stable(feature = "simd_x86", since = "1.27.0")]
1437 pub unsafe fn _mm_mask_i32gather_pd(
1444 let offsets
= offsets
.as_i32x4();
1445 let slice
= slice
as *const i8;
1448 pgatherdpd(src
, slice
, offsets
, mask
, $imm8
)
1451 constify_imm8
!(scale
, call
)
1454 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1456 /// `scale` is between 1 and 8.
1458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_pd)
1460 #[target_feature(enable = "avx2")]
1461 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1462 #[rustc_args_required_const(2)]
1463 #[stable(feature = "simd_x86", since = "1.27.0")]
1464 pub unsafe fn _mm256_i32gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m256d
{
1465 let zero
= _mm256_setzero_pd();
1466 let neg_one
= _mm256_set1_pd(-1.0);
1467 let offsets
= offsets
.as_i32x4();
1468 let slice
= slice
as *const i8;
1471 vpgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1474 constify_imm8
!(scale
, call
)
1477 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1479 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1480 /// that position instead.
1482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_pd)
1484 #[target_feature(enable = "avx2")]
1485 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1486 #[rustc_args_required_const(4)]
1487 #[stable(feature = "simd_x86", since = "1.27.0")]
1488 pub unsafe fn _mm256_mask_i32gather_pd(
1495 let offsets
= offsets
.as_i32x4();
1496 let slice
= slice
as *const i8;
1499 vpgatherdpd(src
, slice
, offsets
, mask
, $imm8
)
1502 constify_imm8
!(scale
, call
)
1505 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1507 /// `scale` is between 1 and 8.
1509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi32)
1511 #[target_feature(enable = "avx2")]
1512 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1513 #[rustc_args_required_const(2)]
1514 #[stable(feature = "simd_x86", since = "1.27.0")]
1515 pub unsafe fn _mm_i64gather_epi32(slice
: *const i32, offsets
: __m128i
, scale
: i32) -> __m128i
{
1516 let zero
= _mm_setzero_si128().as_i32x4();
1517 let neg_one
= _mm_set1_epi64x(-1).as_i32x4();
1518 let offsets
= offsets
.as_i64x2();
1519 let slice
= slice
as *const i8;
1522 pgatherqd(zero
, slice
, offsets
, neg_one
, $imm8
)
1525 let r
= constify_imm8
!(scale
, call
);
1529 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1531 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1532 /// that position instead.
1534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi32)
1536 #[target_feature(enable = "avx2")]
1537 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1538 #[rustc_args_required_const(4)]
1539 #[stable(feature = "simd_x86", since = "1.27.0")]
1540 pub unsafe fn _mm_mask_i64gather_epi32(
1547 let src
= src
.as_i32x4();
1548 let mask
= mask
.as_i32x4();
1549 let offsets
= offsets
.as_i64x2();
1550 let slice
= slice
as *const i8;
1553 pgatherqd(src
, slice
, offsets
, mask
, $imm8
)
1556 let r
= constify_imm8
!(scale
, call
);
1560 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1562 /// `scale` is between 1 and 8.
1564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi32)
1566 #[target_feature(enable = "avx2")]
1567 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1568 #[rustc_args_required_const(2)]
1569 #[stable(feature = "simd_x86", since = "1.27.0")]
1570 pub unsafe fn _mm256_i64gather_epi32(slice
: *const i32, offsets
: __m256i
, scale
: i32) -> __m128i
{
1571 let zero
= _mm_setzero_si128().as_i32x4();
1572 let neg_one
= _mm_set1_epi64x(-1).as_i32x4();
1573 let offsets
= offsets
.as_i64x4();
1574 let slice
= slice
as *const i8;
1577 vpgatherqd(zero
, slice
, offsets
, neg_one
, $imm8
)
1580 let r
= constify_imm8
!(scale
, call
);
1584 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1586 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1587 /// that position instead.
1589 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi32)
1591 #[target_feature(enable = "avx2")]
1592 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1593 #[rustc_args_required_const(4)]
1594 #[stable(feature = "simd_x86", since = "1.27.0")]
1595 pub unsafe fn _mm256_mask_i64gather_epi32(
1602 let src
= src
.as_i32x4();
1603 let mask
= mask
.as_i32x4();
1604 let offsets
= offsets
.as_i64x4();
1605 let slice
= slice
as *const i8;
1608 vpgatherqd(src
, slice
, offsets
, mask
, $imm8
)
1611 let r
= constify_imm8
!(scale
, call
);
1615 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1617 /// `scale` is between 1 and 8.
1619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_ps)
1621 #[target_feature(enable = "avx2")]
1622 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1623 #[rustc_args_required_const(2)]
1624 #[stable(feature = "simd_x86", since = "1.27.0")]
1625 pub unsafe fn _mm_i64gather_ps(slice
: *const f32, offsets
: __m128i
, scale
: i32) -> __m128
{
1626 let zero
= _mm_setzero_ps();
1627 let neg_one
= _mm_set1_ps(-1.0);
1628 let offsets
= offsets
.as_i64x2();
1629 let slice
= slice
as *const i8;
1632 pgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
1635 constify_imm8
!(scale
, call
)
1638 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1640 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1641 /// that position instead.
1643 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_ps)
1645 #[target_feature(enable = "avx2")]
1646 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1647 #[rustc_args_required_const(4)]
1648 #[stable(feature = "simd_x86", since = "1.27.0")]
1649 pub unsafe fn _mm_mask_i64gather_ps(
1656 let offsets
= offsets
.as_i64x2();
1657 let slice
= slice
as *const i8;
1660 pgatherqps(src
, slice
, offsets
, mask
, $imm8
)
1663 constify_imm8
!(scale
, call
)
1666 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1668 /// `scale` is between 1 and 8.
1670 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_ps)
1672 #[target_feature(enable = "avx2")]
1673 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1674 #[rustc_args_required_const(2)]
1675 #[stable(feature = "simd_x86", since = "1.27.0")]
1676 pub unsafe fn _mm256_i64gather_ps(slice
: *const f32, offsets
: __m256i
, scale
: i32) -> __m128
{
1677 let zero
= _mm_setzero_ps();
1678 let neg_one
= _mm_set1_ps(-1.0);
1679 let offsets
= offsets
.as_i64x4();
1680 let slice
= slice
as *const i8;
1683 vpgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
1686 constify_imm8
!(scale
, call
)
1689 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1691 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1692 /// that position instead.
1694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_ps)
1696 #[target_feature(enable = "avx2")]
1697 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1698 #[rustc_args_required_const(4)]
1699 #[stable(feature = "simd_x86", since = "1.27.0")]
1700 pub unsafe fn _mm256_mask_i64gather_ps(
1707 let offsets
= offsets
.as_i64x4();
1708 let slice
= slice
as *const i8;
1711 vpgatherqps(src
, slice
, offsets
, mask
, $imm8
)
1714 constify_imm8
!(scale
, call
)
1717 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1719 /// `scale` is between 1 and 8.
1721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi64)
1723 #[target_feature(enable = "avx2")]
1724 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1725 #[rustc_args_required_const(2)]
1726 #[stable(feature = "simd_x86", since = "1.27.0")]
1727 pub unsafe fn _mm_i64gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m128i
{
1728 let zero
= _mm_setzero_si128().as_i64x2();
1729 let neg_one
= _mm_set1_epi64x(-1).as_i64x2();
1730 let slice
= slice
as *const i8;
1731 let offsets
= offsets
.as_i64x2();
1734 pgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
1737 let r
= constify_imm8
!(scale
, call
);
1741 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1743 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1744 /// that position instead.
1746 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi64)
1748 #[target_feature(enable = "avx2")]
1749 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1750 #[rustc_args_required_const(4)]
1751 #[stable(feature = "simd_x86", since = "1.27.0")]
1752 pub unsafe fn _mm_mask_i64gather_epi64(
1759 let src
= src
.as_i64x2();
1760 let mask
= mask
.as_i64x2();
1761 let offsets
= offsets
.as_i64x2();
1762 let slice
= slice
as *const i8;
1765 pgatherqq(src
, slice
, offsets
, mask
, $imm8
)
1768 let r
= constify_imm8
!(scale
, call
);
1772 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1774 /// `scale` is between 1 and 8.
1776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi64)
1778 #[target_feature(enable = "avx2")]
1779 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1780 #[rustc_args_required_const(2)]
1781 #[stable(feature = "simd_x86", since = "1.27.0")]
1782 pub unsafe fn _mm256_i64gather_epi64(slice
: *const i64, offsets
: __m256i
, scale
: i32) -> __m256i
{
1783 let zero
= _mm256_setzero_si256().as_i64x4();
1784 let neg_one
= _mm256_set1_epi64x(-1).as_i64x4();
1785 let slice
= slice
as *const i8;
1786 let offsets
= offsets
.as_i64x4();
1789 vpgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
1792 let r
= constify_imm8
!(scale
, call
);
1796 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1798 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1799 /// that position instead.
1801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi64)
1803 #[target_feature(enable = "avx2")]
1804 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1805 #[rustc_args_required_const(4)]
1806 #[stable(feature = "simd_x86", since = "1.27.0")]
1807 pub unsafe fn _mm256_mask_i64gather_epi64(
1814 let src
= src
.as_i64x4();
1815 let mask
= mask
.as_i64x4();
1816 let offsets
= offsets
.as_i64x4();
1817 let slice
= slice
as *const i8;
1820 vpgatherqq(src
, slice
, offsets
, mask
, $imm8
)
1823 let r
= constify_imm8
!(scale
, call
);
1827 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1829 /// `scale` is between 1 and 8.
1831 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_pd)
1833 #[target_feature(enable = "avx2")]
1834 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1835 #[rustc_args_required_const(2)]
1836 #[stable(feature = "simd_x86", since = "1.27.0")]
1837 pub unsafe fn _mm_i64gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m128d
{
1838 let zero
= _mm_setzero_pd();
1839 let neg_one
= _mm_set1_pd(-1.0);
1840 let slice
= slice
as *const i8;
1841 let offsets
= offsets
.as_i64x2();
1844 pgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1847 constify_imm8
!(scale
, call
)
1850 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1852 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1853 /// that position instead.
1855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_pd)
1857 #[target_feature(enable = "avx2")]
1858 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1859 #[rustc_args_required_const(4)]
1860 #[stable(feature = "simd_x86", since = "1.27.0")]
1861 pub unsafe fn _mm_mask_i64gather_pd(
1868 let slice
= slice
as *const i8;
1869 let offsets
= offsets
.as_i64x2();
1872 pgatherqpd(src
, slice
, offsets
, mask
, $imm8
)
1875 constify_imm8
!(scale
, call
)
1878 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1880 /// `scale` is between 1 and 8.
1882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_pd)
1884 #[target_feature(enable = "avx2")]
1885 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1886 #[rustc_args_required_const(2)]
1887 #[stable(feature = "simd_x86", since = "1.27.0")]
1888 pub unsafe fn _mm256_i64gather_pd(slice
: *const f64, offsets
: __m256i
, scale
: i32) -> __m256d
{
1889 let zero
= _mm256_setzero_pd();
1890 let neg_one
= _mm256_set1_pd(-1.0);
1891 let slice
= slice
as *const i8;
1892 let offsets
= offsets
.as_i64x4();
1895 vpgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1898 constify_imm8
!(scale
, call
)
1901 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1903 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1904 /// that position instead.
1906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_pd)
1908 #[target_feature(enable = "avx2")]
1909 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1910 #[rustc_args_required_const(4)]
1911 #[stable(feature = "simd_x86", since = "1.27.0")]
1912 pub unsafe fn _mm256_mask_i64gather_pd(
1919 let slice
= slice
as *const i8;
1920 let offsets
= offsets
.as_i64x4();
1923 vpgatherqpd(src
, slice
, offsets
, mask
, $imm8
)
1926 constify_imm8
!(scale
, call
)
1929 /// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1930 /// location specified by `imm8`.
1932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256)
1934 #[target_feature(enable = "avx2")]
1936 all(test
, not(target_os
= "windows")),
1937 assert_instr(vinsertf128
, imm8
= 1)
1939 #[rustc_args_required_const(2)]
1940 #[stable(feature = "simd_x86", since = "1.27.0")]
1941 pub unsafe fn _mm256_inserti128_si256(a
: __m256i
, b
: __m128i
, imm8
: i32) -> __m256i
{
1942 let a
= a
.as_i64x4();
1943 let b
= _mm256_castsi128_si256(b
).as_i64x4();
1944 let dst
: i64x4
= match imm8
& 0b01 {
1945 0 => simd_shuffle4(a
, b
, [4, 5, 2, 3]),
1946 _
=> simd_shuffle4(a
, b
, [0, 1, 4, 5]),
1951 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1952 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1953 /// of intermediate 32-bit integers.
1955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd_epi16)
1957 #[target_feature(enable = "avx2")]
1958 #[cfg_attr(test, assert_instr(vpmaddwd))]
1959 #[stable(feature = "simd_x86", since = "1.27.0")]
1960 pub unsafe fn _mm256_madd_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1961 transmute(pmaddwd(a
.as_i16x16(), b
.as_i16x16()))
1964 /// Vertically multiplies each unsigned 8-bit integer from `a` with the
1965 /// corresponding signed 8-bit integer from `b`, producing intermediate
1966 /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1967 /// signed 16-bit integers
1969 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16)
1971 #[target_feature(enable = "avx2")]
1972 #[cfg_attr(test, assert_instr(vpmaddubsw))]
1973 #[stable(feature = "simd_x86", since = "1.27.0")]
1974 pub unsafe fn _mm256_maddubs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1975 transmute(pmaddubsw(a
.as_u8x32(), b
.as_u8x32()))
1978 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1979 /// (elements are zeroed out when the highest bit is not set in the
1980 /// corresponding element).
1982 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi32)
1984 #[target_feature(enable = "avx2")]
1985 #[cfg_attr(test, assert_instr(vpmaskmovd))]
1986 #[stable(feature = "simd_x86", since = "1.27.0")]
1987 pub unsafe fn _mm_maskload_epi32(mem_addr
: *const i32, mask
: __m128i
) -> __m128i
{
1988 transmute(maskloadd(mem_addr
as *const i8, mask
.as_i32x4()))
1991 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1992 /// (elements are zeroed out when the highest bit is not set in the
1993 /// corresponding element).
1995 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi32)
1997 #[target_feature(enable = "avx2")]
1998 #[cfg_attr(test, assert_instr(vpmaskmovd))]
1999 #[stable(feature = "simd_x86", since = "1.27.0")]
2000 pub unsafe fn _mm256_maskload_epi32(mem_addr
: *const i32, mask
: __m256i
) -> __m256i
{
2001 transmute(maskloadd256(mem_addr
as *const i8, mask
.as_i32x8()))
2004 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
2005 /// (elements are zeroed out when the highest bit is not set in the
2006 /// corresponding element).
2008 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi64)
2010 #[target_feature(enable = "avx2")]
2011 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2012 #[stable(feature = "simd_x86", since = "1.27.0")]
2013 pub unsafe fn _mm_maskload_epi64(mem_addr
: *const i64, mask
: __m128i
) -> __m128i
{
2014 transmute(maskloadq(mem_addr
as *const i8, mask
.as_i64x2()))
2017 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
2018 /// (elements are zeroed out when the highest bit is not set in the
2019 /// corresponding element).
2021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi64)
2023 #[target_feature(enable = "avx2")]
2024 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2025 #[stable(feature = "simd_x86", since = "1.27.0")]
2026 pub unsafe fn _mm256_maskload_epi64(mem_addr
: *const i64, mask
: __m256i
) -> __m256i
{
2027 transmute(maskloadq256(mem_addr
as *const i8, mask
.as_i64x4()))
2030 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
2031 /// using `mask` (elements are not stored when the highest bit is not set
2032 /// in the corresponding element).
2034 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi32)
2036 #[target_feature(enable = "avx2")]
2037 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2038 #[stable(feature = "simd_x86", since = "1.27.0")]
2039 pub unsafe fn _mm_maskstore_epi32(mem_addr
: *mut i32, mask
: __m128i
, a
: __m128i
) {
2040 maskstored(mem_addr
as *mut i8, mask
.as_i32x4(), a
.as_i32x4())
2043 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
2044 /// using `mask` (elements are not stored when the highest bit is not set
2045 /// in the corresponding element).
2047 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi32)
2049 #[target_feature(enable = "avx2")]
2050 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2051 #[stable(feature = "simd_x86", since = "1.27.0")]
2052 pub unsafe fn _mm256_maskstore_epi32(mem_addr
: *mut i32, mask
: __m256i
, a
: __m256i
) {
2053 maskstored256(mem_addr
as *mut i8, mask
.as_i32x8(), a
.as_i32x8())
2056 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
2057 /// using `mask` (elements are not stored when the highest bit is not set
2058 /// in the corresponding element).
2060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi64)
2062 #[target_feature(enable = "avx2")]
2063 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2064 #[stable(feature = "simd_x86", since = "1.27.0")]
2065 pub unsafe fn _mm_maskstore_epi64(mem_addr
: *mut i64, mask
: __m128i
, a
: __m128i
) {
2066 maskstoreq(mem_addr
as *mut i8, mask
.as_i64x2(), a
.as_i64x2())
2069 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
2070 /// using `mask` (elements are not stored when the highest bit is not set
2071 /// in the corresponding element).
2073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi64)
2075 #[target_feature(enable = "avx2")]
2076 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2077 #[stable(feature = "simd_x86", since = "1.27.0")]
2078 pub unsafe fn _mm256_maskstore_epi64(mem_addr
: *mut i64, mask
: __m256i
, a
: __m256i
) {
2079 maskstoreq256(mem_addr
as *mut i8, mask
.as_i64x4(), a
.as_i64x4())
2082 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16)
2087 #[target_feature(enable = "avx2")]
2088 #[cfg_attr(test, assert_instr(vpmaxsw))]
2089 #[stable(feature = "simd_x86", since = "1.27.0")]
2090 pub unsafe fn _mm256_max_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2091 transmute(pmaxsw(a
.as_i16x16(), b
.as_i16x16()))
2094 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2097 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32)
2099 #[target_feature(enable = "avx2")]
2100 #[cfg_attr(test, assert_instr(vpmaxsd))]
2101 #[stable(feature = "simd_x86", since = "1.27.0")]
2102 pub unsafe fn _mm256_max_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2103 transmute(pmaxsd(a
.as_i32x8(), b
.as_i32x8()))
2106 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8)
2111 #[target_feature(enable = "avx2")]
2112 #[cfg_attr(test, assert_instr(vpmaxsb))]
2113 #[stable(feature = "simd_x86", since = "1.27.0")]
2114 pub unsafe fn _mm256_max_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2115 transmute(pmaxsb(a
.as_i8x32(), b
.as_i8x32()))
2118 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2119 /// the packed maximum values.
2121 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16)
2123 #[target_feature(enable = "avx2")]
2124 #[cfg_attr(test, assert_instr(vpmaxuw))]
2125 #[stable(feature = "simd_x86", since = "1.27.0")]
2126 pub unsafe fn _mm256_max_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2127 transmute(pmaxuw(a
.as_u16x16(), b
.as_u16x16()))
2130 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2131 /// the packed maximum values.
2133 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32)
2135 #[target_feature(enable = "avx2")]
2136 #[cfg_attr(test, assert_instr(vpmaxud))]
2137 #[stable(feature = "simd_x86", since = "1.27.0")]
2138 pub unsafe fn _mm256_max_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2139 transmute(pmaxud(a
.as_u32x8(), b
.as_u32x8()))
2142 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2143 /// the packed maximum values.
2145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8)
2147 #[target_feature(enable = "avx2")]
2148 #[cfg_attr(test, assert_instr(vpmaxub))]
2149 #[stable(feature = "simd_x86", since = "1.27.0")]
2150 pub unsafe fn _mm256_max_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2151 transmute(pmaxub(a
.as_u8x32(), b
.as_u8x32()))
2154 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16)
2159 #[target_feature(enable = "avx2")]
2160 #[cfg_attr(test, assert_instr(vpminsw))]
2161 #[stable(feature = "simd_x86", since = "1.27.0")]
2162 pub unsafe fn _mm256_min_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2163 transmute(pminsw(a
.as_i16x16(), b
.as_i16x16()))
2166 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2169 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32)
2171 #[target_feature(enable = "avx2")]
2172 #[cfg_attr(test, assert_instr(vpminsd))]
2173 #[stable(feature = "simd_x86", since = "1.27.0")]
2174 pub unsafe fn _mm256_min_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2175 transmute(pminsd(a
.as_i32x8(), b
.as_i32x8()))
2178 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2181 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8)
2183 #[target_feature(enable = "avx2")]
2184 #[cfg_attr(test, assert_instr(vpminsb))]
2185 #[stable(feature = "simd_x86", since = "1.27.0")]
2186 pub unsafe fn _mm256_min_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2187 transmute(pminsb(a
.as_i8x32(), b
.as_i8x32()))
2190 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2191 /// the packed minimum values.
2193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16)
2195 #[target_feature(enable = "avx2")]
2196 #[cfg_attr(test, assert_instr(vpminuw))]
2197 #[stable(feature = "simd_x86", since = "1.27.0")]
2198 pub unsafe fn _mm256_min_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2199 transmute(pminuw(a
.as_u16x16(), b
.as_u16x16()))
2202 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2203 /// the packed minimum values.
2205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32)
2207 #[target_feature(enable = "avx2")]
2208 #[cfg_attr(test, assert_instr(vpminud))]
2209 #[stable(feature = "simd_x86", since = "1.27.0")]
2210 pub unsafe fn _mm256_min_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2211 transmute(pminud(a
.as_u32x8(), b
.as_u32x8()))
2214 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2215 /// the packed minimum values.
2217 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8)
2219 #[target_feature(enable = "avx2")]
2220 #[cfg_attr(test, assert_instr(vpminub))]
2221 #[stable(feature = "simd_x86", since = "1.27.0")]
2222 pub unsafe fn _mm256_min_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2223 transmute(pminub(a
.as_u8x32(), b
.as_u8x32()))
2226 /// Creates mask from the most significant bit of each 8-bit element in `a`,
2227 /// return the result.
2229 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8)
2231 #[target_feature(enable = "avx2")]
2232 #[cfg_attr(test, assert_instr(vpmovmskb))]
2233 #[stable(feature = "simd_x86", since = "1.27.0")]
2234 pub unsafe fn _mm256_movemask_epi8(a
: __m256i
) -> i32 {
2235 pmovmskb(a
.as_i8x32())
2238 /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2239 /// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2240 /// results in dst. Eight SADs are performed for each 128-bit lane using one
2241 /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2242 /// selected from `b` starting at on the offset specified in `imm8`. Eight
2243 /// quadruplets are formed from sequential 8-bit integers selected from `a`
2244 /// starting at the offset specified in `imm8`.
2246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mpsadbw_epu8)
2248 #[target_feature(enable = "avx2")]
2249 #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
2250 #[rustc_args_required_const(2)]
2251 #[stable(feature = "simd_x86", since = "1.27.0")]
2252 pub unsafe fn _mm256_mpsadbw_epu8(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
2253 let a
= a
.as_u8x32();
2254 let b
= b
.as_u8x32();
2257 mpsadbw(a
, b
, $imm8
)
2260 let r
= constify_imm8
!(imm8
, call
);
2264 /// Multiplies the low 32-bit integers from each packed 64-bit element in
2267 /// Returns the 64-bit results.
2269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32)
2271 #[target_feature(enable = "avx2")]
2272 #[cfg_attr(test, assert_instr(vpmuldq))]
2273 #[stable(feature = "simd_x86", since = "1.27.0")]
2274 pub unsafe fn _mm256_mul_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2275 transmute(pmuldq(a
.as_i32x8(), b
.as_i32x8()))
2278 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2279 /// element in `a` and `b`
2281 /// Returns the unsigned 64-bit results.
2283 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32)
2285 #[target_feature(enable = "avx2")]
2286 #[cfg_attr(test, assert_instr(vpmuludq))]
2287 #[stable(feature = "simd_x86", since = "1.27.0")]
2288 pub unsafe fn _mm256_mul_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2289 transmute(pmuludq(a
.as_u32x8(), b
.as_u32x8()))
2292 /// Multiplies the packed 16-bit integers in `a` and `b`, producing
2293 /// intermediate 32-bit integers and returning the high 16 bits of the
2294 /// intermediate integers.
2296 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epi16)
2298 #[target_feature(enable = "avx2")]
2299 #[cfg_attr(test, assert_instr(vpmulhw))]
2300 #[stable(feature = "simd_x86", since = "1.27.0")]
2301 pub unsafe fn _mm256_mulhi_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2302 transmute(pmulhw(a
.as_i16x16(), b
.as_i16x16()))
2305 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2306 /// intermediate 32-bit integers and returning the high 16 bits of the
2307 /// intermediate integers.
2309 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epu16)
2311 #[target_feature(enable = "avx2")]
2312 #[cfg_attr(test, assert_instr(vpmulhuw))]
2313 #[stable(feature = "simd_x86", since = "1.27.0")]
2314 pub unsafe fn _mm256_mulhi_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2315 transmute(pmulhuw(a
.as_u16x16(), b
.as_u16x16()))
2318 /// Multiplies the packed 16-bit integers in `a` and `b`, producing
2319 /// intermediate 32-bit integers, and returns the low 16 bits of the
2320 /// intermediate integers
2322 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16)
2324 #[target_feature(enable = "avx2")]
2325 #[cfg_attr(test, assert_instr(vpmullw))]
2326 #[stable(feature = "simd_x86", since = "1.27.0")]
2327 pub unsafe fn _mm256_mullo_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2328 transmute(simd_mul(a
.as_i16x16(), b
.as_i16x16()))
2331 /// Multiplies the packed 32-bit integers in `a` and `b`, producing
2332 /// intermediate 64-bit integers, and returns the low 32 bits of the
2333 /// intermediate integers
2335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32)
2337 #[target_feature(enable = "avx2")]
2338 #[cfg_attr(test, assert_instr(vpmulld))]
2339 #[stable(feature = "simd_x86", since = "1.27.0")]
2340 pub unsafe fn _mm256_mullo_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2341 transmute(simd_mul(a
.as_i32x8(), b
.as_i32x8()))
2344 /// Multiplies packed 16-bit integers in `a` and `b`, producing
2345 /// intermediate signed 32-bit integers. Truncate each intermediate
2346 /// integer to the 18 most significant bits, round by adding 1, and
2347 /// return bits `[16:1]`.
2349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhrs_epi16)
2351 #[target_feature(enable = "avx2")]
2352 #[cfg_attr(test, assert_instr(vpmulhrsw))]
2353 #[stable(feature = "simd_x86", since = "1.27.0")]
2354 pub unsafe fn _mm256_mulhrs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2355 transmute(pmulhrsw(a
.as_i16x16(), b
.as_i16x16()))
2358 /// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256)
2363 #[target_feature(enable = "avx2")]
2364 #[cfg_attr(test, assert_instr(vorps))]
2365 #[stable(feature = "simd_x86", since = "1.27.0")]
2366 pub unsafe fn _mm256_or_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
2367 transmute(simd_or(a
.as_i32x8(), b
.as_i32x8()))
2370 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2371 /// using signed saturation
2373 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16)
2375 #[target_feature(enable = "avx2")]
2376 #[cfg_attr(test, assert_instr(vpacksswb))]
2377 #[stable(feature = "simd_x86", since = "1.27.0")]
2378 pub unsafe fn _mm256_packs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2379 transmute(packsswb(a
.as_i16x16(), b
.as_i16x16()))
2382 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2383 /// using signed saturation
2385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32)
2387 #[target_feature(enable = "avx2")]
2388 #[cfg_attr(test, assert_instr(vpackssdw))]
2389 #[stable(feature = "simd_x86", since = "1.27.0")]
2390 pub unsafe fn _mm256_packs_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2391 transmute(packssdw(a
.as_i32x8(), b
.as_i32x8()))
2394 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2395 /// using unsigned saturation
2397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16)
2399 #[target_feature(enable = "avx2")]
2400 #[cfg_attr(test, assert_instr(vpackuswb))]
2401 #[stable(feature = "simd_x86", since = "1.27.0")]
2402 pub unsafe fn _mm256_packus_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2403 transmute(packuswb(a
.as_i16x16(), b
.as_i16x16()))
2406 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2407 /// using unsigned saturation
2409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32)
2411 #[target_feature(enable = "avx2")]
2412 #[cfg_attr(test, assert_instr(vpackusdw))]
2413 #[stable(feature = "simd_x86", since = "1.27.0")]
2414 pub unsafe fn _mm256_packus_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2415 transmute(packusdw(a
.as_i32x8(), b
.as_i32x8()))
2418 /// Permutes packed 32-bit integers from `a` according to the content of `b`.
2420 /// The last 3 bits of each integer of `b` are used as addresses into the 8
2421 /// integers of `a`.
2423 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_epi32)
2425 #[target_feature(enable = "avx2")]
2426 #[cfg_attr(test, assert_instr(vpermps))]
2427 #[stable(feature = "simd_x86", since = "1.27.0")]
2428 pub unsafe fn _mm256_permutevar8x32_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2429 transmute(permd(a
.as_u32x8(), b
.as_u32x8()))
2432 /// Permutes 64-bit integers from `a` using control mask `imm8`.
2434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_epi64)
2436 #[target_feature(enable = "avx2")]
2437 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
2438 #[rustc_args_required_const(1)]
2439 #[stable(feature = "simd_x86", since = "1.27.0")]
2440 pub unsafe fn _mm256_permute4x64_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
2441 let imm8
= (imm8
& 0xFF) as u8;
2442 let zero
= _mm256_setzero_si256().as_i64x4();
2443 let a
= a
.as_i64x4();
2444 macro_rules
! permute4
{
2445 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
2446 simd_shuffle4(a
, zero
, [$a
, $b
, $c
, $d
]);
2449 macro_rules
! permute3
{
2450 ($a
:expr
, $b
:expr
, $c
:expr
) => {
2451 match (imm8
>> 6) & 0b11 {
2452 0b00 => permute4
!($a
, $b
, $c
, 0),
2453 0b01 => permute4
!($a
, $b
, $c
, 1),
2454 0b10 => permute4
!($a
, $b
, $c
, 2),
2455 _
=> permute4
!($a
, $b
, $c
, 3),
2459 macro_rules
! permute2
{
2460 ($a
:expr
, $b
:expr
) => {
2461 match (imm8
>> 4) & 0b11 {
2462 0b00 => permute3
!($a
, $b
, 0),
2463 0b01 => permute3
!($a
, $b
, 1),
2464 0b10 => permute3
!($a
, $b
, 2),
2465 _
=> permute3
!($a
, $b
, 3),
2469 macro_rules
! permute1
{
2471 match (imm8
>> 2) & 0b11 {
2472 0b00 => permute2
!($a
, 0),
2473 0b01 => permute2
!($a
, 1),
2474 0b10 => permute2
!($a
, 2),
2475 _
=> permute2
!($a
, 3),
2479 let r
: i64x4
= match imm8
& 0b11 {
2480 0b00 => permute1
!(0),
2481 0b01 => permute1
!(1),
2482 0b10 => permute1
!(2),
2488 /// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256)
2492 #[target_feature(enable = "avx2")]
2493 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))]
2494 #[rustc_args_required_const(2)]
2495 #[stable(feature = "simd_x86", since = "1.27.0")]
2496 pub unsafe fn _mm256_permute2x128_si256(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
2497 let a
= a
.as_i64x4();
2498 let b
= b
.as_i64x4();
2501 vperm2i128(a
, b
, $imm8
)
2504 transmute(constify_imm8
!(imm8
, call
))
2507 /// Shuffles 64-bit floating-point elements in `a` across lanes using the
2508 /// control in `imm8`.
2510 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd)
2512 #[target_feature(enable = "avx2")]
2513 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
2514 #[rustc_args_required_const(1)]
2515 #[stable(feature = "simd_x86", since = "1.27.0")]
2516 pub unsafe fn _mm256_permute4x64_pd(a
: __m256d
, imm8
: i32) -> __m256d
{
2517 let imm8
= (imm8
& 0xFF) as u8;
2518 let undef
= _mm256_undefined_pd();
2519 macro_rules
! shuffle_done
{
2520 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2521 simd_shuffle4(a
, undef
, [$x01
, $x23
, $x45
, $x67
])
2524 macro_rules
! shuffle_x67
{
2525 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2526 match (imm8
>> 6) & 0b11 {
2527 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2528 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2529 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2530 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2534 macro_rules
! shuffle_x45
{
2535 ($x01
:expr
, $x23
:expr
) => {
2536 match (imm8
>> 4) & 0b11 {
2537 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2538 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2539 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2540 _
=> shuffle_x67
!($x01
, $x23
, 3),
2544 macro_rules
! shuffle_x23
{
2546 match (imm8
>> 2) & 0b11 {
2547 0b00 => shuffle_x45
!($x01
, 0),
2548 0b01 => shuffle_x45
!($x01
, 1),
2549 0b10 => shuffle_x45
!($x01
, 2),
2550 _
=> shuffle_x45
!($x01
, 3),
2555 0b00 => shuffle_x23
!(0),
2556 0b01 => shuffle_x23
!(1),
2557 0b10 => shuffle_x23
!(2),
2558 _
=> shuffle_x23
!(3),
2562 /// Shuffles eight 32-bit foating-point elements in `a` across lanes using
2563 /// the corresponding 32-bit integer index in `idx`.
2565 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps)
2567 #[target_feature(enable = "avx2")]
2568 #[cfg_attr(test, assert_instr(vpermps))]
2569 #[stable(feature = "simd_x86", since = "1.27.0")]
2570 pub unsafe fn _mm256_permutevar8x32_ps(a
: __m256
, idx
: __m256i
) -> __m256
{
2571 permps(a
, idx
.as_i32x8())
2574 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2575 /// and `b`, then horizontally sum each consecutive 8 differences to
2576 /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2577 /// integers in the low 16 bits of the 64-bit return value
2579 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sad_epu8)
2581 #[target_feature(enable = "avx2")]
2582 #[cfg_attr(test, assert_instr(vpsadbw))]
2583 #[stable(feature = "simd_x86", since = "1.27.0")]
2584 pub unsafe fn _mm256_sad_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2585 transmute(psadbw(a
.as_u8x32(), b
.as_u8x32()))
2588 /// Shuffles bytes from `a` according to the content of `b`.
2590 /// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes
2593 /// In addition, if the highest significant bit of a byte of `b` is set, the
2594 /// respective destination byte is set to 0.
2596 /// The low and high halves of the vectors are shuffled separately.
2598 /// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2602 /// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2603 /// let mut r = [0; 32];
2604 /// for i in 0..16 {
2605 /// // if the most significant bit of b is set,
2606 /// // then the destination byte is set to 0.
2607 /// if b[i] & 0x80 == 0u8 {
2608 /// r[i] = a[(b[i] % 16) as usize];
2610 /// if b[i + 16] & 0x80 == 0u8 {
2611 /// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2618 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi8)
2620 #[target_feature(enable = "avx2")]
2621 #[cfg_attr(test, assert_instr(vpshufb))]
2622 #[stable(feature = "simd_x86", since = "1.27.0")]
2623 pub unsafe fn _mm256_shuffle_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2624 transmute(pshufb(a
.as_u8x32(), b
.as_u8x32()))
2627 /// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2631 /// #[cfg(target_arch = "x86")]
2632 /// use std::arch::x86::*;
2633 /// #[cfg(target_arch = "x86_64")]
2634 /// use std::arch::x86_64::*;
2637 /// # if is_x86_feature_detected!("avx2") {
2638 /// # #[target_feature(enable = "avx2")]
2639 /// # unsafe fn worker() {
2640 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2642 /// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2643 /// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2645 /// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2646 /// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2648 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2649 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2651 /// # unsafe { worker(); }
2656 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
2658 #[target_feature(enable = "avx2")]
2659 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
2660 #[rustc_args_required_const(1)]
2661 #[stable(feature = "simd_x86", since = "1.27.0")]
2662 pub unsafe fn _mm256_shuffle_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
2663 // simd_shuffleX requires that its selector parameter be made up of
2664 // constant values, but we can't enforce that here. In spirit, we need
2665 // to write a `match` on all possible values of a byte, and for each value,
2666 // hard-code the correct `simd_shuffleX` call using only constants. We
2667 // then hope for LLVM to do the rest.
2669 // Of course, that's... awful. So we try to use macros to do it for us.
2670 let imm8
= (imm8
& 0xFF) as u8;
2672 let a
= a
.as_i32x8();
2673 macro_rules
! shuffle_done
{
2674 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2691 macro_rules
! shuffle_x67
{
2692 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2693 match (imm8
>> 6) & 0b11 {
2694 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2695 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2696 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2697 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2701 macro_rules
! shuffle_x45
{
2702 ($x01
:expr
, $x23
:expr
) => {
2703 match (imm8
>> 4) & 0b11 {
2704 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2705 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2706 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2707 _
=> shuffle_x67
!($x01
, $x23
, 3),
2711 macro_rules
! shuffle_x23
{
2713 match (imm8
>> 2) & 0b11 {
2714 0b00 => shuffle_x45
!($x01
, 0),
2715 0b01 => shuffle_x45
!($x01
, 1),
2716 0b10 => shuffle_x45
!($x01
, 2),
2717 _
=> shuffle_x45
!($x01
, 3),
2721 let r
: i32x8
= match imm8
& 0b11 {
2722 0b00 => shuffle_x23
!(0),
2723 0b01 => shuffle_x23
!(1),
2724 0b10 => shuffle_x23
!(2),
2725 _
=> shuffle_x23
!(3),
2730 /// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2731 /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflehi_epi16)
2736 #[target_feature(enable = "avx2")]
2737 #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
2738 #[rustc_args_required_const(1)]
2739 #[stable(feature = "simd_x86", since = "1.27.0")]
2740 pub unsafe fn _mm256_shufflehi_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2741 let imm8
= (imm8
& 0xFF) as u8;
2742 let a
= a
.as_i16x16();
2743 macro_rules
! shuffle_done
{
2744 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2746 simd_shuffle16(a
, a
, [
2747 0, 1, 2, 3, 4+$x01
, 4+$x23
, 4+$x45
, 4+$x67
,
2748 8, 9, 10, 11, 12+$x01
, 12+$x23
, 12+$x45
, 12+$x67
2752 macro_rules
! shuffle_x67
{
2753 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2754 match (imm8
>> 6) & 0b11 {
2755 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2756 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2757 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2758 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2762 macro_rules
! shuffle_x45
{
2763 ($x01
:expr
, $x23
:expr
) => {
2764 match (imm8
>> 4) & 0b11 {
2765 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2766 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2767 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2768 _
=> shuffle_x67
!($x01
, $x23
, 3),
2772 macro_rules
! shuffle_x23
{
2774 match (imm8
>> 2) & 0b11 {
2775 0b00 => shuffle_x45
!($x01
, 0),
2776 0b01 => shuffle_x45
!($x01
, 1),
2777 0b10 => shuffle_x45
!($x01
, 2),
2778 _
=> shuffle_x45
!($x01
, 3),
2782 let r
: i16x16
= match imm8
& 0b11 {
2783 0b00 => shuffle_x23
!(0),
2784 0b01 => shuffle_x23
!(1),
2785 0b10 => shuffle_x23
!(2),
2786 _
=> shuffle_x23
!(3),
2791 /// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2792 /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2795 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16)
2797 #[target_feature(enable = "avx2")]
2798 #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
2799 #[rustc_args_required_const(1)]
2800 #[stable(feature = "simd_x86", since = "1.27.0")]
2801 pub unsafe fn _mm256_shufflelo_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2802 let imm8
= (imm8
& 0xFF) as u8;
2803 let a
= a
.as_i16x16();
2804 macro_rules
! shuffle_done
{
2805 ($x01
: expr
, $x23
: expr
, $x45
: expr
, $x67
: expr
) => {
2807 simd_shuffle16(a
, a
, [
2808 0+$x01
, 0+$x23
, 0+$x45
, 0+$x67
, 4, 5, 6, 7,
2809 8+$x01
, 8+$x23
, 8+$x45
, 8+$x67
, 12, 13, 14, 15,
2813 macro_rules
! shuffle_x67
{
2814 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2815 match (imm8
>> 6) & 0b11 {
2816 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2817 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2818 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2819 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2823 macro_rules
! shuffle_x45
{
2824 ($x01
:expr
, $x23
:expr
) => {
2825 match (imm8
>> 4) & 0b11 {
2826 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2827 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2828 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2829 _
=> shuffle_x67
!($x01
, $x23
, 3),
2833 macro_rules
! shuffle_x23
{
2835 match (imm8
>> 2) & 0b11 {
2836 0b00 => shuffle_x45
!($x01
, 0),
2837 0b01 => shuffle_x45
!($x01
, 1),
2838 0b10 => shuffle_x45
!($x01
, 2),
2839 _
=> shuffle_x45
!($x01
, 3),
2843 let r
: i16x16
= match imm8
& 0b11 {
2844 0b00 => shuffle_x23
!(0),
2845 0b01 => shuffle_x23
!(1),
2846 0b10 => shuffle_x23
!(2),
2847 _
=> shuffle_x23
!(3),
2852 /// Negates packed 16-bit integers in `a` when the corresponding signed
2853 /// 16-bit integer in `b` is negative, and returns the results.
2854 /// Results are zeroed out when the corresponding element in `b` is zero.
2856 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16)
2858 #[target_feature(enable = "avx2")]
2859 #[cfg_attr(test, assert_instr(vpsignw))]
2860 #[stable(feature = "simd_x86", since = "1.27.0")]
2861 pub unsafe fn _mm256_sign_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2862 transmute(psignw(a
.as_i16x16(), b
.as_i16x16()))
2865 /// Negates packed 32-bit integers in `a` when the corresponding signed
2866 /// 32-bit integer in `b` is negative, and returns the results.
2867 /// Results are zeroed out when the corresponding element in `b` is zero.
2869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32)
2871 #[target_feature(enable = "avx2")]
2872 #[cfg_attr(test, assert_instr(vpsignd))]
2873 #[stable(feature = "simd_x86", since = "1.27.0")]
2874 pub unsafe fn _mm256_sign_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2875 transmute(psignd(a
.as_i32x8(), b
.as_i32x8()))
2878 /// Negates packed 8-bit integers in `a` when the corresponding signed
2879 /// 8-bit integer in `b` is negative, and returns the results.
2880 /// Results are zeroed out when the corresponding element in `b` is zero.
2882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8)
2884 #[target_feature(enable = "avx2")]
2885 #[cfg_attr(test, assert_instr(vpsignb))]
2886 #[stable(feature = "simd_x86", since = "1.27.0")]
2887 pub unsafe fn _mm256_sign_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2888 transmute(psignb(a
.as_i8x32(), b
.as_i8x32()))
2891 /// Shifts packed 16-bit integers in `a` left by `count` while
2892 /// shifting in zeros, and returns the result
2894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16)
2896 #[target_feature(enable = "avx2")]
2897 #[cfg_attr(test, assert_instr(vpsllw))]
2898 #[stable(feature = "simd_x86", since = "1.27.0")]
2899 pub unsafe fn _mm256_sll_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
2900 transmute(psllw(a
.as_i16x16(), count
.as_i16x8()))
2903 /// Shifts packed 32-bit integers in `a` left by `count` while
2904 /// shifting in zeros, and returns the result
2906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32)
2908 #[target_feature(enable = "avx2")]
2909 #[cfg_attr(test, assert_instr(vpslld))]
2910 #[stable(feature = "simd_x86", since = "1.27.0")]
2911 pub unsafe fn _mm256_sll_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
2912 transmute(pslld(a
.as_i32x8(), count
.as_i32x4()))
2915 /// Shifts packed 64-bit integers in `a` left by `count` while
2916 /// shifting in zeros, and returns the result
2918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64)
2920 #[target_feature(enable = "avx2")]
2921 #[cfg_attr(test, assert_instr(vpsllq))]
2922 #[stable(feature = "simd_x86", since = "1.27.0")]
2923 pub unsafe fn _mm256_sll_epi64(a
: __m256i
, count
: __m128i
) -> __m256i
{
2924 transmute(psllq(a
.as_i64x4(), count
.as_i64x2()))
2927 /// Shifts packed 16-bit integers in `a` left by `imm8` while
2928 /// shifting in zeros, return the results;
2930 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16)
2932 #[target_feature(enable = "avx2")]
2933 #[cfg_attr(test, assert_instr(vpsllw))]
2934 #[stable(feature = "simd_x86", since = "1.27.0")]
2935 pub unsafe fn _mm256_slli_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2936 transmute(pslliw(a
.as_i16x16(), imm8
))
2939 /// Shifts packed 32-bit integers in `a` left by `imm8` while
2940 /// shifting in zeros, return the results;
2942 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32)
2944 #[target_feature(enable = "avx2")]
2945 #[cfg_attr(test, assert_instr(vpslld))]
2946 #[stable(feature = "simd_x86", since = "1.27.0")]
2947 pub unsafe fn _mm256_slli_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
2948 transmute(psllid(a
.as_i32x8(), imm8
))
2951 /// Shifts packed 64-bit integers in `a` left by `imm8` while
2952 /// shifting in zeros, return the results;
2954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64)
2956 #[target_feature(enable = "avx2")]
2957 #[cfg_attr(test, assert_instr(vpsllq))]
2958 #[stable(feature = "simd_x86", since = "1.27.0")]
2959 pub unsafe fn _mm256_slli_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
2960 transmute(pslliq(a
.as_i64x4(), imm8
))
2963 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
2967 #[target_feature(enable = "avx2")]
2968 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2969 #[rustc_args_required_const(1)]
2970 #[stable(feature = "simd_x86", since = "1.27.0")]
2971 pub unsafe fn _mm256_slli_si256(a
: __m256i
, imm8
: i32) -> __m256i
{
2972 let a
= a
.as_i64x4();
2978 transmute(constify_imm8
!(imm8
* 8, call
))
2981 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
2985 #[target_feature(enable = "avx2")]
2986 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2987 #[rustc_args_required_const(1)]
2988 #[stable(feature = "simd_x86", since = "1.27.0")]
2989 pub unsafe fn _mm256_bslli_epi128(a
: __m256i
, imm8
: i32) -> __m256i
{
2990 let a
= a
.as_i64x4();
2996 transmute(constify_imm8
!(imm8
* 8, call
))
2999 /// Shifts packed 32-bit integers in `a` left by the amount
3000 /// specified by the corresponding element in `count` while
3001 /// shifting in zeros, and returns the result.
3003 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32)
3005 #[target_feature(enable = "avx2")]
3006 #[cfg_attr(test, assert_instr(vpsllvd))]
3007 #[stable(feature = "simd_x86", since = "1.27.0")]
3008 pub unsafe fn _mm_sllv_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3009 transmute(psllvd(a
.as_i32x4(), count
.as_i32x4()))
3012 /// Shifts packed 32-bit integers in `a` left by the amount
3013 /// specified by the corresponding element in `count` while
3014 /// shifting in zeros, and returns the result.
3016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32)
3018 #[target_feature(enable = "avx2")]
3019 #[cfg_attr(test, assert_instr(vpsllvd))]
3020 #[stable(feature = "simd_x86", since = "1.27.0")]
3021 pub unsafe fn _mm256_sllv_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3022 transmute(psllvd256(a
.as_i32x8(), count
.as_i32x8()))
3025 /// Shifts packed 64-bit integers in `a` left by the amount
3026 /// specified by the corresponding element in `count` while
3027 /// shifting in zeros, and returns the result.
3029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64)
3031 #[target_feature(enable = "avx2")]
3032 #[cfg_attr(test, assert_instr(vpsllvq))]
3033 #[stable(feature = "simd_x86", since = "1.27.0")]
3034 pub unsafe fn _mm_sllv_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
3035 transmute(psllvq(a
.as_i64x2(), count
.as_i64x2()))
3038 /// Shifts packed 64-bit integers in `a` left by the amount
3039 /// specified by the corresponding element in `count` while
3040 /// shifting in zeros, and returns the result.
3042 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64)
3044 #[target_feature(enable = "avx2")]
3045 #[cfg_attr(test, assert_instr(vpsllvq))]
3046 #[stable(feature = "simd_x86", since = "1.27.0")]
3047 pub unsafe fn _mm256_sllv_epi64(a
: __m256i
, count
: __m256i
) -> __m256i
{
3048 transmute(psllvq256(a
.as_i64x4(), count
.as_i64x4()))
3051 /// Shifts packed 16-bit integers in `a` right by `count` while
3052 /// shifting in sign bits.
3054 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16)
3056 #[target_feature(enable = "avx2")]
3057 #[cfg_attr(test, assert_instr(vpsraw))]
3058 #[stable(feature = "simd_x86", since = "1.27.0")]
3059 pub unsafe fn _mm256_sra_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
3060 transmute(psraw(a
.as_i16x16(), count
.as_i16x8()))
3063 /// Shifts packed 32-bit integers in `a` right by `count` while
3064 /// shifting in sign bits.
3066 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32)
3068 #[target_feature(enable = "avx2")]
3069 #[cfg_attr(test, assert_instr(vpsrad))]
3070 #[stable(feature = "simd_x86", since = "1.27.0")]
3071 pub unsafe fn _mm256_sra_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
3072 transmute(psrad(a
.as_i32x8(), count
.as_i32x4()))
3075 /// Shifts packed 16-bit integers in `a` right by `imm8` while
3076 /// shifting in sign bits.
3078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16)
3080 #[target_feature(enable = "avx2")]
3081 #[cfg_attr(test, assert_instr(vpsraw))]
3082 #[stable(feature = "simd_x86", since = "1.27.0")]
3083 pub unsafe fn _mm256_srai_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
3084 transmute(psraiw(a
.as_i16x16(), imm8
))
3087 /// Shifts packed 32-bit integers in `a` right by `imm8` while
3088 /// shifting in sign bits.
3090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32)
3092 #[target_feature(enable = "avx2")]
3093 #[cfg_attr(test, assert_instr(vpsrad))]
3094 #[stable(feature = "simd_x86", since = "1.27.0")]
3095 pub unsafe fn _mm256_srai_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
3096 transmute(psraid(a
.as_i32x8(), imm8
))
3099 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
3100 /// corresponding element in `count` while shifting in sign bits.
3102 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32)
3104 #[target_feature(enable = "avx2")]
3105 #[cfg_attr(test, assert_instr(vpsravd))]
3106 #[stable(feature = "simd_x86", since = "1.27.0")]
3107 pub unsafe fn _mm_srav_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3108 transmute(psravd(a
.as_i32x4(), count
.as_i32x4()))
3111 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
3112 /// corresponding element in `count` while shifting in sign bits.
3114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32)
3116 #[target_feature(enable = "avx2")]
3117 #[cfg_attr(test, assert_instr(vpsravd))]
3118 #[stable(feature = "simd_x86", since = "1.27.0")]
3119 pub unsafe fn _mm256_srav_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3120 transmute(psravd256(a
.as_i32x8(), count
.as_i32x8()))
3123 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
3127 #[target_feature(enable = "avx2")]
3128 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
3129 #[rustc_args_required_const(1)]
3130 #[stable(feature = "simd_x86", since = "1.27.0")]
3131 pub unsafe fn _mm256_srli_si256(a
: __m256i
, imm8
: i32) -> __m256i
{
3132 let a
= a
.as_i64x4();
3138 transmute(constify_imm8
!(imm8
* 8, call
))
3141 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
3145 #[target_feature(enable = "avx2")]
3146 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
3147 #[rustc_args_required_const(1)]
3148 #[stable(feature = "simd_x86", since = "1.27.0")]
3149 pub unsafe fn _mm256_bsrli_epi128(a
: __m256i
, imm8
: i32) -> __m256i
{
3150 let a
= a
.as_i64x4();
3156 transmute(constify_imm8
!(imm8
* 8, call
))
3159 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
3162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16)
3164 #[target_feature(enable = "avx2")]
3165 #[cfg_attr(test, assert_instr(vpsrlw))]
3166 #[stable(feature = "simd_x86", since = "1.27.0")]
3167 pub unsafe fn _mm256_srl_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
3168 transmute(psrlw(a
.as_i16x16(), count
.as_i16x8()))
3171 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32)
3176 #[target_feature(enable = "avx2")]
3177 #[cfg_attr(test, assert_instr(vpsrld))]
3178 #[stable(feature = "simd_x86", since = "1.27.0")]
3179 pub unsafe fn _mm256_srl_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
3180 transmute(psrld(a
.as_i32x8(), count
.as_i32x4()))
3183 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64)
3188 #[target_feature(enable = "avx2")]
3189 #[cfg_attr(test, assert_instr(vpsrlq))]
3190 #[stable(feature = "simd_x86", since = "1.27.0")]
3191 pub unsafe fn _mm256_srl_epi64(a
: __m256i
, count
: __m128i
) -> __m256i
{
3192 transmute(psrlq(a
.as_i64x4(), count
.as_i64x2()))
3195 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
3198 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16)
3200 #[target_feature(enable = "avx2")]
3201 #[cfg_attr(test, assert_instr(vpsrlw))]
3202 #[stable(feature = "simd_x86", since = "1.27.0")]
3203 pub unsafe fn _mm256_srli_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
3204 transmute(psrliw(a
.as_i16x16(), imm8
))
3207 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
3210 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32)
3212 #[target_feature(enable = "avx2")]
3213 #[cfg_attr(test, assert_instr(vpsrld))]
3214 #[stable(feature = "simd_x86", since = "1.27.0")]
3215 pub unsafe fn _mm256_srli_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
3216 transmute(psrlid(a
.as_i32x8(), imm8
))
3219 /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
3222 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64)
3224 #[target_feature(enable = "avx2")]
3225 #[cfg_attr(test, assert_instr(vpsrlq))]
3226 #[stable(feature = "simd_x86", since = "1.27.0")]
3227 pub unsafe fn _mm256_srli_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
3228 transmute(psrliq(a
.as_i64x4(), imm8
))
3231 /// Shifts packed 32-bit integers in `a` right by the amount specified by
3232 /// the corresponding element in `count` while shifting in zeros,
3234 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32)
3236 #[target_feature(enable = "avx2")]
3237 #[cfg_attr(test, assert_instr(vpsrlvd))]
3238 #[stable(feature = "simd_x86", since = "1.27.0")]
3239 pub unsafe fn _mm_srlv_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3240 transmute(psrlvd(a
.as_i32x4(), count
.as_i32x4()))
3243 /// Shifts packed 32-bit integers in `a` right by the amount specified by
3244 /// the corresponding element in `count` while shifting in zeros,
3246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32)
3248 #[target_feature(enable = "avx2")]
3249 #[cfg_attr(test, assert_instr(vpsrlvd))]
3250 #[stable(feature = "simd_x86", since = "1.27.0")]
3251 pub unsafe fn _mm256_srlv_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3252 transmute(psrlvd256(a
.as_i32x8(), count
.as_i32x8()))
3255 /// Shifts packed 64-bit integers in `a` right by the amount specified by
3256 /// the corresponding element in `count` while shifting in zeros,
3258 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64)
3260 #[target_feature(enable = "avx2")]
3261 #[cfg_attr(test, assert_instr(vpsrlvq))]
3262 #[stable(feature = "simd_x86", since = "1.27.0")]
3263 pub unsafe fn _mm_srlv_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
3264 transmute(psrlvq(a
.as_i64x2(), count
.as_i64x2()))
3267 /// Shifts packed 64-bit integers in `a` right by the amount specified by
3268 /// the corresponding element in `count` while shifting in zeros,
3270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64)
3272 #[target_feature(enable = "avx2")]
3273 #[cfg_attr(test, assert_instr(vpsrlvq))]
3274 #[stable(feature = "simd_x86", since = "1.27.0")]
3275 pub unsafe fn _mm256_srlv_epi64(a
: __m256i
, count
: __m256i
) -> __m256i
{
3276 transmute(psrlvq256(a
.as_i64x4(), count
.as_i64x4()))
3279 // TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
3281 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3283 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi16)
3285 #[target_feature(enable = "avx2")]
3286 #[cfg_attr(test, assert_instr(vpsubw))]
3287 #[stable(feature = "simd_x86", since = "1.27.0")]
3288 pub unsafe fn _mm256_sub_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3289 transmute(simd_sub(a
.as_i16x16(), b
.as_i16x16()))
3292 /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a`
3294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi32)
3296 #[target_feature(enable = "avx2")]
3297 #[cfg_attr(test, assert_instr(vpsubd))]
3298 #[stable(feature = "simd_x86", since = "1.27.0")]
3299 pub unsafe fn _mm256_sub_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3300 transmute(simd_sub(a
.as_i32x8(), b
.as_i32x8()))
3303 /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a`
3305 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi64)
3307 #[target_feature(enable = "avx2")]
3308 #[cfg_attr(test, assert_instr(vpsubq))]
3309 #[stable(feature = "simd_x86", since = "1.27.0")]
3310 pub unsafe fn _mm256_sub_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3311 transmute(simd_sub(a
.as_i64x4(), b
.as_i64x4()))
3314 /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a`
3316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi8)
3318 #[target_feature(enable = "avx2")]
3319 #[cfg_attr(test, assert_instr(vpsubb))]
3320 #[stable(feature = "simd_x86", since = "1.27.0")]
3321 pub unsafe fn _mm256_sub_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3322 transmute(simd_sub(a
.as_i8x32(), b
.as_i8x32()))
3325 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3326 /// `a` using saturation.
3328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi16)
3330 #[target_feature(enable = "avx2")]
3331 #[cfg_attr(test, assert_instr(vpsubsw))]
3332 #[stable(feature = "simd_x86", since = "1.27.0")]
3333 pub unsafe fn _mm256_subs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3334 transmute(simd_saturating_sub(a
.as_i16x16(), b
.as_i16x16()))
3337 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3338 /// `a` using saturation.
3340 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi8)
3342 #[target_feature(enable = "avx2")]
3343 #[cfg_attr(test, assert_instr(vpsubsb))]
3344 #[stable(feature = "simd_x86", since = "1.27.0")]
3345 pub unsafe fn _mm256_subs_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3346 transmute(simd_saturating_sub(a
.as_i8x32(), b
.as_i8x32()))
3349 /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3350 /// integers in `a` using saturation.
3352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu16)
3354 #[target_feature(enable = "avx2")]
3355 #[cfg_attr(test, assert_instr(vpsubusw))]
3356 #[stable(feature = "simd_x86", since = "1.27.0")]
3357 pub unsafe fn _mm256_subs_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3358 transmute(simd_saturating_sub(a
.as_u16x16(), b
.as_u16x16()))
3361 /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3362 /// integers in `a` using saturation.
3364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu8)
3366 #[target_feature(enable = "avx2")]
3367 #[cfg_attr(test, assert_instr(vpsubusb))]
3368 #[stable(feature = "simd_x86", since = "1.27.0")]
3369 pub unsafe fn _mm256_subs_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3370 transmute(simd_saturating_sub(a
.as_u8x32(), b
.as_u8x32()))
3373 /// Unpacks and interleave 8-bit integers from the high half of each
3374 /// 128-bit lane in `a` and `b`.
3377 /// #[cfg(target_arch = "x86")]
3378 /// use std::arch::x86::*;
3379 /// #[cfg(target_arch = "x86_64")]
3380 /// use std::arch::x86_64::*;
3383 /// # if is_x86_feature_detected!("avx2") {
3384 /// # #[target_feature(enable = "avx2")]
3385 /// # unsafe fn worker() {
3386 /// let a = _mm256_setr_epi8(
3387 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3388 /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3390 /// let b = _mm256_setr_epi8(
3391 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3392 /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3396 /// let c = _mm256_unpackhi_epi8(a, b);
3398 /// let expected = _mm256_setr_epi8(
3399 /// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3400 /// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3403 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3406 /// # unsafe { worker(); }
3411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi8)
3413 #[target_feature(enable = "avx2")]
3414 #[cfg_attr(test, assert_instr(vpunpckhbw))]
3415 #[stable(feature = "simd_x86", since = "1.27.0")]
3416 pub unsafe fn _mm256_unpackhi_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3418 let r
: i8x32
= simd_shuffle32(a
.as_i8x32(), b
.as_i8x32(), [
3419 8, 40, 9, 41, 10, 42, 11, 43,
3420 12, 44, 13, 45, 14, 46, 15, 47,
3421 24, 56, 25, 57, 26, 58, 27, 59,
3422 28, 60, 29, 61, 30, 62, 31, 63,
3427 /// Unpacks and interleave 8-bit integers from the low half of each
3428 /// 128-bit lane of `a` and `b`.
3431 /// #[cfg(target_arch = "x86")]
3432 /// use std::arch::x86::*;
3433 /// #[cfg(target_arch = "x86_64")]
3434 /// use std::arch::x86_64::*;
3437 /// # if is_x86_feature_detected!("avx2") {
3438 /// # #[target_feature(enable = "avx2")]
3439 /// # unsafe fn worker() {
3440 /// let a = _mm256_setr_epi8(
3441 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3442 /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3444 /// let b = _mm256_setr_epi8(
3445 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3446 /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3450 /// let c = _mm256_unpacklo_epi8(a, b);
3452 /// let expected = _mm256_setr_epi8(
3453 /// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3454 /// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3456 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3459 /// # unsafe { worker(); }
3464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi8)
3466 #[target_feature(enable = "avx2")]
3467 #[cfg_attr(test, assert_instr(vpunpcklbw))]
3468 #[stable(feature = "simd_x86", since = "1.27.0")]
3469 pub unsafe fn _mm256_unpacklo_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3471 let r
: i8x32
= simd_shuffle32(a
.as_i8x32(), b
.as_i8x32(), [
3472 0, 32, 1, 33, 2, 34, 3, 35,
3473 4, 36, 5, 37, 6, 38, 7, 39,
3474 16, 48, 17, 49, 18, 50, 19, 51,
3475 20, 52, 21, 53, 22, 54, 23, 55,
3480 /// Unpacks and interleave 16-bit integers from the high half of each
3481 /// 128-bit lane of `a` and `b`.
3484 /// #[cfg(target_arch = "x86")]
3485 /// use std::arch::x86::*;
3486 /// #[cfg(target_arch = "x86_64")]
3487 /// use std::arch::x86_64::*;
3490 /// # if is_x86_feature_detected!("avx2") {
3491 /// # #[target_feature(enable = "avx2")]
3492 /// # unsafe fn worker() {
3493 /// let a = _mm256_setr_epi16(
3494 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3496 /// let b = _mm256_setr_epi16(
3497 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3500 /// let c = _mm256_unpackhi_epi16(a, b);
3502 /// let expected = _mm256_setr_epi16(
3503 /// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3505 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3508 /// # unsafe { worker(); }
3513 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi16)
3515 #[target_feature(enable = "avx2")]
3516 #[cfg_attr(test, assert_instr(vpunpckhwd))]
3517 #[stable(feature = "simd_x86", since = "1.27.0")]
3518 pub unsafe fn _mm256_unpackhi_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3519 let r
: i16x16
= simd_shuffle16(
3522 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3527 /// Unpacks and interleave 16-bit integers from the low half of each
3528 /// 128-bit lane of `a` and `b`.
3531 /// #[cfg(target_arch = "x86")]
3532 /// use std::arch::x86::*;
3533 /// #[cfg(target_arch = "x86_64")]
3534 /// use std::arch::x86_64::*;
3537 /// # if is_x86_feature_detected!("avx2") {
3538 /// # #[target_feature(enable = "avx2")]
3539 /// # unsafe fn worker() {
3541 /// let a = _mm256_setr_epi16(
3542 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3544 /// let b = _mm256_setr_epi16(
3545 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3548 /// let c = _mm256_unpacklo_epi16(a, b);
3550 /// let expected = _mm256_setr_epi16(
3551 /// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3553 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3556 /// # unsafe { worker(); }
3561 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi16)
3563 #[target_feature(enable = "avx2")]
3564 #[cfg_attr(test, assert_instr(vpunpcklwd))]
3565 #[stable(feature = "simd_x86", since = "1.27.0")]
3566 pub unsafe fn _mm256_unpacklo_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3567 let r
: i16x16
= simd_shuffle16(
3570 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3575 /// Unpacks and interleave 32-bit integers from the high half of each
3576 /// 128-bit lane of `a` and `b`.
3579 /// #[cfg(target_arch = "x86")]
3580 /// use std::arch::x86::*;
3581 /// #[cfg(target_arch = "x86_64")]
3582 /// use std::arch::x86_64::*;
3585 /// # if is_x86_feature_detected!("avx2") {
3586 /// # #[target_feature(enable = "avx2")]
3587 /// # unsafe fn worker() {
3588 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3589 /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3591 /// let c = _mm256_unpackhi_epi32(a, b);
3593 /// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3594 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3597 /// # unsafe { worker(); }
3602 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi32)
3604 #[target_feature(enable = "avx2")]
3605 #[cfg_attr(test, assert_instr(vunpckhps))]
3606 #[stable(feature = "simd_x86", since = "1.27.0")]
3607 pub unsafe fn _mm256_unpackhi_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3608 let r
: i32x8
= simd_shuffle8(a
.as_i32x8(), b
.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3612 /// Unpacks and interleave 32-bit integers from the low half of each
3613 /// 128-bit lane of `a` and `b`.
3616 /// #[cfg(target_arch = "x86")]
3617 /// use std::arch::x86::*;
3618 /// #[cfg(target_arch = "x86_64")]
3619 /// use std::arch::x86_64::*;
3622 /// # if is_x86_feature_detected!("avx2") {
3623 /// # #[target_feature(enable = "avx2")]
3624 /// # unsafe fn worker() {
3625 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3626 /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3628 /// let c = _mm256_unpacklo_epi32(a, b);
3630 /// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3631 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3634 /// # unsafe { worker(); }
3639 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi32)
3641 #[target_feature(enable = "avx2")]
3642 #[cfg_attr(test, assert_instr(vunpcklps))]
3643 #[stable(feature = "simd_x86", since = "1.27.0")]
3644 pub unsafe fn _mm256_unpacklo_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3645 let r
: i32x8
= simd_shuffle8(a
.as_i32x8(), b
.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3649 /// Unpacks and interleave 64-bit integers from the high half of each
3650 /// 128-bit lane of `a` and `b`.
3653 /// #[cfg(target_arch = "x86")]
3654 /// use std::arch::x86::*;
3655 /// #[cfg(target_arch = "x86_64")]
3656 /// use std::arch::x86_64::*;
3659 /// # if is_x86_feature_detected!("avx2") {
3660 /// # #[target_feature(enable = "avx2")]
3661 /// # unsafe fn worker() {
3662 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3663 /// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3665 /// let c = _mm256_unpackhi_epi64(a, b);
3667 /// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3668 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3671 /// # unsafe { worker(); }
3676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi64)
3678 #[target_feature(enable = "avx2")]
3679 #[cfg_attr(test, assert_instr(vunpckhpd))]
3680 #[stable(feature = "simd_x86", since = "1.27.0")]
3681 pub unsafe fn _mm256_unpackhi_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3682 let r
: i64x4
= simd_shuffle4(a
.as_i64x4(), b
.as_i64x4(), [1, 5, 3, 7]);
3686 /// Unpacks and interleave 64-bit integers from the low half of each
3687 /// 128-bit lane of `a` and `b`.
3690 /// #[cfg(target_arch = "x86")]
3691 /// use std::arch::x86::*;
3692 /// #[cfg(target_arch = "x86_64")]
3693 /// use std::arch::x86_64::*;
3696 /// # if is_x86_feature_detected!("avx2") {
3697 /// # #[target_feature(enable = "avx2")]
3698 /// # unsafe fn worker() {
3699 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3700 /// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3702 /// let c = _mm256_unpacklo_epi64(a, b);
3704 /// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3705 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3708 /// # unsafe { worker(); }
3713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi64)
3715 #[target_feature(enable = "avx2")]
3716 #[cfg_attr(test, assert_instr(vunpcklpd))]
3717 #[stable(feature = "simd_x86", since = "1.27.0")]
3718 pub unsafe fn _mm256_unpacklo_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3719 let r
: i64x4
= simd_shuffle4(a
.as_i64x4(), b
.as_i64x4(), [0, 4, 2, 6]);
3723 /// Computes the bitwise XOR of 256 bits (representing integer data)
3726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256)
3728 #[target_feature(enable = "avx2")]
3729 #[cfg_attr(test, assert_instr(vxorps))]
3730 #[stable(feature = "simd_x86", since = "1.27.0")]
3731 pub unsafe fn _mm256_xor_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
3732 transmute(simd_xor(a
.as_i64x4(), b
.as_i64x4()))
3735 /// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
3736 /// integer containing the zero-extended integer data.
3738 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
3740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi8)
3742 #[target_feature(enable = "avx2")]
3743 // This intrinsic has no corresponding instruction.
3744 #[rustc_args_required_const(1)]
3745 #[stable(feature = "simd_x86", since = "1.27.0")]
3746 pub unsafe fn _mm256_extract_epi8(a
: __m256i
, imm8
: i32) -> i8 {
3747 let imm8
= (imm8
& 31) as u32;
3748 simd_extract(a
.as_i8x32(), imm8
)
3751 /// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit
3752 /// integer containing the zero-extended integer data.
3754 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
3756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi16)
3758 #[target_feature(enable = "avx2")]
3759 // This intrinsic has no corresponding instruction.
3760 #[rustc_args_required_const(1)]
3761 #[stable(feature = "simd_x86", since = "1.27.0")]
3762 pub unsafe fn _mm256_extract_epi16(a
: __m256i
, imm8
: i32) -> i16 {
3763 let imm8
= (imm8
& 15) as u32;
3764 simd_extract(a
.as_i16x16(), imm8
)
3767 /// Extracts a 32-bit integer from `a`, selected with `imm8`.
3769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32)
3771 #[target_feature(enable = "avx2")]
3772 // This intrinsic has no corresponding instruction.
3773 #[rustc_args_required_const(1)]
3774 #[stable(feature = "simd_x86", since = "1.27.0")]
3775 pub unsafe fn _mm256_extract_epi32(a
: __m256i
, imm8
: i32) -> i32 {
3776 let imm8
= (imm8
& 7) as u32;
3777 simd_extract(a
.as_i32x8(), imm8
)
3780 /// Returns the first element of the input vector of `[4 x double]`.
3782 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsd_f64)
3784 #[target_feature(enable = "avx2")]
3785 //#[cfg_attr(test, assert_instr(movsd))] FIXME
3786 #[stable(feature = "simd_x86", since = "1.27.0")]
3787 pub unsafe fn _mm256_cvtsd_f64(a
: __m256d
) -> f64 {
3791 /// Returns the first element of the input vector of `[8 x i32]`.
3793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsi256_si32)
3795 #[target_feature(enable = "avx2")]
3796 //#[cfg_attr(test, assert_instr(movd))] FIXME
3797 #[stable(feature = "simd_x86", since = "1.27.0")]
3798 pub unsafe fn _mm256_cvtsi256_si32(a
: __m256i
) -> i32 {
3799 simd_extract(a
.as_i32x8(), 0)
3802 #[allow(improper_ctypes)]
3804 #[link_name = "llvm.x86.avx2.pabs.b"]
3805 fn pabsb(a
: i8x32
) -> u8x32
;
3806 #[link_name = "llvm.x86.avx2.pabs.w"]
3807 fn pabsw(a
: i16x16
) -> u16x16
;
3808 #[link_name = "llvm.x86.avx2.pabs.d"]
3809 fn pabsd(a
: i32x8
) -> u32x8
;
3810 #[link_name = "llvm.x86.avx2.pavg.b"]
3811 fn pavgb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3812 #[link_name = "llvm.x86.avx2.pavg.w"]
3813 fn pavgw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3814 #[link_name = "llvm.x86.avx2.pblendvb"]
3815 fn pblendvb(a
: i8x32
, b
: i8x32
, mask
: i8x32
) -> i8x32
;
3816 #[link_name = "llvm.x86.avx2.phadd.w"]
3817 fn phaddw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3818 #[link_name = "llvm.x86.avx2.phadd.d"]
3819 fn phaddd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3820 #[link_name = "llvm.x86.avx2.phadd.sw"]
3821 fn phaddsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3822 #[link_name = "llvm.x86.avx2.phsub.w"]
3823 fn phsubw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3824 #[link_name = "llvm.x86.avx2.phsub.d"]
3825 fn phsubd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3826 #[link_name = "llvm.x86.avx2.phsub.sw"]
3827 fn phsubsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3828 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3829 fn pmaddwd(a
: i16x16
, b
: i16x16
) -> i32x8
;
3830 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3831 fn pmaddubsw(a
: u8x32
, b
: u8x32
) -> i16x16
;
3832 #[link_name = "llvm.x86.avx2.maskload.d"]
3833 fn maskloadd(mem_addr
: *const i8, mask
: i32x4
) -> i32x4
;
3834 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3835 fn maskloadd256(mem_addr
: *const i8, mask
: i32x8
) -> i32x8
;
3836 #[link_name = "llvm.x86.avx2.maskload.q"]
3837 fn maskloadq(mem_addr
: *const i8, mask
: i64x2
) -> i64x2
;
3838 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3839 fn maskloadq256(mem_addr
: *const i8, mask
: i64x4
) -> i64x4
;
3840 #[link_name = "llvm.x86.avx2.maskstore.d"]
3841 fn maskstored(mem_addr
: *mut i8, mask
: i32x4
, a
: i32x4
);
3842 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3843 fn maskstored256(mem_addr
: *mut i8, mask
: i32x8
, a
: i32x8
);
3844 #[link_name = "llvm.x86.avx2.maskstore.q"]
3845 fn maskstoreq(mem_addr
: *mut i8, mask
: i64x2
, a
: i64x2
);
3846 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3847 fn maskstoreq256(mem_addr
: *mut i8, mask
: i64x4
, a
: i64x4
);
3848 #[link_name = "llvm.x86.avx2.pmaxs.w"]
3849 fn pmaxsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3850 #[link_name = "llvm.x86.avx2.pmaxs.d"]
3851 fn pmaxsd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3852 #[link_name = "llvm.x86.avx2.pmaxs.b"]
3853 fn pmaxsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3854 #[link_name = "llvm.x86.avx2.pmaxu.w"]
3855 fn pmaxuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3856 #[link_name = "llvm.x86.avx2.pmaxu.d"]
3857 fn pmaxud(a
: u32x8
, b
: u32x8
) -> u32x8
;
3858 #[link_name = "llvm.x86.avx2.pmaxu.b"]
3859 fn pmaxub(a
: u8x32
, b
: u8x32
) -> u8x32
;
3860 #[link_name = "llvm.x86.avx2.pmins.w"]
3861 fn pminsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3862 #[link_name = "llvm.x86.avx2.pmins.d"]
3863 fn pminsd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3864 #[link_name = "llvm.x86.avx2.pmins.b"]
3865 fn pminsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3866 #[link_name = "llvm.x86.avx2.pminu.w"]
3867 fn pminuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3868 #[link_name = "llvm.x86.avx2.pminu.d"]
3869 fn pminud(a
: u32x8
, b
: u32x8
) -> u32x8
;
3870 #[link_name = "llvm.x86.avx2.pminu.b"]
3871 fn pminub(a
: u8x32
, b
: u8x32
) -> u8x32
;
3872 #[link_name = "llvm.x86.avx2.pmovmskb"]
3873 fn pmovmskb(a
: i8x32
) -> i32;
3874 #[link_name = "llvm.x86.avx2.mpsadbw"]
3875 fn mpsadbw(a
: u8x32
, b
: u8x32
, imm8
: i32) -> u16x16
;
3876 #[link_name = "llvm.x86.avx2.pmulhu.w"]
3877 fn pmulhuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3878 #[link_name = "llvm.x86.avx2.pmulh.w"]
3879 fn pmulhw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3880 #[link_name = "llvm.x86.avx2.pmul.dq"]
3881 fn pmuldq(a
: i32x8
, b
: i32x8
) -> i64x4
;
3882 #[link_name = "llvm.x86.avx2.pmulu.dq"]
3883 fn pmuludq(a
: u32x8
, b
: u32x8
) -> u64x4
;
3884 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3885 fn pmulhrsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3886 #[link_name = "llvm.x86.avx2.packsswb"]
3887 fn packsswb(a
: i16x16
, b
: i16x16
) -> i8x32
;
3888 #[link_name = "llvm.x86.avx2.packssdw"]
3889 fn packssdw(a
: i32x8
, b
: i32x8
) -> i16x16
;
3890 #[link_name = "llvm.x86.avx2.packuswb"]
3891 fn packuswb(a
: i16x16
, b
: i16x16
) -> u8x32
;
3892 #[link_name = "llvm.x86.avx2.packusdw"]
3893 fn packusdw(a
: i32x8
, b
: i32x8
) -> u16x16
;
3894 #[link_name = "llvm.x86.avx2.psad.bw"]
3895 fn psadbw(a
: u8x32
, b
: u8x32
) -> u64x4
;
3896 #[link_name = "llvm.x86.avx2.psign.b"]
3897 fn psignb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3898 #[link_name = "llvm.x86.avx2.psign.w"]
3899 fn psignw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3900 #[link_name = "llvm.x86.avx2.psign.d"]
3901 fn psignd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3902 #[link_name = "llvm.x86.avx2.psll.w"]
3903 fn psllw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3904 #[link_name = "llvm.x86.avx2.psll.d"]
3905 fn pslld(a
: i32x8
, count
: i32x4
) -> i32x8
;
3906 #[link_name = "llvm.x86.avx2.psll.q"]
3907 fn psllq(a
: i64x4
, count
: i64x2
) -> i64x4
;
3908 #[link_name = "llvm.x86.avx2.pslli.w"]
3909 fn pslliw(a
: i16x16
, imm8
: i32) -> i16x16
;
3910 #[link_name = "llvm.x86.avx2.pslli.d"]
3911 fn psllid(a
: i32x8
, imm8
: i32) -> i32x8
;
3912 #[link_name = "llvm.x86.avx2.pslli.q"]
3913 fn pslliq(a
: i64x4
, imm8
: i32) -> i64x4
;
3914 #[link_name = "llvm.x86.avx2.psllv.d"]
3915 fn psllvd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3916 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3917 fn psllvd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3918 #[link_name = "llvm.x86.avx2.psllv.q"]
3919 fn psllvq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3920 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3921 fn psllvq256(a
: i64x4
, count
: i64x4
) -> i64x4
;
3922 #[link_name = "llvm.x86.avx2.psra.w"]
3923 fn psraw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3924 #[link_name = "llvm.x86.avx2.psra.d"]
3925 fn psrad(a
: i32x8
, count
: i32x4
) -> i32x8
;
3926 #[link_name = "llvm.x86.avx2.psrai.w"]
3927 fn psraiw(a
: i16x16
, imm8
: i32) -> i16x16
;
3928 #[link_name = "llvm.x86.avx2.psrai.d"]
3929 fn psraid(a
: i32x8
, imm8
: i32) -> i32x8
;
3930 #[link_name = "llvm.x86.avx2.psrav.d"]
3931 fn psravd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3932 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3933 fn psravd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3934 #[link_name = "llvm.x86.avx2.psrl.w"]
3935 fn psrlw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3936 #[link_name = "llvm.x86.avx2.psrl.d"]
3937 fn psrld(a
: i32x8
, count
: i32x4
) -> i32x8
;
3938 #[link_name = "llvm.x86.avx2.psrl.q"]
3939 fn psrlq(a
: i64x4
, count
: i64x2
) -> i64x4
;
3940 #[link_name = "llvm.x86.avx2.psrli.w"]
3941 fn psrliw(a
: i16x16
, imm8
: i32) -> i16x16
;
3942 #[link_name = "llvm.x86.avx2.psrli.d"]
3943 fn psrlid(a
: i32x8
, imm8
: i32) -> i32x8
;
3944 #[link_name = "llvm.x86.avx2.psrli.q"]
3945 fn psrliq(a
: i64x4
, imm8
: i32) -> i64x4
;
3946 #[link_name = "llvm.x86.avx2.psrlv.d"]
3947 fn psrlvd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3948 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3949 fn psrlvd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3950 #[link_name = "llvm.x86.avx2.psrlv.q"]
3951 fn psrlvq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3952 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3953 fn psrlvq256(a
: i64x4
, count
: i64x4
) -> i64x4
;
3954 #[link_name = "llvm.x86.avx2.pshuf.b"]
3955 fn pshufb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3956 #[link_name = "llvm.x86.avx2.permd"]
3957 fn permd(a
: u32x8
, b
: u32x8
) -> u32x8
;
3958 #[link_name = "llvm.x86.avx2.permps"]
3959 fn permps(a
: __m256
, b
: i32x8
) -> __m256
;
3960 #[link_name = "llvm.x86.avx2.vperm2i128"]
3961 fn vperm2i128(a
: i64x4
, b
: i64x4
, imm8
: i8) -> i64x4
;
3962 #[link_name = "llvm.x86.avx2.gather.d.d"]
3963 fn pgatherdd(src
: i32x4
, slice
: *const i8, offsets
: i32x4
, mask
: i32x4
, scale
: i8) -> i32x4
;
3964 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3965 fn vpgatherdd(src
: i32x8
, slice
: *const i8, offsets
: i32x8
, mask
: i32x8
, scale
: i8) -> i32x8
;
3966 #[link_name = "llvm.x86.avx2.gather.d.q"]
3967 fn pgatherdq(src
: i64x2
, slice
: *const i8, offsets
: i32x4
, mask
: i64x2
, scale
: i8) -> i64x2
;
3968 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3969 fn vpgatherdq(src
: i64x4
, slice
: *const i8, offsets
: i32x4
, mask
: i64x4
, scale
: i8) -> i64x4
;
3970 #[link_name = "llvm.x86.avx2.gather.q.d"]
3971 fn pgatherqd(src
: i32x4
, slice
: *const i8, offsets
: i64x2
, mask
: i32x4
, scale
: i8) -> i32x4
;
3972 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3973 fn vpgatherqd(src
: i32x4
, slice
: *const i8, offsets
: i64x4
, mask
: i32x4
, scale
: i8) -> i32x4
;
3974 #[link_name = "llvm.x86.avx2.gather.q.q"]
3975 fn pgatherqq(src
: i64x2
, slice
: *const i8, offsets
: i64x2
, mask
: i64x2
, scale
: i8) -> i64x2
;
3976 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3977 fn vpgatherqq(src
: i64x4
, slice
: *const i8, offsets
: i64x4
, mask
: i64x4
, scale
: i8) -> i64x4
;
3978 #[link_name = "llvm.x86.avx2.gather.d.pd"]
3986 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
3994 #[link_name = "llvm.x86.avx2.gather.q.pd"]
4002 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
4010 #[link_name = "llvm.x86.avx2.gather.d.ps"]
4011 fn pgatherdps(src
: __m128
, slice
: *const i8, offsets
: i32x4
, mask
: __m128
, scale
: i8)
4013 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
4021 #[link_name = "llvm.x86.avx2.gather.q.ps"]
4022 fn pgatherqps(src
: __m128
, slice
: *const i8, offsets
: i64x2
, mask
: __m128
, scale
: i8)
4024 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
4032 #[link_name = "llvm.x86.avx2.psll.dq"]
4033 fn vpslldq(a
: i64x4
, b
: i32) -> i64x4
;
4034 #[link_name = "llvm.x86.avx2.psrl.dq"]
4035 fn vpsrldq(a
: i64x4
, b
: i32) -> i64x4
;
4041 use stdarch_test
::simd_test
;
4043 use crate::core_arch
::x86
::*;
4045 #[simd_test(enable = "avx2")]
4046 unsafe fn test_mm256_abs_epi32() {
4048 let a
= _mm256_setr_epi32(
4050 i32::MIN
, 100, -100, -32,
4052 let r
= _mm256_abs_epi32(a
);
4054 let e
= _mm256_setr_epi32(
4056 i32::MAX
.wrapping_add(1), 100, 100, 32,
4058 assert_eq_m256i(r
, e
);
4061 #[simd_test(enable = "avx2")]
4062 unsafe fn test_mm256_abs_epi16() {
4064 let a
= _mm256_setr_epi16(
4065 0, 1, -1, 2, -2, 3, -3, 4,
4066 -4, 5, -5, i16::MAX
, i16::MIN
, 100, -100, -32,
4068 let r
= _mm256_abs_epi16(a
);
4070 let e
= _mm256_setr_epi16(
4071 0, 1, 1, 2, 2, 3, 3, 4,
4072 4, 5, 5, i16::MAX
, i16::MAX
.wrapping_add(1), 100, 100, 32,
4074 assert_eq_m256i(r
, e
);
4077 #[simd_test(enable = "avx2")]
4078 unsafe fn test_mm256_abs_epi8() {
4080 let a
= _mm256_setr_epi8(
4081 0, 1, -1, 2, -2, 3, -3, 4,
4082 -4, 5, -5, i8::MAX
, i8::MIN
, 100, -100, -32,
4083 0, 1, -1, 2, -2, 3, -3, 4,
4084 -4, 5, -5, i8::MAX
, i8::MIN
, 100, -100, -32,
4086 let r
= _mm256_abs_epi8(a
);
4088 let e
= _mm256_setr_epi8(
4089 0, 1, 1, 2, 2, 3, 3, 4,
4090 4, 5, 5, i8::MAX
, i8::MAX
.wrapping_add(1), 100, 100, 32,
4091 0, 1, 1, 2, 2, 3, 3, 4,
4092 4, 5, 5, i8::MAX
, i8::MAX
.wrapping_add(1), 100, 100, 32,
4094 assert_eq_m256i(r
, e
);
4097 #[simd_test(enable = "avx2")]
4098 unsafe fn test_mm256_add_epi64() {
4099 let a
= _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4100 let b
= _mm256_setr_epi64x(-1, 0, 1, 2);
4101 let r
= _mm256_add_epi64(a
, b
);
4102 let e
= _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4103 assert_eq_m256i(r
, e
);
4106 #[simd_test(enable = "avx2")]
4107 unsafe fn test_mm256_add_epi32() {
4108 let a
= _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4109 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4110 let r
= _mm256_add_epi32(a
, b
);
4111 let e
= _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4112 assert_eq_m256i(r
, e
);
4115 #[simd_test(enable = "avx2")]
4116 unsafe fn test_mm256_add_epi16() {
4118 let a
= _mm256_setr_epi16(
4119 0, 1, 2, 3, 4, 5, 6, 7,
4120 8, 9, 10, 11, 12, 13, 14, 15,
4123 let b
= _mm256_setr_epi16(
4124 0, 1, 2, 3, 4, 5, 6, 7,
4125 8, 9, 10, 11, 12, 13, 14, 15,
4127 let r
= _mm256_add_epi16(a
, b
);
4129 let e
= _mm256_setr_epi16(
4130 0, 2, 4, 6, 8, 10, 12, 14,
4131 16, 18, 20, 22, 24, 26, 28, 30,
4133 assert_eq_m256i(r
, e
);
4136 #[simd_test(enable = "avx2")]
4137 unsafe fn test_mm256_add_epi8() {
4139 let a
= _mm256_setr_epi8(
4140 0, 1, 2, 3, 4, 5, 6, 7,
4141 8, 9, 10, 11, 12, 13, 14, 15,
4142 16, 17, 18, 19, 20, 21, 22, 23,
4143 24, 25, 26, 27, 28, 29, 30, 31,
4146 let b
= _mm256_setr_epi8(
4147 0, 1, 2, 3, 4, 5, 6, 7,
4148 8, 9, 10, 11, 12, 13, 14, 15,
4149 16, 17, 18, 19, 20, 21, 22, 23,
4150 24, 25, 26, 27, 28, 29, 30, 31,
4152 let r
= _mm256_add_epi8(a
, b
);
4154 let e
= _mm256_setr_epi8(
4155 0, 2, 4, 6, 8, 10, 12, 14,
4156 16, 18, 20, 22, 24, 26, 28, 30,
4157 32, 34, 36, 38, 40, 42, 44, 46,
4158 48, 50, 52, 54, 56, 58, 60, 62,
4160 assert_eq_m256i(r
, e
);
4163 #[simd_test(enable = "avx2")]
4164 unsafe fn test_mm256_adds_epi8() {
4166 let a
= _mm256_setr_epi8(
4167 0, 1, 2, 3, 4, 5, 6, 7,
4168 8, 9, 10, 11, 12, 13, 14, 15,
4169 16, 17, 18, 19, 20, 21, 22, 23,
4170 24, 25, 26, 27, 28, 29, 30, 31,
4173 let b
= _mm256_setr_epi8(
4174 32, 33, 34, 35, 36, 37, 38, 39,
4175 40, 41, 42, 43, 44, 45, 46, 47,
4176 48, 49, 50, 51, 52, 53, 54, 55,
4177 56, 57, 58, 59, 60, 61, 62, 63,
4179 let r
= _mm256_adds_epi8(a
, b
);
4181 let e
= _mm256_setr_epi8(
4182 32, 34, 36, 38, 40, 42, 44, 46,
4183 48, 50, 52, 54, 56, 58, 60, 62,
4184 64, 66, 68, 70, 72, 74, 76, 78,
4185 80, 82, 84, 86, 88, 90, 92, 94,
4187 assert_eq_m256i(r
, e
);
4190 #[simd_test(enable = "avx2")]
4191 unsafe fn test_mm256_adds_epi8_saturate_positive() {
4192 let a
= _mm256_set1_epi8(0x7F);
4193 let b
= _mm256_set1_epi8(1);
4194 let r
= _mm256_adds_epi8(a
, b
);
4195 assert_eq_m256i(r
, a
);
4198 #[simd_test(enable = "avx2")]
4199 unsafe fn test_mm256_adds_epi8_saturate_negative() {
4200 let a
= _mm256_set1_epi8(-0x80);
4201 let b
= _mm256_set1_epi8(-1);
4202 let r
= _mm256_adds_epi8(a
, b
);
4203 assert_eq_m256i(r
, a
);
4206 #[simd_test(enable = "avx2")]
4207 unsafe fn test_mm256_adds_epi16() {
4209 let a
= _mm256_setr_epi16(
4210 0, 1, 2, 3, 4, 5, 6, 7,
4211 8, 9, 10, 11, 12, 13, 14, 15,
4214 let b
= _mm256_setr_epi16(
4215 32, 33, 34, 35, 36, 37, 38, 39,
4216 40, 41, 42, 43, 44, 45, 46, 47,
4218 let r
= _mm256_adds_epi16(a
, b
);
4220 let e
= _mm256_setr_epi16(
4221 32, 34, 36, 38, 40, 42, 44, 46,
4222 48, 50, 52, 54, 56, 58, 60, 62,
4225 assert_eq_m256i(r
, e
);
4228 #[simd_test(enable = "avx2")]
4229 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4230 let a
= _mm256_set1_epi16(0x7FFF);
4231 let b
= _mm256_set1_epi16(1);
4232 let r
= _mm256_adds_epi16(a
, b
);
4233 assert_eq_m256i(r
, a
);
4236 #[simd_test(enable = "avx2")]
4237 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4238 let a
= _mm256_set1_epi16(-0x8000);
4239 let b
= _mm256_set1_epi16(-1);
4240 let r
= _mm256_adds_epi16(a
, b
);
4241 assert_eq_m256i(r
, a
);
4244 #[simd_test(enable = "avx2")]
4245 unsafe fn test_mm256_adds_epu8() {
4247 let a
= _mm256_setr_epi8(
4248 0, 1, 2, 3, 4, 5, 6, 7,
4249 8, 9, 10, 11, 12, 13, 14, 15,
4250 16, 17, 18, 19, 20, 21, 22, 23,
4251 24, 25, 26, 27, 28, 29, 30, 31,
4254 let b
= _mm256_setr_epi8(
4255 32, 33, 34, 35, 36, 37, 38, 39,
4256 40, 41, 42, 43, 44, 45, 46, 47,
4257 48, 49, 50, 51, 52, 53, 54, 55,
4258 56, 57, 58, 59, 60, 61, 62, 63,
4260 let r
= _mm256_adds_epu8(a
, b
);
4262 let e
= _mm256_setr_epi8(
4263 32, 34, 36, 38, 40, 42, 44, 46,
4264 48, 50, 52, 54, 56, 58, 60, 62,
4265 64, 66, 68, 70, 72, 74, 76, 78,
4266 80, 82, 84, 86, 88, 90, 92, 94,
4268 assert_eq_m256i(r
, e
);
4271 #[simd_test(enable = "avx2")]
4272 unsafe fn test_mm256_adds_epu8_saturate() {
4273 let a
= _mm256_set1_epi8(!0);
4274 let b
= _mm256_set1_epi8(1);
4275 let r
= _mm256_adds_epu8(a
, b
);
4276 assert_eq_m256i(r
, a
);
4279 #[simd_test(enable = "avx2")]
4280 unsafe fn test_mm256_adds_epu16() {
4282 let a
= _mm256_setr_epi16(
4283 0, 1, 2, 3, 4, 5, 6, 7,
4284 8, 9, 10, 11, 12, 13, 14, 15,
4287 let b
= _mm256_setr_epi16(
4288 32, 33, 34, 35, 36, 37, 38, 39,
4289 40, 41, 42, 43, 44, 45, 46, 47,
4291 let r
= _mm256_adds_epu16(a
, b
);
4293 let e
= _mm256_setr_epi16(
4294 32, 34, 36, 38, 40, 42, 44, 46,
4295 48, 50, 52, 54, 56, 58, 60, 62,
4298 assert_eq_m256i(r
, e
);
4301 #[simd_test(enable = "avx2")]
4302 unsafe fn test_mm256_adds_epu16_saturate() {
4303 let a
= _mm256_set1_epi16(!0);
4304 let b
= _mm256_set1_epi16(1);
4305 let r
= _mm256_adds_epu16(a
, b
);
4306 assert_eq_m256i(r
, a
);
4309 #[simd_test(enable = "avx2")]
4310 unsafe fn test_mm256_and_si256() {
4311 let a
= _mm256_set1_epi8(5);
4312 let b
= _mm256_set1_epi8(3);
4313 let got
= _mm256_and_si256(a
, b
);
4314 assert_eq_m256i(got
, _mm256_set1_epi8(1));
4317 #[simd_test(enable = "avx2")]
4318 unsafe fn test_mm256_andnot_si256() {
4319 let a
= _mm256_set1_epi8(5);
4320 let b
= _mm256_set1_epi8(3);
4321 let got
= _mm256_andnot_si256(a
, b
);
4322 assert_eq_m256i(got
, _mm256_set1_epi8(2));
4325 #[simd_test(enable = "avx2")]
4326 unsafe fn test_mm256_avg_epu8() {
4327 let (a
, b
) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4328 let r
= _mm256_avg_epu8(a
, b
);
4329 assert_eq_m256i(r
, _mm256_set1_epi8(6));
4332 #[simd_test(enable = "avx2")]
4333 unsafe fn test_mm256_avg_epu16() {
4334 let (a
, b
) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4335 let r
= _mm256_avg_epu16(a
, b
);
4336 assert_eq_m256i(r
, _mm256_set1_epi16(6));
4339 #[simd_test(enable = "avx2")]
4340 unsafe fn test_mm_blend_epi32() {
4341 let (a
, b
) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4342 let e
= _mm_setr_epi32(9, 3, 3, 3);
4343 let r
= _mm_blend_epi32(a
, b
, 0x01 as i32);
4344 assert_eq_m128i(r
, e
);
4346 let r
= _mm_blend_epi32(b
, a
, 0x0E as i32);
4347 assert_eq_m128i(r
, e
);
4350 #[simd_test(enable = "avx2")]
4351 unsafe fn test_mm256_blend_epi32() {
4352 let (a
, b
) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4353 let e
= _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4354 let r
= _mm256_blend_epi32(a
, b
, 0x01 as i32);
4355 assert_eq_m256i(r
, e
);
4357 let e
= _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4358 let r
= _mm256_blend_epi32(a
, b
, 0x82 as i32);
4359 assert_eq_m256i(r
, e
);
4361 let e
= _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4362 let r
= _mm256_blend_epi32(a
, b
, 0x7C as i32);
4363 assert_eq_m256i(r
, e
);
4366 #[simd_test(enable = "avx2")]
4367 unsafe fn test_mm256_blend_epi16() {
4368 let (a
, b
) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4369 let e
= _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4370 let r
= _mm256_blend_epi16(a
, b
, 0x01 as i32);
4371 assert_eq_m256i(r
, e
);
4373 let r
= _mm256_blend_epi16(b
, a
, 0xFE as i32);
4374 assert_eq_m256i(r
, e
);
4377 #[simd_test(enable = "avx2")]
4378 unsafe fn test_mm256_blendv_epi8() {
4379 let (a
, b
) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4380 let mask
= _mm256_insert_epi8(_mm256_set1_epi8(0), -1, 2);
4381 let e
= _mm256_insert_epi8(_mm256_set1_epi8(4), 2, 2);
4382 let r
= _mm256_blendv_epi8(a
, b
, mask
);
4383 assert_eq_m256i(r
, e
);
4386 #[simd_test(enable = "avx2")]
4387 unsafe fn test_mm_broadcastb_epi8() {
4388 let a
= _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4389 let res
= _mm_broadcastb_epi8(a
);
4390 assert_eq_m128i(res
, _mm_set1_epi8(0x2a));
4393 #[simd_test(enable = "avx2")]
4394 unsafe fn test_mm256_broadcastb_epi8() {
4395 let a
= _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4396 let res
= _mm256_broadcastb_epi8(a
);
4397 assert_eq_m256i(res
, _mm256_set1_epi8(0x2a));
4400 #[simd_test(enable = "avx2")]
4401 unsafe fn test_mm_broadcastd_epi32() {
4402 let a
= _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4403 let res
= _mm_broadcastd_epi32(a
);
4404 assert_eq_m128i(res
, _mm_set1_epi32(0x2a));
4407 #[simd_test(enable = "avx2")]
4408 unsafe fn test_mm256_broadcastd_epi32() {
4409 let a
= _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4410 let res
= _mm256_broadcastd_epi32(a
);
4411 assert_eq_m256i(res
, _mm256_set1_epi32(0x2a));
4414 #[simd_test(enable = "avx2")]
4415 unsafe fn test_mm_broadcastq_epi64() {
4416 let a
= _mm_setr_epi64x(0x1ffffffff, 0);
4417 let res
= _mm_broadcastq_epi64(a
);
4418 assert_eq_m128i(res
, _mm_set1_epi64x(0x1ffffffff));
4421 #[simd_test(enable = "avx2")]
4422 unsafe fn test_mm256_broadcastq_epi64() {
4423 let a
= _mm_setr_epi64x(0x1ffffffff, 0);
4424 let res
= _mm256_broadcastq_epi64(a
);
4425 assert_eq_m256i(res
, _mm256_set1_epi64x(0x1ffffffff));
4428 #[simd_test(enable = "avx2")]
4429 unsafe fn test_mm_broadcastsd_pd() {
4430 let a
= _mm_setr_pd(6.28, 3.14);
4431 let res
= _mm_broadcastsd_pd(a
);
4432 assert_eq_m128d(res
, _mm_set1_pd(6.28f64));
4435 #[simd_test(enable = "avx2")]
4436 unsafe fn test_mm256_broadcastsd_pd() {
4437 let a
= _mm_setr_pd(6.28, 3.14);
4438 let res
= _mm256_broadcastsd_pd(a
);
4439 assert_eq_m256d(res
, _mm256_set1_pd(6.28f64));
4442 #[simd_test(enable = "avx2")]
4443 unsafe fn test_mm256_broadcastsi128_si256() {
4444 let a
= _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4445 let res
= _mm256_broadcastsi128_si256(a
);
4446 let retval
= _mm256_setr_epi64x(
4452 assert_eq_m256i(res
, retval
);
4455 #[simd_test(enable = "avx2")]
4456 unsafe fn test_mm_broadcastss_ps() {
4457 let a
= _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4458 let res
= _mm_broadcastss_ps(a
);
4459 assert_eq_m128(res
, _mm_set1_ps(6.28f32));
4462 #[simd_test(enable = "avx2")]
4463 unsafe fn test_mm256_broadcastss_ps() {
4464 let a
= _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4465 let res
= _mm256_broadcastss_ps(a
);
4466 assert_eq_m256(res
, _mm256_set1_ps(6.28f32));
4469 #[simd_test(enable = "avx2")]
4470 unsafe fn test_mm_broadcastw_epi16() {
4471 let a
= _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0);
4472 let res
= _mm_broadcastw_epi16(a
);
4473 assert_eq_m128i(res
, _mm_set1_epi16(0x22b));
4476 #[simd_test(enable = "avx2")]
4477 unsafe fn test_mm256_broadcastw_epi16() {
4478 let a
= _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0);
4479 let res
= _mm256_broadcastw_epi16(a
);
4480 assert_eq_m256i(res
, _mm256_set1_epi16(0x22b));
4483 #[simd_test(enable = "avx2")]
4484 unsafe fn test_mm256_cmpeq_epi8() {
4486 let a
= _mm256_setr_epi8(
4487 0, 1, 2, 3, 4, 5, 6, 7,
4488 8, 9, 10, 11, 12, 13, 14, 15,
4489 16, 17, 18, 19, 20, 21, 22, 23,
4490 24, 25, 26, 27, 28, 29, 30, 31,
4493 let b
= _mm256_setr_epi8(
4494 31, 30, 2, 28, 27, 26, 25, 24,
4495 23, 22, 21, 20, 19, 18, 17, 16,
4496 15, 14, 13, 12, 11, 10, 9, 8,
4497 7, 6, 5, 4, 3, 2, 1, 0,
4499 let r
= _mm256_cmpeq_epi8(a
, b
);
4500 assert_eq_m256i(r
, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2));
4503 #[simd_test(enable = "avx2")]
4504 unsafe fn test_mm256_cmpeq_epi16() {
4506 let a
= _mm256_setr_epi16(
4507 0, 1, 2, 3, 4, 5, 6, 7,
4508 8, 9, 10, 11, 12, 13, 14, 15,
4511 let b
= _mm256_setr_epi16(
4512 15, 14, 2, 12, 11, 10, 9, 8,
4513 7, 6, 5, 4, 3, 2, 1, 0,
4515 let r
= _mm256_cmpeq_epi16(a
, b
);
4516 assert_eq_m256i(r
, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2));
4519 #[simd_test(enable = "avx2")]
4520 unsafe fn test_mm256_cmpeq_epi32() {
4521 let a
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4522 let b
= _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4523 let r
= _mm256_cmpeq_epi32(a
, b
);
4524 let e
= _mm256_set1_epi32(0);
4525 let e
= _mm256_insert_epi32(e
, !0, 2);
4526 assert_eq_m256i(r
, e
);
4529 #[simd_test(enable = "avx2")]
4530 unsafe fn test_mm256_cmpeq_epi64() {
4531 let a
= _mm256_setr_epi64x(0, 1, 2, 3);
4532 let b
= _mm256_setr_epi64x(3, 2, 2, 0);
4533 let r
= _mm256_cmpeq_epi64(a
, b
);
4534 assert_eq_m256i(r
, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2));
4537 #[simd_test(enable = "avx2")]
4538 unsafe fn test_mm256_cmpgt_epi8() {
4539 let a
= _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0);
4540 let b
= _mm256_set1_epi8(0);
4541 let r
= _mm256_cmpgt_epi8(a
, b
);
4542 assert_eq_m256i(r
, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0));
4545 #[simd_test(enable = "avx2")]
4546 unsafe fn test_mm256_cmpgt_epi16() {
4547 let a
= _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0);
4548 let b
= _mm256_set1_epi16(0);
4549 let r
= _mm256_cmpgt_epi16(a
, b
);
4550 assert_eq_m256i(r
, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0));
4553 #[simd_test(enable = "avx2")]
4554 unsafe fn test_mm256_cmpgt_epi32() {
4555 let a
= _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0);
4556 let b
= _mm256_set1_epi32(0);
4557 let r
= _mm256_cmpgt_epi32(a
, b
);
4558 assert_eq_m256i(r
, _mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0));
4561 #[simd_test(enable = "avx2")]
4562 unsafe fn test_mm256_cmpgt_epi64() {
4563 let a
= _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0);
4564 let b
= _mm256_set1_epi64x(0);
4565 let r
= _mm256_cmpgt_epi64(a
, b
);
4566 assert_eq_m256i(r
, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0));
4569 #[simd_test(enable = "avx2")]
4570 unsafe fn test_mm256_cvtepi8_epi16() {
4572 let a
= _mm_setr_epi8(
4573 0, 0, -1, 1, -2, 2, -3, 3,
4574 -4, 4, -5, 5, -6, 6, -7, 7,
4577 let r
= _mm256_setr_epi16(
4578 0, 0, -1, 1, -2, 2, -3, 3,
4579 -4, 4, -5, 5, -6, 6, -7, 7,
4581 assert_eq_m256i(r
, _mm256_cvtepi8_epi16(a
));
4584 #[simd_test(enable = "avx2")]
4585 unsafe fn test_mm256_cvtepi8_epi32() {
4587 let a
= _mm_setr_epi8(
4588 0, 0, -1, 1, -2, 2, -3, 3,
4589 -4, 4, -5, 5, -6, 6, -7, 7,
4591 let r
= _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4592 assert_eq_m256i(r
, _mm256_cvtepi8_epi32(a
));
4595 #[simd_test(enable = "avx2")]
4596 unsafe fn test_mm256_cvtepi8_epi64() {
4598 let a
= _mm_setr_epi8(
4599 0, 0, -1, 1, -2, 2, -3, 3,
4600 -4, 4, -5, 5, -6, 6, -7, 7,
4602 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4603 assert_eq_m256i(r
, _mm256_cvtepi8_epi64(a
));
4606 #[simd_test(enable = "avx2")]
4607 unsafe fn test_mm256_cvtepi16_epi32() {
4608 let a
= _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4609 let r
= _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4610 assert_eq_m256i(r
, _mm256_cvtepi16_epi32(a
));
4613 #[simd_test(enable = "avx2")]
4614 unsafe fn test_mm256_cvtepi16_epi64() {
4615 let a
= _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4616 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4617 assert_eq_m256i(r
, _mm256_cvtepi16_epi64(a
));
4620 #[simd_test(enable = "avx2")]
4621 unsafe fn test_mm256_cvtepi32_epi64() {
4622 let a
= _mm_setr_epi32(0, 0, -1, 1);
4623 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4624 assert_eq_m256i(r
, _mm256_cvtepi32_epi64(a
));
4627 #[simd_test(enable = "avx2")]
4628 unsafe fn test_mm256_cvtepu16_epi32() {
4629 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4630 let r
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4631 assert_eq_m256i(r
, _mm256_cvtepu16_epi32(a
));
4634 #[simd_test(enable = "avx2")]
4635 unsafe fn test_mm256_cvtepu16_epi64() {
4636 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4637 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4638 assert_eq_m256i(r
, _mm256_cvtepu16_epi64(a
));
4641 #[simd_test(enable = "avx2")]
4642 unsafe fn test_mm256_cvtepu32_epi64() {
4643 let a
= _mm_setr_epi32(0, 1, 2, 3);
4644 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4645 assert_eq_m256i(r
, _mm256_cvtepu32_epi64(a
));
4648 #[simd_test(enable = "avx2")]
4649 unsafe fn test_mm256_cvtepu8_epi16() {
4651 let a
= _mm_setr_epi8(
4652 0, 1, 2, 3, 4, 5, 6, 7,
4653 8, 9, 10, 11, 12, 13, 14, 15,
4656 let r
= _mm256_setr_epi16(
4657 0, 1, 2, 3, 4, 5, 6, 7,
4658 8, 9, 10, 11, 12, 13, 14, 15,
4660 assert_eq_m256i(r
, _mm256_cvtepu8_epi16(a
));
4663 #[simd_test(enable = "avx2")]
4664 unsafe fn test_mm256_cvtepu8_epi32() {
4666 let a
= _mm_setr_epi8(
4667 0, 1, 2, 3, 4, 5, 6, 7,
4668 8, 9, 10, 11, 12, 13, 14, 15,
4670 let r
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4671 assert_eq_m256i(r
, _mm256_cvtepu8_epi32(a
));
4674 #[simd_test(enable = "avx2")]
4675 unsafe fn test_mm256_cvtepu8_epi64() {
4677 let a
= _mm_setr_epi8(
4678 0, 1, 2, 3, 4, 5, 6, 7,
4679 8, 9, 10, 11, 12, 13, 14, 15,
4681 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4682 assert_eq_m256i(r
, _mm256_cvtepu8_epi64(a
));
4685 #[simd_test(enable = "avx2")]
4686 unsafe fn test_mm256_extracti128_si256() {
4687 let a
= _mm256_setr_epi64x(1, 2, 3, 4);
4688 let r
= _mm256_extracti128_si256(a
, 0b01);
4689 let e
= _mm_setr_epi64x(3, 4);
4690 assert_eq_m128i(r
, e
);
4693 #[simd_test(enable = "avx2")]
4694 unsafe fn test_mm256_hadd_epi16() {
4695 let a
= _mm256_set1_epi16(2);
4696 let b
= _mm256_set1_epi16(4);
4697 let r
= _mm256_hadd_epi16(a
, b
);
4698 let e
= _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4699 assert_eq_m256i(r
, e
);
4702 #[simd_test(enable = "avx2")]
4703 unsafe fn test_mm256_hadd_epi32() {
4704 let a
= _mm256_set1_epi32(2);
4705 let b
= _mm256_set1_epi32(4);
4706 let r
= _mm256_hadd_epi32(a
, b
);
4707 let e
= _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4708 assert_eq_m256i(r
, e
);
4711 #[simd_test(enable = "avx2")]
4712 unsafe fn test_mm256_hadds_epi16() {
4713 let a
= _mm256_set1_epi16(2);
4714 let a
= _mm256_insert_epi16(a
, 0x7fff, 0);
4715 let a
= _mm256_insert_epi16(a
, 1, 1);
4716 let b
= _mm256_set1_epi16(4);
4717 let r
= _mm256_hadds_epi16(a
, b
);
4719 let e
= _mm256_setr_epi16(
4720 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4721 4, 4, 4, 4, 8, 8, 8, 8,
4723 assert_eq_m256i(r
, e
);
4726 #[simd_test(enable = "avx2")]
4727 unsafe fn test_mm256_hsub_epi16() {
4728 let a
= _mm256_set1_epi16(2);
4729 let b
= _mm256_set1_epi16(4);
4730 let r
= _mm256_hsub_epi16(a
, b
);
4731 let e
= _mm256_set1_epi16(0);
4732 assert_eq_m256i(r
, e
);
4735 #[simd_test(enable = "avx2")]
4736 unsafe fn test_mm256_hsub_epi32() {
4737 let a
= _mm256_set1_epi32(2);
4738 let b
= _mm256_set1_epi32(4);
4739 let r
= _mm256_hsub_epi32(a
, b
);
4740 let e
= _mm256_set1_epi32(0);
4741 assert_eq_m256i(r
, e
);
4744 #[simd_test(enable = "avx2")]
4745 unsafe fn test_mm256_hsubs_epi16() {
4746 let a
= _mm256_set1_epi16(2);
4747 let a
= _mm256_insert_epi16(a
, 0x7fff, 0);
4748 let a
= _mm256_insert_epi16(a
, -1, 1);
4749 let b
= _mm256_set1_epi16(4);
4750 let r
= _mm256_hsubs_epi16(a
, b
);
4751 let e
= _mm256_insert_epi16(_mm256_set1_epi16(0), 0x7FFF, 0);
4752 assert_eq_m256i(r
, e
);
4755 #[simd_test(enable = "avx2")]
4756 unsafe fn test_mm256_madd_epi16() {
4757 let a
= _mm256_set1_epi16(2);
4758 let b
= _mm256_set1_epi16(4);
4759 let r
= _mm256_madd_epi16(a
, b
);
4760 let e
= _mm256_set1_epi32(16);
4761 assert_eq_m256i(r
, e
);
4764 #[simd_test(enable = "avx2")]
4765 unsafe fn test_mm256_inserti128_si256() {
4766 let a
= _mm256_setr_epi64x(1, 2, 3, 4);
4767 let b
= _mm_setr_epi64x(7, 8);
4768 let r
= _mm256_inserti128_si256(a
, b
, 0b01);
4769 let e
= _mm256_setr_epi64x(1, 2, 7, 8);
4770 assert_eq_m256i(r
, e
);
4773 #[simd_test(enable = "avx2")]
4774 unsafe fn test_mm256_maddubs_epi16() {
4775 let a
= _mm256_set1_epi8(2);
4776 let b
= _mm256_set1_epi8(4);
4777 let r
= _mm256_maddubs_epi16(a
, b
);
4778 let e
= _mm256_set1_epi16(16);
4779 assert_eq_m256i(r
, e
);
4782 #[simd_test(enable = "avx2")]
4783 unsafe fn test_mm_maskload_epi32() {
4784 let nums
= [1, 2, 3, 4];
4785 let a
= &nums
as *const i32;
4786 let mask
= _mm_setr_epi32(-1, 0, 0, -1);
4787 let r
= _mm_maskload_epi32(a
, mask
);
4788 let e
= _mm_setr_epi32(1, 0, 0, 4);
4789 assert_eq_m128i(r
, e
);
4792 #[simd_test(enable = "avx2")]
4793 unsafe fn test_mm256_maskload_epi32() {
4794 let nums
= [1, 2, 3, 4, 5, 6, 7, 8];
4795 let a
= &nums
as *const i32;
4796 let mask
= _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4797 let r
= _mm256_maskload_epi32(a
, mask
);
4798 let e
= _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4799 assert_eq_m256i(r
, e
);
4802 #[simd_test(enable = "avx2")]
4803 unsafe fn test_mm_maskload_epi64() {
4804 let nums
= [1_i64, 2_i64];
4805 let a
= &nums
as *const i64;
4806 let mask
= _mm_setr_epi64x(0, -1);
4807 let r
= _mm_maskload_epi64(a
, mask
);
4808 let e
= _mm_setr_epi64x(0, 2);
4809 assert_eq_m128i(r
, e
);
4812 #[simd_test(enable = "avx2")]
4813 unsafe fn test_mm256_maskload_epi64() {
4814 let nums
= [1_i64, 2_i64, 3_i64, 4_i64];
4815 let a
= &nums
as *const i64;
4816 let mask
= _mm256_setr_epi64x(0, -1, -1, 0);
4817 let r
= _mm256_maskload_epi64(a
, mask
);
4818 let e
= _mm256_setr_epi64x(0, 2, 3, 0);
4819 assert_eq_m256i(r
, e
);
4822 #[simd_test(enable = "avx2")]
4823 unsafe fn test_mm_maskstore_epi32() {
4824 let a
= _mm_setr_epi32(1, 2, 3, 4);
4825 let mut arr
= [-1, -1, -1, -1];
4826 let mask
= _mm_setr_epi32(-1, 0, 0, -1);
4827 _mm_maskstore_epi32(arr
.as_mut_ptr(), mask
, a
);
4828 let e
= [1, -1, -1, 4];
4832 #[simd_test(enable = "avx2")]
4833 unsafe fn test_mm256_maskstore_epi32() {
4834 let a
= _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4835 let mut arr
= [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4836 let mask
= _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4837 _mm256_maskstore_epi32(arr
.as_mut_ptr(), mask
, a
);
4838 let e
= [1, -1, -1, 42, -1, 6, 7, -1];
4842 #[simd_test(enable = "avx2")]
4843 unsafe fn test_mm_maskstore_epi64() {
4844 let a
= _mm_setr_epi64x(1_i64, 2_i64);
4845 let mut arr
= [-1_i64, -1_i64];
4846 let mask
= _mm_setr_epi64x(0, -1);
4847 _mm_maskstore_epi64(arr
.as_mut_ptr(), mask
, a
);
4852 #[simd_test(enable = "avx2")]
4853 unsafe fn test_mm256_maskstore_epi64() {
4854 let a
= _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4855 let mut arr
= [-1_i64, -1_i64, -1_i64, -1_i64];
4856 let mask
= _mm256_setr_epi64x(0, -1, -1, 0);
4857 _mm256_maskstore_epi64(arr
.as_mut_ptr(), mask
, a
);
4858 let e
= [-1, 2, 3, -1];
4862 #[simd_test(enable = "avx2")]
4863 unsafe fn test_mm256_max_epi16() {
4864 let a
= _mm256_set1_epi16(2);
4865 let b
= _mm256_set1_epi16(4);
4866 let r
= _mm256_max_epi16(a
, b
);
4867 assert_eq_m256i(r
, b
);
4870 #[simd_test(enable = "avx2")]
4871 unsafe fn test_mm256_max_epi32() {
4872 let a
= _mm256_set1_epi32(2);
4873 let b
= _mm256_set1_epi32(4);
4874 let r
= _mm256_max_epi32(a
, b
);
4875 assert_eq_m256i(r
, b
);
4878 #[simd_test(enable = "avx2")]
4879 unsafe fn test_mm256_max_epi8() {
4880 let a
= _mm256_set1_epi8(2);
4881 let b
= _mm256_set1_epi8(4);
4882 let r
= _mm256_max_epi8(a
, b
);
4883 assert_eq_m256i(r
, b
);
4886 #[simd_test(enable = "avx2")]
4887 unsafe fn test_mm256_max_epu16() {
4888 let a
= _mm256_set1_epi16(2);
4889 let b
= _mm256_set1_epi16(4);
4890 let r
= _mm256_max_epu16(a
, b
);
4891 assert_eq_m256i(r
, b
);
4894 #[simd_test(enable = "avx2")]
4895 unsafe fn test_mm256_max_epu32() {
4896 let a
= _mm256_set1_epi32(2);
4897 let b
= _mm256_set1_epi32(4);
4898 let r
= _mm256_max_epu32(a
, b
);
4899 assert_eq_m256i(r
, b
);
4902 #[simd_test(enable = "avx2")]
4903 unsafe fn test_mm256_max_epu8() {
4904 let a
= _mm256_set1_epi8(2);
4905 let b
= _mm256_set1_epi8(4);
4906 let r
= _mm256_max_epu8(a
, b
);
4907 assert_eq_m256i(r
, b
);
4910 #[simd_test(enable = "avx2")]
4911 unsafe fn test_mm256_min_epi16() {
4912 let a
= _mm256_set1_epi16(2);
4913 let b
= _mm256_set1_epi16(4);
4914 let r
= _mm256_min_epi16(a
, b
);
4915 assert_eq_m256i(r
, a
);
4918 #[simd_test(enable = "avx2")]
4919 unsafe fn test_mm256_min_epi32() {
4920 let a
= _mm256_set1_epi32(2);
4921 let b
= _mm256_set1_epi32(4);
4922 let r
= _mm256_min_epi32(a
, b
);
4923 assert_eq_m256i(r
, a
);
4926 #[simd_test(enable = "avx2")]
4927 unsafe fn test_mm256_min_epi8() {
4928 let a
= _mm256_set1_epi8(2);
4929 let b
= _mm256_set1_epi8(4);
4930 let r
= _mm256_min_epi8(a
, b
);
4931 assert_eq_m256i(r
, a
);
4934 #[simd_test(enable = "avx2")]
4935 unsafe fn test_mm256_min_epu16() {
4936 let a
= _mm256_set1_epi16(2);
4937 let b
= _mm256_set1_epi16(4);
4938 let r
= _mm256_min_epu16(a
, b
);
4939 assert_eq_m256i(r
, a
);
4942 #[simd_test(enable = "avx2")]
4943 unsafe fn test_mm256_min_epu32() {
4944 let a
= _mm256_set1_epi32(2);
4945 let b
= _mm256_set1_epi32(4);
4946 let r
= _mm256_min_epu32(a
, b
);
4947 assert_eq_m256i(r
, a
);
4950 #[simd_test(enable = "avx2")]
4951 unsafe fn test_mm256_min_epu8() {
4952 let a
= _mm256_set1_epi8(2);
4953 let b
= _mm256_set1_epi8(4);
4954 let r
= _mm256_min_epu8(a
, b
);
4955 assert_eq_m256i(r
, a
);
4958 #[simd_test(enable = "avx2")]
4959 unsafe fn test_mm256_movemask_epi8() {
4960 let a
= _mm256_set1_epi8(-1);
4961 let r
= _mm256_movemask_epi8(a
);
4966 #[simd_test(enable = "avx2")]
4967 unsafe fn test_mm256_mpsadbw_epu8() {
4968 let a
= _mm256_set1_epi8(2);
4969 let b
= _mm256_set1_epi8(4);
4970 let r
= _mm256_mpsadbw_epu8(a
, b
, 0);
4971 let e
= _mm256_set1_epi16(8);
4972 assert_eq_m256i(r
, e
);
4975 #[simd_test(enable = "avx2")]
4976 unsafe fn test_mm256_mul_epi32() {
4977 let a
= _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4978 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4979 let r
= _mm256_mul_epi32(a
, b
);
4980 let e
= _mm256_setr_epi64x(0, 0, 10, 14);
4981 assert_eq_m256i(r
, e
);
4984 #[simd_test(enable = "avx2")]
4985 unsafe fn test_mm256_mul_epu32() {
4986 let a
= _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4987 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4988 let r
= _mm256_mul_epu32(a
, b
);
4989 let e
= _mm256_setr_epi64x(0, 0, 10, 14);
4990 assert_eq_m256i(r
, e
);
4993 #[simd_test(enable = "avx2")]
4994 unsafe fn test_mm256_mulhi_epi16() {
4995 let a
= _mm256_set1_epi16(6535);
4996 let b
= _mm256_set1_epi16(6535);
4997 let r
= _mm256_mulhi_epi16(a
, b
);
4998 let e
= _mm256_set1_epi16(651);
4999 assert_eq_m256i(r
, e
);
5002 #[simd_test(enable = "avx2")]
5003 unsafe fn test_mm256_mulhi_epu16() {
5004 let a
= _mm256_set1_epi16(6535);
5005 let b
= _mm256_set1_epi16(6535);
5006 let r
= _mm256_mulhi_epu16(a
, b
);
5007 let e
= _mm256_set1_epi16(651);
5008 assert_eq_m256i(r
, e
);
5011 #[simd_test(enable = "avx2")]
5012 unsafe fn test_mm256_mullo_epi16() {
5013 let a
= _mm256_set1_epi16(2);
5014 let b
= _mm256_set1_epi16(4);
5015 let r
= _mm256_mullo_epi16(a
, b
);
5016 let e
= _mm256_set1_epi16(8);
5017 assert_eq_m256i(r
, e
);
5020 #[simd_test(enable = "avx2")]
5021 unsafe fn test_mm256_mullo_epi32() {
5022 let a
= _mm256_set1_epi32(2);
5023 let b
= _mm256_set1_epi32(4);
5024 let r
= _mm256_mullo_epi32(a
, b
);
5025 let e
= _mm256_set1_epi32(8);
5026 assert_eq_m256i(r
, e
);
5029 #[simd_test(enable = "avx2")]
5030 unsafe fn test_mm256_mulhrs_epi16() {
5031 let a
= _mm256_set1_epi16(2);
5032 let b
= _mm256_set1_epi16(4);
5033 let r
= _mm256_mullo_epi16(a
, b
);
5034 let e
= _mm256_set1_epi16(8);
5035 assert_eq_m256i(r
, e
);
5038 #[simd_test(enable = "avx2")]
5039 unsafe fn test_mm256_or_si256() {
5040 let a
= _mm256_set1_epi8(-1);
5041 let b
= _mm256_set1_epi8(0);
5042 let r
= _mm256_or_si256(a
, b
);
5043 assert_eq_m256i(r
, a
);
5046 #[simd_test(enable = "avx2")]
5047 unsafe fn test_mm256_packs_epi16() {
5048 let a
= _mm256_set1_epi16(2);
5049 let b
= _mm256_set1_epi16(4);
5050 let r
= _mm256_packs_epi16(a
, b
);
5052 let e
= _mm256_setr_epi8(
5053 2, 2, 2, 2, 2, 2, 2, 2,
5054 4, 4, 4, 4, 4, 4, 4, 4,
5055 2, 2, 2, 2, 2, 2, 2, 2,
5056 4, 4, 4, 4, 4, 4, 4, 4,
5059 assert_eq_m256i(r
, e
);
5062 #[simd_test(enable = "avx2")]
5063 unsafe fn test_mm256_packs_epi32() {
5064 let a
= _mm256_set1_epi32(2);
5065 let b
= _mm256_set1_epi32(4);
5066 let r
= _mm256_packs_epi32(a
, b
);
5067 let e
= _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5069 assert_eq_m256i(r
, e
);
5072 #[simd_test(enable = "avx2")]
5073 unsafe fn test_mm256_packus_epi16() {
5074 let a
= _mm256_set1_epi16(2);
5075 let b
= _mm256_set1_epi16(4);
5076 let r
= _mm256_packus_epi16(a
, b
);
5078 let e
= _mm256_setr_epi8(
5079 2, 2, 2, 2, 2, 2, 2, 2,
5080 4, 4, 4, 4, 4, 4, 4, 4,
5081 2, 2, 2, 2, 2, 2, 2, 2,
5082 4, 4, 4, 4, 4, 4, 4, 4,
5085 assert_eq_m256i(r
, e
);
5088 #[simd_test(enable = "avx2")]
5089 unsafe fn test_mm256_packus_epi32() {
5090 let a
= _mm256_set1_epi32(2);
5091 let b
= _mm256_set1_epi32(4);
5092 let r
= _mm256_packus_epi32(a
, b
);
5093 let e
= _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5095 assert_eq_m256i(r
, e
);
5098 #[simd_test(enable = "avx2")]
5099 unsafe fn test_mm256_sad_epu8() {
5100 let a
= _mm256_set1_epi8(2);
5101 let b
= _mm256_set1_epi8(4);
5102 let r
= _mm256_sad_epu8(a
, b
);
5103 let e
= _mm256_set1_epi64x(16);
5104 assert_eq_m256i(r
, e
);
5107 #[simd_test(enable = "avx2")]
5108 unsafe fn test_mm256_shufflehi_epi16() {
5110 let a
= _mm256_setr_epi16(
5111 0, 1, 2, 3, 11, 22, 33, 44,
5112 4, 5, 6, 7, 55, 66, 77, 88,
5115 let e
= _mm256_setr_epi16(
5116 0, 1, 2, 3, 44, 22, 22, 11,
5117 4, 5, 6, 7, 88, 66, 66, 55,
5119 let r
= _mm256_shufflehi_epi16(a
, 0b00_01_01_11);
5120 assert_eq_m256i(r
, e
);
5123 #[simd_test(enable = "avx2")]
5124 unsafe fn test_mm256_shufflelo_epi16() {
5126 let a
= _mm256_setr_epi16(
5127 11, 22, 33, 44, 0, 1, 2, 3,
5128 55, 66, 77, 88, 4, 5, 6, 7,
5131 let e
= _mm256_setr_epi16(
5132 44, 22, 22, 11, 0, 1, 2, 3,
5133 88, 66, 66, 55, 4, 5, 6, 7,
5135 let r
= _mm256_shufflelo_epi16(a
, 0b00_01_01_11);
5136 assert_eq_m256i(r
, e
);
5139 #[simd_test(enable = "avx2")]
5140 unsafe fn test_mm256_sign_epi16() {
5141 let a
= _mm256_set1_epi16(2);
5142 let b
= _mm256_set1_epi16(-1);
5143 let r
= _mm256_sign_epi16(a
, b
);
5144 let e
= _mm256_set1_epi16(-2);
5145 assert_eq_m256i(r
, e
);
5148 #[simd_test(enable = "avx2")]
5149 unsafe fn test_mm256_sign_epi32() {
5150 let a
= _mm256_set1_epi32(2);
5151 let b
= _mm256_set1_epi32(-1);
5152 let r
= _mm256_sign_epi32(a
, b
);
5153 let e
= _mm256_set1_epi32(-2);
5154 assert_eq_m256i(r
, e
);
5157 #[simd_test(enable = "avx2")]
5158 unsafe fn test_mm256_sign_epi8() {
5159 let a
= _mm256_set1_epi8(2);
5160 let b
= _mm256_set1_epi8(-1);
5161 let r
= _mm256_sign_epi8(a
, b
);
5162 let e
= _mm256_set1_epi8(-2);
5163 assert_eq_m256i(r
, e
);
5166 #[simd_test(enable = "avx2")]
5167 unsafe fn test_mm256_sll_epi16() {
5168 let a
= _mm256_set1_epi16(0xFF);
5169 let b
= _mm_insert_epi16(_mm_set1_epi16(0), 4, 0);
5170 let r
= _mm256_sll_epi16(a
, b
);
5171 assert_eq_m256i(r
, _mm256_set1_epi16(0xFF0));
5174 #[simd_test(enable = "avx2")]
5175 unsafe fn test_mm256_sll_epi32() {
5176 let a
= _mm256_set1_epi32(0xFFFF);
5177 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5178 let r
= _mm256_sll_epi32(a
, b
);
5179 assert_eq_m256i(r
, _mm256_set1_epi32(0xFFFF0));
5182 #[simd_test(enable = "avx2")]
5183 unsafe fn test_mm256_sll_epi64() {
5184 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5185 let b
= _mm_insert_epi64(_mm_set1_epi64x(0), 4, 0);
5186 let r
= _mm256_sll_epi64(a
, b
);
5187 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFFF0));
5190 #[simd_test(enable = "avx2")]
5191 unsafe fn test_mm256_slli_epi16() {
5193 _mm256_slli_epi16(_mm256_set1_epi16(0xFF), 4),
5194 _mm256_set1_epi16(0xFF0),
5198 #[simd_test(enable = "avx2")]
5199 unsafe fn test_mm256_slli_epi32() {
5201 _mm256_slli_epi32(_mm256_set1_epi32(0xFFFF), 4),
5202 _mm256_set1_epi32(0xFFFF0),
5206 #[simd_test(enable = "avx2")]
5207 unsafe fn test_mm256_slli_epi64() {
5209 _mm256_slli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
5210 _mm256_set1_epi64x(0xFFFFFFFF0),
5214 #[simd_test(enable = "avx2")]
5215 unsafe fn test_mm256_slli_si256() {
5216 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5217 let r
= _mm256_slli_si256(a
, 3);
5218 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFFF000000));
5221 #[simd_test(enable = "avx2")]
5222 unsafe fn test_mm_sllv_epi32() {
5223 let a
= _mm_set1_epi32(2);
5224 let b
= _mm_set1_epi32(1);
5225 let r
= _mm_sllv_epi32(a
, b
);
5226 let e
= _mm_set1_epi32(4);
5227 assert_eq_m128i(r
, e
);
5230 #[simd_test(enable = "avx2")]
5231 unsafe fn test_mm256_sllv_epi32() {
5232 let a
= _mm256_set1_epi32(2);
5233 let b
= _mm256_set1_epi32(1);
5234 let r
= _mm256_sllv_epi32(a
, b
);
5235 let e
= _mm256_set1_epi32(4);
5236 assert_eq_m256i(r
, e
);
5239 #[simd_test(enable = "avx2")]
5240 unsafe fn test_mm_sllv_epi64() {
5241 let a
= _mm_set1_epi64x(2);
5242 let b
= _mm_set1_epi64x(1);
5243 let r
= _mm_sllv_epi64(a
, b
);
5244 let e
= _mm_set1_epi64x(4);
5245 assert_eq_m128i(r
, e
);
5248 #[simd_test(enable = "avx2")]
5249 unsafe fn test_mm256_sllv_epi64() {
5250 let a
= _mm256_set1_epi64x(2);
5251 let b
= _mm256_set1_epi64x(1);
5252 let r
= _mm256_sllv_epi64(a
, b
);
5253 let e
= _mm256_set1_epi64x(4);
5254 assert_eq_m256i(r
, e
);
5257 #[simd_test(enable = "avx2")]
5258 unsafe fn test_mm256_sra_epi16() {
5259 let a
= _mm256_set1_epi16(-1);
5260 let b
= _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5261 let r
= _mm256_sra_epi16(a
, b
);
5262 assert_eq_m256i(r
, _mm256_set1_epi16(-1));
5265 #[simd_test(enable = "avx2")]
5266 unsafe fn test_mm256_sra_epi32() {
5267 let a
= _mm256_set1_epi32(-1);
5268 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 1, 0);
5269 let r
= _mm256_sra_epi32(a
, b
);
5270 assert_eq_m256i(r
, _mm256_set1_epi32(-1));
5273 #[simd_test(enable = "avx2")]
5274 unsafe fn test_mm256_srai_epi16() {
5276 _mm256_srai_epi16(_mm256_set1_epi16(-1), 1),
5277 _mm256_set1_epi16(-1),
5281 #[simd_test(enable = "avx2")]
5282 unsafe fn test_mm256_srai_epi32() {
5284 _mm256_srai_epi32(_mm256_set1_epi32(-1), 1),
5285 _mm256_set1_epi32(-1),
5289 #[simd_test(enable = "avx2")]
5290 unsafe fn test_mm_srav_epi32() {
5291 let a
= _mm_set1_epi32(4);
5292 let count
= _mm_set1_epi32(1);
5293 let r
= _mm_srav_epi32(a
, count
);
5294 let e
= _mm_set1_epi32(2);
5295 assert_eq_m128i(r
, e
);
5298 #[simd_test(enable = "avx2")]
5299 unsafe fn test_mm256_srav_epi32() {
5300 let a
= _mm256_set1_epi32(4);
5301 let count
= _mm256_set1_epi32(1);
5302 let r
= _mm256_srav_epi32(a
, count
);
5303 let e
= _mm256_set1_epi32(2);
5304 assert_eq_m256i(r
, e
);
5307 #[simd_test(enable = "avx2")]
5308 unsafe fn test_mm256_srli_si256() {
5310 let a
= _mm256_setr_epi8(
5311 1, 2, 3, 4, 5, 6, 7, 8,
5312 9, 10, 11, 12, 13, 14, 15, 16,
5313 17, 18, 19, 20, 21, 22, 23, 24,
5314 25, 26, 27, 28, 29, 30, 31, 32,
5316 let r
= _mm256_srli_si256(a
, 3);
5318 let e
= _mm256_setr_epi8(
5319 4, 5, 6, 7, 8, 9, 10, 11,
5320 12, 13, 14, 15, 16, 0, 0, 0,
5321 20, 21, 22, 23, 24, 25, 26, 27,
5322 28, 29, 30, 31, 32, 0, 0, 0,
5324 assert_eq_m256i(r
, e
);
5327 #[simd_test(enable = "avx2")]
5328 unsafe fn test_mm256_srl_epi16() {
5329 let a
= _mm256_set1_epi16(0xFF);
5330 let b
= _mm_insert_epi16(_mm_set1_epi16(0), 4, 0);
5331 let r
= _mm256_srl_epi16(a
, b
);
5332 assert_eq_m256i(r
, _mm256_set1_epi16(0xF));
5335 #[simd_test(enable = "avx2")]
5336 unsafe fn test_mm256_srl_epi32() {
5337 let a
= _mm256_set1_epi32(0xFFFF);
5338 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5339 let r
= _mm256_srl_epi32(a
, b
);
5340 assert_eq_m256i(r
, _mm256_set1_epi32(0xFFF));
5343 #[simd_test(enable = "avx2")]
5344 unsafe fn test_mm256_srl_epi64() {
5345 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5346 let b
= _mm_setr_epi64x(4, 0);
5347 let r
= _mm256_srl_epi64(a
, b
);
5348 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFF));
5351 #[simd_test(enable = "avx2")]
5352 unsafe fn test_mm256_srli_epi16() {
5354 _mm256_srli_epi16(_mm256_set1_epi16(0xFF), 4),
5355 _mm256_set1_epi16(0xF),
5359 #[simd_test(enable = "avx2")]
5360 unsafe fn test_mm256_srli_epi32() {
5362 _mm256_srli_epi32(_mm256_set1_epi32(0xFFFF), 4),
5363 _mm256_set1_epi32(0xFFF),
5367 #[simd_test(enable = "avx2")]
5368 unsafe fn test_mm256_srli_epi64() {
5370 _mm256_srli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
5371 _mm256_set1_epi64x(0xFFFFFFF),
5375 #[simd_test(enable = "avx2")]
5376 unsafe fn test_mm_srlv_epi32() {
5377 let a
= _mm_set1_epi32(2);
5378 let count
= _mm_set1_epi32(1);
5379 let r
= _mm_srlv_epi32(a
, count
);
5380 let e
= _mm_set1_epi32(1);
5381 assert_eq_m128i(r
, e
);
5384 #[simd_test(enable = "avx2")]
5385 unsafe fn test_mm256_srlv_epi32() {
5386 let a
= _mm256_set1_epi32(2);
5387 let count
= _mm256_set1_epi32(1);
5388 let r
= _mm256_srlv_epi32(a
, count
);
5389 let e
= _mm256_set1_epi32(1);
5390 assert_eq_m256i(r
, e
);
5393 #[simd_test(enable = "avx2")]
5394 unsafe fn test_mm_srlv_epi64() {
5395 let a
= _mm_set1_epi64x(2);
5396 let count
= _mm_set1_epi64x(1);
5397 let r
= _mm_srlv_epi64(a
, count
);
5398 let e
= _mm_set1_epi64x(1);
5399 assert_eq_m128i(r
, e
);
5402 #[simd_test(enable = "avx2")]
5403 unsafe fn test_mm256_srlv_epi64() {
5404 let a
= _mm256_set1_epi64x(2);
5405 let count
= _mm256_set1_epi64x(1);
5406 let r
= _mm256_srlv_epi64(a
, count
);
5407 let e
= _mm256_set1_epi64x(1);
5408 assert_eq_m256i(r
, e
);
5411 #[simd_test(enable = "avx2")]
5412 unsafe fn test_mm256_sub_epi16() {
5413 let a
= _mm256_set1_epi16(4);
5414 let b
= _mm256_set1_epi16(2);
5415 let r
= _mm256_sub_epi16(a
, b
);
5416 assert_eq_m256i(r
, b
);
5419 #[simd_test(enable = "avx2")]
5420 unsafe fn test_mm256_sub_epi32() {
5421 let a
= _mm256_set1_epi32(4);
5422 let b
= _mm256_set1_epi32(2);
5423 let r
= _mm256_sub_epi32(a
, b
);
5424 assert_eq_m256i(r
, b
);
5427 #[simd_test(enable = "avx2")]
5428 unsafe fn test_mm256_sub_epi64() {
5429 let a
= _mm256_set1_epi64x(4);
5430 let b
= _mm256_set1_epi64x(2);
5431 let r
= _mm256_sub_epi64(a
, b
);
5432 assert_eq_m256i(r
, b
);
5435 #[simd_test(enable = "avx2")]
5436 unsafe fn test_mm256_sub_epi8() {
5437 let a
= _mm256_set1_epi8(4);
5438 let b
= _mm256_set1_epi8(2);
5439 let r
= _mm256_sub_epi8(a
, b
);
5440 assert_eq_m256i(r
, b
);
5443 #[simd_test(enable = "avx2")]
5444 unsafe fn test_mm256_subs_epi16() {
5445 let a
= _mm256_set1_epi16(4);
5446 let b
= _mm256_set1_epi16(2);
5447 let r
= _mm256_subs_epi16(a
, b
);
5448 assert_eq_m256i(r
, b
);
5451 #[simd_test(enable = "avx2")]
5452 unsafe fn test_mm256_subs_epi8() {
5453 let a
= _mm256_set1_epi8(4);
5454 let b
= _mm256_set1_epi8(2);
5455 let r
= _mm256_subs_epi8(a
, b
);
5456 assert_eq_m256i(r
, b
);
5459 #[simd_test(enable = "avx2")]
5460 unsafe fn test_mm256_subs_epu16() {
5461 let a
= _mm256_set1_epi16(4);
5462 let b
= _mm256_set1_epi16(2);
5463 let r
= _mm256_subs_epu16(a
, b
);
5464 assert_eq_m256i(r
, b
);
5467 #[simd_test(enable = "avx2")]
5468 unsafe fn test_mm256_subs_epu8() {
5469 let a
= _mm256_set1_epi8(4);
5470 let b
= _mm256_set1_epi8(2);
5471 let r
= _mm256_subs_epu8(a
, b
);
5472 assert_eq_m256i(r
, b
);
5475 #[simd_test(enable = "avx2")]
5476 unsafe fn test_mm256_xor_si256() {
5477 let a
= _mm256_set1_epi8(5);
5478 let b
= _mm256_set1_epi8(3);
5479 let r
= _mm256_xor_si256(a
, b
);
5480 assert_eq_m256i(r
, _mm256_set1_epi8(6));
5483 #[simd_test(enable = "avx2")]
5484 unsafe fn test_mm256_alignr_epi8() {
5486 let a
= _mm256_setr_epi8(
5487 1, 2, 3, 4, 5, 6, 7, 8,
5488 9, 10, 11, 12, 13, 14, 15, 16,
5489 17, 18, 19, 20, 21, 22, 23, 24,
5490 25, 26, 27, 28, 29, 30, 31, 32,
5493 let b
= _mm256_setr_epi8(
5494 -1, -2, -3, -4, -5, -6, -7, -8,
5495 -9, -10, -11, -12, -13, -14, -15, -16,
5496 -17, -18, -19, -20, -21, -22, -23, -24,
5497 -25, -26, -27, -28, -29, -30, -31, -32,
5499 let r
= _mm256_alignr_epi8(a
, b
, 33);
5500 assert_eq_m256i(r
, _mm256_set1_epi8(0));
5502 let r
= _mm256_alignr_epi8(a
, b
, 17);
5504 let expected
= _mm256_setr_epi8(
5505 2, 3, 4, 5, 6, 7, 8, 9,
5506 10, 11, 12, 13, 14, 15, 16, 0,
5507 18, 19, 20, 21, 22, 23, 24, 25,
5508 26, 27, 28, 29, 30, 31, 32, 0,
5510 assert_eq_m256i(r
, expected
);
5512 let r
= _mm256_alignr_epi8(a
, b
, 4);
5514 let expected
= _mm256_setr_epi8(
5515 -5, -6, -7, -8, -9, -10, -11, -12,
5516 -13, -14, -15, -16, 1, 2, 3, 4,
5517 -21, -22, -23, -24, -25, -26, -27, -28,
5518 -29, -30, -31, -32, 17, 18, 19, 20,
5520 assert_eq_m256i(r
, expected
);
5523 let expected
= _mm256_setr_epi8(
5524 -1, -2, -3, -4, -5, -6, -7, -8,
5525 -9, -10, -11, -12, -13, -14, -15, -16, -17,
5526 -18, -19, -20, -21, -22, -23, -24, -25,
5527 -26, -27, -28, -29, -30, -31, -32,
5529 let r
= _mm256_alignr_epi8(a
, b
, 16);
5530 assert_eq_m256i(r
, expected
);
5532 let r
= _mm256_alignr_epi8(a
, b
, 15);
5534 let expected
= _mm256_setr_epi8(
5535 -16, 1, 2, 3, 4, 5, 6, 7,
5536 8, 9, 10, 11, 12, 13, 14, 15,
5537 -32, 17, 18, 19, 20, 21, 22, 23,
5538 24, 25, 26, 27, 28, 29, 30, 31,
5540 assert_eq_m256i(r
, expected
);
5542 let r
= _mm256_alignr_epi8(a
, b
, 0);
5543 assert_eq_m256i(r
, b
);
5546 #[simd_test(enable = "avx2")]
5547 unsafe fn test_mm256_shuffle_epi8() {
5549 let a
= _mm256_setr_epi8(
5550 1, 2, 3, 4, 5, 6, 7, 8,
5551 9, 10, 11, 12, 13, 14, 15, 16,
5552 17, 18, 19, 20, 21, 22, 23, 24,
5553 25, 26, 27, 28, 29, 30, 31, 32,
5556 let b
= _mm256_setr_epi8(
5557 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5558 12, 5, 5, 10, 4, 1, 8, 0,
5559 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5560 12, 5, 5, 10, 4, 1, 8, 0,
5563 let expected
= _mm256_setr_epi8(
5564 5, 0, 5, 4, 9, 13, 7, 4,
5565 13, 6, 6, 11, 5, 2, 9, 1,
5566 21, 0, 21, 20, 25, 29, 23, 20,
5567 29, 22, 22, 27, 21, 18, 25, 17,
5569 let r
= _mm256_shuffle_epi8(a
, b
);
5570 assert_eq_m256i(r
, expected
);
5573 #[simd_test(enable = "avx2")]
5574 unsafe fn test_mm256_permutevar8x32_epi32() {
5575 let a
= _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5576 let b
= _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5577 let expected
= _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5578 let r
= _mm256_permutevar8x32_epi32(a
, b
);
5579 assert_eq_m256i(r
, expected
);
5582 #[simd_test(enable = "avx2")]
5583 unsafe fn test_mm256_permute4x64_epi64() {
5584 let a
= _mm256_setr_epi64x(100, 200, 300, 400);
5585 let expected
= _mm256_setr_epi64x(400, 100, 200, 100);
5586 let r
= _mm256_permute4x64_epi64(a
, 0b00010011);
5587 assert_eq_m256i(r
, expected
);
5590 #[simd_test(enable = "avx2")]
5591 unsafe fn test_mm256_permute2x128_si256() {
5592 let a
= _mm256_setr_epi64x(100, 200, 500, 600);
5593 let b
= _mm256_setr_epi64x(300, 400, 700, 800);
5594 let r
= _mm256_permute2x128_si256(a
, b
, 0b00_01_00_11);
5595 let e
= _mm256_setr_epi64x(700, 800, 500, 600);
5596 assert_eq_m256i(r
, e
);
5599 #[simd_test(enable = "avx2")]
5600 unsafe fn test_mm256_permute4x64_pd() {
5601 let a
= _mm256_setr_pd(1., 2., 3., 4.);
5602 let r
= _mm256_permute4x64_pd(a
, 0b00_01_00_11);
5603 let e
= _mm256_setr_pd(4., 1., 2., 1.);
5604 assert_eq_m256d(r
, e
);
5607 #[simd_test(enable = "avx2")]
5608 unsafe fn test_mm256_permutevar8x32_ps() {
5609 let a
= _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5610 let b
= _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5611 let r
= _mm256_permutevar8x32_ps(a
, b
);
5612 let e
= _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5613 assert_eq_m256(r
, e
);
5616 #[simd_test(enable = "avx2")]
5617 unsafe fn test_mm_i32gather_epi32() {
5618 let mut arr
= [0i32; 128];
5619 for i
in 0..128i32 {
5620 arr
[i
as usize] = i
;
5622 // A multiplier of 4 is word-addressing
5623 let r
= _mm_i32gather_epi32(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4);
5624 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 32, 48));
5627 #[simd_test(enable = "avx2")]
5628 unsafe fn test_mm_mask_i32gather_epi32() {
5629 let mut arr
= [0i32; 128];
5630 for i
in 0..128i32 {
5631 arr
[i
as usize] = i
;
5633 // A multiplier of 4 is word-addressing
5634 let r
= _mm_mask_i32gather_epi32(
5635 _mm_set1_epi32(256),
5637 _mm_setr_epi32(0, 16, 64, 96),
5638 _mm_setr_epi32(-1, -1, -1, 0),
5641 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 64, 256));
5644 #[simd_test(enable = "avx2")]
5645 unsafe fn test_mm256_i32gather_epi32() {
5646 let mut arr
= [0i32; 128];
5647 for i
in 0..128i32 {
5648 arr
[i
as usize] = i
;
5650 // A multiplier of 4 is word-addressing
5651 let r
= _mm256_i32gather_epi32(
5653 _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
5656 assert_eq_m256i(r
, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5659 #[simd_test(enable = "avx2")]
5660 unsafe fn test_mm256_mask_i32gather_epi32() {
5661 let mut arr
= [0i32; 128];
5662 for i
in 0..128i32 {
5663 arr
[i
as usize] = i
;
5665 // A multiplier of 4 is word-addressing
5666 let r
= _mm256_mask_i32gather_epi32(
5667 _mm256_set1_epi32(256),
5669 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5670 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5673 assert_eq_m256i(r
, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5676 #[simd_test(enable = "avx2")]
5677 unsafe fn test_mm_i32gather_ps() {
5678 let mut arr
= [0.0f32; 128];
5680 for i
in 0..128usize
{
5684 // A multiplier of 4 is word-addressing for f32s
5685 let r
= _mm_i32gather_ps(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4);
5686 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5689 #[simd_test(enable = "avx2")]
5690 unsafe fn test_mm_mask_i32gather_ps() {
5691 let mut arr
= [0.0f32; 128];
5693 for i
in 0..128usize
{
5697 // A multiplier of 4 is word-addressing for f32s
5698 let r
= _mm_mask_i32gather_ps(
5701 _mm_setr_epi32(0, 16, 64, 96),
5702 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5705 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5708 #[simd_test(enable = "avx2")]
5709 unsafe fn test_mm256_i32gather_ps() {
5710 let mut arr
= [0.0f32; 128];
5712 for i
in 0..128usize
{
5716 // A multiplier of 4 is word-addressing for f32s
5717 let r
= _mm256_i32gather_ps(
5719 _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
5722 assert_eq_m256(r
, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5725 #[simd_test(enable = "avx2")]
5726 unsafe fn test_mm256_mask_i32gather_ps() {
5727 let mut arr
= [0.0f32; 128];
5729 for i
in 0..128usize
{
5733 // A multiplier of 4 is word-addressing for f32s
5734 let r
= _mm256_mask_i32gather_ps(
5735 _mm256_set1_ps(256.0),
5737 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5738 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5743 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5747 #[simd_test(enable = "avx2")]
5748 unsafe fn test_mm_i32gather_epi64() {
5749 let mut arr
= [0i64; 128];
5750 for i
in 0..128i64 {
5751 arr
[i
as usize] = i
;
5753 // A multiplier of 8 is word-addressing for i64s
5754 let r
= _mm_i32gather_epi64(arr
.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
5755 assert_eq_m128i(r
, _mm_setr_epi64x(0, 16));
5758 #[simd_test(enable = "avx2")]
5759 unsafe fn test_mm_mask_i32gather_epi64() {
5760 let mut arr
= [0i64; 128];
5761 for i
in 0..128i64 {
5762 arr
[i
as usize] = i
;
5764 // A multiplier of 8 is word-addressing for i64s
5765 let r
= _mm_mask_i32gather_epi64(
5766 _mm_set1_epi64x(256),
5768 _mm_setr_epi32(16, 16, 16, 16),
5769 _mm_setr_epi64x(-1, 0),
5772 assert_eq_m128i(r
, _mm_setr_epi64x(16, 256));
5775 #[simd_test(enable = "avx2")]
5776 unsafe fn test_mm256_i32gather_epi64() {
5777 let mut arr
= [0i64; 128];
5778 for i
in 0..128i64 {
5779 arr
[i
as usize] = i
;
5781 // A multiplier of 8 is word-addressing for i64s
5782 let r
= _mm256_i32gather_epi64(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8);
5783 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 32, 48));
5786 #[simd_test(enable = "avx2")]
5787 unsafe fn test_mm256_mask_i32gather_epi64() {
5788 let mut arr
= [0i64; 128];
5789 for i
in 0..128i64 {
5790 arr
[i
as usize] = i
;
5792 // A multiplier of 8 is word-addressing for i64s
5793 let r
= _mm256_mask_i32gather_epi64(
5794 _mm256_set1_epi64x(256),
5796 _mm_setr_epi32(0, 16, 64, 96),
5797 _mm256_setr_epi64x(-1, -1, -1, 0),
5800 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 64, 256));
5803 #[simd_test(enable = "avx2")]
5804 unsafe fn test_mm_i32gather_pd() {
5805 let mut arr
= [0.0f64; 128];
5807 for i
in 0..128usize
{
5811 // A multiplier of 8 is word-addressing for f64s
5812 let r
= _mm_i32gather_pd(arr
.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
5813 assert_eq_m128d(r
, _mm_setr_pd(0.0, 16.0));
5816 #[simd_test(enable = "avx2")]
5817 unsafe fn test_mm_mask_i32gather_pd() {
5818 let mut arr
= [0.0f64; 128];
5820 for i
in 0..128usize
{
5824 // A multiplier of 8 is word-addressing for f64s
5825 let r
= _mm_mask_i32gather_pd(
5828 _mm_setr_epi32(16, 16, 16, 16),
5829 _mm_setr_pd(-1.0, 0.0),
5832 assert_eq_m128d(r
, _mm_setr_pd(16.0, 256.0));
5835 #[simd_test(enable = "avx2")]
5836 unsafe fn test_mm256_i32gather_pd() {
5837 let mut arr
= [0.0f64; 128];
5839 for i
in 0..128usize
{
5843 // A multiplier of 8 is word-addressing for f64s
5844 let r
= _mm256_i32gather_pd(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8);
5845 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5848 #[simd_test(enable = "avx2")]
5849 unsafe fn test_mm256_mask_i32gather_pd() {
5850 let mut arr
= [0.0f64; 128];
5852 for i
in 0..128usize
{
5856 // A multiplier of 8 is word-addressing for f64s
5857 let r
= _mm256_mask_i32gather_pd(
5858 _mm256_set1_pd(256.0),
5860 _mm_setr_epi32(0, 16, 64, 96),
5861 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5864 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5867 #[simd_test(enable = "avx2")]
5868 unsafe fn test_mm_i64gather_epi32() {
5869 let mut arr
= [0i32; 128];
5870 for i
in 0..128i32 {
5871 arr
[i
as usize] = i
;
5873 // A multiplier of 4 is word-addressing
5874 let r
= _mm_i64gather_epi32(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 4);
5875 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 0, 0));
5878 #[simd_test(enable = "avx2")]
5879 unsafe fn test_mm_mask_i64gather_epi32() {
5880 let mut arr
= [0i32; 128];
5881 for i
in 0..128i32 {
5882 arr
[i
as usize] = i
;
5884 // A multiplier of 4 is word-addressing
5885 let r
= _mm_mask_i64gather_epi32(
5886 _mm_set1_epi32(256),
5888 _mm_setr_epi64x(0, 16),
5889 _mm_setr_epi32(-1, 0, -1, 0),
5892 assert_eq_m128i(r
, _mm_setr_epi32(0, 256, 0, 0));
5895 #[simd_test(enable = "avx2")]
5896 unsafe fn test_mm256_i64gather_epi32() {
5897 let mut arr
= [0i32; 128];
5898 for i
in 0..128i32 {
5899 arr
[i
as usize] = i
;
5901 // A multiplier of 4 is word-addressing
5902 let r
= _mm256_i64gather_epi32(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4);
5903 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 32, 48));
5906 #[simd_test(enable = "avx2")]
5907 unsafe fn test_mm256_mask_i64gather_epi32() {
5908 let mut arr
= [0i32; 128];
5909 for i
in 0..128i32 {
5910 arr
[i
as usize] = i
;
5912 // A multiplier of 4 is word-addressing
5913 let r
= _mm256_mask_i64gather_epi32(
5914 _mm_set1_epi32(256),
5916 _mm256_setr_epi64x(0, 16, 64, 96),
5917 _mm_setr_epi32(-1, -1, -1, 0),
5920 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 64, 256));
5923 #[simd_test(enable = "avx2")]
5924 unsafe fn test_mm_i64gather_ps() {
5925 let mut arr
= [0.0f32; 128];
5927 for i
in 0..128usize
{
5931 // A multiplier of 4 is word-addressing for f32s
5932 let r
= _mm_i64gather_ps(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 4);
5933 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5936 #[simd_test(enable = "avx2")]
5937 unsafe fn test_mm_mask_i64gather_ps() {
5938 let mut arr
= [0.0f32; 128];
5940 for i
in 0..128usize
{
5944 // A multiplier of 4 is word-addressing for f32s
5945 let r
= _mm_mask_i64gather_ps(
5948 _mm_setr_epi64x(0, 16),
5949 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5952 assert_eq_m128(r
, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5955 #[simd_test(enable = "avx2")]
5956 unsafe fn test_mm256_i64gather_ps() {
5957 let mut arr
= [0.0f32; 128];
5959 for i
in 0..128usize
{
5963 // A multiplier of 4 is word-addressing for f32s
5964 let r
= _mm256_i64gather_ps(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4);
5965 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5968 #[simd_test(enable = "avx2")]
5969 unsafe fn test_mm256_mask_i64gather_ps() {
5970 let mut arr
= [0.0f32; 128];
5972 for i
in 0..128usize
{
5976 // A multiplier of 4 is word-addressing for f32s
5977 let r
= _mm256_mask_i64gather_ps(
5980 _mm256_setr_epi64x(0, 16, 64, 96),
5981 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5984 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5987 #[simd_test(enable = "avx2")]
5988 unsafe fn test_mm_i64gather_epi64() {
5989 let mut arr
= [0i64; 128];
5990 for i
in 0..128i64 {
5991 arr
[i
as usize] = i
;
5993 // A multiplier of 8 is word-addressing for i64s
5994 let r
= _mm_i64gather_epi64(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 8);
5995 assert_eq_m128i(r
, _mm_setr_epi64x(0, 16));
5998 #[simd_test(enable = "avx2")]
5999 unsafe fn test_mm_mask_i64gather_epi64() {
6000 let mut arr
= [0i64; 128];
6001 for i
in 0..128i64 {
6002 arr
[i
as usize] = i
;
6004 // A multiplier of 8 is word-addressing for i64s
6005 let r
= _mm_mask_i64gather_epi64(
6006 _mm_set1_epi64x(256),
6008 _mm_setr_epi64x(16, 16),
6009 _mm_setr_epi64x(-1, 0),
6012 assert_eq_m128i(r
, _mm_setr_epi64x(16, 256));
6015 #[simd_test(enable = "avx2")]
6016 unsafe fn test_mm256_i64gather_epi64() {
6017 let mut arr
= [0i64; 128];
6018 for i
in 0..128i64 {
6019 arr
[i
as usize] = i
;
6021 // A multiplier of 8 is word-addressing for i64s
6022 let r
= _mm256_i64gather_epi64(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8);
6023 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 32, 48));
6026 #[simd_test(enable = "avx2")]
6027 unsafe fn test_mm256_mask_i64gather_epi64() {
6028 let mut arr
= [0i64; 128];
6029 for i
in 0..128i64 {
6030 arr
[i
as usize] = i
;
6032 // A multiplier of 8 is word-addressing for i64s
6033 let r
= _mm256_mask_i64gather_epi64(
6034 _mm256_set1_epi64x(256),
6036 _mm256_setr_epi64x(0, 16, 64, 96),
6037 _mm256_setr_epi64x(-1, -1, -1, 0),
6040 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 64, 256));
6043 #[simd_test(enable = "avx2")]
6044 unsafe fn test_mm_i64gather_pd() {
6045 let mut arr
= [0.0f64; 128];
6047 for i
in 0..128usize
{
6051 // A multiplier of 8 is word-addressing for f64s
6052 let r
= _mm_i64gather_pd(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 8);
6053 assert_eq_m128d(r
, _mm_setr_pd(0.0, 16.0));
6056 #[simd_test(enable = "avx2")]
6057 unsafe fn test_mm_mask_i64gather_pd() {
6058 let mut arr
= [0.0f64; 128];
6060 for i
in 0..128usize
{
6064 // A multiplier of 8 is word-addressing for f64s
6065 let r
= _mm_mask_i64gather_pd(
6068 _mm_setr_epi64x(16, 16),
6069 _mm_setr_pd(-1.0, 0.0),
6072 assert_eq_m128d(r
, _mm_setr_pd(16.0, 256.0));
6075 #[simd_test(enable = "avx2")]
6076 unsafe fn test_mm256_i64gather_pd() {
6077 let mut arr
= [0.0f64; 128];
6079 for i
in 0..128usize
{
6083 // A multiplier of 8 is word-addressing for f64s
6084 let r
= _mm256_i64gather_pd(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8);
6085 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
6088 #[simd_test(enable = "avx2")]
6089 unsafe fn test_mm256_mask_i64gather_pd() {
6090 let mut arr
= [0.0f64; 128];
6092 for i
in 0..128usize
{
6096 // A multiplier of 8 is word-addressing for f64s
6097 let r
= _mm256_mask_i64gather_pd(
6098 _mm256_set1_pd(256.0),
6100 _mm256_setr_epi64x(0, 16, 64, 96),
6101 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
6104 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
6107 #[simd_test(enable = "avx")]
6108 unsafe fn test_mm256_extract_epi8() {
6110 let a
= _mm256_setr_epi8(
6111 -1, 1, 2, 3, 4, 5, 6, 7,
6112 8, 9, 10, 11, 12, 13, 14, 15,
6113 16, 17, 18, 19, 20, 21, 22, 23,
6114 24, 25, 26, 27, 28, 29, 30, 31
6116 let r1
= _mm256_extract_epi8(a
, 0);
6117 let r2
= _mm256_extract_epi8(a
, 35);
6122 #[simd_test(enable = "avx2")]
6123 unsafe fn test_mm256_extract_epi16() {
6125 let a
= _mm256_setr_epi16(
6126 -1, 1, 2, 3, 4, 5, 6, 7,
6127 8, 9, 10, 11, 12, 13, 14, 15,
6129 let r1
= _mm256_extract_epi16(a
, 0);
6130 let r2
= _mm256_extract_epi16(a
, 19);
6135 #[simd_test(enable = "avx2")]
6136 unsafe fn test_mm256_extract_epi32() {
6137 let a
= _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
6138 let r1
= _mm256_extract_epi32(a
, 0);
6139 let r2
= _mm256_extract_epi32(a
, 11);
6144 #[simd_test(enable = "avx2")]
6145 unsafe fn test_mm256_cvtsd_f64() {
6146 let a
= _mm256_setr_pd(1., 2., 3., 4.);
6147 let r
= _mm256_cvtsd_f64(a
);
6151 #[simd_test(enable = "avx2")]
6152 unsafe fn test_mm256_cvtsi256_si32() {
6153 let a
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
6154 let r
= _mm256_cvtsi256_si32(a
);