1 //! Advanced Vector Extensions 2 (AVX)
3 //! AVX2 expands most AVX commands to 256-bit wide vector registers and
4 //! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
6 //! The references are:
8 //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9 //! Instruction Set Reference, A-Z][intel64_ref].
10 //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11 //! System Instructions][amd64_ref].
13 //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14 //! overview of the instructions available.
16 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17 //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18 //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
22 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
27 use stdarch_test
::assert_instr
;
29 /// Computes the absolute values of packed 32-bit integers in `a`.
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi32)
33 #[target_feature(enable = "avx2")]
34 #[cfg_attr(test, assert_instr(vpabsd))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm256_abs_epi32(a
: __m256i
) -> __m256i
{
37 transmute(pabsd(a
.as_i32x8()))
40 /// Computes the absolute values of packed 16-bit integers in `a`.
42 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi16)
44 #[target_feature(enable = "avx2")]
45 #[cfg_attr(test, assert_instr(vpabsw))]
46 #[stable(feature = "simd_x86", since = "1.27.0")]
47 pub unsafe fn _mm256_abs_epi16(a
: __m256i
) -> __m256i
{
48 transmute(pabsw(a
.as_i16x16()))
51 /// Computes the absolute values of packed 8-bit integers in `a`.
53 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi8)
55 #[target_feature(enable = "avx2")]
56 #[cfg_attr(test, assert_instr(vpabsb))]
57 #[stable(feature = "simd_x86", since = "1.27.0")]
58 pub unsafe fn _mm256_abs_epi8(a
: __m256i
) -> __m256i
{
59 transmute(pabsb(a
.as_i8x32()))
62 /// Adds packed 64-bit integers in `a` and `b`.
64 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64)
66 #[target_feature(enable = "avx2")]
67 #[cfg_attr(test, assert_instr(vpaddq))]
68 #[stable(feature = "simd_x86", since = "1.27.0")]
69 pub unsafe fn _mm256_add_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
70 transmute(simd_add(a
.as_i64x4(), b
.as_i64x4()))
73 /// Adds packed 32-bit integers in `a` and `b`.
75 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32)
77 #[target_feature(enable = "avx2")]
78 #[cfg_attr(test, assert_instr(vpaddd))]
79 #[stable(feature = "simd_x86", since = "1.27.0")]
80 pub unsafe fn _mm256_add_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
81 transmute(simd_add(a
.as_i32x8(), b
.as_i32x8()))
84 /// Adds packed 16-bit integers in `a` and `b`.
86 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16)
88 #[target_feature(enable = "avx2")]
89 #[cfg_attr(test, assert_instr(vpaddw))]
90 #[stable(feature = "simd_x86", since = "1.27.0")]
91 pub unsafe fn _mm256_add_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
92 transmute(simd_add(a
.as_i16x16(), b
.as_i16x16()))
95 /// Adds packed 8-bit integers in `a` and `b`.
97 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8)
99 #[target_feature(enable = "avx2")]
100 #[cfg_attr(test, assert_instr(vpaddb))]
101 #[stable(feature = "simd_x86", since = "1.27.0")]
102 pub unsafe fn _mm256_add_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
103 transmute(simd_add(a
.as_i8x32(), b
.as_i8x32()))
106 /// Adds packed 8-bit integers in `a` and `b` using saturation.
108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8)
110 #[target_feature(enable = "avx2")]
111 #[cfg_attr(test, assert_instr(vpaddsb))]
112 #[stable(feature = "simd_x86", since = "1.27.0")]
113 pub unsafe fn _mm256_adds_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
114 transmute(simd_saturating_add(a
.as_i8x32(), b
.as_i8x32()))
117 /// Adds packed 16-bit integers in `a` and `b` using saturation.
119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16)
121 #[target_feature(enable = "avx2")]
122 #[cfg_attr(test, assert_instr(vpaddsw))]
123 #[stable(feature = "simd_x86", since = "1.27.0")]
124 pub unsafe fn _mm256_adds_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
125 transmute(simd_saturating_add(a
.as_i16x16(), b
.as_i16x16()))
128 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8)
132 #[target_feature(enable = "avx2")]
133 #[cfg_attr(test, assert_instr(vpaddusb))]
134 #[stable(feature = "simd_x86", since = "1.27.0")]
135 pub unsafe fn _mm256_adds_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
136 transmute(simd_saturating_add(a
.as_u8x32(), b
.as_u8x32()))
139 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
141 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16)
143 #[target_feature(enable = "avx2")]
144 #[cfg_attr(test, assert_instr(vpaddusw))]
145 #[stable(feature = "simd_x86", since = "1.27.0")]
146 pub unsafe fn _mm256_adds_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
147 transmute(simd_saturating_add(a
.as_u16x16(), b
.as_u16x16()))
150 /// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
151 /// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8)
155 #[target_feature(enable = "avx2")]
156 #[cfg_attr(test, assert_instr(vpalignr, n = 7))]
157 #[rustc_args_required_const(2)]
158 #[stable(feature = "simd_x86", since = "1.27.0")]
159 pub unsafe fn _mm256_alignr_epi8(a
: __m256i
, b
: __m256i
, n
: i32) -> __m256i
{
161 // If `palignr` is shifting the pair of vectors more than the size of two
164 return _mm256_set1_epi8(0);
166 // If `palignr` is shifting the pair of input vectors more than one lane,
167 // but less than two lanes, convert to shifting in zeroes.
168 let (a
, b
, n
) = if n
> 16 {
169 (_mm256_set1_epi8(0), a
, n
- 16)
174 let a
= a
.as_i8x32();
175 let b
= b
.as_i8x32();
177 let r
: i8x32
= match n
{
182 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
183 23, 24, 25, 26, 27, 28, 29, 30, 31,
190 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
191 24, 25, 26, 27, 28, 29, 30, 31, 48,
198 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24,
199 25, 26, 27, 28, 29, 30, 31, 48, 49,
206 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24,
207 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
214 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25,
215 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
222 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26,
223 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
230 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27,
231 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
238 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27,
239 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
246 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28,
247 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
254 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29,
255 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
258 10 => simd_shuffle32(
262 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30,
263 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
266 11 => simd_shuffle32(
270 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31,
271 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
274 12 => simd_shuffle32(
278 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48,
279 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
282 13 => simd_shuffle32(
286 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49,
287 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
290 14 => simd_shuffle32(
294 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50,
295 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
298 15 => simd_shuffle32(
302 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51,
303 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
311 /// Computes the bitwise AND of 256 bits (representing integer data)
314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256)
316 #[target_feature(enable = "avx2")]
317 #[cfg_attr(test, assert_instr(vandps))]
318 #[stable(feature = "simd_x86", since = "1.27.0")]
319 pub unsafe fn _mm256_and_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
320 transmute(simd_and(a
.as_i64x4(), b
.as_i64x4()))
323 /// Computes the bitwise NOT of 256 bits (representing integer data)
324 /// in `a` and then AND with `b`.
326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256)
328 #[target_feature(enable = "avx2")]
329 #[cfg_attr(test, assert_instr(vandnps))]
330 #[stable(feature = "simd_x86", since = "1.27.0")]
331 pub unsafe fn _mm256_andnot_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
332 let all_ones
= _mm256_set1_epi8(-1);
334 simd_xor(a
.as_i64x4(), all_ones
.as_i64x4()),
339 /// Averages packed unsigned 16-bit integers in `a` and `b`.
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16)
343 #[target_feature(enable = "avx2")]
344 #[cfg_attr(test, assert_instr(vpavgw))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm256_avg_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
347 transmute(pavgw(a
.as_u16x16(), b
.as_u16x16()))
350 /// Averages packed unsigned 8-bit integers in `a` and `b`.
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8)
354 #[target_feature(enable = "avx2")]
355 #[cfg_attr(test, assert_instr(vpavgb))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm256_avg_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
358 transmute(pavgb(a
.as_u8x32(), b
.as_u8x32()))
361 /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32)
365 #[target_feature(enable = "avx2")]
366 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
367 #[rustc_args_required_const(2)]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_blend_epi32(a
: __m128i
, b
: __m128i
, imm8
: i32) -> __m128i
{
370 let imm8
= (imm8
& 0xFF) as u8;
371 let a
= a
.as_i32x4();
372 let b
= b
.as_i32x4();
373 macro_rules
! blend2
{
374 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
375 simd_shuffle4(a
, b
, [$a
, $b
, $c
, $d
]);
378 macro_rules
! blend1
{
379 ($a
:expr
, $b
:expr
) => {
380 match (imm8
>> 2) & 0b11 {
381 0b00 => blend2
!($a
, $b
, 2, 3),
382 0b01 => blend2
!($a
, $b
, 6, 3),
383 0b10 => blend2
!($a
, $b
, 2, 7),
384 _
=> blend2
!($a
, $b
, 6, 7),
388 let r
: i32x4
= match imm8
& 0b11 {
389 0b00 => blend1
!(0, 1),
390 0b01 => blend1
!(4, 1),
391 0b10 => blend1
!(0, 5),
397 /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
399 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32)
401 #[target_feature(enable = "avx2")]
402 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
403 #[rustc_args_required_const(2)]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm256_blend_epi32(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
406 let imm8
= (imm8
& 0xFF) as u8;
407 let a
= a
.as_i32x8();
408 let b
= b
.as_i32x8();
409 macro_rules
! blend4
{
420 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
]);
423 macro_rules
! blend3
{
424 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
) => {
425 match (imm8
>> 6) & 0b11 {
426 0b00 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 7),
427 0b01 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 14, 7),
428 0b10 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 15),
429 _
=> blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 14, 15),
433 macro_rules
! blend2
{
434 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
435 match (imm8
>> 4) & 0b11 {
436 0b00 => blend3
!($a
, $b
, $c
, $d
, 4, 5),
437 0b01 => blend3
!($a
, $b
, $c
, $d
, 12, 5),
438 0b10 => blend3
!($a
, $b
, $c
, $d
, 4, 13),
439 _
=> blend3
!($a
, $b
, $c
, $d
, 12, 13),
443 macro_rules
! blend1
{
444 ($a
:expr
, $b
:expr
) => {
445 match (imm8
>> 2) & 0b11 {
446 0b00 => blend2
!($a
, $b
, 2, 3),
447 0b01 => blend2
!($a
, $b
, 10, 3),
448 0b10 => blend2
!($a
, $b
, 2, 11),
449 _
=> blend2
!($a
, $b
, 10, 11),
453 let r
: i32x8
= match imm8
& 0b11 {
454 0b00 => blend1
!(0, 1),
455 0b01 => blend1
!(8, 1),
456 0b10 => blend1
!(0, 9),
462 /// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`.
464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16)
466 #[target_feature(enable = "avx2")]
467 #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
468 #[rustc_args_required_const(2)]
469 #[stable(feature = "simd_x86", since = "1.27.0")]
470 pub unsafe fn _mm256_blend_epi16(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
471 let imm8
= (imm8
& 0xFF) as u8;
472 let a
= a
.as_i16x16();
473 let b
= b
.as_i16x16();
474 macro_rules
! blend4
{
497 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
502 macro_rules
! blend3
{
517 match (imm8
>> 6) & 0b11 {
518 0b00 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 7, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 14, 15),
520 blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 22, 7, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 30, 15)
523 blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 23, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 14, 31)
525 _
=> blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 22, 23, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 30, 31),
529 macro_rules
! blend2
{
540 match (imm8
>> 4) & 0b11 {
541 0b00 => blend3
!($a
, $b
, $c
, $d
, 4, 5, $a2
, $b2
, $c2
, $d2
, 12, 13),
542 0b01 => blend3
!($a
, $b
, $c
, $d
, 20, 5, $a2
, $b2
, $c2
, $d2
, 28, 13),
543 0b10 => blend3
!($a
, $b
, $c
, $d
, 4, 21, $a2
, $b2
, $c2
, $d2
, 12, 29),
544 _
=> blend3
!($a
, $b
, $c
, $d
, 20, 21, $a2
, $b2
, $c2
, $d2
, 28, 29),
548 macro_rules
! blend1
{
549 ($a1
:expr
, $b1
:expr
, $a2
:expr
, $b2
:expr
) => {
550 match (imm8
>> 2) & 0b11 {
551 0b00 => blend2
!($a1
, $b1
, 2, 3, $a2
, $b2
, 10, 11),
552 0b01 => blend2
!($a1
, $b1
, 18, 3, $a2
, $b2
, 26, 11),
553 0b10 => blend2
!($a1
, $b1
, 2, 19, $a2
, $b2
, 10, 27),
554 _
=> blend2
!($a1
, $b1
, 18, 19, $a2
, $b2
, 26, 27),
558 let r
: i16x16
= match imm8
& 0b11 {
559 0b00 => blend1
!(0, 1, 8, 9),
560 0b01 => blend1
!(16, 1, 24, 9),
561 0b10 => blend1
!(0, 17, 8, 25),
562 _
=> blend1
!(16, 17, 24, 25),
567 /// Blends packed 8-bit integers from `a` and `b` using `mask`.
569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8)
571 #[target_feature(enable = "avx2")]
572 #[cfg_attr(test, assert_instr(vpblendvb))]
573 #[stable(feature = "simd_x86", since = "1.27.0")]
574 pub unsafe fn _mm256_blendv_epi8(a
: __m256i
, b
: __m256i
, mask
: __m256i
) -> __m256i
{
575 transmute(pblendvb(a
.as_i8x32(), b
.as_i8x32(), mask
.as_i8x32()))
578 /// Broadcasts the low packed 8-bit integer from `a` to all elements of
579 /// the 128-bit returned value.
581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8)
583 #[target_feature(enable = "avx2")]
584 #[cfg_attr(test, assert_instr(vpbroadcastb))]
585 #[stable(feature = "simd_x86", since = "1.27.0")]
586 pub unsafe fn _mm_broadcastb_epi8(a
: __m128i
) -> __m128i
{
587 let zero
= _mm_setzero_si128();
588 let ret
= simd_shuffle16(a
.as_i8x16(), zero
.as_i8x16(), [0_u32; 16]);
589 transmute
::<i8x16
, _
>(ret
)
592 /// Broadcasts the low packed 8-bit integer from `a` to all elements of
593 /// the 256-bit returned value.
595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8)
597 #[target_feature(enable = "avx2")]
598 #[cfg_attr(test, assert_instr(vpbroadcastb))]
599 #[stable(feature = "simd_x86", since = "1.27.0")]
600 pub unsafe fn _mm256_broadcastb_epi8(a
: __m128i
) -> __m256i
{
601 let zero
= _mm_setzero_si128();
602 let ret
= simd_shuffle32(a
.as_i8x16(), zero
.as_i8x16(), [0_u32; 32]);
603 transmute
::<i8x32
, _
>(ret
)
606 // N.B., `simd_shuffle4` with integer data types for `a` and `b` is
607 // often compiled to `vbroadcastss`.
608 /// Broadcasts the low packed 32-bit integer from `a` to all elements of
609 /// the 128-bit returned value.
611 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32)
613 #[target_feature(enable = "avx2")]
614 #[cfg_attr(test, assert_instr(vbroadcastss))]
615 #[stable(feature = "simd_x86", since = "1.27.0")]
616 pub unsafe fn _mm_broadcastd_epi32(a
: __m128i
) -> __m128i
{
617 let zero
= _mm_setzero_si128();
618 let ret
= simd_shuffle4(a
.as_i32x4(), zero
.as_i32x4(), [0_u32; 4]);
619 transmute
::<i32x4
, _
>(ret
)
622 // N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
623 // often compiled to `vbroadcastss`.
624 /// Broadcasts the low packed 32-bit integer from `a` to all elements of
625 /// the 256-bit returned value.
627 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32)
629 #[target_feature(enable = "avx2")]
630 #[cfg_attr(test, assert_instr(vbroadcastss))]
631 #[stable(feature = "simd_x86", since = "1.27.0")]
632 pub unsafe fn _mm256_broadcastd_epi32(a
: __m128i
) -> __m256i
{
633 let zero
= _mm_setzero_si128();
634 let ret
= simd_shuffle8(a
.as_i32x4(), zero
.as_i32x4(), [0_u32; 8]);
635 transmute
::<i32x8
, _
>(ret
)
638 /// Broadcasts the low packed 64-bit integer from `a` to all elements of
639 /// the 128-bit returned value.
641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64)
643 #[target_feature(enable = "avx2")]
644 // FIXME: https://github.com/rust-lang/stdarch/issues/791
645 #[cfg_attr(test, assert_instr(vmovddup))]
646 #[stable(feature = "simd_x86", since = "1.27.0")]
647 pub unsafe fn _mm_broadcastq_epi64(a
: __m128i
) -> __m128i
{
648 let ret
= simd_shuffle2(a
.as_i64x2(), a
.as_i64x2(), [0_u32; 2]);
649 transmute
::<i64x2
, _
>(ret
)
652 /// Broadcasts the low packed 64-bit integer from `a` to all elements of
653 /// the 256-bit returned value.
655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64)
657 #[target_feature(enable = "avx2")]
658 #[cfg_attr(test, assert_instr(vbroadcastsd))]
659 #[stable(feature = "simd_x86", since = "1.27.0")]
660 pub unsafe fn _mm256_broadcastq_epi64(a
: __m128i
) -> __m256i
{
661 let ret
= simd_shuffle4(a
.as_i64x2(), a
.as_i64x2(), [0_u32; 4]);
662 transmute
::<i64x4
, _
>(ret
)
665 /// Broadcasts the low double-precision (64-bit) floating-point element
666 /// from `a` to all elements of the 128-bit returned value.
668 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd)
670 #[target_feature(enable = "avx2")]
671 #[cfg_attr(test, assert_instr(vmovddup))]
672 #[stable(feature = "simd_x86", since = "1.27.0")]
673 pub unsafe fn _mm_broadcastsd_pd(a
: __m128d
) -> __m128d
{
674 simd_shuffle2(a
, _mm_setzero_pd(), [0_u32; 2])
677 /// Broadcasts the low double-precision (64-bit) floating-point element
678 /// from `a` to all elements of the 256-bit returned value.
680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd)
682 #[target_feature(enable = "avx2")]
683 #[cfg_attr(test, assert_instr(vbroadcastsd))]
684 #[stable(feature = "simd_x86", since = "1.27.0")]
685 pub unsafe fn _mm256_broadcastsd_pd(a
: __m128d
) -> __m256d
{
686 simd_shuffle4(a
, _mm_setzero_pd(), [0_u32; 4])
689 // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
691 /// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
692 /// the 256-bit returned value.
694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256)
696 #[target_feature(enable = "avx2")]
697 #[stable(feature = "simd_x86", since = "1.27.0")]
698 pub unsafe fn _mm256_broadcastsi128_si256(a
: __m128i
) -> __m256i
{
699 let zero
= _mm_setzero_si128();
700 let ret
= simd_shuffle4(a
.as_i64x2(), zero
.as_i64x2(), [0, 1, 0, 1]);
701 transmute
::<i64x4
, _
>(ret
)
704 /// Broadcasts the low single-precision (32-bit) floating-point element
705 /// from `a` to all elements of the 128-bit returned value.
707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps)
709 #[target_feature(enable = "avx2")]
710 #[cfg_attr(test, assert_instr(vbroadcastss))]
711 #[stable(feature = "simd_x86", since = "1.27.0")]
712 pub unsafe fn _mm_broadcastss_ps(a
: __m128
) -> __m128
{
713 simd_shuffle4(a
, _mm_setzero_ps(), [0_u32; 4])
716 /// Broadcasts the low single-precision (32-bit) floating-point element
717 /// from `a` to all elements of the 256-bit returned value.
719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps)
721 #[target_feature(enable = "avx2")]
722 #[cfg_attr(test, assert_instr(vbroadcastss))]
723 #[stable(feature = "simd_x86", since = "1.27.0")]
724 pub unsafe fn _mm256_broadcastss_ps(a
: __m128
) -> __m256
{
725 simd_shuffle8(a
, _mm_setzero_ps(), [0_u32; 8])
728 /// Broadcasts the low packed 16-bit integer from a to all elements of
729 /// the 128-bit returned value
731 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16)
733 #[target_feature(enable = "avx2")]
734 #[cfg_attr(test, assert_instr(vpbroadcastw))]
735 #[stable(feature = "simd_x86", since = "1.27.0")]
736 pub unsafe fn _mm_broadcastw_epi16(a
: __m128i
) -> __m128i
{
737 let zero
= _mm_setzero_si128();
738 let ret
= simd_shuffle8(a
.as_i16x8(), zero
.as_i16x8(), [0_u32; 8]);
739 transmute
::<i16x8
, _
>(ret
)
742 /// Broadcasts the low packed 16-bit integer from a to all elements of
743 /// the 256-bit returned value
745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16)
747 #[target_feature(enable = "avx2")]
748 #[cfg_attr(test, assert_instr(vpbroadcastw))]
749 #[stable(feature = "simd_x86", since = "1.27.0")]
750 pub unsafe fn _mm256_broadcastw_epi16(a
: __m128i
) -> __m256i
{
751 let zero
= _mm_setzero_si128();
752 let ret
= simd_shuffle16(a
.as_i16x8(), zero
.as_i16x8(), [0_u32; 16]);
753 transmute
::<i16x16
, _
>(ret
)
756 /// Compares packed 64-bit integers in `a` and `b` for equality.
758 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64)
760 #[target_feature(enable = "avx2")]
761 #[cfg_attr(test, assert_instr(vpcmpeqq))]
762 #[stable(feature = "simd_x86", since = "1.27.0")]
763 pub unsafe fn _mm256_cmpeq_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
764 transmute
::<i64x4
, _
>(simd_eq(a
.as_i64x4(), b
.as_i64x4()))
767 /// Compares packed 32-bit integers in `a` and `b` for equality.
769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32)
771 #[target_feature(enable = "avx2")]
772 #[cfg_attr(test, assert_instr(vpcmpeqd))]
773 #[stable(feature = "simd_x86", since = "1.27.0")]
774 pub unsafe fn _mm256_cmpeq_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
775 transmute
::<i32x8
, _
>(simd_eq(a
.as_i32x8(), b
.as_i32x8()))
778 /// Compares packed 16-bit integers in `a` and `b` for equality.
780 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16)
782 #[target_feature(enable = "avx2")]
783 #[cfg_attr(test, assert_instr(vpcmpeqw))]
784 #[stable(feature = "simd_x86", since = "1.27.0")]
785 pub unsafe fn _mm256_cmpeq_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
786 transmute
::<i16x16
, _
>(simd_eq(a
.as_i16x16(), b
.as_i16x16()))
789 /// Compares packed 8-bit integers in `a` and `b` for equality.
791 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8)
793 #[target_feature(enable = "avx2")]
794 #[cfg_attr(test, assert_instr(vpcmpeqb))]
795 #[stable(feature = "simd_x86", since = "1.27.0")]
796 pub unsafe fn _mm256_cmpeq_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
797 transmute
::<i8x32
, _
>(simd_eq(a
.as_i8x32(), b
.as_i8x32()))
800 /// Compares packed 64-bit integers in `a` and `b` for greater-than.
802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64)
804 #[target_feature(enable = "avx2")]
805 #[cfg_attr(test, assert_instr(vpcmpgtq))]
806 #[stable(feature = "simd_x86", since = "1.27.0")]
807 pub unsafe fn _mm256_cmpgt_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
808 transmute
::<i64x4
, _
>(simd_gt(a
.as_i64x4(), b
.as_i64x4()))
811 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32)
815 #[target_feature(enable = "avx2")]
816 #[cfg_attr(test, assert_instr(vpcmpgtd))]
817 #[stable(feature = "simd_x86", since = "1.27.0")]
818 pub unsafe fn _mm256_cmpgt_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
819 transmute
::<i32x8
, _
>(simd_gt(a
.as_i32x8(), b
.as_i32x8()))
822 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16)
826 #[target_feature(enable = "avx2")]
827 #[cfg_attr(test, assert_instr(vpcmpgtw))]
828 #[stable(feature = "simd_x86", since = "1.27.0")]
829 pub unsafe fn _mm256_cmpgt_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
830 transmute
::<i16x16
, _
>(simd_gt(a
.as_i16x16(), b
.as_i16x16()))
833 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8)
837 #[target_feature(enable = "avx2")]
838 #[cfg_attr(test, assert_instr(vpcmpgtb))]
839 #[stable(feature = "simd_x86", since = "1.27.0")]
840 pub unsafe fn _mm256_cmpgt_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
841 transmute
::<i8x32
, _
>(simd_gt(a
.as_i8x32(), b
.as_i8x32()))
844 /// Sign-extend 16-bit integers to 32-bit integers.
846 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi32)
848 #[target_feature(enable = "avx2")]
849 #[cfg_attr(test, assert_instr(vpmovsxwd))]
850 #[stable(feature = "simd_x86", since = "1.27.0")]
851 pub unsafe fn _mm256_cvtepi16_epi32(a
: __m128i
) -> __m256i
{
852 transmute
::<i32x8
, _
>(simd_cast(a
.as_i16x8()))
855 /// Sign-extend 16-bit integers to 64-bit integers.
857 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi64)
859 #[target_feature(enable = "avx2")]
860 #[cfg_attr(test, assert_instr(vpmovsxwq))]
861 #[stable(feature = "simd_x86", since = "1.27.0")]
862 pub unsafe fn _mm256_cvtepi16_epi64(a
: __m128i
) -> __m256i
{
863 let a
= a
.as_i16x8();
864 let v64
: i16x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
865 transmute
::<i64x4
, _
>(simd_cast(v64
))
868 /// Sign-extend 32-bit integers to 64-bit integers.
870 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_epi64)
872 #[target_feature(enable = "avx2")]
873 #[cfg_attr(test, assert_instr(vpmovsxdq))]
874 #[stable(feature = "simd_x86", since = "1.27.0")]
875 pub unsafe fn _mm256_cvtepi32_epi64(a
: __m128i
) -> __m256i
{
876 transmute
::<i64x4
, _
>(simd_cast(a
.as_i32x4()))
879 /// Sign-extend 8-bit integers to 16-bit integers.
881 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi16)
883 #[target_feature(enable = "avx2")]
884 #[cfg_attr(test, assert_instr(vpmovsxbw))]
885 #[stable(feature = "simd_x86", since = "1.27.0")]
886 pub unsafe fn _mm256_cvtepi8_epi16(a
: __m128i
) -> __m256i
{
887 transmute
::<i16x16
, _
>(simd_cast(a
.as_i8x16()))
890 /// Sign-extend 8-bit integers to 32-bit integers.
892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi32)
894 #[target_feature(enable = "avx2")]
895 #[cfg_attr(test, assert_instr(vpmovsxbd))]
896 #[stable(feature = "simd_x86", since = "1.27.0")]
897 pub unsafe fn _mm256_cvtepi8_epi32(a
: __m128i
) -> __m256i
{
898 let a
= a
.as_i8x16();
899 let v64
: i8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
900 transmute
::<i32x8
, _
>(simd_cast(v64
))
903 /// Sign-extend 8-bit integers to 64-bit integers.
905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi64)
907 #[target_feature(enable = "avx2")]
908 #[cfg_attr(test, assert_instr(vpmovsxbq))]
909 #[stable(feature = "simd_x86", since = "1.27.0")]
910 pub unsafe fn _mm256_cvtepi8_epi64(a
: __m128i
) -> __m256i
{
911 let a
= a
.as_i8x16();
912 let v32
: i8x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
913 transmute
::<i64x4
, _
>(simd_cast(v32
))
916 /// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
917 /// integers, and stores the results in `dst`.
919 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32)
921 #[target_feature(enable = "avx2")]
922 #[cfg_attr(test, assert_instr(vpmovzxwd))]
923 #[stable(feature = "simd_x86", since = "1.27.0")]
924 pub unsafe fn _mm256_cvtepu16_epi32(a
: __m128i
) -> __m256i
{
925 transmute
::<i32x8
, _
>(simd_cast(a
.as_u16x8()))
928 /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
929 /// integers. The upper four elements of `a` are unused.
931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi64)
933 #[target_feature(enable = "avx2")]
934 #[cfg_attr(test, assert_instr(vpmovzxwq))]
935 #[stable(feature = "simd_x86", since = "1.27.0")]
936 pub unsafe fn _mm256_cvtepu16_epi64(a
: __m128i
) -> __m256i
{
937 let a
= a
.as_u16x8();
938 let v64
: u16x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
939 transmute
::<i64x4
, _
>(simd_cast(v64
))
942 /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_epi64)
946 #[target_feature(enable = "avx2")]
947 #[cfg_attr(test, assert_instr(vpmovzxdq))]
948 #[stable(feature = "simd_x86", since = "1.27.0")]
949 pub unsafe fn _mm256_cvtepu32_epi64(a
: __m128i
) -> __m256i
{
950 transmute
::<i64x4
, _
>(simd_cast(a
.as_u32x4()))
953 /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi16)
957 #[target_feature(enable = "avx2")]
958 #[cfg_attr(test, assert_instr(vpmovzxbw))]
959 #[stable(feature = "simd_x86", since = "1.27.0")]
960 pub unsafe fn _mm256_cvtepu8_epi16(a
: __m128i
) -> __m256i
{
961 transmute
::<i16x16
, _
>(simd_cast(a
.as_u8x16()))
964 /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
965 /// integers. The upper eight elements of `a` are unused.
967 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi32)
969 #[target_feature(enable = "avx2")]
970 #[cfg_attr(test, assert_instr(vpmovzxbd))]
971 #[stable(feature = "simd_x86", since = "1.27.0")]
972 pub unsafe fn _mm256_cvtepu8_epi32(a
: __m128i
) -> __m256i
{
973 let a
= a
.as_u8x16();
974 let v64
: u8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
975 transmute
::<i32x8
, _
>(simd_cast(v64
))
978 /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
979 /// integers. The upper twelve elements of `a` are unused.
981 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi64)
983 #[target_feature(enable = "avx2")]
984 #[cfg_attr(test, assert_instr(vpmovzxbq))]
985 #[stable(feature = "simd_x86", since = "1.27.0")]
986 pub unsafe fn _mm256_cvtepu8_epi64(a
: __m128i
) -> __m256i
{
987 let a
= a
.as_u8x16();
988 let v32
: u8x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
989 transmute
::<i64x4
, _
>(simd_cast(v32
))
992 /// Extracts 128 bits (of integer data) from `a` selected with `imm8`.
994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256)
996 #[target_feature(enable = "avx2")]
998 all(test
, not(target_os
= "windows")),
999 assert_instr(vextractf128
, imm8
= 1)
1001 #[rustc_args_required_const(1)]
1002 #[stable(feature = "simd_x86", since = "1.27.0")]
1003 pub unsafe fn _mm256_extracti128_si256(a
: __m256i
, imm8
: i32) -> __m128i
{
1004 let a
= a
.as_i64x4();
1005 let b
= _mm256_undefined_si256().as_i64x4();
1006 let dst
: i64x2
= match imm8
& 0b01 {
1007 0 => simd_shuffle2(a
, b
, [0, 1]),
1008 _
=> simd_shuffle2(a
, b
, [2, 3]),
1013 /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
1015 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16)
1017 #[target_feature(enable = "avx2")]
1018 #[cfg_attr(test, assert_instr(vphaddw))]
1019 #[stable(feature = "simd_x86", since = "1.27.0")]
1020 pub unsafe fn _mm256_hadd_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1021 transmute(phaddw(a
.as_i16x16(), b
.as_i16x16()))
1024 /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
1026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32)
1028 #[target_feature(enable = "avx2")]
1029 #[cfg_attr(test, assert_instr(vphaddd))]
1030 #[stable(feature = "simd_x86", since = "1.27.0")]
1031 pub unsafe fn _mm256_hadd_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
1032 transmute(phaddd(a
.as_i32x8(), b
.as_i32x8()))
1035 /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
1036 /// using saturation.
1038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16)
1040 #[target_feature(enable = "avx2")]
1041 #[cfg_attr(test, assert_instr(vphaddsw))]
1042 #[stable(feature = "simd_x86", since = "1.27.0")]
1043 pub unsafe fn _mm256_hadds_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1044 transmute(phaddsw(a
.as_i16x16(), b
.as_i16x16()))
1047 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
1049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi16)
1051 #[target_feature(enable = "avx2")]
1052 #[cfg_attr(test, assert_instr(vphsubw))]
1053 #[stable(feature = "simd_x86", since = "1.27.0")]
1054 pub unsafe fn _mm256_hsub_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1055 transmute(phsubw(a
.as_i16x16(), b
.as_i16x16()))
1058 /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
1060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi32)
1062 #[target_feature(enable = "avx2")]
1063 #[cfg_attr(test, assert_instr(vphsubd))]
1064 #[stable(feature = "simd_x86", since = "1.27.0")]
1065 pub unsafe fn _mm256_hsub_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
1066 transmute(phsubd(a
.as_i32x8(), b
.as_i32x8()))
1069 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
1070 /// using saturation.
1072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsubs_epi16)
1074 #[target_feature(enable = "avx2")]
1075 #[cfg_attr(test, assert_instr(vphsubsw))]
1076 #[stable(feature = "simd_x86", since = "1.27.0")]
1077 pub unsafe fn _mm256_hsubs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1078 transmute(phsubsw(a
.as_i16x16(), b
.as_i16x16()))
1081 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1083 /// `scale` is between 1 and 8.
1085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi32)
1087 #[target_feature(enable = "avx2")]
1088 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1089 #[rustc_args_required_const(2)]
1090 #[stable(feature = "simd_x86", since = "1.27.0")]
1091 pub unsafe fn _mm_i32gather_epi32(slice
: *const i32, offsets
: __m128i
, scale
: i32) -> __m128i
{
1092 let zero
= _mm_setzero_si128().as_i32x4();
1093 let neg_one
= _mm_set1_epi32(-1).as_i32x4();
1094 let offsets
= offsets
.as_i32x4();
1095 let slice
= slice
as *const i8;
1098 pgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
1101 let r
= constify_imm8
!(scale
, call
);
1105 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1107 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1108 /// that position instead.
1110 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi32)
1112 #[target_feature(enable = "avx2")]
1113 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1114 #[rustc_args_required_const(4)]
1115 #[stable(feature = "simd_x86", since = "1.27.0")]
1116 pub unsafe fn _mm_mask_i32gather_epi32(
1123 let src
= src
.as_i32x4();
1124 let mask
= mask
.as_i32x4();
1125 let offsets
= offsets
.as_i32x4();
1126 let slice
= slice
as *const i8;
1129 pgatherdd(src
, slice
, offsets
, mask
, $imm8
)
1132 let r
= constify_imm8
!(scale
, call
);
1136 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1138 /// `scale` is between 1 and 8.
1140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi32)
1142 #[target_feature(enable = "avx2")]
1143 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1144 #[rustc_args_required_const(2)]
1145 #[stable(feature = "simd_x86", since = "1.27.0")]
1146 pub unsafe fn _mm256_i32gather_epi32(slice
: *const i32, offsets
: __m256i
, scale
: i32) -> __m256i
{
1147 let zero
= _mm256_setzero_si256().as_i32x8();
1148 let neg_one
= _mm256_set1_epi32(-1).as_i32x8();
1149 let offsets
= offsets
.as_i32x8();
1150 let slice
= slice
as *const i8;
1153 vpgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
1156 let r
= constify_imm8
!(scale
, call
);
1160 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1162 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1163 /// that position instead.
1165 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi32)
1167 #[target_feature(enable = "avx2")]
1168 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1169 #[rustc_args_required_const(4)]
1170 #[stable(feature = "simd_x86", since = "1.27.0")]
1171 pub unsafe fn _mm256_mask_i32gather_epi32(
1178 let src
= src
.as_i32x8();
1179 let mask
= mask
.as_i32x8();
1180 let offsets
= offsets
.as_i32x8();
1181 let slice
= slice
as *const i8;
1184 vpgatherdd(src
, slice
, offsets
, mask
, $imm8
)
1187 let r
= constify_imm8
!(scale
, call
);
1191 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1193 /// `scale` is between 1 and 8.
1195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_ps)
1197 #[target_feature(enable = "avx2")]
1198 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1199 #[rustc_args_required_const(2)]
1200 #[stable(feature = "simd_x86", since = "1.27.0")]
1201 pub unsafe fn _mm_i32gather_ps(slice
: *const f32, offsets
: __m128i
, scale
: i32) -> __m128
{
1202 let zero
= _mm_setzero_ps();
1203 let neg_one
= _mm_set1_ps(-1.0);
1204 let offsets
= offsets
.as_i32x4();
1205 let slice
= slice
as *const i8;
1208 pgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
1211 constify_imm8
!(scale
, call
)
1214 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1216 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1217 /// that position instead.
1219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_ps)
1221 #[target_feature(enable = "avx2")]
1222 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1223 #[rustc_args_required_const(4)]
1224 #[stable(feature = "simd_x86", since = "1.27.0")]
1225 pub unsafe fn _mm_mask_i32gather_ps(
1232 let offsets
= offsets
.as_i32x4();
1233 let slice
= slice
as *const i8;
1236 pgatherdps(src
, slice
, offsets
, mask
, $imm8
)
1239 constify_imm8
!(scale
, call
)
1242 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1244 /// `scale` is between 1 and 8.
1246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_ps)
1248 #[target_feature(enable = "avx2")]
1249 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1250 #[rustc_args_required_const(2)]
1251 #[stable(feature = "simd_x86", since = "1.27.0")]
1252 pub unsafe fn _mm256_i32gather_ps(slice
: *const f32, offsets
: __m256i
, scale
: i32) -> __m256
{
1253 let zero
= _mm256_setzero_ps();
1254 let neg_one
= _mm256_set1_ps(-1.0);
1255 let offsets
= offsets
.as_i32x8();
1256 let slice
= slice
as *const i8;
1259 vpgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
1262 constify_imm8
!(scale
, call
)
1265 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1267 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1268 /// that position instead.
1270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_ps)
1272 #[target_feature(enable = "avx2")]
1273 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1274 #[rustc_args_required_const(4)]
1275 #[stable(feature = "simd_x86", since = "1.27.0")]
1276 pub unsafe fn _mm256_mask_i32gather_ps(
1283 let offsets
= offsets
.as_i32x8();
1284 let slice
= slice
as *const i8;
1287 vpgatherdps(src
, slice
, offsets
, mask
, $imm8
)
1290 constify_imm8
!(scale
, call
)
1293 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1295 /// `scale` is between 1 and 8.
1297 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi64)
1299 #[target_feature(enable = "avx2")]
1300 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1301 #[rustc_args_required_const(2)]
1302 #[stable(feature = "simd_x86", since = "1.27.0")]
1303 pub unsafe fn _mm_i32gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m128i
{
1304 let zero
= _mm_setzero_si128().as_i64x2();
1305 let neg_one
= _mm_set1_epi64x(-1).as_i64x2();
1306 let offsets
= offsets
.as_i32x4();
1307 let slice
= slice
as *const i8;
1310 pgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
1313 let r
= constify_imm8
!(scale
, call
);
1317 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1319 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1320 /// that position instead.
1322 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi64)
1324 #[target_feature(enable = "avx2")]
1325 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1326 #[rustc_args_required_const(4)]
1327 #[stable(feature = "simd_x86", since = "1.27.0")]
1328 pub unsafe fn _mm_mask_i32gather_epi64(
1335 let src
= src
.as_i64x2();
1336 let mask
= mask
.as_i64x2();
1337 let offsets
= offsets
.as_i32x4();
1338 let slice
= slice
as *const i8;
1341 pgatherdq(src
, slice
, offsets
, mask
, $imm8
)
1344 let r
= constify_imm8
!(scale
, call
);
1348 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1350 /// `scale` is between 1 and 8.
1352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64)
1354 #[target_feature(enable = "avx2")]
1355 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1356 #[rustc_args_required_const(2)]
1357 #[stable(feature = "simd_x86", since = "1.27.0")]
1358 pub unsafe fn _mm256_i32gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m256i
{
1359 let zero
= _mm256_setzero_si256().as_i64x4();
1360 let neg_one
= _mm256_set1_epi64x(-1).as_i64x4();
1361 let offsets
= offsets
.as_i32x4();
1362 let slice
= slice
as *const i8;
1365 vpgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
1368 let r
= constify_imm8
!(scale
, call
);
1372 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1374 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1375 /// that position instead.
1377 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi64)
1379 #[target_feature(enable = "avx2")]
1380 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1381 #[rustc_args_required_const(4)]
1382 #[stable(feature = "simd_x86", since = "1.27.0")]
1383 pub unsafe fn _mm256_mask_i32gather_epi64(
1390 let src
= src
.as_i64x4();
1391 let mask
= mask
.as_i64x4();
1392 let offsets
= offsets
.as_i32x4();
1393 let slice
= slice
as *const i8;
1396 vpgatherdq(src
, slice
, offsets
, mask
, $imm8
)
1399 let r
= constify_imm8
!(scale
, call
);
1403 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1405 /// `scale` is between 1 and 8.
1407 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_pd)
1409 #[target_feature(enable = "avx2")]
1410 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1411 #[rustc_args_required_const(2)]
1412 #[stable(feature = "simd_x86", since = "1.27.0")]
1413 pub unsafe fn _mm_i32gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m128d
{
1414 let zero
= _mm_setzero_pd();
1415 let neg_one
= _mm_set1_pd(-1.0);
1416 let offsets
= offsets
.as_i32x4();
1417 let slice
= slice
as *const i8;
1420 pgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1423 constify_imm8
!(scale
, call
)
1426 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1428 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1429 /// that position instead.
1431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_pd)
1433 #[target_feature(enable = "avx2")]
1434 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1435 #[rustc_args_required_const(4)]
1436 #[stable(feature = "simd_x86", since = "1.27.0")]
1437 pub unsafe fn _mm_mask_i32gather_pd(
1444 let offsets
= offsets
.as_i32x4();
1445 let slice
= slice
as *const i8;
1448 pgatherdpd(src
, slice
, offsets
, mask
, $imm8
)
1451 constify_imm8
!(scale
, call
)
1454 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1456 /// `scale` is between 1 and 8.
1458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_pd)
1460 #[target_feature(enable = "avx2")]
1461 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1462 #[rustc_args_required_const(2)]
1463 #[stable(feature = "simd_x86", since = "1.27.0")]
1464 pub unsafe fn _mm256_i32gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m256d
{
1465 let zero
= _mm256_setzero_pd();
1466 let neg_one
= _mm256_set1_pd(-1.0);
1467 let offsets
= offsets
.as_i32x4();
1468 let slice
= slice
as *const i8;
1471 vpgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1474 constify_imm8
!(scale
, call
)
1477 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1479 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1480 /// that position instead.
1482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_pd)
1484 #[target_feature(enable = "avx2")]
1485 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1486 #[rustc_args_required_const(4)]
1487 #[stable(feature = "simd_x86", since = "1.27.0")]
1488 pub unsafe fn _mm256_mask_i32gather_pd(
1495 let offsets
= offsets
.as_i32x4();
1496 let slice
= slice
as *const i8;
1499 vpgatherdpd(src
, slice
, offsets
, mask
, $imm8
)
1502 constify_imm8
!(scale
, call
)
1505 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1507 /// `scale` is between 1 and 8.
1509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi32)
1511 #[target_feature(enable = "avx2")]
1512 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1513 #[rustc_args_required_const(2)]
1514 #[stable(feature = "simd_x86", since = "1.27.0")]
1515 pub unsafe fn _mm_i64gather_epi32(slice
: *const i32, offsets
: __m128i
, scale
: i32) -> __m128i
{
1516 let zero
= _mm_setzero_si128().as_i32x4();
1517 let neg_one
= _mm_set1_epi64x(-1).as_i32x4();
1518 let offsets
= offsets
.as_i64x2();
1519 let slice
= slice
as *const i8;
1522 pgatherqd(zero
, slice
, offsets
, neg_one
, $imm8
)
1525 let r
= constify_imm8
!(scale
, call
);
1529 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1531 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1532 /// that position instead.
1534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi32)
1536 #[target_feature(enable = "avx2")]
1537 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1538 #[rustc_args_required_const(4)]
1539 #[stable(feature = "simd_x86", since = "1.27.0")]
1540 pub unsafe fn _mm_mask_i64gather_epi32(
1547 let src
= src
.as_i32x4();
1548 let mask
= mask
.as_i32x4();
1549 let offsets
= offsets
.as_i64x2();
1550 let slice
= slice
as *const i8;
1553 pgatherqd(src
, slice
, offsets
, mask
, $imm8
)
1556 let r
= constify_imm8
!(scale
, call
);
1560 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1562 /// `scale` is between 1 and 8.
1564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi32)
1566 #[target_feature(enable = "avx2")]
1567 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1568 #[rustc_args_required_const(2)]
1569 #[stable(feature = "simd_x86", since = "1.27.0")]
1570 pub unsafe fn _mm256_i64gather_epi32(slice
: *const i32, offsets
: __m256i
, scale
: i32) -> __m128i
{
1571 let zero
= _mm_setzero_si128().as_i32x4();
1572 let neg_one
= _mm_set1_epi64x(-1).as_i32x4();
1573 let offsets
= offsets
.as_i64x4();
1574 let slice
= slice
as *const i8;
1577 vpgatherqd(zero
, slice
, offsets
, neg_one
, $imm8
)
1580 let r
= constify_imm8
!(scale
, call
);
1584 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1586 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1587 /// that position instead.
1589 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi32)
1591 #[target_feature(enable = "avx2")]
1592 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1593 #[rustc_args_required_const(4)]
1594 #[stable(feature = "simd_x86", since = "1.27.0")]
1595 pub unsafe fn _mm256_mask_i64gather_epi32(
1602 let src
= src
.as_i32x4();
1603 let mask
= mask
.as_i32x4();
1604 let offsets
= offsets
.as_i64x4();
1605 let slice
= slice
as *const i8;
1608 vpgatherqd(src
, slice
, offsets
, mask
, $imm8
)
1611 let r
= constify_imm8
!(scale
, call
);
1615 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1617 /// `scale` is between 1 and 8.
1619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_ps)
1621 #[target_feature(enable = "avx2")]
1622 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1623 #[rustc_args_required_const(2)]
1624 #[stable(feature = "simd_x86", since = "1.27.0")]
1625 pub unsafe fn _mm_i64gather_ps(slice
: *const f32, offsets
: __m128i
, scale
: i32) -> __m128
{
1626 let zero
= _mm_setzero_ps();
1627 let neg_one
= _mm_set1_ps(-1.0);
1628 let offsets
= offsets
.as_i64x2();
1629 let slice
= slice
as *const i8;
1632 pgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
1635 constify_imm8
!(scale
, call
)
1638 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1640 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1641 /// that position instead.
1643 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_ps)
1645 #[target_feature(enable = "avx2")]
1646 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1647 #[rustc_args_required_const(4)]
1648 #[stable(feature = "simd_x86", since = "1.27.0")]
1649 pub unsafe fn _mm_mask_i64gather_ps(
1656 let offsets
= offsets
.as_i64x2();
1657 let slice
= slice
as *const i8;
1660 pgatherqps(src
, slice
, offsets
, mask
, $imm8
)
1663 constify_imm8
!(scale
, call
)
1666 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1668 /// `scale` is between 1 and 8.
1670 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_ps)
1672 #[target_feature(enable = "avx2")]
1673 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1674 #[rustc_args_required_const(2)]
1675 #[stable(feature = "simd_x86", since = "1.27.0")]
1676 pub unsafe fn _mm256_i64gather_ps(slice
: *const f32, offsets
: __m256i
, scale
: i32) -> __m128
{
1677 let zero
= _mm_setzero_ps();
1678 let neg_one
= _mm_set1_ps(-1.0);
1679 let offsets
= offsets
.as_i64x4();
1680 let slice
= slice
as *const i8;
1683 vpgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
1686 constify_imm8
!(scale
, call
)
1689 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1691 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1692 /// that position instead.
1694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_ps)
1696 #[target_feature(enable = "avx2")]
1697 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1698 #[rustc_args_required_const(4)]
1699 #[stable(feature = "simd_x86", since = "1.27.0")]
1700 pub unsafe fn _mm256_mask_i64gather_ps(
1707 let offsets
= offsets
.as_i64x4();
1708 let slice
= slice
as *const i8;
1711 vpgatherqps(src
, slice
, offsets
, mask
, $imm8
)
1714 constify_imm8
!(scale
, call
)
1717 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1719 /// `scale` is between 1 and 8.
1721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi64)
1723 #[target_feature(enable = "avx2")]
1724 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1725 #[rustc_args_required_const(2)]
1726 #[stable(feature = "simd_x86", since = "1.27.0")]
1727 pub unsafe fn _mm_i64gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m128i
{
1728 let zero
= _mm_setzero_si128().as_i64x2();
1729 let neg_one
= _mm_set1_epi64x(-1).as_i64x2();
1730 let slice
= slice
as *const i8;
1731 let offsets
= offsets
.as_i64x2();
1734 pgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
1737 let r
= constify_imm8
!(scale
, call
);
1741 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1743 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1744 /// that position instead.
1746 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi64)
1748 #[target_feature(enable = "avx2")]
1749 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1750 #[rustc_args_required_const(4)]
1751 #[stable(feature = "simd_x86", since = "1.27.0")]
1752 pub unsafe fn _mm_mask_i64gather_epi64(
1759 let src
= src
.as_i64x2();
1760 let mask
= mask
.as_i64x2();
1761 let offsets
= offsets
.as_i64x2();
1762 let slice
= slice
as *const i8;
1765 pgatherqq(src
, slice
, offsets
, mask
, $imm8
)
1768 let r
= constify_imm8
!(scale
, call
);
1772 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1774 /// `scale` is between 1 and 8.
1776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi64)
1778 #[target_feature(enable = "avx2")]
1779 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1780 #[rustc_args_required_const(2)]
1781 #[stable(feature = "simd_x86", since = "1.27.0")]
1782 pub unsafe fn _mm256_i64gather_epi64(slice
: *const i64, offsets
: __m256i
, scale
: i32) -> __m256i
{
1783 let zero
= _mm256_setzero_si256().as_i64x4();
1784 let neg_one
= _mm256_set1_epi64x(-1).as_i64x4();
1785 let slice
= slice
as *const i8;
1786 let offsets
= offsets
.as_i64x4();
1789 vpgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
1792 let r
= constify_imm8
!(scale
, call
);
1796 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1798 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1799 /// that position instead.
1801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi64)
1803 #[target_feature(enable = "avx2")]
1804 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1805 #[rustc_args_required_const(4)]
1806 #[stable(feature = "simd_x86", since = "1.27.0")]
1807 pub unsafe fn _mm256_mask_i64gather_epi64(
1814 let src
= src
.as_i64x4();
1815 let mask
= mask
.as_i64x4();
1816 let offsets
= offsets
.as_i64x4();
1817 let slice
= slice
as *const i8;
1820 vpgatherqq(src
, slice
, offsets
, mask
, $imm8
)
1823 let r
= constify_imm8
!(scale
, call
);
1827 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1829 /// `scale` is between 1 and 8.
1831 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_pd)
1833 #[target_feature(enable = "avx2")]
1834 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1835 #[rustc_args_required_const(2)]
1836 #[stable(feature = "simd_x86", since = "1.27.0")]
1837 pub unsafe fn _mm_i64gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m128d
{
1838 let zero
= _mm_setzero_pd();
1839 let neg_one
= _mm_set1_pd(-1.0);
1840 let slice
= slice
as *const i8;
1841 let offsets
= offsets
.as_i64x2();
1844 pgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1847 constify_imm8
!(scale
, call
)
1850 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1852 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1853 /// that position instead.
1855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_pd)
1857 #[target_feature(enable = "avx2")]
1858 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1859 #[rustc_args_required_const(4)]
1860 #[stable(feature = "simd_x86", since = "1.27.0")]
1861 pub unsafe fn _mm_mask_i64gather_pd(
1868 let slice
= slice
as *const i8;
1869 let offsets
= offsets
.as_i64x2();
1872 pgatherqpd(src
, slice
, offsets
, mask
, $imm8
)
1875 constify_imm8
!(scale
, call
)
1878 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1880 /// `scale` is between 1 and 8.
1882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_pd)
1884 #[target_feature(enable = "avx2")]
1885 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1886 #[rustc_args_required_const(2)]
1887 #[stable(feature = "simd_x86", since = "1.27.0")]
1888 pub unsafe fn _mm256_i64gather_pd(slice
: *const f64, offsets
: __m256i
, scale
: i32) -> __m256d
{
1889 let zero
= _mm256_setzero_pd();
1890 let neg_one
= _mm256_set1_pd(-1.0);
1891 let slice
= slice
as *const i8;
1892 let offsets
= offsets
.as_i64x4();
1895 vpgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1898 constify_imm8
!(scale
, call
)
1901 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1903 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1904 /// that position instead.
1906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_pd)
1908 #[target_feature(enable = "avx2")]
1909 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1910 #[rustc_args_required_const(4)]
1911 #[stable(feature = "simd_x86", since = "1.27.0")]
1912 pub unsafe fn _mm256_mask_i64gather_pd(
1919 let slice
= slice
as *const i8;
1920 let offsets
= offsets
.as_i64x4();
1923 vpgatherqpd(src
, slice
, offsets
, mask
, $imm8
)
1926 constify_imm8
!(scale
, call
)
1929 /// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1930 /// location specified by `imm8`.
1932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256)
1934 #[target_feature(enable = "avx2")]
1936 all(test
, not(target_os
= "windows")),
1937 assert_instr(vinsertf128
, imm8
= 1)
1939 #[rustc_args_required_const(2)]
1940 #[stable(feature = "simd_x86", since = "1.27.0")]
1941 pub unsafe fn _mm256_inserti128_si256(a
: __m256i
, b
: __m128i
, imm8
: i32) -> __m256i
{
1942 let a
= a
.as_i64x4();
1943 let b
= _mm256_castsi128_si256(b
).as_i64x4();
1944 let dst
: i64x4
= match imm8
& 0b01 {
1945 0 => simd_shuffle4(a
, b
, [4, 5, 2, 3]),
1946 _
=> simd_shuffle4(a
, b
, [0, 1, 4, 5]),
1951 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1952 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1953 /// of intermediate 32-bit integers.
1955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd_epi16)
1957 #[target_feature(enable = "avx2")]
1958 #[cfg_attr(test, assert_instr(vpmaddwd))]
1959 #[stable(feature = "simd_x86", since = "1.27.0")]
1960 pub unsafe fn _mm256_madd_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1961 transmute(pmaddwd(a
.as_i16x16(), b
.as_i16x16()))
1964 /// Vertically multiplies each unsigned 8-bit integer from `a` with the
1965 /// corresponding signed 8-bit integer from `b`, producing intermediate
1966 /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1967 /// signed 16-bit integers
1969 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16)
1971 #[target_feature(enable = "avx2")]
1972 #[cfg_attr(test, assert_instr(vpmaddubsw))]
1973 #[stable(feature = "simd_x86", since = "1.27.0")]
1974 pub unsafe fn _mm256_maddubs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1975 transmute(pmaddubsw(a
.as_u8x32(), b
.as_u8x32()))
1978 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1979 /// (elements are zeroed out when the highest bit is not set in the
1980 /// corresponding element).
1982 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi32)
1984 #[target_feature(enable = "avx2")]
1985 #[cfg_attr(test, assert_instr(vpmaskmovd))]
1986 #[stable(feature = "simd_x86", since = "1.27.0")]
1987 pub unsafe fn _mm_maskload_epi32(mem_addr
: *const i32, mask
: __m128i
) -> __m128i
{
1988 transmute(maskloadd(mem_addr
as *const i8, mask
.as_i32x4()))
1991 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1992 /// (elements are zeroed out when the highest bit is not set in the
1993 /// corresponding element).
1995 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi32)
1997 #[target_feature(enable = "avx2")]
1998 #[cfg_attr(test, assert_instr(vpmaskmovd))]
1999 #[stable(feature = "simd_x86", since = "1.27.0")]
2000 pub unsafe fn _mm256_maskload_epi32(mem_addr
: *const i32, mask
: __m256i
) -> __m256i
{
2001 transmute(maskloadd256(mem_addr
as *const i8, mask
.as_i32x8()))
2004 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
2005 /// (elements are zeroed out when the highest bit is not set in the
2006 /// corresponding element).
2008 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi64)
2010 #[target_feature(enable = "avx2")]
2011 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2012 #[stable(feature = "simd_x86", since = "1.27.0")]
2013 pub unsafe fn _mm_maskload_epi64(mem_addr
: *const i64, mask
: __m128i
) -> __m128i
{
2014 transmute(maskloadq(mem_addr
as *const i8, mask
.as_i64x2()))
2017 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
2018 /// (elements are zeroed out when the highest bit is not set in the
2019 /// corresponding element).
2021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi64)
2023 #[target_feature(enable = "avx2")]
2024 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2025 #[stable(feature = "simd_x86", since = "1.27.0")]
2026 pub unsafe fn _mm256_maskload_epi64(mem_addr
: *const i64, mask
: __m256i
) -> __m256i
{
2027 transmute(maskloadq256(mem_addr
as *const i8, mask
.as_i64x4()))
2030 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
2031 /// using `mask` (elements are not stored when the highest bit is not set
2032 /// in the corresponding element).
2034 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi32)
2036 #[target_feature(enable = "avx2")]
2037 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2038 #[stable(feature = "simd_x86", since = "1.27.0")]
2039 pub unsafe fn _mm_maskstore_epi32(mem_addr
: *mut i32, mask
: __m128i
, a
: __m128i
) {
2040 maskstored(mem_addr
as *mut i8, mask
.as_i32x4(), a
.as_i32x4())
2043 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
2044 /// using `mask` (elements are not stored when the highest bit is not set
2045 /// in the corresponding element).
2047 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi32)
2049 #[target_feature(enable = "avx2")]
2050 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2051 #[stable(feature = "simd_x86", since = "1.27.0")]
2052 pub unsafe fn _mm256_maskstore_epi32(mem_addr
: *mut i32, mask
: __m256i
, a
: __m256i
) {
2053 maskstored256(mem_addr
as *mut i8, mask
.as_i32x8(), a
.as_i32x8())
2056 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
2057 /// using `mask` (elements are not stored when the highest bit is not set
2058 /// in the corresponding element).
2060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi64)
2062 #[target_feature(enable = "avx2")]
2063 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2064 #[stable(feature = "simd_x86", since = "1.27.0")]
2065 pub unsafe fn _mm_maskstore_epi64(mem_addr
: *mut i64, mask
: __m128i
, a
: __m128i
) {
2066 maskstoreq(mem_addr
as *mut i8, mask
.as_i64x2(), a
.as_i64x2())
2069 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
2070 /// using `mask` (elements are not stored when the highest bit is not set
2071 /// in the corresponding element).
2073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi64)
2075 #[target_feature(enable = "avx2")]
2076 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2077 #[stable(feature = "simd_x86", since = "1.27.0")]
2078 pub unsafe fn _mm256_maskstore_epi64(mem_addr
: *mut i64, mask
: __m256i
, a
: __m256i
) {
2079 maskstoreq256(mem_addr
as *mut i8, mask
.as_i64x4(), a
.as_i64x4())
2082 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16)
2087 #[target_feature(enable = "avx2")]
2088 #[cfg_attr(test, assert_instr(vpmaxsw))]
2089 #[stable(feature = "simd_x86", since = "1.27.0")]
2090 pub unsafe fn _mm256_max_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2091 transmute(pmaxsw(a
.as_i16x16(), b
.as_i16x16()))
2094 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2097 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32)
2099 #[target_feature(enable = "avx2")]
2100 #[cfg_attr(test, assert_instr(vpmaxsd))]
2101 #[stable(feature = "simd_x86", since = "1.27.0")]
2102 pub unsafe fn _mm256_max_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2103 transmute(pmaxsd(a
.as_i32x8(), b
.as_i32x8()))
2106 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8)
2111 #[target_feature(enable = "avx2")]
2112 #[cfg_attr(test, assert_instr(vpmaxsb))]
2113 #[stable(feature = "simd_x86", since = "1.27.0")]
2114 pub unsafe fn _mm256_max_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2115 transmute(pmaxsb(a
.as_i8x32(), b
.as_i8x32()))
2118 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2119 /// the packed maximum values.
2121 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16)
2123 #[target_feature(enable = "avx2")]
2124 #[cfg_attr(test, assert_instr(vpmaxuw))]
2125 #[stable(feature = "simd_x86", since = "1.27.0")]
2126 pub unsafe fn _mm256_max_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2127 transmute(pmaxuw(a
.as_u16x16(), b
.as_u16x16()))
2130 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2131 /// the packed maximum values.
2133 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32)
2135 #[target_feature(enable = "avx2")]
2136 #[cfg_attr(test, assert_instr(vpmaxud))]
2137 #[stable(feature = "simd_x86", since = "1.27.0")]
2138 pub unsafe fn _mm256_max_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2139 transmute(pmaxud(a
.as_u32x8(), b
.as_u32x8()))
2142 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2143 /// the packed maximum values.
2145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8)
2147 #[target_feature(enable = "avx2")]
2148 #[cfg_attr(test, assert_instr(vpmaxub))]
2149 #[stable(feature = "simd_x86", since = "1.27.0")]
2150 pub unsafe fn _mm256_max_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2151 transmute(pmaxub(a
.as_u8x32(), b
.as_u8x32()))
2154 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16)
2159 #[target_feature(enable = "avx2")]
2160 #[cfg_attr(test, assert_instr(vpminsw))]
2161 #[stable(feature = "simd_x86", since = "1.27.0")]
2162 pub unsafe fn _mm256_min_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2163 transmute(pminsw(a
.as_i16x16(), b
.as_i16x16()))
2166 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2169 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32)
2171 #[target_feature(enable = "avx2")]
2172 #[cfg_attr(test, assert_instr(vpminsd))]
2173 #[stable(feature = "simd_x86", since = "1.27.0")]
2174 pub unsafe fn _mm256_min_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2175 transmute(pminsd(a
.as_i32x8(), b
.as_i32x8()))
2178 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2181 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8)
2183 #[target_feature(enable = "avx2")]
2184 #[cfg_attr(test, assert_instr(vpminsb))]
2185 #[stable(feature = "simd_x86", since = "1.27.0")]
2186 pub unsafe fn _mm256_min_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2187 transmute(pminsb(a
.as_i8x32(), b
.as_i8x32()))
2190 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2191 /// the packed minimum values.
2193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16)
2195 #[target_feature(enable = "avx2")]
2196 #[cfg_attr(test, assert_instr(vpminuw))]
2197 #[stable(feature = "simd_x86", since = "1.27.0")]
2198 pub unsafe fn _mm256_min_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2199 transmute(pminuw(a
.as_u16x16(), b
.as_u16x16()))
2202 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2203 /// the packed minimum values.
2205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32)
2207 #[target_feature(enable = "avx2")]
2208 #[cfg_attr(test, assert_instr(vpminud))]
2209 #[stable(feature = "simd_x86", since = "1.27.0")]
2210 pub unsafe fn _mm256_min_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2211 transmute(pminud(a
.as_u32x8(), b
.as_u32x8()))
2214 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2215 /// the packed minimum values.
2217 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8)
2219 #[target_feature(enable = "avx2")]
2220 #[cfg_attr(test, assert_instr(vpminub))]
2221 #[stable(feature = "simd_x86", since = "1.27.0")]
2222 pub unsafe fn _mm256_min_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2223 transmute(pminub(a
.as_u8x32(), b
.as_u8x32()))
2226 /// Creates mask from the most significant bit of each 8-bit element in `a`,
2227 /// return the result.
2229 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8)
2231 #[target_feature(enable = "avx2")]
2232 #[cfg_attr(test, assert_instr(vpmovmskb))]
2233 #[stable(feature = "simd_x86", since = "1.27.0")]
2234 pub unsafe fn _mm256_movemask_epi8(a
: __m256i
) -> i32 {
2235 pmovmskb(a
.as_i8x32())
2238 /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2239 /// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2240 /// results in dst. Eight SADs are performed for each 128-bit lane using one
2241 /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2242 /// selected from `b` starting at on the offset specified in `imm8`. Eight
2243 /// quadruplets are formed from sequential 8-bit integers selected from `a`
2244 /// starting at the offset specified in `imm8`.
2246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mpsadbw_epu8)
2248 #[target_feature(enable = "avx2")]
2249 #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
2250 #[rustc_args_required_const(2)]
2251 #[stable(feature = "simd_x86", since = "1.27.0")]
2252 pub unsafe fn _mm256_mpsadbw_epu8(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
2253 let a
= a
.as_u8x32();
2254 let b
= b
.as_u8x32();
2257 mpsadbw(a
, b
, $imm8
)
2260 let r
= constify_imm8
!(imm8
, call
);
2264 /// Multiplies the low 32-bit integers from each packed 64-bit element in
2267 /// Returns the 64-bit results.
2269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32)
2271 #[target_feature(enable = "avx2")]
2272 #[cfg_attr(test, assert_instr(vpmuldq))]
2273 #[stable(feature = "simd_x86", since = "1.27.0")]
2274 pub unsafe fn _mm256_mul_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2275 transmute(pmuldq(a
.as_i32x8(), b
.as_i32x8()))
2278 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2279 /// element in `a` and `b`
2281 /// Returns the unsigned 64-bit results.
2283 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32)
2285 #[target_feature(enable = "avx2")]
2286 #[cfg_attr(test, assert_instr(vpmuludq))]
2287 #[stable(feature = "simd_x86", since = "1.27.0")]
2288 pub unsafe fn _mm256_mul_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2289 transmute(pmuludq(a
.as_u32x8(), b
.as_u32x8()))
2292 /// Multiplies the packed 16-bit integers in `a` and `b`, producing
2293 /// intermediate 32-bit integers and returning the high 16 bits of the
2294 /// intermediate integers.
2296 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epi16)
2298 #[target_feature(enable = "avx2")]
2299 #[cfg_attr(test, assert_instr(vpmulhw))]
2300 #[stable(feature = "simd_x86", since = "1.27.0")]
2301 pub unsafe fn _mm256_mulhi_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2302 transmute(pmulhw(a
.as_i16x16(), b
.as_i16x16()))
2305 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2306 /// intermediate 32-bit integers and returning the high 16 bits of the
2307 /// intermediate integers.
2309 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epu16)
2311 #[target_feature(enable = "avx2")]
2312 #[cfg_attr(test, assert_instr(vpmulhuw))]
2313 #[stable(feature = "simd_x86", since = "1.27.0")]
2314 pub unsafe fn _mm256_mulhi_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2315 transmute(pmulhuw(a
.as_u16x16(), b
.as_u16x16()))
2318 /// Multiplies the packed 16-bit integers in `a` and `b`, producing
2319 /// intermediate 32-bit integers, and returns the low 16 bits of the
2320 /// intermediate integers
2322 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16)
2324 #[target_feature(enable = "avx2")]
2325 #[cfg_attr(test, assert_instr(vpmullw))]
2326 #[stable(feature = "simd_x86", since = "1.27.0")]
2327 pub unsafe fn _mm256_mullo_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2328 transmute(simd_mul(a
.as_i16x16(), b
.as_i16x16()))
2331 /// Multiplies the packed 32-bit integers in `a` and `b`, producing
2332 /// intermediate 64-bit integers, and returns the low 32 bits of the
2333 /// intermediate integers
2335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32)
2337 #[target_feature(enable = "avx2")]
2338 #[cfg_attr(test, assert_instr(vpmulld))]
2339 #[stable(feature = "simd_x86", since = "1.27.0")]
2340 pub unsafe fn _mm256_mullo_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2341 transmute(simd_mul(a
.as_i32x8(), b
.as_i32x8()))
2344 /// Multiplies packed 16-bit integers in `a` and `b`, producing
2345 /// intermediate signed 32-bit integers. Truncate each intermediate
2346 /// integer to the 18 most significant bits, round by adding 1, and
2347 /// return bits `[16:1]`.
2349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhrs_epi16)
2351 #[target_feature(enable = "avx2")]
2352 #[cfg_attr(test, assert_instr(vpmulhrsw))]
2353 #[stable(feature = "simd_x86", since = "1.27.0")]
2354 pub unsafe fn _mm256_mulhrs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2355 transmute(pmulhrsw(a
.as_i16x16(), b
.as_i16x16()))
2358 /// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256)
2363 #[target_feature(enable = "avx2")]
2364 #[cfg_attr(test, assert_instr(vorps))]
2365 #[stable(feature = "simd_x86", since = "1.27.0")]
2366 pub unsafe fn _mm256_or_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
2367 transmute(simd_or(a
.as_i32x8(), b
.as_i32x8()))
2370 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2371 /// using signed saturation
2373 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16)
2375 #[target_feature(enable = "avx2")]
2376 #[cfg_attr(test, assert_instr(vpacksswb))]
2377 #[stable(feature = "simd_x86", since = "1.27.0")]
2378 pub unsafe fn _mm256_packs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2379 transmute(packsswb(a
.as_i16x16(), b
.as_i16x16()))
2382 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2383 /// using signed saturation
2385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32)
2387 #[target_feature(enable = "avx2")]
2388 #[cfg_attr(test, assert_instr(vpackssdw))]
2389 #[stable(feature = "simd_x86", since = "1.27.0")]
2390 pub unsafe fn _mm256_packs_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2391 transmute(packssdw(a
.as_i32x8(), b
.as_i32x8()))
2394 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2395 /// using unsigned saturation
2397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16)
2399 #[target_feature(enable = "avx2")]
2400 #[cfg_attr(test, assert_instr(vpackuswb))]
2401 #[stable(feature = "simd_x86", since = "1.27.0")]
2402 pub unsafe fn _mm256_packus_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2403 transmute(packuswb(a
.as_i16x16(), b
.as_i16x16()))
2406 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2407 /// using unsigned saturation
2409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32)
2411 #[target_feature(enable = "avx2")]
2412 #[cfg_attr(test, assert_instr(vpackusdw))]
2413 #[stable(feature = "simd_x86", since = "1.27.0")]
2414 pub unsafe fn _mm256_packus_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2415 transmute(packusdw(a
.as_i32x8(), b
.as_i32x8()))
2418 /// Permutes packed 32-bit integers from `a` according to the content of `b`.
2420 /// The last 3 bits of each integer of `b` are used as addresses into the 8
2421 /// integers of `a`.
2423 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_epi32)
2425 #[target_feature(enable = "avx2")]
2426 #[cfg_attr(test, assert_instr(vpermps))]
2427 #[stable(feature = "simd_x86", since = "1.27.0")]
2428 pub unsafe fn _mm256_permutevar8x32_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2429 transmute(permd(a
.as_u32x8(), b
.as_u32x8()))
2432 /// Permutes 64-bit integers from `a` using control mask `imm8`.
2434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_epi64)
2436 #[target_feature(enable = "avx2")]
2437 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
2438 #[rustc_args_required_const(1)]
2439 #[stable(feature = "simd_x86", since = "1.27.0")]
2440 pub unsafe fn _mm256_permute4x64_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
2441 let imm8
= (imm8
& 0xFF) as u8;
2442 let zero
= _mm256_setzero_si256().as_i64x4();
2443 let a
= a
.as_i64x4();
2444 macro_rules
! permute4
{
2445 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
2446 simd_shuffle4(a
, zero
, [$a
, $b
, $c
, $d
]);
2449 macro_rules
! permute3
{
2450 ($a
:expr
, $b
:expr
, $c
:expr
) => {
2451 match (imm8
>> 6) & 0b11 {
2452 0b00 => permute4
!($a
, $b
, $c
, 0),
2453 0b01 => permute4
!($a
, $b
, $c
, 1),
2454 0b10 => permute4
!($a
, $b
, $c
, 2),
2455 _
=> permute4
!($a
, $b
, $c
, 3),
2459 macro_rules
! permute2
{
2460 ($a
:expr
, $b
:expr
) => {
2461 match (imm8
>> 4) & 0b11 {
2462 0b00 => permute3
!($a
, $b
, 0),
2463 0b01 => permute3
!($a
, $b
, 1),
2464 0b10 => permute3
!($a
, $b
, 2),
2465 _
=> permute3
!($a
, $b
, 3),
2469 macro_rules
! permute1
{
2471 match (imm8
>> 2) & 0b11 {
2472 0b00 => permute2
!($a
, 0),
2473 0b01 => permute2
!($a
, 1),
2474 0b10 => permute2
!($a
, 2),
2475 _
=> permute2
!($a
, 3),
2479 let r
: i64x4
= match imm8
& 0b11 {
2480 0b00 => permute1
!(0),
2481 0b01 => permute1
!(1),
2482 0b10 => permute1
!(2),
2488 /// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256)
2492 #[target_feature(enable = "avx2")]
2493 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))]
2494 #[rustc_args_required_const(2)]
2495 #[stable(feature = "simd_x86", since = "1.27.0")]
2496 pub unsafe fn _mm256_permute2x128_si256(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
2497 let a
= a
.as_i64x4();
2498 let b
= b
.as_i64x4();
2501 vperm2i128(a
, b
, $imm8
)
2504 transmute(constify_imm8
!(imm8
, call
))
2507 /// Shuffles 64-bit floating-point elements in `a` across lanes using the
2508 /// control in `imm8`.
2510 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd)
2512 #[target_feature(enable = "avx2")]
2513 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
2514 #[rustc_args_required_const(1)]
2515 #[stable(feature = "simd_x86", since = "1.27.0")]
2516 pub unsafe fn _mm256_permute4x64_pd(a
: __m256d
, imm8
: i32) -> __m256d
{
2517 let imm8
= (imm8
& 0xFF) as u8;
2518 let undef
= _mm256_undefined_pd();
2519 macro_rules
! shuffle_done
{
2520 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2521 simd_shuffle4(a
, undef
, [$x01
, $x23
, $x45
, $x67
])
2524 macro_rules
! shuffle_x67
{
2525 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2526 match (imm8
>> 6) & 0b11 {
2527 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2528 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2529 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2530 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2534 macro_rules
! shuffle_x45
{
2535 ($x01
:expr
, $x23
:expr
) => {
2536 match (imm8
>> 4) & 0b11 {
2537 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2538 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2539 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2540 _
=> shuffle_x67
!($x01
, $x23
, 3),
2544 macro_rules
! shuffle_x23
{
2546 match (imm8
>> 2) & 0b11 {
2547 0b00 => shuffle_x45
!($x01
, 0),
2548 0b01 => shuffle_x45
!($x01
, 1),
2549 0b10 => shuffle_x45
!($x01
, 2),
2550 _
=> shuffle_x45
!($x01
, 3),
2555 0b00 => shuffle_x23
!(0),
2556 0b01 => shuffle_x23
!(1),
2557 0b10 => shuffle_x23
!(2),
2558 _
=> shuffle_x23
!(3),
2562 /// Shuffles eight 32-bit foating-point elements in `a` across lanes using
2563 /// the corresponding 32-bit integer index in `idx`.
2565 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps)
2567 #[target_feature(enable = "avx2")]
2568 #[cfg_attr(test, assert_instr(vpermps))]
2569 #[stable(feature = "simd_x86", since = "1.27.0")]
2570 pub unsafe fn _mm256_permutevar8x32_ps(a
: __m256
, idx
: __m256i
) -> __m256
{
2571 permps(a
, idx
.as_i32x8())
2574 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2575 /// and `b`, then horizontally sum each consecutive 8 differences to
2576 /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2577 /// integers in the low 16 bits of the 64-bit return value
2579 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sad_epu8)
2581 #[target_feature(enable = "avx2")]
2582 #[cfg_attr(test, assert_instr(vpsadbw))]
2583 #[stable(feature = "simd_x86", since = "1.27.0")]
2584 pub unsafe fn _mm256_sad_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2585 transmute(psadbw(a
.as_u8x32(), b
.as_u8x32()))
2588 /// Shuffles bytes from `a` according to the content of `b`.
2590 /// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes
2593 /// In addition, if the highest significant bit of a byte of `b` is set, the
2594 /// respective destination byte is set to 0.
2596 /// The low and high halves of the vectors are shuffled separately.
2598 /// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2602 /// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2603 /// let mut r = [0; 32];
2604 /// for i in 0..16 {
2605 /// // if the most significant bit of b is set,
2606 /// // then the destination byte is set to 0.
2607 /// if b[i] & 0x80 == 0u8 {
2608 /// r[i] = a[(b[i] % 16) as usize];
2610 /// if b[i + 16] & 0x80 == 0u8 {
2611 /// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2618 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi8)
2620 #[target_feature(enable = "avx2")]
2621 #[cfg_attr(test, assert_instr(vpshufb))]
2622 #[stable(feature = "simd_x86", since = "1.27.0")]
2623 pub unsafe fn _mm256_shuffle_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2624 transmute(pshufb(a
.as_u8x32(), b
.as_u8x32()))
2627 /// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2631 /// #[cfg(target_arch = "x86")]
2632 /// use std::arch::x86::*;
2633 /// #[cfg(target_arch = "x86_64")]
2634 /// use std::arch::x86_64::*;
2637 /// # if is_x86_feature_detected!("avx2") {
2638 /// # #[target_feature(enable = "avx2")]
2639 /// # unsafe fn worker() {
2640 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2642 /// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2643 /// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2645 /// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2646 /// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2648 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2649 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2651 /// # unsafe { worker(); }
2656 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
2658 #[target_feature(enable = "avx2")]
2659 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
2660 #[rustc_args_required_const(1)]
2661 #[stable(feature = "simd_x86", since = "1.27.0")]
2662 pub unsafe fn _mm256_shuffle_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
2663 // simd_shuffleX requires that its selector parameter be made up of
2664 // constant values, but we can't enforce that here. In spirit, we need
2665 // to write a `match` on all possible values of a byte, and for each value,
2666 // hard-code the correct `simd_shuffleX` call using only constants. We
2667 // then hope for LLVM to do the rest.
2669 // Of course, that's... awful. So we try to use macros to do it for us.
2670 let imm8
= (imm8
& 0xFF) as u8;
2672 let a
= a
.as_i32x8();
2673 macro_rules
! shuffle_done
{
2674 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2691 macro_rules
! shuffle_x67
{
2692 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2693 match (imm8
>> 6) & 0b11 {
2694 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2695 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2696 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2697 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2701 macro_rules
! shuffle_x45
{
2702 ($x01
:expr
, $x23
:expr
) => {
2703 match (imm8
>> 4) & 0b11 {
2704 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2705 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2706 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2707 _
=> shuffle_x67
!($x01
, $x23
, 3),
2711 macro_rules
! shuffle_x23
{
2713 match (imm8
>> 2) & 0b11 {
2714 0b00 => shuffle_x45
!($x01
, 0),
2715 0b01 => shuffle_x45
!($x01
, 1),
2716 0b10 => shuffle_x45
!($x01
, 2),
2717 _
=> shuffle_x45
!($x01
, 3),
2721 let r
: i32x8
= match imm8
& 0b11 {
2722 0b00 => shuffle_x23
!(0),
2723 0b01 => shuffle_x23
!(1),
2724 0b10 => shuffle_x23
!(2),
2725 _
=> shuffle_x23
!(3),
2730 /// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2731 /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflehi_epi16)
2736 #[target_feature(enable = "avx2")]
2737 #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
2738 #[rustc_args_required_const(1)]
2739 #[stable(feature = "simd_x86", since = "1.27.0")]
2740 pub unsafe fn _mm256_shufflehi_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2741 let imm8
= (imm8
& 0xFF) as u8;
2742 let a
= a
.as_i16x16();
2743 macro_rules
! shuffle_done
{
2744 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2746 simd_shuffle16(a
, a
, [
2747 0, 1, 2, 3, 4+$x01
, 4+$x23
, 4+$x45
, 4+$x67
,
2748 8, 9, 10, 11, 12+$x01
, 12+$x23
, 12+$x45
, 12+$x67
2752 macro_rules
! shuffle_x67
{
2753 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2754 match (imm8
>> 6) & 0b11 {
2755 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2756 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2757 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2758 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2762 macro_rules
! shuffle_x45
{
2763 ($x01
:expr
, $x23
:expr
) => {
2764 match (imm8
>> 4) & 0b11 {
2765 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2766 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2767 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2768 _
=> shuffle_x67
!($x01
, $x23
, 3),
2772 macro_rules
! shuffle_x23
{
2774 match (imm8
>> 2) & 0b11 {
2775 0b00 => shuffle_x45
!($x01
, 0),
2776 0b01 => shuffle_x45
!($x01
, 1),
2777 0b10 => shuffle_x45
!($x01
, 2),
2778 _
=> shuffle_x45
!($x01
, 3),
2782 let r
: i16x16
= match imm8
& 0b11 {
2783 0b00 => shuffle_x23
!(0),
2784 0b01 => shuffle_x23
!(1),
2785 0b10 => shuffle_x23
!(2),
2786 _
=> shuffle_x23
!(3),
2791 /// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2792 /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2795 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16)
2797 #[target_feature(enable = "avx2")]
2798 #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
2799 #[rustc_args_required_const(1)]
2800 #[stable(feature = "simd_x86", since = "1.27.0")]
2801 pub unsafe fn _mm256_shufflelo_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2802 let imm8
= (imm8
& 0xFF) as u8;
2803 let a
= a
.as_i16x16();
2804 macro_rules
! shuffle_done
{
2805 ($x01
: expr
, $x23
: expr
, $x45
: expr
, $x67
: expr
) => {
2807 simd_shuffle16(a
, a
, [
2808 0+$x01
, 0+$x23
, 0+$x45
, 0+$x67
, 4, 5, 6, 7,
2809 8+$x01
, 8+$x23
, 8+$x45
, 8+$x67
, 12, 13, 14, 15,
2813 macro_rules
! shuffle_x67
{
2814 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2815 match (imm8
>> 6) & 0b11 {
2816 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2817 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2818 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2819 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2823 macro_rules
! shuffle_x45
{
2824 ($x01
:expr
, $x23
:expr
) => {
2825 match (imm8
>> 4) & 0b11 {
2826 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2827 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2828 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2829 _
=> shuffle_x67
!($x01
, $x23
, 3),
2833 macro_rules
! shuffle_x23
{
2835 match (imm8
>> 2) & 0b11 {
2836 0b00 => shuffle_x45
!($x01
, 0),
2837 0b01 => shuffle_x45
!($x01
, 1),
2838 0b10 => shuffle_x45
!($x01
, 2),
2839 _
=> shuffle_x45
!($x01
, 3),
2843 let r
: i16x16
= match imm8
& 0b11 {
2844 0b00 => shuffle_x23
!(0),
2845 0b01 => shuffle_x23
!(1),
2846 0b10 => shuffle_x23
!(2),
2847 _
=> shuffle_x23
!(3),
2852 /// Negates packed 16-bit integers in `a` when the corresponding signed
2853 /// 16-bit integer in `b` is negative, and returns the results.
2854 /// Results are zeroed out when the corresponding element in `b` is zero.
2856 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16)
2858 #[target_feature(enable = "avx2")]
2859 #[cfg_attr(test, assert_instr(vpsignw))]
2860 #[stable(feature = "simd_x86", since = "1.27.0")]
2861 pub unsafe fn _mm256_sign_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2862 transmute(psignw(a
.as_i16x16(), b
.as_i16x16()))
2865 /// Negates packed 32-bit integers in `a` when the corresponding signed
2866 /// 32-bit integer in `b` is negative, and returns the results.
2867 /// Results are zeroed out when the corresponding element in `b` is zero.
2869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32)
2871 #[target_feature(enable = "avx2")]
2872 #[cfg_attr(test, assert_instr(vpsignd))]
2873 #[stable(feature = "simd_x86", since = "1.27.0")]
2874 pub unsafe fn _mm256_sign_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2875 transmute(psignd(a
.as_i32x8(), b
.as_i32x8()))
2878 /// Negates packed 8-bit integers in `a` when the corresponding signed
2879 /// 8-bit integer in `b` is negative, and returns the results.
2880 /// Results are zeroed out when the corresponding element in `b` is zero.
2882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8)
2884 #[target_feature(enable = "avx2")]
2885 #[cfg_attr(test, assert_instr(vpsignb))]
2886 #[stable(feature = "simd_x86", since = "1.27.0")]
2887 pub unsafe fn _mm256_sign_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2888 transmute(psignb(a
.as_i8x32(), b
.as_i8x32()))
2891 /// Shifts packed 16-bit integers in `a` left by `count` while
2892 /// shifting in zeros, and returns the result
2894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16)
2896 #[target_feature(enable = "avx2")]
2897 #[cfg_attr(test, assert_instr(vpsllw))]
2898 #[stable(feature = "simd_x86", since = "1.27.0")]
2899 pub unsafe fn _mm256_sll_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
2900 transmute(psllw(a
.as_i16x16(), count
.as_i16x8()))
2903 /// Shifts packed 32-bit integers in `a` left by `count` while
2904 /// shifting in zeros, and returns the result
2906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32)
2908 #[target_feature(enable = "avx2")]
2909 #[cfg_attr(test, assert_instr(vpslld))]
2910 #[stable(feature = "simd_x86", since = "1.27.0")]
2911 pub unsafe fn _mm256_sll_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
2912 transmute(pslld(a
.as_i32x8(), count
.as_i32x4()))
2915 /// Shifts packed 64-bit integers in `a` left by `count` while
2916 /// shifting in zeros, and returns the result
2918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64)
2920 #[target_feature(enable = "avx2")]
2921 #[cfg_attr(test, assert_instr(vpsllq))]
2922 #[stable(feature = "simd_x86", since = "1.27.0")]
2923 pub unsafe fn _mm256_sll_epi64(a
: __m256i
, count
: __m128i
) -> __m256i
{
2924 transmute(psllq(a
.as_i64x4(), count
.as_i64x2()))
2927 /// Shifts packed 16-bit integers in `a` left by `imm8` while
2928 /// shifting in zeros, return the results;
2930 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16)
2932 #[target_feature(enable = "avx2")]
2933 #[cfg_attr(test, assert_instr(vpsllw))]
2934 #[stable(feature = "simd_x86", since = "1.27.0")]
2935 pub unsafe fn _mm256_slli_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2936 transmute(pslliw(a
.as_i16x16(), imm8
))
2939 /// Shifts packed 32-bit integers in `a` left by `imm8` while
2940 /// shifting in zeros, return the results;
2942 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32)
2944 #[target_feature(enable = "avx2")]
2945 #[cfg_attr(test, assert_instr(vpslld))]
2946 #[stable(feature = "simd_x86", since = "1.27.0")]
2947 pub unsafe fn _mm256_slli_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
2948 transmute(psllid(a
.as_i32x8(), imm8
))
2951 /// Shifts packed 64-bit integers in `a` left by `imm8` while
2952 /// shifting in zeros, return the results;
2954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64)
2956 #[target_feature(enable = "avx2")]
2957 #[cfg_attr(test, assert_instr(vpsllq))]
2958 #[stable(feature = "simd_x86", since = "1.27.0")]
2959 pub unsafe fn _mm256_slli_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
2960 transmute(pslliq(a
.as_i64x4(), imm8
))
2963 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
2967 #[target_feature(enable = "avx2")]
2968 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2969 #[rustc_args_required_const(1)]
2970 #[stable(feature = "simd_x86", since = "1.27.0")]
2971 pub unsafe fn _mm256_slli_si256(a
: __m256i
, imm8
: i32) -> __m256i
{
2972 let a
= a
.as_i64x4();
2978 transmute(constify_imm8
!(imm8
* 8, call
))
2981 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
2985 #[target_feature(enable = "avx2")]
2986 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2987 #[rustc_args_required_const(1)]
2988 #[stable(feature = "simd_x86", since = "1.27.0")]
2989 pub unsafe fn _mm256_bslli_epi128(a
: __m256i
, imm8
: i32) -> __m256i
{
2990 let a
= a
.as_i64x4();
2996 transmute(constify_imm8
!(imm8
* 8, call
))
2999 /// Shifts packed 32-bit integers in `a` left by the amount
3000 /// specified by the corresponding element in `count` while
3001 /// shifting in zeros, and returns the result.
3003 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32)
3005 #[target_feature(enable = "avx2")]
3006 #[cfg_attr(test, assert_instr(vpsllvd))]
3007 #[stable(feature = "simd_x86", since = "1.27.0")]
3008 pub unsafe fn _mm_sllv_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3009 transmute(psllvd(a
.as_i32x4(), count
.as_i32x4()))
3012 /// Shifts packed 32-bit integers in `a` left by the amount
3013 /// specified by the corresponding element in `count` while
3014 /// shifting in zeros, and returns the result.
3016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32)
3018 #[target_feature(enable = "avx2")]
3019 #[cfg_attr(test, assert_instr(vpsllvd))]
3020 #[stable(feature = "simd_x86", since = "1.27.0")]
3021 pub unsafe fn _mm256_sllv_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3022 transmute(psllvd256(a
.as_i32x8(), count
.as_i32x8()))
3025 /// Shifts packed 64-bit integers in `a` left by the amount
3026 /// specified by the corresponding element in `count` while
3027 /// shifting in zeros, and returns the result.
3029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64)
3031 #[target_feature(enable = "avx2")]
3032 #[cfg_attr(test, assert_instr(vpsllvq))]
3033 #[stable(feature = "simd_x86", since = "1.27.0")]
3034 pub unsafe fn _mm_sllv_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
3035 transmute(psllvq(a
.as_i64x2(), count
.as_i64x2()))
3038 /// Shifts packed 64-bit integers in `a` left by the amount
3039 /// specified by the corresponding element in `count` while
3040 /// shifting in zeros, and returns the result.
3042 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64)
3044 #[target_feature(enable = "avx2")]
3045 #[cfg_attr(test, assert_instr(vpsllvq))]
3046 #[stable(feature = "simd_x86", since = "1.27.0")]
3047 pub unsafe fn _mm256_sllv_epi64(a
: __m256i
, count
: __m256i
) -> __m256i
{
3048 transmute(psllvq256(a
.as_i64x4(), count
.as_i64x4()))
3051 /// Shifts packed 16-bit integers in `a` right by `count` while
3052 /// shifting in sign bits.
3054 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16)
3056 #[target_feature(enable = "avx2")]
3057 #[cfg_attr(test, assert_instr(vpsraw))]
3058 #[stable(feature = "simd_x86", since = "1.27.0")]
3059 pub unsafe fn _mm256_sra_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
3060 transmute(psraw(a
.as_i16x16(), count
.as_i16x8()))
3063 /// Shifts packed 32-bit integers in `a` right by `count` while
3064 /// shifting in sign bits.
3066 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32)
3068 #[target_feature(enable = "avx2")]
3069 #[cfg_attr(test, assert_instr(vpsrad))]
3070 #[stable(feature = "simd_x86", since = "1.27.0")]
3071 pub unsafe fn _mm256_sra_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
3072 transmute(psrad(a
.as_i32x8(), count
.as_i32x4()))
3075 /// Shifts packed 16-bit integers in `a` right by `imm8` while
3076 /// shifting in sign bits.
3078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16)
3080 #[target_feature(enable = "avx2")]
3081 #[cfg_attr(test, assert_instr(vpsraw))]
3082 #[stable(feature = "simd_x86", since = "1.27.0")]
3083 pub unsafe fn _mm256_srai_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
3084 transmute(psraiw(a
.as_i16x16(), imm8
))
3087 /// Shifts packed 32-bit integers in `a` right by `imm8` while
3088 /// shifting in sign bits.
3090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32)
3092 #[target_feature(enable = "avx2")]
3093 #[cfg_attr(test, assert_instr(vpsrad))]
3094 #[stable(feature = "simd_x86", since = "1.27.0")]
3095 pub unsafe fn _mm256_srai_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
3096 transmute(psraid(a
.as_i32x8(), imm8
))
3099 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
3100 /// corresponding element in `count` while shifting in sign bits.
3102 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32)
3104 #[target_feature(enable = "avx2")]
3105 #[cfg_attr(test, assert_instr(vpsravd))]
3106 #[stable(feature = "simd_x86", since = "1.27.0")]
3107 pub unsafe fn _mm_srav_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3108 transmute(psravd(a
.as_i32x4(), count
.as_i32x4()))
3111 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
3112 /// corresponding element in `count` while shifting in sign bits.
3114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32)
3116 #[target_feature(enable = "avx2")]
3117 #[cfg_attr(test, assert_instr(vpsravd))]
3118 #[stable(feature = "simd_x86", since = "1.27.0")]
3119 pub unsafe fn _mm256_srav_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3120 transmute(psravd256(a
.as_i32x8(), count
.as_i32x8()))
3123 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
3127 #[target_feature(enable = "avx2")]
3128 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
3129 #[rustc_args_required_const(1)]
3130 #[stable(feature = "simd_x86", since = "1.27.0")]
3131 pub unsafe fn _mm256_srli_si256(a
: __m256i
, imm8
: i32) -> __m256i
{
3132 let a
= a
.as_i64x4();
3138 transmute(constify_imm8
!(imm8
* 8, call
))
3141 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
3145 #[target_feature(enable = "avx2")]
3146 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
3147 #[rustc_args_required_const(1)]
3148 #[stable(feature = "simd_x86", since = "1.27.0")]
3149 pub unsafe fn _mm256_bsrli_epi128(a
: __m256i
, imm8
: i32) -> __m256i
{
3150 let a
= a
.as_i64x4();
3156 transmute(constify_imm8
!(imm8
* 8, call
))
3159 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
3162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16)
3164 #[target_feature(enable = "avx2")]
3165 #[cfg_attr(test, assert_instr(vpsrlw))]
3166 #[stable(feature = "simd_x86", since = "1.27.0")]
3167 pub unsafe fn _mm256_srl_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
3168 transmute(psrlw(a
.as_i16x16(), count
.as_i16x8()))
3171 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32)
3176 #[target_feature(enable = "avx2")]
3177 #[cfg_attr(test, assert_instr(vpsrld))]
3178 #[stable(feature = "simd_x86", since = "1.27.0")]
3179 pub unsafe fn _mm256_srl_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
3180 transmute(psrld(a
.as_i32x8(), count
.as_i32x4()))
3183 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64)
3188 #[target_feature(enable = "avx2")]
3189 #[cfg_attr(test, assert_instr(vpsrlq))]
3190 #[stable(feature = "simd_x86", since = "1.27.0")]
3191 pub unsafe fn _mm256_srl_epi64(a
: __m256i
, count
: __m128i
) -> __m256i
{
3192 transmute(psrlq(a
.as_i64x4(), count
.as_i64x2()))
3195 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
3198 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16)
3200 #[target_feature(enable = "avx2")]
3201 #[cfg_attr(test, assert_instr(vpsrlw))]
3202 #[stable(feature = "simd_x86", since = "1.27.0")]
3203 pub unsafe fn _mm256_srli_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
3204 transmute(psrliw(a
.as_i16x16(), imm8
))
3207 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
3210 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32)
3212 #[target_feature(enable = "avx2")]
3213 #[cfg_attr(test, assert_instr(vpsrld))]
3214 #[stable(feature = "simd_x86", since = "1.27.0")]
3215 pub unsafe fn _mm256_srli_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
3216 transmute(psrlid(a
.as_i32x8(), imm8
))
3219 /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
3222 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64)
3224 #[target_feature(enable = "avx2")]
3225 #[cfg_attr(test, assert_instr(vpsrlq))]
3226 #[stable(feature = "simd_x86", since = "1.27.0")]
3227 pub unsafe fn _mm256_srli_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
3228 transmute(psrliq(a
.as_i64x4(), imm8
))
3231 /// Shifts packed 32-bit integers in `a` right by the amount specified by
3232 /// the corresponding element in `count` while shifting in zeros,
3234 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32)
3236 #[target_feature(enable = "avx2")]
3237 #[cfg_attr(test, assert_instr(vpsrlvd))]
3238 #[stable(feature = "simd_x86", since = "1.27.0")]
3239 pub unsafe fn _mm_srlv_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3240 transmute(psrlvd(a
.as_i32x4(), count
.as_i32x4()))
3243 /// Shifts packed 32-bit integers in `a` right by the amount specified by
3244 /// the corresponding element in `count` while shifting in zeros,
3246 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32)
3248 #[target_feature(enable = "avx2")]
3249 #[cfg_attr(test, assert_instr(vpsrlvd))]
3250 #[stable(feature = "simd_x86", since = "1.27.0")]
3251 pub unsafe fn _mm256_srlv_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3252 transmute(psrlvd256(a
.as_i32x8(), count
.as_i32x8()))
3255 /// Shifts packed 64-bit integers in `a` right by the amount specified by
3256 /// the corresponding element in `count` while shifting in zeros,
3258 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64)
3260 #[target_feature(enable = "avx2")]
3261 #[cfg_attr(test, assert_instr(vpsrlvq))]
3262 #[stable(feature = "simd_x86", since = "1.27.0")]
3263 pub unsafe fn _mm_srlv_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
3264 transmute(psrlvq(a
.as_i64x2(), count
.as_i64x2()))
3267 /// Shifts packed 64-bit integers in `a` right by the amount specified by
3268 /// the corresponding element in `count` while shifting in zeros,
3270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64)
3272 #[target_feature(enable = "avx2")]
3273 #[cfg_attr(test, assert_instr(vpsrlvq))]
3274 #[stable(feature = "simd_x86", since = "1.27.0")]
3275 pub unsafe fn _mm256_srlv_epi64(a
: __m256i
, count
: __m256i
) -> __m256i
{
3276 transmute(psrlvq256(a
.as_i64x4(), count
.as_i64x4()))
3279 // TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
3281 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3283 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi16)
3285 #[target_feature(enable = "avx2")]
3286 #[cfg_attr(test, assert_instr(vpsubw))]
3287 #[stable(feature = "simd_x86", since = "1.27.0")]
3288 pub unsafe fn _mm256_sub_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3289 transmute(simd_sub(a
.as_i16x16(), b
.as_i16x16()))
3292 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`
3294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi32)
3296 #[target_feature(enable = "avx2")]
3297 #[cfg_attr(test, assert_instr(vpsubd))]
3298 #[stable(feature = "simd_x86", since = "1.27.0")]
3299 pub unsafe fn _mm256_sub_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3300 transmute(simd_sub(a
.as_i32x8(), b
.as_i32x8()))
3303 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`
3305 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi64)
3307 #[target_feature(enable = "avx2")]
3308 #[cfg_attr(test, assert_instr(vpsubq))]
3309 #[stable(feature = "simd_x86", since = "1.27.0")]
3310 pub unsafe fn _mm256_sub_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3311 transmute(simd_sub(a
.as_i64x4(), b
.as_i64x4()))
3314 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
3316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi8)
3318 #[target_feature(enable = "avx2")]
3319 #[cfg_attr(test, assert_instr(vpsubb))]
3320 #[stable(feature = "simd_x86", since = "1.27.0")]
3321 pub unsafe fn _mm256_sub_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3322 transmute(simd_sub(a
.as_i8x32(), b
.as_i8x32()))
3325 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3326 /// `a` using saturation.
3328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi16)
3330 #[target_feature(enable = "avx2")]
3331 #[cfg_attr(test, assert_instr(vpsubsw))]
3332 #[stable(feature = "simd_x86", since = "1.27.0")]
3333 pub unsafe fn _mm256_subs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3334 transmute(simd_saturating_sub(a
.as_i16x16(), b
.as_i16x16()))
3337 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3338 /// `a` using saturation.
3340 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi8)
3342 #[target_feature(enable = "avx2")]
3343 #[cfg_attr(test, assert_instr(vpsubsb))]
3344 #[stable(feature = "simd_x86", since = "1.27.0")]
3345 pub unsafe fn _mm256_subs_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3346 transmute(simd_saturating_sub(a
.as_i8x32(), b
.as_i8x32()))
3349 /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3350 /// integers in `a` using saturation.
3352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu16)
3354 #[target_feature(enable = "avx2")]
3355 #[cfg_attr(test, assert_instr(vpsubusw))]
3356 #[stable(feature = "simd_x86", since = "1.27.0")]
3357 pub unsafe fn _mm256_subs_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3358 transmute(simd_saturating_sub(a
.as_u16x16(), b
.as_u16x16()))
3361 /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3362 /// integers in `a` using saturation.
3364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu8)
3366 #[target_feature(enable = "avx2")]
3367 #[cfg_attr(test, assert_instr(vpsubusb))]
3368 #[stable(feature = "simd_x86", since = "1.27.0")]
3369 pub unsafe fn _mm256_subs_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3370 transmute(simd_saturating_sub(a
.as_u8x32(), b
.as_u8x32()))
3373 /// Unpacks and interleave 8-bit integers from the high half of each
3374 /// 128-bit lane in `a` and `b`.
3377 /// #[cfg(target_arch = "x86")]
3378 /// use std::arch::x86::*;
3379 /// #[cfg(target_arch = "x86_64")]
3380 /// use std::arch::x86_64::*;
3383 /// # if is_x86_feature_detected!("avx2") {
3384 /// # #[target_feature(enable = "avx2")]
3385 /// # unsafe fn worker() {
3386 /// let a = _mm256_setr_epi8(
3387 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3388 /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3390 /// let b = _mm256_setr_epi8(
3391 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3392 /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3396 /// let c = _mm256_unpackhi_epi8(a, b);
3398 /// let expected = _mm256_setr_epi8(
3399 /// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3400 /// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3403 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3406 /// # unsafe { worker(); }
3411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi8)
3413 #[target_feature(enable = "avx2")]
3414 #[cfg_attr(test, assert_instr(vpunpckhbw))]
3415 #[stable(feature = "simd_x86", since = "1.27.0")]
3416 pub unsafe fn _mm256_unpackhi_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3418 let r
: i8x32
= simd_shuffle32(a
.as_i8x32(), b
.as_i8x32(), [
3419 8, 40, 9, 41, 10, 42, 11, 43,
3420 12, 44, 13, 45, 14, 46, 15, 47,
3421 24, 56, 25, 57, 26, 58, 27, 59,
3422 28, 60, 29, 61, 30, 62, 31, 63,
3427 /// Unpacks and interleave 8-bit integers from the low half of each
3428 /// 128-bit lane of `a` and `b`.
3431 /// #[cfg(target_arch = "x86")]
3432 /// use std::arch::x86::*;
3433 /// #[cfg(target_arch = "x86_64")]
3434 /// use std::arch::x86_64::*;
3437 /// # if is_x86_feature_detected!("avx2") {
3438 /// # #[target_feature(enable = "avx2")]
3439 /// # unsafe fn worker() {
3440 /// let a = _mm256_setr_epi8(
3441 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3442 /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3444 /// let b = _mm256_setr_epi8(
3445 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3446 /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3450 /// let c = _mm256_unpacklo_epi8(a, b);
3452 /// let expected = _mm256_setr_epi8(
3453 /// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3454 /// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3456 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3459 /// # unsafe { worker(); }
3464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi8)
3466 #[target_feature(enable = "avx2")]
3467 #[cfg_attr(test, assert_instr(vpunpcklbw))]
3468 #[stable(feature = "simd_x86", since = "1.27.0")]
3469 pub unsafe fn _mm256_unpacklo_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3471 let r
: i8x32
= simd_shuffle32(a
.as_i8x32(), b
.as_i8x32(), [
3472 0, 32, 1, 33, 2, 34, 3, 35,
3473 4, 36, 5, 37, 6, 38, 7, 39,
3474 16, 48, 17, 49, 18, 50, 19, 51,
3475 20, 52, 21, 53, 22, 54, 23, 55,
3480 /// Unpacks and interleave 16-bit integers from the high half of each
3481 /// 128-bit lane of `a` and `b`.
3484 /// #[cfg(target_arch = "x86")]
3485 /// use std::arch::x86::*;
3486 /// #[cfg(target_arch = "x86_64")]
3487 /// use std::arch::x86_64::*;
3490 /// # if is_x86_feature_detected!("avx2") {
3491 /// # #[target_feature(enable = "avx2")]
3492 /// # unsafe fn worker() {
3493 /// let a = _mm256_setr_epi16(
3494 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3496 /// let b = _mm256_setr_epi16(
3497 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3500 /// let c = _mm256_unpackhi_epi16(a, b);
3502 /// let expected = _mm256_setr_epi16(
3503 /// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3505 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3508 /// # unsafe { worker(); }
3513 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi16)
3515 #[target_feature(enable = "avx2")]
3516 #[cfg_attr(test, assert_instr(vpunpckhwd))]
3517 #[stable(feature = "simd_x86", since = "1.27.0")]
3518 pub unsafe fn _mm256_unpackhi_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3519 let r
: i16x16
= simd_shuffle16(
3522 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3527 /// Unpacks and interleave 16-bit integers from the low half of each
3528 /// 128-bit lane of `a` and `b`.
3531 /// #[cfg(target_arch = "x86")]
3532 /// use std::arch::x86::*;
3533 /// #[cfg(target_arch = "x86_64")]
3534 /// use std::arch::x86_64::*;
3537 /// # if is_x86_feature_detected!("avx2") {
3538 /// # #[target_feature(enable = "avx2")]
3539 /// # unsafe fn worker() {
3541 /// let a = _mm256_setr_epi16(
3542 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3544 /// let b = _mm256_setr_epi16(
3545 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3548 /// let c = _mm256_unpacklo_epi16(a, b);
3550 /// let expected = _mm256_setr_epi16(
3551 /// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3553 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3556 /// # unsafe { worker(); }
3561 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi16)
3563 #[target_feature(enable = "avx2")]
3564 #[cfg_attr(test, assert_instr(vpunpcklwd))]
3565 #[stable(feature = "simd_x86", since = "1.27.0")]
3566 pub unsafe fn _mm256_unpacklo_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3567 let r
: i16x16
= simd_shuffle16(
3570 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3575 /// Unpacks and interleave 32-bit integers from the high half of each
3576 /// 128-bit lane of `a` and `b`.
3579 /// #[cfg(target_arch = "x86")]
3580 /// use std::arch::x86::*;
3581 /// #[cfg(target_arch = "x86_64")]
3582 /// use std::arch::x86_64::*;
3585 /// # if is_x86_feature_detected!("avx2") {
3586 /// # #[target_feature(enable = "avx2")]
3587 /// # unsafe fn worker() {
3588 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3589 /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3591 /// let c = _mm256_unpackhi_epi32(a, b);
3593 /// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3594 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3597 /// # unsafe { worker(); }
3602 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi32)
3604 #[target_feature(enable = "avx2")]
3605 #[cfg_attr(test, assert_instr(vunpckhps))]
3606 #[stable(feature = "simd_x86", since = "1.27.0")]
3607 pub unsafe fn _mm256_unpackhi_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3608 let r
: i32x8
= simd_shuffle8(a
.as_i32x8(), b
.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3612 /// Unpacks and interleave 32-bit integers from the low half of each
3613 /// 128-bit lane of `a` and `b`.
3616 /// #[cfg(target_arch = "x86")]
3617 /// use std::arch::x86::*;
3618 /// #[cfg(target_arch = "x86_64")]
3619 /// use std::arch::x86_64::*;
3622 /// # if is_x86_feature_detected!("avx2") {
3623 /// # #[target_feature(enable = "avx2")]
3624 /// # unsafe fn worker() {
3625 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3626 /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3628 /// let c = _mm256_unpacklo_epi32(a, b);
3630 /// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3631 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3634 /// # unsafe { worker(); }
3639 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi32)
3641 #[target_feature(enable = "avx2")]
3642 #[cfg_attr(test, assert_instr(vunpcklps))]
3643 #[stable(feature = "simd_x86", since = "1.27.0")]
3644 pub unsafe fn _mm256_unpacklo_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3645 let r
: i32x8
= simd_shuffle8(a
.as_i32x8(), b
.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3649 /// Unpacks and interleave 64-bit integers from the high half of each
3650 /// 128-bit lane of `a` and `b`.
3653 /// #[cfg(target_arch = "x86")]
3654 /// use std::arch::x86::*;
3655 /// #[cfg(target_arch = "x86_64")]
3656 /// use std::arch::x86_64::*;
3659 /// # if is_x86_feature_detected!("avx2") {
3660 /// # #[target_feature(enable = "avx2")]
3661 /// # unsafe fn worker() {
3662 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3663 /// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3665 /// let c = _mm256_unpackhi_epi64(a, b);
3667 /// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3668 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3671 /// # unsafe { worker(); }
3676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi64)
3678 #[target_feature(enable = "avx2")]
3679 #[cfg_attr(test, assert_instr(vunpckhpd))]
3680 #[stable(feature = "simd_x86", since = "1.27.0")]
3681 pub unsafe fn _mm256_unpackhi_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3682 let r
: i64x4
= simd_shuffle4(a
.as_i64x4(), b
.as_i64x4(), [1, 5, 3, 7]);
3686 /// Unpacks and interleave 64-bit integers from the low half of each
3687 /// 128-bit lane of `a` and `b`.
3690 /// #[cfg(target_arch = "x86")]
3691 /// use std::arch::x86::*;
3692 /// #[cfg(target_arch = "x86_64")]
3693 /// use std::arch::x86_64::*;
3696 /// # if is_x86_feature_detected!("avx2") {
3697 /// # #[target_feature(enable = "avx2")]
3698 /// # unsafe fn worker() {
3699 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3700 /// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3702 /// let c = _mm256_unpacklo_epi64(a, b);
3704 /// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3705 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3708 /// # unsafe { worker(); }
3713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi64)
3715 #[target_feature(enable = "avx2")]
3716 #[cfg_attr(test, assert_instr(vunpcklpd))]
3717 #[stable(feature = "simd_x86", since = "1.27.0")]
3718 pub unsafe fn _mm256_unpacklo_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3719 let r
: i64x4
= simd_shuffle4(a
.as_i64x4(), b
.as_i64x4(), [0, 4, 2, 6]);
3723 /// Computes the bitwise XOR of 256 bits (representing integer data)
3726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256)
3728 #[target_feature(enable = "avx2")]
3729 #[cfg_attr(test, assert_instr(vxorps))]
3730 #[stable(feature = "simd_x86", since = "1.27.0")]
3731 pub unsafe fn _mm256_xor_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
3732 transmute(simd_xor(a
.as_i64x4(), b
.as_i64x4()))
3735 /// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
3736 /// integer containing the zero-extended integer data.
3738 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
3740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi8)
3742 #[target_feature(enable = "avx2")]
3743 // This intrinsic has no corresponding instruction.
3744 #[rustc_args_required_const(1)]
3745 #[stable(feature = "simd_x86", since = "1.27.0")]
3746 pub unsafe fn _mm256_extract_epi8(a
: __m256i
, imm8
: i32) -> i32 {
3747 let a
= a
.as_u8x32();
3750 simd_extract
::<_
, u8>(a
, $imm5
) as i32
3753 constify_imm5
!(imm8
, call
)
3756 /// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit
3757 /// integer containing the zero-extended integer data.
3759 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
3761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi16)
3763 #[target_feature(enable = "avx2")]
3764 // This intrinsic has no corresponding instruction.
3765 #[rustc_args_required_const(1)]
3766 #[stable(feature = "simd_x86", since = "1.27.0")]
3767 pub unsafe fn _mm256_extract_epi16(a
: __m256i
, imm8
: i32) -> i32 {
3768 let a
= a
.as_u16x16();
3771 simd_extract
::<_
, u16>(a
, $imm4
) as i32
3774 constify_imm4
!((imm8
& 15), call
)
3777 /// Extracts a 32-bit integer from `a`, selected with `imm8`.
3779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32)
3781 #[target_feature(enable = "avx2")]
3782 // This intrinsic has no corresponding instruction.
3783 #[rustc_args_required_const(1)]
3784 #[stable(feature = "simd_x86", since = "1.27.0")]
3785 pub unsafe fn _mm256_extract_epi32(a
: __m256i
, imm8
: i32) -> i32 {
3786 let a
= a
.as_i32x8();
3789 simd_extract(a
, $imm3
)
3792 constify_imm3
!((imm8
& 7), call
)
3795 /// Returns the first element of the input vector of `[4 x double]`.
3797 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsd_f64)
3799 #[target_feature(enable = "avx2")]
3800 //#[cfg_attr(test, assert_instr(movsd))] FIXME
3801 #[stable(feature = "simd_x86", since = "1.27.0")]
3802 pub unsafe fn _mm256_cvtsd_f64(a
: __m256d
) -> f64 {
3806 /// Returns the first element of the input vector of `[8 x i32]`.
3808 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsi256_si32)
3810 #[target_feature(enable = "avx2")]
3811 //#[cfg_attr(test, assert_instr(movd))] FIXME
3812 #[stable(feature = "simd_x86", since = "1.27.0")]
3813 pub unsafe fn _mm256_cvtsi256_si32(a
: __m256i
) -> i32 {
3814 simd_extract(a
.as_i32x8(), 0)
3817 #[allow(improper_ctypes)]
3819 #[link_name = "llvm.x86.avx2.pabs.b"]
3820 fn pabsb(a
: i8x32
) -> u8x32
;
3821 #[link_name = "llvm.x86.avx2.pabs.w"]
3822 fn pabsw(a
: i16x16
) -> u16x16
;
3823 #[link_name = "llvm.x86.avx2.pabs.d"]
3824 fn pabsd(a
: i32x8
) -> u32x8
;
3825 #[link_name = "llvm.x86.avx2.pavg.b"]
3826 fn pavgb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3827 #[link_name = "llvm.x86.avx2.pavg.w"]
3828 fn pavgw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3829 #[link_name = "llvm.x86.avx2.pblendvb"]
3830 fn pblendvb(a
: i8x32
, b
: i8x32
, mask
: i8x32
) -> i8x32
;
3831 #[link_name = "llvm.x86.avx2.phadd.w"]
3832 fn phaddw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3833 #[link_name = "llvm.x86.avx2.phadd.d"]
3834 fn phaddd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3835 #[link_name = "llvm.x86.avx2.phadd.sw"]
3836 fn phaddsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3837 #[link_name = "llvm.x86.avx2.phsub.w"]
3838 fn phsubw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3839 #[link_name = "llvm.x86.avx2.phsub.d"]
3840 fn phsubd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3841 #[link_name = "llvm.x86.avx2.phsub.sw"]
3842 fn phsubsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3843 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3844 fn pmaddwd(a
: i16x16
, b
: i16x16
) -> i32x8
;
3845 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3846 fn pmaddubsw(a
: u8x32
, b
: u8x32
) -> i16x16
;
3847 #[link_name = "llvm.x86.avx2.maskload.d"]
3848 fn maskloadd(mem_addr
: *const i8, mask
: i32x4
) -> i32x4
;
3849 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3850 fn maskloadd256(mem_addr
: *const i8, mask
: i32x8
) -> i32x8
;
3851 #[link_name = "llvm.x86.avx2.maskload.q"]
3852 fn maskloadq(mem_addr
: *const i8, mask
: i64x2
) -> i64x2
;
3853 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3854 fn maskloadq256(mem_addr
: *const i8, mask
: i64x4
) -> i64x4
;
3855 #[link_name = "llvm.x86.avx2.maskstore.d"]
3856 fn maskstored(mem_addr
: *mut i8, mask
: i32x4
, a
: i32x4
);
3857 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3858 fn maskstored256(mem_addr
: *mut i8, mask
: i32x8
, a
: i32x8
);
3859 #[link_name = "llvm.x86.avx2.maskstore.q"]
3860 fn maskstoreq(mem_addr
: *mut i8, mask
: i64x2
, a
: i64x2
);
3861 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3862 fn maskstoreq256(mem_addr
: *mut i8, mask
: i64x4
, a
: i64x4
);
3863 #[link_name = "llvm.x86.avx2.pmaxs.w"]
3864 fn pmaxsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3865 #[link_name = "llvm.x86.avx2.pmaxs.d"]
3866 fn pmaxsd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3867 #[link_name = "llvm.x86.avx2.pmaxs.b"]
3868 fn pmaxsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3869 #[link_name = "llvm.x86.avx2.pmaxu.w"]
3870 fn pmaxuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3871 #[link_name = "llvm.x86.avx2.pmaxu.d"]
3872 fn pmaxud(a
: u32x8
, b
: u32x8
) -> u32x8
;
3873 #[link_name = "llvm.x86.avx2.pmaxu.b"]
3874 fn pmaxub(a
: u8x32
, b
: u8x32
) -> u8x32
;
3875 #[link_name = "llvm.x86.avx2.pmins.w"]
3876 fn pminsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3877 #[link_name = "llvm.x86.avx2.pmins.d"]
3878 fn pminsd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3879 #[link_name = "llvm.x86.avx2.pmins.b"]
3880 fn pminsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3881 #[link_name = "llvm.x86.avx2.pminu.w"]
3882 fn pminuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3883 #[link_name = "llvm.x86.avx2.pminu.d"]
3884 fn pminud(a
: u32x8
, b
: u32x8
) -> u32x8
;
3885 #[link_name = "llvm.x86.avx2.pminu.b"]
3886 fn pminub(a
: u8x32
, b
: u8x32
) -> u8x32
;
3887 #[link_name = "llvm.x86.avx2.pmovmskb"]
3888 fn pmovmskb(a
: i8x32
) -> i32;
3889 #[link_name = "llvm.x86.avx2.mpsadbw"]
3890 fn mpsadbw(a
: u8x32
, b
: u8x32
, imm8
: i32) -> u16x16
;
3891 #[link_name = "llvm.x86.avx2.pmulhu.w"]
3892 fn pmulhuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3893 #[link_name = "llvm.x86.avx2.pmulh.w"]
3894 fn pmulhw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3895 #[link_name = "llvm.x86.avx2.pmul.dq"]
3896 fn pmuldq(a
: i32x8
, b
: i32x8
) -> i64x4
;
3897 #[link_name = "llvm.x86.avx2.pmulu.dq"]
3898 fn pmuludq(a
: u32x8
, b
: u32x8
) -> u64x4
;
3899 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3900 fn pmulhrsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3901 #[link_name = "llvm.x86.avx2.packsswb"]
3902 fn packsswb(a
: i16x16
, b
: i16x16
) -> i8x32
;
3903 #[link_name = "llvm.x86.avx2.packssdw"]
3904 fn packssdw(a
: i32x8
, b
: i32x8
) -> i16x16
;
3905 #[link_name = "llvm.x86.avx2.packuswb"]
3906 fn packuswb(a
: i16x16
, b
: i16x16
) -> u8x32
;
3907 #[link_name = "llvm.x86.avx2.packusdw"]
3908 fn packusdw(a
: i32x8
, b
: i32x8
) -> u16x16
;
3909 #[link_name = "llvm.x86.avx2.psad.bw"]
3910 fn psadbw(a
: u8x32
, b
: u8x32
) -> u64x4
;
3911 #[link_name = "llvm.x86.avx2.psign.b"]
3912 fn psignb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3913 #[link_name = "llvm.x86.avx2.psign.w"]
3914 fn psignw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3915 #[link_name = "llvm.x86.avx2.psign.d"]
3916 fn psignd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3917 #[link_name = "llvm.x86.avx2.psll.w"]
3918 fn psllw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3919 #[link_name = "llvm.x86.avx2.psll.d"]
3920 fn pslld(a
: i32x8
, count
: i32x4
) -> i32x8
;
3921 #[link_name = "llvm.x86.avx2.psll.q"]
3922 fn psllq(a
: i64x4
, count
: i64x2
) -> i64x4
;
3923 #[link_name = "llvm.x86.avx2.pslli.w"]
3924 fn pslliw(a
: i16x16
, imm8
: i32) -> i16x16
;
3925 #[link_name = "llvm.x86.avx2.pslli.d"]
3926 fn psllid(a
: i32x8
, imm8
: i32) -> i32x8
;
3927 #[link_name = "llvm.x86.avx2.pslli.q"]
3928 fn pslliq(a
: i64x4
, imm8
: i32) -> i64x4
;
3929 #[link_name = "llvm.x86.avx2.psllv.d"]
3930 fn psllvd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3931 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3932 fn psllvd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3933 #[link_name = "llvm.x86.avx2.psllv.q"]
3934 fn psllvq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3935 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3936 fn psllvq256(a
: i64x4
, count
: i64x4
) -> i64x4
;
3937 #[link_name = "llvm.x86.avx2.psra.w"]
3938 fn psraw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3939 #[link_name = "llvm.x86.avx2.psra.d"]
3940 fn psrad(a
: i32x8
, count
: i32x4
) -> i32x8
;
3941 #[link_name = "llvm.x86.avx2.psrai.w"]
3942 fn psraiw(a
: i16x16
, imm8
: i32) -> i16x16
;
3943 #[link_name = "llvm.x86.avx2.psrai.d"]
3944 fn psraid(a
: i32x8
, imm8
: i32) -> i32x8
;
3945 #[link_name = "llvm.x86.avx2.psrav.d"]
3946 fn psravd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3947 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3948 fn psravd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3949 #[link_name = "llvm.x86.avx2.psrl.w"]
3950 fn psrlw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3951 #[link_name = "llvm.x86.avx2.psrl.d"]
3952 fn psrld(a
: i32x8
, count
: i32x4
) -> i32x8
;
3953 #[link_name = "llvm.x86.avx2.psrl.q"]
3954 fn psrlq(a
: i64x4
, count
: i64x2
) -> i64x4
;
3955 #[link_name = "llvm.x86.avx2.psrli.w"]
3956 fn psrliw(a
: i16x16
, imm8
: i32) -> i16x16
;
3957 #[link_name = "llvm.x86.avx2.psrli.d"]
3958 fn psrlid(a
: i32x8
, imm8
: i32) -> i32x8
;
3959 #[link_name = "llvm.x86.avx2.psrli.q"]
3960 fn psrliq(a
: i64x4
, imm8
: i32) -> i64x4
;
3961 #[link_name = "llvm.x86.avx2.psrlv.d"]
3962 fn psrlvd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3963 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3964 fn psrlvd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3965 #[link_name = "llvm.x86.avx2.psrlv.q"]
3966 fn psrlvq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3967 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3968 fn psrlvq256(a
: i64x4
, count
: i64x4
) -> i64x4
;
3969 #[link_name = "llvm.x86.avx2.pshuf.b"]
3970 fn pshufb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3971 #[link_name = "llvm.x86.avx2.permd"]
3972 fn permd(a
: u32x8
, b
: u32x8
) -> u32x8
;
3973 #[link_name = "llvm.x86.avx2.permps"]
3974 fn permps(a
: __m256
, b
: i32x8
) -> __m256
;
3975 #[link_name = "llvm.x86.avx2.vperm2i128"]
3976 fn vperm2i128(a
: i64x4
, b
: i64x4
, imm8
: i8) -> i64x4
;
3977 #[link_name = "llvm.x86.avx2.gather.d.d"]
3978 fn pgatherdd(src
: i32x4
, slice
: *const i8, offsets
: i32x4
, mask
: i32x4
, scale
: i8) -> i32x4
;
3979 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3980 fn vpgatherdd(src
: i32x8
, slice
: *const i8, offsets
: i32x8
, mask
: i32x8
, scale
: i8) -> i32x8
;
3981 #[link_name = "llvm.x86.avx2.gather.d.q"]
3982 fn pgatherdq(src
: i64x2
, slice
: *const i8, offsets
: i32x4
, mask
: i64x2
, scale
: i8) -> i64x2
;
3983 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3984 fn vpgatherdq(src
: i64x4
, slice
: *const i8, offsets
: i32x4
, mask
: i64x4
, scale
: i8) -> i64x4
;
3985 #[link_name = "llvm.x86.avx2.gather.q.d"]
3986 fn pgatherqd(src
: i32x4
, slice
: *const i8, offsets
: i64x2
, mask
: i32x4
, scale
: i8) -> i32x4
;
3987 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3988 fn vpgatherqd(src
: i32x4
, slice
: *const i8, offsets
: i64x4
, mask
: i32x4
, scale
: i8) -> i32x4
;
3989 #[link_name = "llvm.x86.avx2.gather.q.q"]
3990 fn pgatherqq(src
: i64x2
, slice
: *const i8, offsets
: i64x2
, mask
: i64x2
, scale
: i8) -> i64x2
;
3991 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3992 fn vpgatherqq(src
: i64x4
, slice
: *const i8, offsets
: i64x4
, mask
: i64x4
, scale
: i8) -> i64x4
;
3993 #[link_name = "llvm.x86.avx2.gather.d.pd"]
4001 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
4009 #[link_name = "llvm.x86.avx2.gather.q.pd"]
4017 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
4025 #[link_name = "llvm.x86.avx2.gather.d.ps"]
4026 fn pgatherdps(src
: __m128
, slice
: *const i8, offsets
: i32x4
, mask
: __m128
, scale
: i8)
4028 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
4036 #[link_name = "llvm.x86.avx2.gather.q.ps"]
4037 fn pgatherqps(src
: __m128
, slice
: *const i8, offsets
: i64x2
, mask
: __m128
, scale
: i8)
4039 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
4047 #[link_name = "llvm.x86.avx2.psll.dq"]
4048 fn vpslldq(a
: i64x4
, b
: i32) -> i64x4
;
4049 #[link_name = "llvm.x86.avx2.psrl.dq"]
4050 fn vpsrldq(a
: i64x4
, b
: i32) -> i64x4
;
4056 use stdarch_test
::simd_test
;
4058 use crate::core_arch
::x86
::*;
4060 #[simd_test(enable = "avx2")]
4061 unsafe fn test_mm256_abs_epi32() {
4063 let a
= _mm256_setr_epi32(
4065 i32::MIN
, 100, -100, -32,
4067 let r
= _mm256_abs_epi32(a
);
4069 let e
= _mm256_setr_epi32(
4071 i32::MAX
.wrapping_add(1), 100, 100, 32,
4073 assert_eq_m256i(r
, e
);
4076 #[simd_test(enable = "avx2")]
4077 unsafe fn test_mm256_abs_epi16() {
4079 let a
= _mm256_setr_epi16(
4080 0, 1, -1, 2, -2, 3, -3, 4,
4081 -4, 5, -5, i16::MAX
, i16::MIN
, 100, -100, -32,
4083 let r
= _mm256_abs_epi16(a
);
4085 let e
= _mm256_setr_epi16(
4086 0, 1, 1, 2, 2, 3, 3, 4,
4087 4, 5, 5, i16::MAX
, i16::MAX
.wrapping_add(1), 100, 100, 32,
4089 assert_eq_m256i(r
, e
);
4092 #[simd_test(enable = "avx2")]
4093 unsafe fn test_mm256_abs_epi8() {
4095 let a
= _mm256_setr_epi8(
4096 0, 1, -1, 2, -2, 3, -3, 4,
4097 -4, 5, -5, i8::MAX
, i8::MIN
, 100, -100, -32,
4098 0, 1, -1, 2, -2, 3, -3, 4,
4099 -4, 5, -5, i8::MAX
, i8::MIN
, 100, -100, -32,
4101 let r
= _mm256_abs_epi8(a
);
4103 let e
= _mm256_setr_epi8(
4104 0, 1, 1, 2, 2, 3, 3, 4,
4105 4, 5, 5, i8::MAX
, i8::MAX
.wrapping_add(1), 100, 100, 32,
4106 0, 1, 1, 2, 2, 3, 3, 4,
4107 4, 5, 5, i8::MAX
, i8::MAX
.wrapping_add(1), 100, 100, 32,
4109 assert_eq_m256i(r
, e
);
4112 #[simd_test(enable = "avx2")]
4113 unsafe fn test_mm256_add_epi64() {
4114 let a
= _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4115 let b
= _mm256_setr_epi64x(-1, 0, 1, 2);
4116 let r
= _mm256_add_epi64(a
, b
);
4117 let e
= _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4118 assert_eq_m256i(r
, e
);
4121 #[simd_test(enable = "avx2")]
4122 unsafe fn test_mm256_add_epi32() {
4123 let a
= _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4124 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4125 let r
= _mm256_add_epi32(a
, b
);
4126 let e
= _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4127 assert_eq_m256i(r
, e
);
4130 #[simd_test(enable = "avx2")]
4131 unsafe fn test_mm256_add_epi16() {
4133 let a
= _mm256_setr_epi16(
4134 0, 1, 2, 3, 4, 5, 6, 7,
4135 8, 9, 10, 11, 12, 13, 14, 15,
4138 let b
= _mm256_setr_epi16(
4139 0, 1, 2, 3, 4, 5, 6, 7,
4140 8, 9, 10, 11, 12, 13, 14, 15,
4142 let r
= _mm256_add_epi16(a
, b
);
4144 let e
= _mm256_setr_epi16(
4145 0, 2, 4, 6, 8, 10, 12, 14,
4146 16, 18, 20, 22, 24, 26, 28, 30,
4148 assert_eq_m256i(r
, e
);
4151 #[simd_test(enable = "avx2")]
4152 unsafe fn test_mm256_add_epi8() {
4154 let a
= _mm256_setr_epi8(
4155 0, 1, 2, 3, 4, 5, 6, 7,
4156 8, 9, 10, 11, 12, 13, 14, 15,
4157 16, 17, 18, 19, 20, 21, 22, 23,
4158 24, 25, 26, 27, 28, 29, 30, 31,
4161 let b
= _mm256_setr_epi8(
4162 0, 1, 2, 3, 4, 5, 6, 7,
4163 8, 9, 10, 11, 12, 13, 14, 15,
4164 16, 17, 18, 19, 20, 21, 22, 23,
4165 24, 25, 26, 27, 28, 29, 30, 31,
4167 let r
= _mm256_add_epi8(a
, b
);
4169 let e
= _mm256_setr_epi8(
4170 0, 2, 4, 6, 8, 10, 12, 14,
4171 16, 18, 20, 22, 24, 26, 28, 30,
4172 32, 34, 36, 38, 40, 42, 44, 46,
4173 48, 50, 52, 54, 56, 58, 60, 62,
4175 assert_eq_m256i(r
, e
);
4178 #[simd_test(enable = "avx2")]
4179 unsafe fn test_mm256_adds_epi8() {
4181 let a
= _mm256_setr_epi8(
4182 0, 1, 2, 3, 4, 5, 6, 7,
4183 8, 9, 10, 11, 12, 13, 14, 15,
4184 16, 17, 18, 19, 20, 21, 22, 23,
4185 24, 25, 26, 27, 28, 29, 30, 31,
4188 let b
= _mm256_setr_epi8(
4189 32, 33, 34, 35, 36, 37, 38, 39,
4190 40, 41, 42, 43, 44, 45, 46, 47,
4191 48, 49, 50, 51, 52, 53, 54, 55,
4192 56, 57, 58, 59, 60, 61, 62, 63,
4194 let r
= _mm256_adds_epi8(a
, b
);
4196 let e
= _mm256_setr_epi8(
4197 32, 34, 36, 38, 40, 42, 44, 46,
4198 48, 50, 52, 54, 56, 58, 60, 62,
4199 64, 66, 68, 70, 72, 74, 76, 78,
4200 80, 82, 84, 86, 88, 90, 92, 94,
4202 assert_eq_m256i(r
, e
);
4205 #[simd_test(enable = "avx2")]
4206 unsafe fn test_mm256_adds_epi8_saturate_positive() {
4207 let a
= _mm256_set1_epi8(0x7F);
4208 let b
= _mm256_set1_epi8(1);
4209 let r
= _mm256_adds_epi8(a
, b
);
4210 assert_eq_m256i(r
, a
);
4213 #[simd_test(enable = "avx2")]
4214 unsafe fn test_mm256_adds_epi8_saturate_negative() {
4215 let a
= _mm256_set1_epi8(-0x80);
4216 let b
= _mm256_set1_epi8(-1);
4217 let r
= _mm256_adds_epi8(a
, b
);
4218 assert_eq_m256i(r
, a
);
4221 #[simd_test(enable = "avx2")]
4222 unsafe fn test_mm256_adds_epi16() {
4224 let a
= _mm256_setr_epi16(
4225 0, 1, 2, 3, 4, 5, 6, 7,
4226 8, 9, 10, 11, 12, 13, 14, 15,
4229 let b
= _mm256_setr_epi16(
4230 32, 33, 34, 35, 36, 37, 38, 39,
4231 40, 41, 42, 43, 44, 45, 46, 47,
4233 let r
= _mm256_adds_epi16(a
, b
);
4235 let e
= _mm256_setr_epi16(
4236 32, 34, 36, 38, 40, 42, 44, 46,
4237 48, 50, 52, 54, 56, 58, 60, 62,
4240 assert_eq_m256i(r
, e
);
4243 #[simd_test(enable = "avx2")]
4244 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4245 let a
= _mm256_set1_epi16(0x7FFF);
4246 let b
= _mm256_set1_epi16(1);
4247 let r
= _mm256_adds_epi16(a
, b
);
4248 assert_eq_m256i(r
, a
);
4251 #[simd_test(enable = "avx2")]
4252 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4253 let a
= _mm256_set1_epi16(-0x8000);
4254 let b
= _mm256_set1_epi16(-1);
4255 let r
= _mm256_adds_epi16(a
, b
);
4256 assert_eq_m256i(r
, a
);
4259 #[simd_test(enable = "avx2")]
4260 unsafe fn test_mm256_adds_epu8() {
4262 let a
= _mm256_setr_epi8(
4263 0, 1, 2, 3, 4, 5, 6, 7,
4264 8, 9, 10, 11, 12, 13, 14, 15,
4265 16, 17, 18, 19, 20, 21, 22, 23,
4266 24, 25, 26, 27, 28, 29, 30, 31,
4269 let b
= _mm256_setr_epi8(
4270 32, 33, 34, 35, 36, 37, 38, 39,
4271 40, 41, 42, 43, 44, 45, 46, 47,
4272 48, 49, 50, 51, 52, 53, 54, 55,
4273 56, 57, 58, 59, 60, 61, 62, 63,
4275 let r
= _mm256_adds_epu8(a
, b
);
4277 let e
= _mm256_setr_epi8(
4278 32, 34, 36, 38, 40, 42, 44, 46,
4279 48, 50, 52, 54, 56, 58, 60, 62,
4280 64, 66, 68, 70, 72, 74, 76, 78,
4281 80, 82, 84, 86, 88, 90, 92, 94,
4283 assert_eq_m256i(r
, e
);
4286 #[simd_test(enable = "avx2")]
4287 unsafe fn test_mm256_adds_epu8_saturate() {
4288 let a
= _mm256_set1_epi8(!0);
4289 let b
= _mm256_set1_epi8(1);
4290 let r
= _mm256_adds_epu8(a
, b
);
4291 assert_eq_m256i(r
, a
);
4294 #[simd_test(enable = "avx2")]
4295 unsafe fn test_mm256_adds_epu16() {
4297 let a
= _mm256_setr_epi16(
4298 0, 1, 2, 3, 4, 5, 6, 7,
4299 8, 9, 10, 11, 12, 13, 14, 15,
4302 let b
= _mm256_setr_epi16(
4303 32, 33, 34, 35, 36, 37, 38, 39,
4304 40, 41, 42, 43, 44, 45, 46, 47,
4306 let r
= _mm256_adds_epu16(a
, b
);
4308 let e
= _mm256_setr_epi16(
4309 32, 34, 36, 38, 40, 42, 44, 46,
4310 48, 50, 52, 54, 56, 58, 60, 62,
4313 assert_eq_m256i(r
, e
);
4316 #[simd_test(enable = "avx2")]
4317 unsafe fn test_mm256_adds_epu16_saturate() {
4318 let a
= _mm256_set1_epi16(!0);
4319 let b
= _mm256_set1_epi16(1);
4320 let r
= _mm256_adds_epu16(a
, b
);
4321 assert_eq_m256i(r
, a
);
4324 #[simd_test(enable = "avx2")]
4325 unsafe fn test_mm256_and_si256() {
4326 let a
= _mm256_set1_epi8(5);
4327 let b
= _mm256_set1_epi8(3);
4328 let got
= _mm256_and_si256(a
, b
);
4329 assert_eq_m256i(got
, _mm256_set1_epi8(1));
4332 #[simd_test(enable = "avx2")]
4333 unsafe fn test_mm256_andnot_si256() {
4334 let a
= _mm256_set1_epi8(5);
4335 let b
= _mm256_set1_epi8(3);
4336 let got
= _mm256_andnot_si256(a
, b
);
4337 assert_eq_m256i(got
, _mm256_set1_epi8(2));
4340 #[simd_test(enable = "avx2")]
4341 unsafe fn test_mm256_avg_epu8() {
4342 let (a
, b
) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4343 let r
= _mm256_avg_epu8(a
, b
);
4344 assert_eq_m256i(r
, _mm256_set1_epi8(6));
4347 #[simd_test(enable = "avx2")]
4348 unsafe fn test_mm256_avg_epu16() {
4349 let (a
, b
) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4350 let r
= _mm256_avg_epu16(a
, b
);
4351 assert_eq_m256i(r
, _mm256_set1_epi16(6));
4354 #[simd_test(enable = "avx2")]
4355 unsafe fn test_mm_blend_epi32() {
4356 let (a
, b
) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4357 let e
= _mm_setr_epi32(9, 3, 3, 3);
4358 let r
= _mm_blend_epi32(a
, b
, 0x01 as i32);
4359 assert_eq_m128i(r
, e
);
4361 let r
= _mm_blend_epi32(b
, a
, 0x0E as i32);
4362 assert_eq_m128i(r
, e
);
4365 #[simd_test(enable = "avx2")]
4366 unsafe fn test_mm256_blend_epi32() {
4367 let (a
, b
) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4368 let e
= _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4369 let r
= _mm256_blend_epi32(a
, b
, 0x01 as i32);
4370 assert_eq_m256i(r
, e
);
4372 let e
= _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4373 let r
= _mm256_blend_epi32(a
, b
, 0x82 as i32);
4374 assert_eq_m256i(r
, e
);
4376 let e
= _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4377 let r
= _mm256_blend_epi32(a
, b
, 0x7C as i32);
4378 assert_eq_m256i(r
, e
);
4381 #[simd_test(enable = "avx2")]
4382 unsafe fn test_mm256_blend_epi16() {
4383 let (a
, b
) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4384 let e
= _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4385 let r
= _mm256_blend_epi16(a
, b
, 0x01 as i32);
4386 assert_eq_m256i(r
, e
);
4388 let r
= _mm256_blend_epi16(b
, a
, 0xFE as i32);
4389 assert_eq_m256i(r
, e
);
4392 #[simd_test(enable = "avx2")]
4393 unsafe fn test_mm256_blendv_epi8() {
4394 let (a
, b
) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4395 let mask
= _mm256_insert_epi8(_mm256_set1_epi8(0), -1, 2);
4396 let e
= _mm256_insert_epi8(_mm256_set1_epi8(4), 2, 2);
4397 let r
= _mm256_blendv_epi8(a
, b
, mask
);
4398 assert_eq_m256i(r
, e
);
4401 #[simd_test(enable = "avx2")]
4402 unsafe fn test_mm_broadcastb_epi8() {
4403 let a
= _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4404 let res
= _mm_broadcastb_epi8(a
);
4405 assert_eq_m128i(res
, _mm_set1_epi8(0x2a));
4408 #[simd_test(enable = "avx2")]
4409 unsafe fn test_mm256_broadcastb_epi8() {
4410 let a
= _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4411 let res
= _mm256_broadcastb_epi8(a
);
4412 assert_eq_m256i(res
, _mm256_set1_epi8(0x2a));
4415 #[simd_test(enable = "avx2")]
4416 unsafe fn test_mm_broadcastd_epi32() {
4417 let a
= _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4418 let res
= _mm_broadcastd_epi32(a
);
4419 assert_eq_m128i(res
, _mm_set1_epi32(0x2a));
4422 #[simd_test(enable = "avx2")]
4423 unsafe fn test_mm256_broadcastd_epi32() {
4424 let a
= _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4425 let res
= _mm256_broadcastd_epi32(a
);
4426 assert_eq_m256i(res
, _mm256_set1_epi32(0x2a));
4429 #[simd_test(enable = "avx2")]
4430 unsafe fn test_mm_broadcastq_epi64() {
4431 let a
= _mm_setr_epi64x(0x1ffffffff, 0);
4432 let res
= _mm_broadcastq_epi64(a
);
4433 assert_eq_m128i(res
, _mm_set1_epi64x(0x1ffffffff));
4436 #[simd_test(enable = "avx2")]
4437 unsafe fn test_mm256_broadcastq_epi64() {
4438 let a
= _mm_setr_epi64x(0x1ffffffff, 0);
4439 let res
= _mm256_broadcastq_epi64(a
);
4440 assert_eq_m256i(res
, _mm256_set1_epi64x(0x1ffffffff));
4443 #[simd_test(enable = "avx2")]
4444 unsafe fn test_mm_broadcastsd_pd() {
4445 let a
= _mm_setr_pd(6.28, 3.14);
4446 let res
= _mm_broadcastsd_pd(a
);
4447 assert_eq_m128d(res
, _mm_set1_pd(6.28f64));
4450 #[simd_test(enable = "avx2")]
4451 unsafe fn test_mm256_broadcastsd_pd() {
4452 let a
= _mm_setr_pd(6.28, 3.14);
4453 let res
= _mm256_broadcastsd_pd(a
);
4454 assert_eq_m256d(res
, _mm256_set1_pd(6.28f64));
4457 #[simd_test(enable = "avx2")]
4458 unsafe fn test_mm256_broadcastsi128_si256() {
4459 let a
= _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4460 let res
= _mm256_broadcastsi128_si256(a
);
4461 let retval
= _mm256_setr_epi64x(
4467 assert_eq_m256i(res
, retval
);
4470 #[simd_test(enable = "avx2")]
4471 unsafe fn test_mm_broadcastss_ps() {
4472 let a
= _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4473 let res
= _mm_broadcastss_ps(a
);
4474 assert_eq_m128(res
, _mm_set1_ps(6.28f32));
4477 #[simd_test(enable = "avx2")]
4478 unsafe fn test_mm256_broadcastss_ps() {
4479 let a
= _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4480 let res
= _mm256_broadcastss_ps(a
);
4481 assert_eq_m256(res
, _mm256_set1_ps(6.28f32));
4484 #[simd_test(enable = "avx2")]
4485 unsafe fn test_mm_broadcastw_epi16() {
4486 let a
= _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0);
4487 let res
= _mm_broadcastw_epi16(a
);
4488 assert_eq_m128i(res
, _mm_set1_epi16(0x22b));
4491 #[simd_test(enable = "avx2")]
4492 unsafe fn test_mm256_broadcastw_epi16() {
4493 let a
= _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0);
4494 let res
= _mm256_broadcastw_epi16(a
);
4495 assert_eq_m256i(res
, _mm256_set1_epi16(0x22b));
4498 #[simd_test(enable = "avx2")]
4499 unsafe fn test_mm256_cmpeq_epi8() {
4501 let a
= _mm256_setr_epi8(
4502 0, 1, 2, 3, 4, 5, 6, 7,
4503 8, 9, 10, 11, 12, 13, 14, 15,
4504 16, 17, 18, 19, 20, 21, 22, 23,
4505 24, 25, 26, 27, 28, 29, 30, 31,
4508 let b
= _mm256_setr_epi8(
4509 31, 30, 2, 28, 27, 26, 25, 24,
4510 23, 22, 21, 20, 19, 18, 17, 16,
4511 15, 14, 13, 12, 11, 10, 9, 8,
4512 7, 6, 5, 4, 3, 2, 1, 0,
4514 let r
= _mm256_cmpeq_epi8(a
, b
);
4515 assert_eq_m256i(r
, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2));
4518 #[simd_test(enable = "avx2")]
4519 unsafe fn test_mm256_cmpeq_epi16() {
4521 let a
= _mm256_setr_epi16(
4522 0, 1, 2, 3, 4, 5, 6, 7,
4523 8, 9, 10, 11, 12, 13, 14, 15,
4526 let b
= _mm256_setr_epi16(
4527 15, 14, 2, 12, 11, 10, 9, 8,
4528 7, 6, 5, 4, 3, 2, 1, 0,
4530 let r
= _mm256_cmpeq_epi16(a
, b
);
4531 assert_eq_m256i(r
, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2));
4534 #[simd_test(enable = "avx2")]
4535 unsafe fn test_mm256_cmpeq_epi32() {
4536 let a
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4537 let b
= _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4538 let r
= _mm256_cmpeq_epi32(a
, b
);
4539 let e
= _mm256_set1_epi32(0);
4540 let e
= _mm256_insert_epi32(e
, !0, 2);
4541 assert_eq_m256i(r
, e
);
4544 #[simd_test(enable = "avx2")]
4545 unsafe fn test_mm256_cmpeq_epi64() {
4546 let a
= _mm256_setr_epi64x(0, 1, 2, 3);
4547 let b
= _mm256_setr_epi64x(3, 2, 2, 0);
4548 let r
= _mm256_cmpeq_epi64(a
, b
);
4549 assert_eq_m256i(r
, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2));
4552 #[simd_test(enable = "avx2")]
4553 unsafe fn test_mm256_cmpgt_epi8() {
4554 let a
= _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0);
4555 let b
= _mm256_set1_epi8(0);
4556 let r
= _mm256_cmpgt_epi8(a
, b
);
4557 assert_eq_m256i(r
, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0));
4560 #[simd_test(enable = "avx2")]
4561 unsafe fn test_mm256_cmpgt_epi16() {
4562 let a
= _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0);
4563 let b
= _mm256_set1_epi16(0);
4564 let r
= _mm256_cmpgt_epi16(a
, b
);
4565 assert_eq_m256i(r
, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0));
4568 #[simd_test(enable = "avx2")]
4569 unsafe fn test_mm256_cmpgt_epi32() {
4570 let a
= _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0);
4571 let b
= _mm256_set1_epi32(0);
4572 let r
= _mm256_cmpgt_epi32(a
, b
);
4573 assert_eq_m256i(r
, _mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0));
4576 #[simd_test(enable = "avx2")]
4577 unsafe fn test_mm256_cmpgt_epi64() {
4578 let a
= _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0);
4579 let b
= _mm256_set1_epi64x(0);
4580 let r
= _mm256_cmpgt_epi64(a
, b
);
4581 assert_eq_m256i(r
, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0));
4584 #[simd_test(enable = "avx2")]
4585 unsafe fn test_mm256_cvtepi8_epi16() {
4587 let a
= _mm_setr_epi8(
4588 0, 0, -1, 1, -2, 2, -3, 3,
4589 -4, 4, -5, 5, -6, 6, -7, 7,
4592 let r
= _mm256_setr_epi16(
4593 0, 0, -1, 1, -2, 2, -3, 3,
4594 -4, 4, -5, 5, -6, 6, -7, 7,
4596 assert_eq_m256i(r
, _mm256_cvtepi8_epi16(a
));
4599 #[simd_test(enable = "avx2")]
4600 unsafe fn test_mm256_cvtepi8_epi32() {
4602 let a
= _mm_setr_epi8(
4603 0, 0, -1, 1, -2, 2, -3, 3,
4604 -4, 4, -5, 5, -6, 6, -7, 7,
4606 let r
= _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4607 assert_eq_m256i(r
, _mm256_cvtepi8_epi32(a
));
4610 #[simd_test(enable = "avx2")]
4611 unsafe fn test_mm256_cvtepi8_epi64() {
4613 let a
= _mm_setr_epi8(
4614 0, 0, -1, 1, -2, 2, -3, 3,
4615 -4, 4, -5, 5, -6, 6, -7, 7,
4617 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4618 assert_eq_m256i(r
, _mm256_cvtepi8_epi64(a
));
4621 #[simd_test(enable = "avx2")]
4622 unsafe fn test_mm256_cvtepi16_epi32() {
4623 let a
= _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4624 let r
= _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4625 assert_eq_m256i(r
, _mm256_cvtepi16_epi32(a
));
4628 #[simd_test(enable = "avx2")]
4629 unsafe fn test_mm256_cvtepi16_epi64() {
4630 let a
= _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4631 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4632 assert_eq_m256i(r
, _mm256_cvtepi16_epi64(a
));
4635 #[simd_test(enable = "avx2")]
4636 unsafe fn test_mm256_cvtepi32_epi64() {
4637 let a
= _mm_setr_epi32(0, 0, -1, 1);
4638 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4639 assert_eq_m256i(r
, _mm256_cvtepi32_epi64(a
));
4642 #[simd_test(enable = "avx2")]
4643 unsafe fn test_mm256_cvtepu16_epi32() {
4644 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4645 let r
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4646 assert_eq_m256i(r
, _mm256_cvtepu16_epi32(a
));
4649 #[simd_test(enable = "avx2")]
4650 unsafe fn test_mm256_cvtepu16_epi64() {
4651 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4652 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4653 assert_eq_m256i(r
, _mm256_cvtepu16_epi64(a
));
4656 #[simd_test(enable = "avx2")]
4657 unsafe fn test_mm256_cvtepu32_epi64() {
4658 let a
= _mm_setr_epi32(0, 1, 2, 3);
4659 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4660 assert_eq_m256i(r
, _mm256_cvtepu32_epi64(a
));
4663 #[simd_test(enable = "avx2")]
4664 unsafe fn test_mm256_cvtepu8_epi16() {
4666 let a
= _mm_setr_epi8(
4667 0, 1, 2, 3, 4, 5, 6, 7,
4668 8, 9, 10, 11, 12, 13, 14, 15,
4671 let r
= _mm256_setr_epi16(
4672 0, 1, 2, 3, 4, 5, 6, 7,
4673 8, 9, 10, 11, 12, 13, 14, 15,
4675 assert_eq_m256i(r
, _mm256_cvtepu8_epi16(a
));
4678 #[simd_test(enable = "avx2")]
4679 unsafe fn test_mm256_cvtepu8_epi32() {
4681 let a
= _mm_setr_epi8(
4682 0, 1, 2, 3, 4, 5, 6, 7,
4683 8, 9, 10, 11, 12, 13, 14, 15,
4685 let r
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4686 assert_eq_m256i(r
, _mm256_cvtepu8_epi32(a
));
4689 #[simd_test(enable = "avx2")]
4690 unsafe fn test_mm256_cvtepu8_epi64() {
4692 let a
= _mm_setr_epi8(
4693 0, 1, 2, 3, 4, 5, 6, 7,
4694 8, 9, 10, 11, 12, 13, 14, 15,
4696 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4697 assert_eq_m256i(r
, _mm256_cvtepu8_epi64(a
));
4700 #[simd_test(enable = "avx2")]
4701 unsafe fn test_mm256_extracti128_si256() {
4702 let a
= _mm256_setr_epi64x(1, 2, 3, 4);
4703 let r
= _mm256_extracti128_si256(a
, 0b01);
4704 let e
= _mm_setr_epi64x(3, 4);
4705 assert_eq_m128i(r
, e
);
4708 #[simd_test(enable = "avx2")]
4709 unsafe fn test_mm256_hadd_epi16() {
4710 let a
= _mm256_set1_epi16(2);
4711 let b
= _mm256_set1_epi16(4);
4712 let r
= _mm256_hadd_epi16(a
, b
);
4713 let e
= _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4714 assert_eq_m256i(r
, e
);
4717 #[simd_test(enable = "avx2")]
4718 unsafe fn test_mm256_hadd_epi32() {
4719 let a
= _mm256_set1_epi32(2);
4720 let b
= _mm256_set1_epi32(4);
4721 let r
= _mm256_hadd_epi32(a
, b
);
4722 let e
= _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4723 assert_eq_m256i(r
, e
);
4726 #[simd_test(enable = "avx2")]
4727 unsafe fn test_mm256_hadds_epi16() {
4728 let a
= _mm256_set1_epi16(2);
4729 let a
= _mm256_insert_epi16(a
, 0x7fff, 0);
4730 let a
= _mm256_insert_epi16(a
, 1, 1);
4731 let b
= _mm256_set1_epi16(4);
4732 let r
= _mm256_hadds_epi16(a
, b
);
4734 let e
= _mm256_setr_epi16(
4735 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4736 4, 4, 4, 4, 8, 8, 8, 8,
4738 assert_eq_m256i(r
, e
);
4741 #[simd_test(enable = "avx2")]
4742 unsafe fn test_mm256_hsub_epi16() {
4743 let a
= _mm256_set1_epi16(2);
4744 let b
= _mm256_set1_epi16(4);
4745 let r
= _mm256_hsub_epi16(a
, b
);
4746 let e
= _mm256_set1_epi16(0);
4747 assert_eq_m256i(r
, e
);
4750 #[simd_test(enable = "avx2")]
4751 unsafe fn test_mm256_hsub_epi32() {
4752 let a
= _mm256_set1_epi32(2);
4753 let b
= _mm256_set1_epi32(4);
4754 let r
= _mm256_hsub_epi32(a
, b
);
4755 let e
= _mm256_set1_epi32(0);
4756 assert_eq_m256i(r
, e
);
4759 #[simd_test(enable = "avx2")]
4760 unsafe fn test_mm256_hsubs_epi16() {
4761 let a
= _mm256_set1_epi16(2);
4762 let a
= _mm256_insert_epi16(a
, 0x7fff, 0);
4763 let a
= _mm256_insert_epi16(a
, -1, 1);
4764 let b
= _mm256_set1_epi16(4);
4765 let r
= _mm256_hsubs_epi16(a
, b
);
4766 let e
= _mm256_insert_epi16(_mm256_set1_epi16(0), 0x7FFF, 0);
4767 assert_eq_m256i(r
, e
);
4770 #[simd_test(enable = "avx2")]
4771 unsafe fn test_mm256_madd_epi16() {
4772 let a
= _mm256_set1_epi16(2);
4773 let b
= _mm256_set1_epi16(4);
4774 let r
= _mm256_madd_epi16(a
, b
);
4775 let e
= _mm256_set1_epi32(16);
4776 assert_eq_m256i(r
, e
);
4779 #[simd_test(enable = "avx2")]
4780 unsafe fn test_mm256_inserti128_si256() {
4781 let a
= _mm256_setr_epi64x(1, 2, 3, 4);
4782 let b
= _mm_setr_epi64x(7, 8);
4783 let r
= _mm256_inserti128_si256(a
, b
, 0b01);
4784 let e
= _mm256_setr_epi64x(1, 2, 7, 8);
4785 assert_eq_m256i(r
, e
);
4788 #[simd_test(enable = "avx2")]
4789 unsafe fn test_mm256_maddubs_epi16() {
4790 let a
= _mm256_set1_epi8(2);
4791 let b
= _mm256_set1_epi8(4);
4792 let r
= _mm256_maddubs_epi16(a
, b
);
4793 let e
= _mm256_set1_epi16(16);
4794 assert_eq_m256i(r
, e
);
4797 #[simd_test(enable = "avx2")]
4798 unsafe fn test_mm_maskload_epi32() {
4799 let nums
= [1, 2, 3, 4];
4800 let a
= &nums
as *const i32;
4801 let mask
= _mm_setr_epi32(-1, 0, 0, -1);
4802 let r
= _mm_maskload_epi32(a
, mask
);
4803 let e
= _mm_setr_epi32(1, 0, 0, 4);
4804 assert_eq_m128i(r
, e
);
4807 #[simd_test(enable = "avx2")]
4808 unsafe fn test_mm256_maskload_epi32() {
4809 let nums
= [1, 2, 3, 4, 5, 6, 7, 8];
4810 let a
= &nums
as *const i32;
4811 let mask
= _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4812 let r
= _mm256_maskload_epi32(a
, mask
);
4813 let e
= _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4814 assert_eq_m256i(r
, e
);
4817 #[simd_test(enable = "avx2")]
4818 unsafe fn test_mm_maskload_epi64() {
4819 let nums
= [1_i64, 2_i64];
4820 let a
= &nums
as *const i64;
4821 let mask
= _mm_setr_epi64x(0, -1);
4822 let r
= _mm_maskload_epi64(a
, mask
);
4823 let e
= _mm_setr_epi64x(0, 2);
4824 assert_eq_m128i(r
, e
);
4827 #[simd_test(enable = "avx2")]
4828 unsafe fn test_mm256_maskload_epi64() {
4829 let nums
= [1_i64, 2_i64, 3_i64, 4_i64];
4830 let a
= &nums
as *const i64;
4831 let mask
= _mm256_setr_epi64x(0, -1, -1, 0);
4832 let r
= _mm256_maskload_epi64(a
, mask
);
4833 let e
= _mm256_setr_epi64x(0, 2, 3, 0);
4834 assert_eq_m256i(r
, e
);
4837 #[simd_test(enable = "avx2")]
4838 unsafe fn test_mm_maskstore_epi32() {
4839 let a
= _mm_setr_epi32(1, 2, 3, 4);
4840 let mut arr
= [-1, -1, -1, -1];
4841 let mask
= _mm_setr_epi32(-1, 0, 0, -1);
4842 _mm_maskstore_epi32(arr
.as_mut_ptr(), mask
, a
);
4843 let e
= [1, -1, -1, 4];
4847 #[simd_test(enable = "avx2")]
4848 unsafe fn test_mm256_maskstore_epi32() {
4849 let a
= _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4850 let mut arr
= [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4851 let mask
= _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4852 _mm256_maskstore_epi32(arr
.as_mut_ptr(), mask
, a
);
4853 let e
= [1, -1, -1, 42, -1, 6, 7, -1];
4857 #[simd_test(enable = "avx2")]
4858 unsafe fn test_mm_maskstore_epi64() {
4859 let a
= _mm_setr_epi64x(1_i64, 2_i64);
4860 let mut arr
= [-1_i64, -1_i64];
4861 let mask
= _mm_setr_epi64x(0, -1);
4862 _mm_maskstore_epi64(arr
.as_mut_ptr(), mask
, a
);
4867 #[simd_test(enable = "avx2")]
4868 unsafe fn test_mm256_maskstore_epi64() {
4869 let a
= _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4870 let mut arr
= [-1_i64, -1_i64, -1_i64, -1_i64];
4871 let mask
= _mm256_setr_epi64x(0, -1, -1, 0);
4872 _mm256_maskstore_epi64(arr
.as_mut_ptr(), mask
, a
);
4873 let e
= [-1, 2, 3, -1];
4877 #[simd_test(enable = "avx2")]
4878 unsafe fn test_mm256_max_epi16() {
4879 let a
= _mm256_set1_epi16(2);
4880 let b
= _mm256_set1_epi16(4);
4881 let r
= _mm256_max_epi16(a
, b
);
4882 assert_eq_m256i(r
, b
);
4885 #[simd_test(enable = "avx2")]
4886 unsafe fn test_mm256_max_epi32() {
4887 let a
= _mm256_set1_epi32(2);
4888 let b
= _mm256_set1_epi32(4);
4889 let r
= _mm256_max_epi32(a
, b
);
4890 assert_eq_m256i(r
, b
);
4893 #[simd_test(enable = "avx2")]
4894 unsafe fn test_mm256_max_epi8() {
4895 let a
= _mm256_set1_epi8(2);
4896 let b
= _mm256_set1_epi8(4);
4897 let r
= _mm256_max_epi8(a
, b
);
4898 assert_eq_m256i(r
, b
);
4901 #[simd_test(enable = "avx2")]
4902 unsafe fn test_mm256_max_epu16() {
4903 let a
= _mm256_set1_epi16(2);
4904 let b
= _mm256_set1_epi16(4);
4905 let r
= _mm256_max_epu16(a
, b
);
4906 assert_eq_m256i(r
, b
);
4909 #[simd_test(enable = "avx2")]
4910 unsafe fn test_mm256_max_epu32() {
4911 let a
= _mm256_set1_epi32(2);
4912 let b
= _mm256_set1_epi32(4);
4913 let r
= _mm256_max_epu32(a
, b
);
4914 assert_eq_m256i(r
, b
);
4917 #[simd_test(enable = "avx2")]
4918 unsafe fn test_mm256_max_epu8() {
4919 let a
= _mm256_set1_epi8(2);
4920 let b
= _mm256_set1_epi8(4);
4921 let r
= _mm256_max_epu8(a
, b
);
4922 assert_eq_m256i(r
, b
);
4925 #[simd_test(enable = "avx2")]
4926 unsafe fn test_mm256_min_epi16() {
4927 let a
= _mm256_set1_epi16(2);
4928 let b
= _mm256_set1_epi16(4);
4929 let r
= _mm256_min_epi16(a
, b
);
4930 assert_eq_m256i(r
, a
);
4933 #[simd_test(enable = "avx2")]
4934 unsafe fn test_mm256_min_epi32() {
4935 let a
= _mm256_set1_epi32(2);
4936 let b
= _mm256_set1_epi32(4);
4937 let r
= _mm256_min_epi32(a
, b
);
4938 assert_eq_m256i(r
, a
);
4941 #[simd_test(enable = "avx2")]
4942 unsafe fn test_mm256_min_epi8() {
4943 let a
= _mm256_set1_epi8(2);
4944 let b
= _mm256_set1_epi8(4);
4945 let r
= _mm256_min_epi8(a
, b
);
4946 assert_eq_m256i(r
, a
);
4949 #[simd_test(enable = "avx2")]
4950 unsafe fn test_mm256_min_epu16() {
4951 let a
= _mm256_set1_epi16(2);
4952 let b
= _mm256_set1_epi16(4);
4953 let r
= _mm256_min_epu16(a
, b
);
4954 assert_eq_m256i(r
, a
);
4957 #[simd_test(enable = "avx2")]
4958 unsafe fn test_mm256_min_epu32() {
4959 let a
= _mm256_set1_epi32(2);
4960 let b
= _mm256_set1_epi32(4);
4961 let r
= _mm256_min_epu32(a
, b
);
4962 assert_eq_m256i(r
, a
);
4965 #[simd_test(enable = "avx2")]
4966 unsafe fn test_mm256_min_epu8() {
4967 let a
= _mm256_set1_epi8(2);
4968 let b
= _mm256_set1_epi8(4);
4969 let r
= _mm256_min_epu8(a
, b
);
4970 assert_eq_m256i(r
, a
);
4973 #[simd_test(enable = "avx2")]
4974 unsafe fn test_mm256_movemask_epi8() {
4975 let a
= _mm256_set1_epi8(-1);
4976 let r
= _mm256_movemask_epi8(a
);
4981 #[simd_test(enable = "avx2")]
4982 unsafe fn test_mm256_mpsadbw_epu8() {
4983 let a
= _mm256_set1_epi8(2);
4984 let b
= _mm256_set1_epi8(4);
4985 let r
= _mm256_mpsadbw_epu8(a
, b
, 0);
4986 let e
= _mm256_set1_epi16(8);
4987 assert_eq_m256i(r
, e
);
4990 #[simd_test(enable = "avx2")]
4991 unsafe fn test_mm256_mul_epi32() {
4992 let a
= _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4993 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4994 let r
= _mm256_mul_epi32(a
, b
);
4995 let e
= _mm256_setr_epi64x(0, 0, 10, 14);
4996 assert_eq_m256i(r
, e
);
4999 #[simd_test(enable = "avx2")]
5000 unsafe fn test_mm256_mul_epu32() {
5001 let a
= _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
5002 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5003 let r
= _mm256_mul_epu32(a
, b
);
5004 let e
= _mm256_setr_epi64x(0, 0, 10, 14);
5005 assert_eq_m256i(r
, e
);
5008 #[simd_test(enable = "avx2")]
5009 unsafe fn test_mm256_mulhi_epi16() {
5010 let a
= _mm256_set1_epi16(6535);
5011 let b
= _mm256_set1_epi16(6535);
5012 let r
= _mm256_mulhi_epi16(a
, b
);
5013 let e
= _mm256_set1_epi16(651);
5014 assert_eq_m256i(r
, e
);
5017 #[simd_test(enable = "avx2")]
5018 unsafe fn test_mm256_mulhi_epu16() {
5019 let a
= _mm256_set1_epi16(6535);
5020 let b
= _mm256_set1_epi16(6535);
5021 let r
= _mm256_mulhi_epu16(a
, b
);
5022 let e
= _mm256_set1_epi16(651);
5023 assert_eq_m256i(r
, e
);
5026 #[simd_test(enable = "avx2")]
5027 unsafe fn test_mm256_mullo_epi16() {
5028 let a
= _mm256_set1_epi16(2);
5029 let b
= _mm256_set1_epi16(4);
5030 let r
= _mm256_mullo_epi16(a
, b
);
5031 let e
= _mm256_set1_epi16(8);
5032 assert_eq_m256i(r
, e
);
5035 #[simd_test(enable = "avx2")]
5036 unsafe fn test_mm256_mullo_epi32() {
5037 let a
= _mm256_set1_epi32(2);
5038 let b
= _mm256_set1_epi32(4);
5039 let r
= _mm256_mullo_epi32(a
, b
);
5040 let e
= _mm256_set1_epi32(8);
5041 assert_eq_m256i(r
, e
);
5044 #[simd_test(enable = "avx2")]
5045 unsafe fn test_mm256_mulhrs_epi16() {
5046 let a
= _mm256_set1_epi16(2);
5047 let b
= _mm256_set1_epi16(4);
5048 let r
= _mm256_mullo_epi16(a
, b
);
5049 let e
= _mm256_set1_epi16(8);
5050 assert_eq_m256i(r
, e
);
5053 #[simd_test(enable = "avx2")]
5054 unsafe fn test_mm256_or_si256() {
5055 let a
= _mm256_set1_epi8(-1);
5056 let b
= _mm256_set1_epi8(0);
5057 let r
= _mm256_or_si256(a
, b
);
5058 assert_eq_m256i(r
, a
);
5061 #[simd_test(enable = "avx2")]
5062 unsafe fn test_mm256_packs_epi16() {
5063 let a
= _mm256_set1_epi16(2);
5064 let b
= _mm256_set1_epi16(4);
5065 let r
= _mm256_packs_epi16(a
, b
);
5067 let e
= _mm256_setr_epi8(
5068 2, 2, 2, 2, 2, 2, 2, 2,
5069 4, 4, 4, 4, 4, 4, 4, 4,
5070 2, 2, 2, 2, 2, 2, 2, 2,
5071 4, 4, 4, 4, 4, 4, 4, 4,
5074 assert_eq_m256i(r
, e
);
5077 #[simd_test(enable = "avx2")]
5078 unsafe fn test_mm256_packs_epi32() {
5079 let a
= _mm256_set1_epi32(2);
5080 let b
= _mm256_set1_epi32(4);
5081 let r
= _mm256_packs_epi32(a
, b
);
5082 let e
= _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5084 assert_eq_m256i(r
, e
);
5087 #[simd_test(enable = "avx2")]
5088 unsafe fn test_mm256_packus_epi16() {
5089 let a
= _mm256_set1_epi16(2);
5090 let b
= _mm256_set1_epi16(4);
5091 let r
= _mm256_packus_epi16(a
, b
);
5093 let e
= _mm256_setr_epi8(
5094 2, 2, 2, 2, 2, 2, 2, 2,
5095 4, 4, 4, 4, 4, 4, 4, 4,
5096 2, 2, 2, 2, 2, 2, 2, 2,
5097 4, 4, 4, 4, 4, 4, 4, 4,
5100 assert_eq_m256i(r
, e
);
5103 #[simd_test(enable = "avx2")]
5104 unsafe fn test_mm256_packus_epi32() {
5105 let a
= _mm256_set1_epi32(2);
5106 let b
= _mm256_set1_epi32(4);
5107 let r
= _mm256_packus_epi32(a
, b
);
5108 let e
= _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5110 assert_eq_m256i(r
, e
);
5113 #[simd_test(enable = "avx2")]
5114 unsafe fn test_mm256_sad_epu8() {
5115 let a
= _mm256_set1_epi8(2);
5116 let b
= _mm256_set1_epi8(4);
5117 let r
= _mm256_sad_epu8(a
, b
);
5118 let e
= _mm256_set1_epi64x(16);
5119 assert_eq_m256i(r
, e
);
5122 #[simd_test(enable = "avx2")]
5123 unsafe fn test_mm256_shufflehi_epi16() {
5125 let a
= _mm256_setr_epi16(
5126 0, 1, 2, 3, 11, 22, 33, 44,
5127 4, 5, 6, 7, 55, 66, 77, 88,
5130 let e
= _mm256_setr_epi16(
5131 0, 1, 2, 3, 44, 22, 22, 11,
5132 4, 5, 6, 7, 88, 66, 66, 55,
5134 let r
= _mm256_shufflehi_epi16(a
, 0b00_01_01_11);
5135 assert_eq_m256i(r
, e
);
5138 #[simd_test(enable = "avx2")]
5139 unsafe fn test_mm256_shufflelo_epi16() {
5141 let a
= _mm256_setr_epi16(
5142 11, 22, 33, 44, 0, 1, 2, 3,
5143 55, 66, 77, 88, 4, 5, 6, 7,
5146 let e
= _mm256_setr_epi16(
5147 44, 22, 22, 11, 0, 1, 2, 3,
5148 88, 66, 66, 55, 4, 5, 6, 7,
5150 let r
= _mm256_shufflelo_epi16(a
, 0b00_01_01_11);
5151 assert_eq_m256i(r
, e
);
5154 #[simd_test(enable = "avx2")]
5155 unsafe fn test_mm256_sign_epi16() {
5156 let a
= _mm256_set1_epi16(2);
5157 let b
= _mm256_set1_epi16(-1);
5158 let r
= _mm256_sign_epi16(a
, b
);
5159 let e
= _mm256_set1_epi16(-2);
5160 assert_eq_m256i(r
, e
);
5163 #[simd_test(enable = "avx2")]
5164 unsafe fn test_mm256_sign_epi32() {
5165 let a
= _mm256_set1_epi32(2);
5166 let b
= _mm256_set1_epi32(-1);
5167 let r
= _mm256_sign_epi32(a
, b
);
5168 let e
= _mm256_set1_epi32(-2);
5169 assert_eq_m256i(r
, e
);
5172 #[simd_test(enable = "avx2")]
5173 unsafe fn test_mm256_sign_epi8() {
5174 let a
= _mm256_set1_epi8(2);
5175 let b
= _mm256_set1_epi8(-1);
5176 let r
= _mm256_sign_epi8(a
, b
);
5177 let e
= _mm256_set1_epi8(-2);
5178 assert_eq_m256i(r
, e
);
5181 #[simd_test(enable = "avx2")]
5182 unsafe fn test_mm256_sll_epi16() {
5183 let a
= _mm256_set1_epi16(0xFF);
5184 let b
= _mm_insert_epi16(_mm_set1_epi16(0), 4, 0);
5185 let r
= _mm256_sll_epi16(a
, b
);
5186 assert_eq_m256i(r
, _mm256_set1_epi16(0xFF0));
5189 #[simd_test(enable = "avx2")]
5190 unsafe fn test_mm256_sll_epi32() {
5191 let a
= _mm256_set1_epi32(0xFFFF);
5192 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5193 let r
= _mm256_sll_epi32(a
, b
);
5194 assert_eq_m256i(r
, _mm256_set1_epi32(0xFFFF0));
5197 #[simd_test(enable = "avx2")]
5198 unsafe fn test_mm256_sll_epi64() {
5199 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5200 let b
= _mm_insert_epi64(_mm_set1_epi64x(0), 4, 0);
5201 let r
= _mm256_sll_epi64(a
, b
);
5202 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFFF0));
5205 #[simd_test(enable = "avx2")]
5206 unsafe fn test_mm256_slli_epi16() {
5208 _mm256_slli_epi16(_mm256_set1_epi16(0xFF), 4),
5209 _mm256_set1_epi16(0xFF0),
5213 #[simd_test(enable = "avx2")]
5214 unsafe fn test_mm256_slli_epi32() {
5216 _mm256_slli_epi32(_mm256_set1_epi32(0xFFFF), 4),
5217 _mm256_set1_epi32(0xFFFF0),
5221 #[simd_test(enable = "avx2")]
5222 unsafe fn test_mm256_slli_epi64() {
5224 _mm256_slli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
5225 _mm256_set1_epi64x(0xFFFFFFFF0),
5229 #[simd_test(enable = "avx2")]
5230 unsafe fn test_mm256_slli_si256() {
5231 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5232 let r
= _mm256_slli_si256(a
, 3);
5233 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFFF000000));
5236 #[simd_test(enable = "avx2")]
5237 unsafe fn test_mm_sllv_epi32() {
5238 let a
= _mm_set1_epi32(2);
5239 let b
= _mm_set1_epi32(1);
5240 let r
= _mm_sllv_epi32(a
, b
);
5241 let e
= _mm_set1_epi32(4);
5242 assert_eq_m128i(r
, e
);
5245 #[simd_test(enable = "avx2")]
5246 unsafe fn test_mm256_sllv_epi32() {
5247 let a
= _mm256_set1_epi32(2);
5248 let b
= _mm256_set1_epi32(1);
5249 let r
= _mm256_sllv_epi32(a
, b
);
5250 let e
= _mm256_set1_epi32(4);
5251 assert_eq_m256i(r
, e
);
5254 #[simd_test(enable = "avx2")]
5255 unsafe fn test_mm_sllv_epi64() {
5256 let a
= _mm_set1_epi64x(2);
5257 let b
= _mm_set1_epi64x(1);
5258 let r
= _mm_sllv_epi64(a
, b
);
5259 let e
= _mm_set1_epi64x(4);
5260 assert_eq_m128i(r
, e
);
5263 #[simd_test(enable = "avx2")]
5264 unsafe fn test_mm256_sllv_epi64() {
5265 let a
= _mm256_set1_epi64x(2);
5266 let b
= _mm256_set1_epi64x(1);
5267 let r
= _mm256_sllv_epi64(a
, b
);
5268 let e
= _mm256_set1_epi64x(4);
5269 assert_eq_m256i(r
, e
);
5272 #[simd_test(enable = "avx2")]
5273 unsafe fn test_mm256_sra_epi16() {
5274 let a
= _mm256_set1_epi16(-1);
5275 let b
= _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5276 let r
= _mm256_sra_epi16(a
, b
);
5277 assert_eq_m256i(r
, _mm256_set1_epi16(-1));
5280 #[simd_test(enable = "avx2")]
5281 unsafe fn test_mm256_sra_epi32() {
5282 let a
= _mm256_set1_epi32(-1);
5283 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 1, 0);
5284 let r
= _mm256_sra_epi32(a
, b
);
5285 assert_eq_m256i(r
, _mm256_set1_epi32(-1));
5288 #[simd_test(enable = "avx2")]
5289 unsafe fn test_mm256_srai_epi16() {
5291 _mm256_srai_epi16(_mm256_set1_epi16(-1), 1),
5292 _mm256_set1_epi16(-1),
5296 #[simd_test(enable = "avx2")]
5297 unsafe fn test_mm256_srai_epi32() {
5299 _mm256_srai_epi32(_mm256_set1_epi32(-1), 1),
5300 _mm256_set1_epi32(-1),
5304 #[simd_test(enable = "avx2")]
5305 unsafe fn test_mm_srav_epi32() {
5306 let a
= _mm_set1_epi32(4);
5307 let count
= _mm_set1_epi32(1);
5308 let r
= _mm_srav_epi32(a
, count
);
5309 let e
= _mm_set1_epi32(2);
5310 assert_eq_m128i(r
, e
);
5313 #[simd_test(enable = "avx2")]
5314 unsafe fn test_mm256_srav_epi32() {
5315 let a
= _mm256_set1_epi32(4);
5316 let count
= _mm256_set1_epi32(1);
5317 let r
= _mm256_srav_epi32(a
, count
);
5318 let e
= _mm256_set1_epi32(2);
5319 assert_eq_m256i(r
, e
);
5322 #[simd_test(enable = "avx2")]
5323 unsafe fn test_mm256_srli_si256() {
5325 let a
= _mm256_setr_epi8(
5326 1, 2, 3, 4, 5, 6, 7, 8,
5327 9, 10, 11, 12, 13, 14, 15, 16,
5328 17, 18, 19, 20, 21, 22, 23, 24,
5329 25, 26, 27, 28, 29, 30, 31, 32,
5331 let r
= _mm256_srli_si256(a
, 3);
5333 let e
= _mm256_setr_epi8(
5334 4, 5, 6, 7, 8, 9, 10, 11,
5335 12, 13, 14, 15, 16, 0, 0, 0,
5336 20, 21, 22, 23, 24, 25, 26, 27,
5337 28, 29, 30, 31, 32, 0, 0, 0,
5339 assert_eq_m256i(r
, e
);
5342 #[simd_test(enable = "avx2")]
5343 unsafe fn test_mm256_srl_epi16() {
5344 let a
= _mm256_set1_epi16(0xFF);
5345 let b
= _mm_insert_epi16(_mm_set1_epi16(0), 4, 0);
5346 let r
= _mm256_srl_epi16(a
, b
);
5347 assert_eq_m256i(r
, _mm256_set1_epi16(0xF));
5350 #[simd_test(enable = "avx2")]
5351 unsafe fn test_mm256_srl_epi32() {
5352 let a
= _mm256_set1_epi32(0xFFFF);
5353 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5354 let r
= _mm256_srl_epi32(a
, b
);
5355 assert_eq_m256i(r
, _mm256_set1_epi32(0xFFF));
5358 #[simd_test(enable = "avx2")]
5359 unsafe fn test_mm256_srl_epi64() {
5360 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5361 let b
= _mm_setr_epi64x(4, 0);
5362 let r
= _mm256_srl_epi64(a
, b
);
5363 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFF));
5366 #[simd_test(enable = "avx2")]
5367 unsafe fn test_mm256_srli_epi16() {
5369 _mm256_srli_epi16(_mm256_set1_epi16(0xFF), 4),
5370 _mm256_set1_epi16(0xF),
5374 #[simd_test(enable = "avx2")]
5375 unsafe fn test_mm256_srli_epi32() {
5377 _mm256_srli_epi32(_mm256_set1_epi32(0xFFFF), 4),
5378 _mm256_set1_epi32(0xFFF),
5382 #[simd_test(enable = "avx2")]
5383 unsafe fn test_mm256_srli_epi64() {
5385 _mm256_srli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
5386 _mm256_set1_epi64x(0xFFFFFFF),
5390 #[simd_test(enable = "avx2")]
5391 unsafe fn test_mm_srlv_epi32() {
5392 let a
= _mm_set1_epi32(2);
5393 let count
= _mm_set1_epi32(1);
5394 let r
= _mm_srlv_epi32(a
, count
);
5395 let e
= _mm_set1_epi32(1);
5396 assert_eq_m128i(r
, e
);
5399 #[simd_test(enable = "avx2")]
5400 unsafe fn test_mm256_srlv_epi32() {
5401 let a
= _mm256_set1_epi32(2);
5402 let count
= _mm256_set1_epi32(1);
5403 let r
= _mm256_srlv_epi32(a
, count
);
5404 let e
= _mm256_set1_epi32(1);
5405 assert_eq_m256i(r
, e
);
5408 #[simd_test(enable = "avx2")]
5409 unsafe fn test_mm_srlv_epi64() {
5410 let a
= _mm_set1_epi64x(2);
5411 let count
= _mm_set1_epi64x(1);
5412 let r
= _mm_srlv_epi64(a
, count
);
5413 let e
= _mm_set1_epi64x(1);
5414 assert_eq_m128i(r
, e
);
5417 #[simd_test(enable = "avx2")]
5418 unsafe fn test_mm256_srlv_epi64() {
5419 let a
= _mm256_set1_epi64x(2);
5420 let count
= _mm256_set1_epi64x(1);
5421 let r
= _mm256_srlv_epi64(a
, count
);
5422 let e
= _mm256_set1_epi64x(1);
5423 assert_eq_m256i(r
, e
);
5426 #[simd_test(enable = "avx2")]
5427 unsafe fn test_mm256_sub_epi16() {
5428 let a
= _mm256_set1_epi16(4);
5429 let b
= _mm256_set1_epi16(2);
5430 let r
= _mm256_sub_epi16(a
, b
);
5431 assert_eq_m256i(r
, b
);
5434 #[simd_test(enable = "avx2")]
5435 unsafe fn test_mm256_sub_epi32() {
5436 let a
= _mm256_set1_epi32(4);
5437 let b
= _mm256_set1_epi32(2);
5438 let r
= _mm256_sub_epi32(a
, b
);
5439 assert_eq_m256i(r
, b
);
5442 #[simd_test(enable = "avx2")]
5443 unsafe fn test_mm256_sub_epi64() {
5444 let a
= _mm256_set1_epi64x(4);
5445 let b
= _mm256_set1_epi64x(2);
5446 let r
= _mm256_sub_epi64(a
, b
);
5447 assert_eq_m256i(r
, b
);
5450 #[simd_test(enable = "avx2")]
5451 unsafe fn test_mm256_sub_epi8() {
5452 let a
= _mm256_set1_epi8(4);
5453 let b
= _mm256_set1_epi8(2);
5454 let r
= _mm256_sub_epi8(a
, b
);
5455 assert_eq_m256i(r
, b
);
5458 #[simd_test(enable = "avx2")]
5459 unsafe fn test_mm256_subs_epi16() {
5460 let a
= _mm256_set1_epi16(4);
5461 let b
= _mm256_set1_epi16(2);
5462 let r
= _mm256_subs_epi16(a
, b
);
5463 assert_eq_m256i(r
, b
);
5466 #[simd_test(enable = "avx2")]
5467 unsafe fn test_mm256_subs_epi8() {
5468 let a
= _mm256_set1_epi8(4);
5469 let b
= _mm256_set1_epi8(2);
5470 let r
= _mm256_subs_epi8(a
, b
);
5471 assert_eq_m256i(r
, b
);
5474 #[simd_test(enable = "avx2")]
5475 unsafe fn test_mm256_subs_epu16() {
5476 let a
= _mm256_set1_epi16(4);
5477 let b
= _mm256_set1_epi16(2);
5478 let r
= _mm256_subs_epu16(a
, b
);
5479 assert_eq_m256i(r
, b
);
5482 #[simd_test(enable = "avx2")]
5483 unsafe fn test_mm256_subs_epu8() {
5484 let a
= _mm256_set1_epi8(4);
5485 let b
= _mm256_set1_epi8(2);
5486 let r
= _mm256_subs_epu8(a
, b
);
5487 assert_eq_m256i(r
, b
);
5490 #[simd_test(enable = "avx2")]
5491 unsafe fn test_mm256_xor_si256() {
5492 let a
= _mm256_set1_epi8(5);
5493 let b
= _mm256_set1_epi8(3);
5494 let r
= _mm256_xor_si256(a
, b
);
5495 assert_eq_m256i(r
, _mm256_set1_epi8(6));
5498 #[simd_test(enable = "avx2")]
5499 unsafe fn test_mm256_alignr_epi8() {
5501 let a
= _mm256_setr_epi8(
5502 1, 2, 3, 4, 5, 6, 7, 8,
5503 9, 10, 11, 12, 13, 14, 15, 16,
5504 17, 18, 19, 20, 21, 22, 23, 24,
5505 25, 26, 27, 28, 29, 30, 31, 32,
5508 let b
= _mm256_setr_epi8(
5509 -1, -2, -3, -4, -5, -6, -7, -8,
5510 -9, -10, -11, -12, -13, -14, -15, -16,
5511 -17, -18, -19, -20, -21, -22, -23, -24,
5512 -25, -26, -27, -28, -29, -30, -31, -32,
5514 let r
= _mm256_alignr_epi8(a
, b
, 33);
5515 assert_eq_m256i(r
, _mm256_set1_epi8(0));
5517 let r
= _mm256_alignr_epi8(a
, b
, 17);
5519 let expected
= _mm256_setr_epi8(
5520 2, 3, 4, 5, 6, 7, 8, 9,
5521 10, 11, 12, 13, 14, 15, 16, 0,
5522 18, 19, 20, 21, 22, 23, 24, 25,
5523 26, 27, 28, 29, 30, 31, 32, 0,
5525 assert_eq_m256i(r
, expected
);
5527 let r
= _mm256_alignr_epi8(a
, b
, 4);
5529 let expected
= _mm256_setr_epi8(
5530 -5, -6, -7, -8, -9, -10, -11, -12,
5531 -13, -14, -15, -16, 1, 2, 3, 4,
5532 -21, -22, -23, -24, -25, -26, -27, -28,
5533 -29, -30, -31, -32, 17, 18, 19, 20,
5535 assert_eq_m256i(r
, expected
);
5538 let expected
= _mm256_setr_epi8(
5539 -1, -2, -3, -4, -5, -6, -7, -8,
5540 -9, -10, -11, -12, -13, -14, -15, -16, -17,
5541 -18, -19, -20, -21, -22, -23, -24, -25,
5542 -26, -27, -28, -29, -30, -31, -32,
5544 let r
= _mm256_alignr_epi8(a
, b
, 16);
5545 assert_eq_m256i(r
, expected
);
5547 let r
= _mm256_alignr_epi8(a
, b
, 15);
5549 let expected
= _mm256_setr_epi8(
5550 -16, 1, 2, 3, 4, 5, 6, 7,
5551 8, 9, 10, 11, 12, 13, 14, 15,
5552 -32, 17, 18, 19, 20, 21, 22, 23,
5553 24, 25, 26, 27, 28, 29, 30, 31,
5555 assert_eq_m256i(r
, expected
);
5557 let r
= _mm256_alignr_epi8(a
, b
, 0);
5558 assert_eq_m256i(r
, b
);
5561 #[simd_test(enable = "avx2")]
5562 unsafe fn test_mm256_shuffle_epi8() {
5564 let a
= _mm256_setr_epi8(
5565 1, 2, 3, 4, 5, 6, 7, 8,
5566 9, 10, 11, 12, 13, 14, 15, 16,
5567 17, 18, 19, 20, 21, 22, 23, 24,
5568 25, 26, 27, 28, 29, 30, 31, 32,
5571 let b
= _mm256_setr_epi8(
5572 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5573 12, 5, 5, 10, 4, 1, 8, 0,
5574 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5575 12, 5, 5, 10, 4, 1, 8, 0,
5578 let expected
= _mm256_setr_epi8(
5579 5, 0, 5, 4, 9, 13, 7, 4,
5580 13, 6, 6, 11, 5, 2, 9, 1,
5581 21, 0, 21, 20, 25, 29, 23, 20,
5582 29, 22, 22, 27, 21, 18, 25, 17,
5584 let r
= _mm256_shuffle_epi8(a
, b
);
5585 assert_eq_m256i(r
, expected
);
5588 #[simd_test(enable = "avx2")]
5589 unsafe fn test_mm256_permutevar8x32_epi32() {
5590 let a
= _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5591 let b
= _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5592 let expected
= _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5593 let r
= _mm256_permutevar8x32_epi32(a
, b
);
5594 assert_eq_m256i(r
, expected
);
5597 #[simd_test(enable = "avx2")]
5598 unsafe fn test_mm256_permute4x64_epi64() {
5599 let a
= _mm256_setr_epi64x(100, 200, 300, 400);
5600 let expected
= _mm256_setr_epi64x(400, 100, 200, 100);
5601 let r
= _mm256_permute4x64_epi64(a
, 0b00010011);
5602 assert_eq_m256i(r
, expected
);
5605 #[simd_test(enable = "avx2")]
5606 unsafe fn test_mm256_permute2x128_si256() {
5607 let a
= _mm256_setr_epi64x(100, 200, 500, 600);
5608 let b
= _mm256_setr_epi64x(300, 400, 700, 800);
5609 let r
= _mm256_permute2x128_si256(a
, b
, 0b00_01_00_11);
5610 let e
= _mm256_setr_epi64x(700, 800, 500, 600);
5611 assert_eq_m256i(r
, e
);
5614 #[simd_test(enable = "avx2")]
5615 unsafe fn test_mm256_permute4x64_pd() {
5616 let a
= _mm256_setr_pd(1., 2., 3., 4.);
5617 let r
= _mm256_permute4x64_pd(a
, 0b00_01_00_11);
5618 let e
= _mm256_setr_pd(4., 1., 2., 1.);
5619 assert_eq_m256d(r
, e
);
5622 #[simd_test(enable = "avx2")]
5623 unsafe fn test_mm256_permutevar8x32_ps() {
5624 let a
= _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5625 let b
= _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5626 let r
= _mm256_permutevar8x32_ps(a
, b
);
5627 let e
= _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5628 assert_eq_m256(r
, e
);
5631 #[simd_test(enable = "avx2")]
5632 unsafe fn test_mm_i32gather_epi32() {
5633 let mut arr
= [0i32; 128];
5634 for i
in 0..128i32 {
5635 arr
[i
as usize] = i
;
5637 // A multiplier of 4 is word-addressing
5638 let r
= _mm_i32gather_epi32(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4);
5639 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 32, 48));
5642 #[simd_test(enable = "avx2")]
5643 unsafe fn test_mm_mask_i32gather_epi32() {
5644 let mut arr
= [0i32; 128];
5645 for i
in 0..128i32 {
5646 arr
[i
as usize] = i
;
5648 // A multiplier of 4 is word-addressing
5649 let r
= _mm_mask_i32gather_epi32(
5650 _mm_set1_epi32(256),
5652 _mm_setr_epi32(0, 16, 64, 96),
5653 _mm_setr_epi32(-1, -1, -1, 0),
5656 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 64, 256));
5659 #[simd_test(enable = "avx2")]
5660 unsafe fn test_mm256_i32gather_epi32() {
5661 let mut arr
= [0i32; 128];
5662 for i
in 0..128i32 {
5663 arr
[i
as usize] = i
;
5665 // A multiplier of 4 is word-addressing
5666 let r
= _mm256_i32gather_epi32(
5668 _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
5671 assert_eq_m256i(r
, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5674 #[simd_test(enable = "avx2")]
5675 unsafe fn test_mm256_mask_i32gather_epi32() {
5676 let mut arr
= [0i32; 128];
5677 for i
in 0..128i32 {
5678 arr
[i
as usize] = i
;
5680 // A multiplier of 4 is word-addressing
5681 let r
= _mm256_mask_i32gather_epi32(
5682 _mm256_set1_epi32(256),
5684 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5685 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5688 assert_eq_m256i(r
, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5691 #[simd_test(enable = "avx2")]
5692 unsafe fn test_mm_i32gather_ps() {
5693 let mut arr
= [0.0f32; 128];
5695 for i
in 0..128usize
{
5699 // A multiplier of 4 is word-addressing for f32s
5700 let r
= _mm_i32gather_ps(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4);
5701 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5704 #[simd_test(enable = "avx2")]
5705 unsafe fn test_mm_mask_i32gather_ps() {
5706 let mut arr
= [0.0f32; 128];
5708 for i
in 0..128usize
{
5712 // A multiplier of 4 is word-addressing for f32s
5713 let r
= _mm_mask_i32gather_ps(
5716 _mm_setr_epi32(0, 16, 64, 96),
5717 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5720 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5723 #[simd_test(enable = "avx2")]
5724 unsafe fn test_mm256_i32gather_ps() {
5725 let mut arr
= [0.0f32; 128];
5727 for i
in 0..128usize
{
5731 // A multiplier of 4 is word-addressing for f32s
5732 let r
= _mm256_i32gather_ps(
5734 _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
5737 assert_eq_m256(r
, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5740 #[simd_test(enable = "avx2")]
5741 unsafe fn test_mm256_mask_i32gather_ps() {
5742 let mut arr
= [0.0f32; 128];
5744 for i
in 0..128usize
{
5748 // A multiplier of 4 is word-addressing for f32s
5749 let r
= _mm256_mask_i32gather_ps(
5750 _mm256_set1_ps(256.0),
5752 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5753 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5758 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5762 #[simd_test(enable = "avx2")]
5763 unsafe fn test_mm_i32gather_epi64() {
5764 let mut arr
= [0i64; 128];
5765 for i
in 0..128i64 {
5766 arr
[i
as usize] = i
;
5768 // A multiplier of 8 is word-addressing for i64s
5769 let r
= _mm_i32gather_epi64(arr
.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
5770 assert_eq_m128i(r
, _mm_setr_epi64x(0, 16));
5773 #[simd_test(enable = "avx2")]
5774 unsafe fn test_mm_mask_i32gather_epi64() {
5775 let mut arr
= [0i64; 128];
5776 for i
in 0..128i64 {
5777 arr
[i
as usize] = i
;
5779 // A multiplier of 8 is word-addressing for i64s
5780 let r
= _mm_mask_i32gather_epi64(
5781 _mm_set1_epi64x(256),
5783 _mm_setr_epi32(16, 16, 16, 16),
5784 _mm_setr_epi64x(-1, 0),
5787 assert_eq_m128i(r
, _mm_setr_epi64x(16, 256));
5790 #[simd_test(enable = "avx2")]
5791 unsafe fn test_mm256_i32gather_epi64() {
5792 let mut arr
= [0i64; 128];
5793 for i
in 0..128i64 {
5794 arr
[i
as usize] = i
;
5796 // A multiplier of 8 is word-addressing for i64s
5797 let r
= _mm256_i32gather_epi64(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8);
5798 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 32, 48));
5801 #[simd_test(enable = "avx2")]
5802 unsafe fn test_mm256_mask_i32gather_epi64() {
5803 let mut arr
= [0i64; 128];
5804 for i
in 0..128i64 {
5805 arr
[i
as usize] = i
;
5807 // A multiplier of 8 is word-addressing for i64s
5808 let r
= _mm256_mask_i32gather_epi64(
5809 _mm256_set1_epi64x(256),
5811 _mm_setr_epi32(0, 16, 64, 96),
5812 _mm256_setr_epi64x(-1, -1, -1, 0),
5815 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 64, 256));
5818 #[simd_test(enable = "avx2")]
5819 unsafe fn test_mm_i32gather_pd() {
5820 let mut arr
= [0.0f64; 128];
5822 for i
in 0..128usize
{
5826 // A multiplier of 8 is word-addressing for f64s
5827 let r
= _mm_i32gather_pd(arr
.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
5828 assert_eq_m128d(r
, _mm_setr_pd(0.0, 16.0));
5831 #[simd_test(enable = "avx2")]
5832 unsafe fn test_mm_mask_i32gather_pd() {
5833 let mut arr
= [0.0f64; 128];
5835 for i
in 0..128usize
{
5839 // A multiplier of 8 is word-addressing for f64s
5840 let r
= _mm_mask_i32gather_pd(
5843 _mm_setr_epi32(16, 16, 16, 16),
5844 _mm_setr_pd(-1.0, 0.0),
5847 assert_eq_m128d(r
, _mm_setr_pd(16.0, 256.0));
5850 #[simd_test(enable = "avx2")]
5851 unsafe fn test_mm256_i32gather_pd() {
5852 let mut arr
= [0.0f64; 128];
5854 for i
in 0..128usize
{
5858 // A multiplier of 8 is word-addressing for f64s
5859 let r
= _mm256_i32gather_pd(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8);
5860 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5863 #[simd_test(enable = "avx2")]
5864 unsafe fn test_mm256_mask_i32gather_pd() {
5865 let mut arr
= [0.0f64; 128];
5867 for i
in 0..128usize
{
5871 // A multiplier of 8 is word-addressing for f64s
5872 let r
= _mm256_mask_i32gather_pd(
5873 _mm256_set1_pd(256.0),
5875 _mm_setr_epi32(0, 16, 64, 96),
5876 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5879 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5882 #[simd_test(enable = "avx2")]
5883 unsafe fn test_mm_i64gather_epi32() {
5884 let mut arr
= [0i32; 128];
5885 for i
in 0..128i32 {
5886 arr
[i
as usize] = i
;
5888 // A multiplier of 4 is word-addressing
5889 let r
= _mm_i64gather_epi32(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 4);
5890 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 0, 0));
5893 #[simd_test(enable = "avx2")]
5894 unsafe fn test_mm_mask_i64gather_epi32() {
5895 let mut arr
= [0i32; 128];
5896 for i
in 0..128i32 {
5897 arr
[i
as usize] = i
;
5899 // A multiplier of 4 is word-addressing
5900 let r
= _mm_mask_i64gather_epi32(
5901 _mm_set1_epi32(256),
5903 _mm_setr_epi64x(0, 16),
5904 _mm_setr_epi32(-1, 0, -1, 0),
5907 assert_eq_m128i(r
, _mm_setr_epi32(0, 256, 0, 0));
5910 #[simd_test(enable = "avx2")]
5911 unsafe fn test_mm256_i64gather_epi32() {
5912 let mut arr
= [0i32; 128];
5913 for i
in 0..128i32 {
5914 arr
[i
as usize] = i
;
5916 // A multiplier of 4 is word-addressing
5917 let r
= _mm256_i64gather_epi32(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4);
5918 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 32, 48));
5921 #[simd_test(enable = "avx2")]
5922 unsafe fn test_mm256_mask_i64gather_epi32() {
5923 let mut arr
= [0i32; 128];
5924 for i
in 0..128i32 {
5925 arr
[i
as usize] = i
;
5927 // A multiplier of 4 is word-addressing
5928 let r
= _mm256_mask_i64gather_epi32(
5929 _mm_set1_epi32(256),
5931 _mm256_setr_epi64x(0, 16, 64, 96),
5932 _mm_setr_epi32(-1, -1, -1, 0),
5935 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 64, 256));
5938 #[simd_test(enable = "avx2")]
5939 unsafe fn test_mm_i64gather_ps() {
5940 let mut arr
= [0.0f32; 128];
5942 for i
in 0..128usize
{
5946 // A multiplier of 4 is word-addressing for f32s
5947 let r
= _mm_i64gather_ps(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 4);
5948 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5951 #[simd_test(enable = "avx2")]
5952 unsafe fn test_mm_mask_i64gather_ps() {
5953 let mut arr
= [0.0f32; 128];
5955 for i
in 0..128usize
{
5959 // A multiplier of 4 is word-addressing for f32s
5960 let r
= _mm_mask_i64gather_ps(
5963 _mm_setr_epi64x(0, 16),
5964 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5967 assert_eq_m128(r
, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5970 #[simd_test(enable = "avx2")]
5971 unsafe fn test_mm256_i64gather_ps() {
5972 let mut arr
= [0.0f32; 128];
5974 for i
in 0..128usize
{
5978 // A multiplier of 4 is word-addressing for f32s
5979 let r
= _mm256_i64gather_ps(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4);
5980 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5983 #[simd_test(enable = "avx2")]
5984 unsafe fn test_mm256_mask_i64gather_ps() {
5985 let mut arr
= [0.0f32; 128];
5987 for i
in 0..128usize
{
5991 // A multiplier of 4 is word-addressing for f32s
5992 let r
= _mm256_mask_i64gather_ps(
5995 _mm256_setr_epi64x(0, 16, 64, 96),
5996 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5999 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
6002 #[simd_test(enable = "avx2")]
6003 unsafe fn test_mm_i64gather_epi64() {
6004 let mut arr
= [0i64; 128];
6005 for i
in 0..128i64 {
6006 arr
[i
as usize] = i
;
6008 // A multiplier of 8 is word-addressing for i64s
6009 let r
= _mm_i64gather_epi64(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 8);
6010 assert_eq_m128i(r
, _mm_setr_epi64x(0, 16));
6013 #[simd_test(enable = "avx2")]
6014 unsafe fn test_mm_mask_i64gather_epi64() {
6015 let mut arr
= [0i64; 128];
6016 for i
in 0..128i64 {
6017 arr
[i
as usize] = i
;
6019 // A multiplier of 8 is word-addressing for i64s
6020 let r
= _mm_mask_i64gather_epi64(
6021 _mm_set1_epi64x(256),
6023 _mm_setr_epi64x(16, 16),
6024 _mm_setr_epi64x(-1, 0),
6027 assert_eq_m128i(r
, _mm_setr_epi64x(16, 256));
6030 #[simd_test(enable = "avx2")]
6031 unsafe fn test_mm256_i64gather_epi64() {
6032 let mut arr
= [0i64; 128];
6033 for i
in 0..128i64 {
6034 arr
[i
as usize] = i
;
6036 // A multiplier of 8 is word-addressing for i64s
6037 let r
= _mm256_i64gather_epi64(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8);
6038 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 32, 48));
6041 #[simd_test(enable = "avx2")]
6042 unsafe fn test_mm256_mask_i64gather_epi64() {
6043 let mut arr
= [0i64; 128];
6044 for i
in 0..128i64 {
6045 arr
[i
as usize] = i
;
6047 // A multiplier of 8 is word-addressing for i64s
6048 let r
= _mm256_mask_i64gather_epi64(
6049 _mm256_set1_epi64x(256),
6051 _mm256_setr_epi64x(0, 16, 64, 96),
6052 _mm256_setr_epi64x(-1, -1, -1, 0),
6055 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 64, 256));
6058 #[simd_test(enable = "avx2")]
6059 unsafe fn test_mm_i64gather_pd() {
6060 let mut arr
= [0.0f64; 128];
6062 for i
in 0..128usize
{
6066 // A multiplier of 8 is word-addressing for f64s
6067 let r
= _mm_i64gather_pd(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 8);
6068 assert_eq_m128d(r
, _mm_setr_pd(0.0, 16.0));
6071 #[simd_test(enable = "avx2")]
6072 unsafe fn test_mm_mask_i64gather_pd() {
6073 let mut arr
= [0.0f64; 128];
6075 for i
in 0..128usize
{
6079 // A multiplier of 8 is word-addressing for f64s
6080 let r
= _mm_mask_i64gather_pd(
6083 _mm_setr_epi64x(16, 16),
6084 _mm_setr_pd(-1.0, 0.0),
6087 assert_eq_m128d(r
, _mm_setr_pd(16.0, 256.0));
6090 #[simd_test(enable = "avx2")]
6091 unsafe fn test_mm256_i64gather_pd() {
6092 let mut arr
= [0.0f64; 128];
6094 for i
in 0..128usize
{
6098 // A multiplier of 8 is word-addressing for f64s
6099 let r
= _mm256_i64gather_pd(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8);
6100 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
6103 #[simd_test(enable = "avx2")]
6104 unsafe fn test_mm256_mask_i64gather_pd() {
6105 let mut arr
= [0.0f64; 128];
6107 for i
in 0..128usize
{
6111 // A multiplier of 8 is word-addressing for f64s
6112 let r
= _mm256_mask_i64gather_pd(
6113 _mm256_set1_pd(256.0),
6115 _mm256_setr_epi64x(0, 16, 64, 96),
6116 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
6119 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
6122 #[simd_test(enable = "avx")]
6123 unsafe fn test_mm256_extract_epi8() {
6125 let a
= _mm256_setr_epi8(
6126 -1, 1, 2, 3, 4, 5, 6, 7,
6127 8, 9, 10, 11, 12, 13, 14, 15,
6128 16, 17, 18, 19, 20, 21, 22, 23,
6129 24, 25, 26, 27, 28, 29, 30, 31
6131 let r1
= _mm256_extract_epi8(a
, 0);
6132 let r2
= _mm256_extract_epi8(a
, 35);
6133 assert_eq
!(r1
, 0xFF);
6137 #[simd_test(enable = "avx2")]
6138 unsafe fn test_mm256_extract_epi16() {
6140 let a
= _mm256_setr_epi16(
6141 -1, 1, 2, 3, 4, 5, 6, 7,
6142 8, 9, 10, 11, 12, 13, 14, 15,
6144 let r1
= _mm256_extract_epi16(a
, 0);
6145 let r2
= _mm256_extract_epi16(a
, 19);
6146 assert_eq
!(r1
, 0xFFFF);
6150 #[simd_test(enable = "avx2")]
6151 unsafe fn test_mm256_extract_epi32() {
6152 let a
= _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
6153 let r1
= _mm256_extract_epi32(a
, 0);
6154 let r2
= _mm256_extract_epi32(a
, 11);
6159 #[simd_test(enable = "avx2")]
6160 unsafe fn test_mm256_cvtsd_f64() {
6161 let a
= _mm256_setr_pd(1., 2., 3., 4.);
6162 let r
= _mm256_cvtsd_f64(a
);
6166 #[simd_test(enable = "avx2")]
6167 unsafe fn test_mm256_cvtsi256_si32() {
6168 let a
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
6169 let r
= _mm256_cvtsi256_si32(a
);