1 //! Advanced Vector Extensions 2 (AVX)
3 //! AVX2 expands most AVX commands to 256-bit wide vector registers and
4 //! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
6 //! The references are:
8 //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
9 //! Instruction Set Reference, A-Z][intel64_ref].
10 //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
11 //! System Instructions][amd64_ref].
13 //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
14 //! overview of the instructions available.
16 //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
17 //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
18 //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
19 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
22 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
27 use stdarch_test
::assert_instr
;
29 /// Computes the absolute values of packed 32-bit integers in `a`.
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi32)
33 #[target_feature(enable = "avx2")]
34 #[cfg_attr(test, assert_instr(vpabsd))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm256_abs_epi32(a
: __m256i
) -> __m256i
{
37 transmute(pabsd(a
.as_i32x8()))
40 /// Computes the absolute values of packed 16-bit integers in `a`.
42 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi16)
44 #[target_feature(enable = "avx2")]
45 #[cfg_attr(test, assert_instr(vpabsw))]
46 #[stable(feature = "simd_x86", since = "1.27.0")]
47 pub unsafe fn _mm256_abs_epi16(a
: __m256i
) -> __m256i
{
48 transmute(pabsw(a
.as_i16x16()))
51 /// Computes the absolute values of packed 8-bit integers in `a`.
53 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi8)
55 #[target_feature(enable = "avx2")]
56 #[cfg_attr(test, assert_instr(vpabsb))]
57 #[stable(feature = "simd_x86", since = "1.27.0")]
58 pub unsafe fn _mm256_abs_epi8(a
: __m256i
) -> __m256i
{
59 transmute(pabsb(a
.as_i8x32()))
62 /// Adds packed 64-bit integers in `a` and `b`.
64 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64)
66 #[target_feature(enable = "avx2")]
67 #[cfg_attr(test, assert_instr(vpaddq))]
68 #[stable(feature = "simd_x86", since = "1.27.0")]
69 pub unsafe fn _mm256_add_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
70 transmute(simd_add(a
.as_i64x4(), b
.as_i64x4()))
73 /// Adds packed 32-bit integers in `a` and `b`.
75 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32)
77 #[target_feature(enable = "avx2")]
78 #[cfg_attr(test, assert_instr(vpaddd))]
79 #[stable(feature = "simd_x86", since = "1.27.0")]
80 pub unsafe fn _mm256_add_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
81 transmute(simd_add(a
.as_i32x8(), b
.as_i32x8()))
84 /// Adds packed 16-bit integers in `a` and `b`.
86 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16)
88 #[target_feature(enable = "avx2")]
89 #[cfg_attr(test, assert_instr(vpaddw))]
90 #[stable(feature = "simd_x86", since = "1.27.0")]
91 pub unsafe fn _mm256_add_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
92 transmute(simd_add(a
.as_i16x16(), b
.as_i16x16()))
95 /// Adds packed 8-bit integers in `a` and `b`.
97 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8)
99 #[target_feature(enable = "avx2")]
100 #[cfg_attr(test, assert_instr(vpaddb))]
101 #[stable(feature = "simd_x86", since = "1.27.0")]
102 pub unsafe fn _mm256_add_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
103 transmute(simd_add(a
.as_i8x32(), b
.as_i8x32()))
106 /// Adds packed 8-bit integers in `a` and `b` using saturation.
108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8)
110 #[target_feature(enable = "avx2")]
111 #[cfg_attr(test, assert_instr(vpaddsb))]
112 #[stable(feature = "simd_x86", since = "1.27.0")]
113 pub unsafe fn _mm256_adds_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
114 transmute(paddsb(a
.as_i8x32(), b
.as_i8x32()))
117 /// Adds packed 16-bit integers in `a` and `b` using saturation.
119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16)
121 #[target_feature(enable = "avx2")]
122 #[cfg_attr(test, assert_instr(vpaddsw))]
123 #[stable(feature = "simd_x86", since = "1.27.0")]
124 pub unsafe fn _mm256_adds_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
125 transmute(paddsw(a
.as_i16x16(), b
.as_i16x16()))
128 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8)
132 #[target_feature(enable = "avx2")]
133 #[cfg_attr(test, assert_instr(vpaddusb))]
134 #[stable(feature = "simd_x86", since = "1.27.0")]
135 pub unsafe fn _mm256_adds_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
136 transmute(paddusb(a
.as_u8x32(), b
.as_u8x32()))
139 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
141 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16)
143 #[target_feature(enable = "avx2")]
144 #[cfg_attr(test, assert_instr(vpaddusw))]
145 #[stable(feature = "simd_x86", since = "1.27.0")]
146 pub unsafe fn _mm256_adds_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
147 transmute(paddusw(a
.as_u16x16(), b
.as_u16x16()))
150 /// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary
151 /// result, shifts the result right by `n` bytes, and returns the low 16 bytes.
153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8)
155 #[target_feature(enable = "avx2")]
156 #[cfg_attr(test, assert_instr(vpalignr, n = 7))]
157 #[rustc_args_required_const(2)]
158 #[stable(feature = "simd_x86", since = "1.27.0")]
159 pub unsafe fn _mm256_alignr_epi8(a
: __m256i
, b
: __m256i
, n
: i32) -> __m256i
{
161 // If `palignr` is shifting the pair of vectors more than the size of two
164 return _mm256_set1_epi8(0);
166 // If `palignr` is shifting the pair of input vectors more than one lane,
167 // but less than two lanes, convert to shifting in zeroes.
168 let (a
, b
, n
) = if n
> 16 {
169 (_mm256_set1_epi8(0), a
, n
- 16)
174 let a
= a
.as_i8x32();
175 let b
= b
.as_i8x32();
177 let r
: i8x32
= match n
{
182 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
183 23, 24, 25, 26, 27, 28, 29, 30, 31,
190 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23,
191 24, 25, 26, 27, 28, 29, 30, 31, 48,
198 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24,
199 25, 26, 27, 28, 29, 30, 31, 48, 49,
206 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24,
207 25, 26, 27, 28, 29, 30, 31, 48, 49, 50,
214 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25,
215 26, 27, 28, 29, 30, 31, 48, 49, 50, 51,
222 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26,
223 27, 28, 29, 30, 31, 48, 49, 50, 51, 52,
230 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27,
231 28, 29, 30, 31, 48, 49, 50, 51, 52, 53,
238 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27,
239 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54,
246 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28,
247 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55,
254 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29,
255 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56,
258 10 => simd_shuffle32(
262 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30,
263 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
266 11 => simd_shuffle32(
270 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31,
271 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
274 12 => simd_shuffle32(
278 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48,
279 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
282 13 => simd_shuffle32(
286 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49,
287 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
290 14 => simd_shuffle32(
294 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50,
295 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
298 15 => simd_shuffle32(
302 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51,
303 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
311 /// Computes the bitwise AND of 256 bits (representing integer data)
314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256)
316 #[target_feature(enable = "avx2")]
317 #[cfg_attr(test, assert_instr(vandps))]
318 #[stable(feature = "simd_x86", since = "1.27.0")]
319 pub unsafe fn _mm256_and_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
320 transmute(simd_and(a
.as_i64x4(), b
.as_i64x4()))
323 /// Computes the bitwise NOT of 256 bits (representing integer data)
324 /// in `a` and then AND with `b`.
326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256)
328 #[target_feature(enable = "avx2")]
329 #[cfg_attr(test, assert_instr(vandnps))]
330 #[stable(feature = "simd_x86", since = "1.27.0")]
331 pub unsafe fn _mm256_andnot_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
332 let all_ones
= _mm256_set1_epi8(-1);
334 simd_xor(a
.as_i64x4(), all_ones
.as_i64x4()),
339 /// Averages packed unsigned 16-bit integers in `a` and `b`.
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16)
343 #[target_feature(enable = "avx2")]
344 #[cfg_attr(test, assert_instr(vpavgw))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm256_avg_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
347 transmute(pavgw(a
.as_u16x16(), b
.as_u16x16()))
350 /// Averages packed unsigned 8-bit integers in `a` and `b`.
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8)
354 #[target_feature(enable = "avx2")]
355 #[cfg_attr(test, assert_instr(vpavgb))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm256_avg_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
358 transmute(pavgb(a
.as_u8x32(), b
.as_u8x32()))
361 /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32)
365 #[target_feature(enable = "avx2")]
366 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
367 #[rustc_args_required_const(2)]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_blend_epi32(a
: __m128i
, b
: __m128i
, imm8
: i32) -> __m128i
{
370 let imm8
= (imm8
& 0xFF) as u8;
371 let a
= a
.as_i32x4();
372 let b
= b
.as_i32x4();
373 macro_rules
! blend2
{
374 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
375 simd_shuffle4(a
, b
, [$a
, $b
, $c
, $d
]);
378 macro_rules
! blend1
{
379 ($a
:expr
, $b
:expr
) => {
380 match (imm8
>> 2) & 0b11 {
381 0b00 => blend2
!($a
, $b
, 2, 3),
382 0b01 => blend2
!($a
, $b
, 6, 3),
383 0b10 => blend2
!($a
, $b
, 2, 7),
384 _
=> blend2
!($a
, $b
, 6, 7),
388 let r
: i32x4
= match imm8
& 0b11 {
389 0b00 => blend1
!(0, 1),
390 0b01 => blend1
!(4, 1),
391 0b10 => blend1
!(0, 5),
397 /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`.
399 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32)
401 #[target_feature(enable = "avx2")]
402 #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))]
403 #[rustc_args_required_const(2)]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm256_blend_epi32(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
406 let imm8
= (imm8
& 0xFF) as u8;
407 let a
= a
.as_i32x8();
408 let b
= b
.as_i32x8();
409 macro_rules
! blend4
{
420 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
]);
423 macro_rules
! blend3
{
424 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
) => {
425 match (imm8
>> 6) & 0b11 {
426 0b00 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 7),
427 0b01 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 14, 7),
428 0b10 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 15),
429 _
=> blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 14, 15),
433 macro_rules
! blend2
{
434 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
435 match (imm8
>> 4) & 0b11 {
436 0b00 => blend3
!($a
, $b
, $c
, $d
, 4, 5),
437 0b01 => blend3
!($a
, $b
, $c
, $d
, 12, 5),
438 0b10 => blend3
!($a
, $b
, $c
, $d
, 4, 13),
439 _
=> blend3
!($a
, $b
, $c
, $d
, 12, 13),
443 macro_rules
! blend1
{
444 ($a
:expr
, $b
:expr
) => {
445 match (imm8
>> 2) & 0b11 {
446 0b00 => blend2
!($a
, $b
, 2, 3),
447 0b01 => blend2
!($a
, $b
, 10, 3),
448 0b10 => blend2
!($a
, $b
, 2, 11),
449 _
=> blend2
!($a
, $b
, 10, 11),
453 let r
: i32x8
= match imm8
& 0b11 {
454 0b00 => blend1
!(0, 1),
455 0b01 => blend1
!(8, 1),
456 0b10 => blend1
!(0, 9),
462 /// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`.
464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16)
466 #[target_feature(enable = "avx2")]
467 #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))]
468 #[rustc_args_required_const(2)]
469 #[stable(feature = "simd_x86", since = "1.27.0")]
470 pub unsafe fn _mm256_blend_epi16(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
471 let imm8
= (imm8
& 0xFF) as u8;
472 let a
= a
.as_i16x16();
473 let b
= b
.as_i16x16();
474 macro_rules
! blend4
{
497 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
502 macro_rules
! blend3
{
517 match (imm8
>> 6) & 0b11 {
518 0b00 => blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 7, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 14, 15),
520 blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 22, 7, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 30, 15)
523 blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 6, 23, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 14, 31)
525 _
=> blend4
!($a
, $b
, $c
, $d
, $e
, $f
, 22, 23, $a2
, $b2
, $c2
, $d2
, $e2
, $f2
, 30, 31),
529 macro_rules
! blend2
{
540 match (imm8
>> 4) & 0b11 {
541 0b00 => blend3
!($a
, $b
, $c
, $d
, 4, 5, $a2
, $b2
, $c2
, $d2
, 12, 13),
542 0b01 => blend3
!($a
, $b
, $c
, $d
, 20, 5, $a2
, $b2
, $c2
, $d2
, 28, 13),
543 0b10 => blend3
!($a
, $b
, $c
, $d
, 4, 21, $a2
, $b2
, $c2
, $d2
, 12, 29),
544 _
=> blend3
!($a
, $b
, $c
, $d
, 20, 21, $a2
, $b2
, $c2
, $d2
, 28, 29),
548 macro_rules
! blend1
{
549 ($a1
:expr
, $b1
:expr
, $a2
:expr
, $b2
:expr
) => {
550 match (imm8
>> 2) & 0b11 {
551 0b00 => blend2
!($a1
, $b1
, 2, 3, $a2
, $b2
, 10, 11),
552 0b01 => blend2
!($a1
, $b1
, 18, 3, $a2
, $b2
, 26, 11),
553 0b10 => blend2
!($a1
, $b1
, 2, 19, $a2
, $b2
, 10, 27),
554 _
=> blend2
!($a1
, $b1
, 18, 19, $a2
, $b2
, 26, 27),
558 let r
: i16x16
= match imm8
& 0b11 {
559 0b00 => blend1
!(0, 1, 8, 9),
560 0b01 => blend1
!(16, 1, 24, 9),
561 0b10 => blend1
!(0, 17, 8, 25),
562 _
=> blend1
!(16, 17, 24, 25),
567 /// Blends packed 8-bit integers from `a` and `b` using `mask`.
569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8)
571 #[target_feature(enable = "avx2")]
572 #[cfg_attr(test, assert_instr(vpblendvb))]
573 #[stable(feature = "simd_x86", since = "1.27.0")]
574 pub unsafe fn _mm256_blendv_epi8(a
: __m256i
, b
: __m256i
, mask
: __m256i
) -> __m256i
{
575 transmute(pblendvb(a
.as_i8x32(), b
.as_i8x32(), mask
.as_i8x32()))
578 /// Broadcasts the low packed 8-bit integer from `a` to all elements of
579 /// the 128-bit returned value.
581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8)
583 #[target_feature(enable = "avx2")]
584 #[cfg_attr(test, assert_instr(vpbroadcastb))]
585 #[stable(feature = "simd_x86", since = "1.27.0")]
586 pub unsafe fn _mm_broadcastb_epi8(a
: __m128i
) -> __m128i
{
587 let zero
= _mm_setzero_si128();
588 let ret
= simd_shuffle16(a
.as_i8x16(), zero
.as_i8x16(), [0_u32; 16]);
589 transmute
::<i8x16
, _
>(ret
)
592 /// Broadcasts the low packed 8-bit integer from `a` to all elements of
593 /// the 256-bit returned value.
595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8)
597 #[target_feature(enable = "avx2")]
598 #[cfg_attr(test, assert_instr(vpbroadcastb))]
599 #[stable(feature = "simd_x86", since = "1.27.0")]
600 pub unsafe fn _mm256_broadcastb_epi8(a
: __m128i
) -> __m256i
{
601 let zero
= _mm_setzero_si128();
602 let ret
= simd_shuffle32(a
.as_i8x16(), zero
.as_i8x16(), [0_u32; 32]);
603 transmute
::<i8x32
, _
>(ret
)
606 // N.B., `simd_shuffle4` with integer data types for `a` and `b` is
607 // often compiled to `vbroadcastss`.
608 /// Broadcasts the low packed 32-bit integer from `a` to all elements of
609 /// the 128-bit returned value.
611 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32)
613 #[target_feature(enable = "avx2")]
614 #[cfg_attr(test, assert_instr(vbroadcastss))]
615 #[stable(feature = "simd_x86", since = "1.27.0")]
616 pub unsafe fn _mm_broadcastd_epi32(a
: __m128i
) -> __m128i
{
617 let zero
= _mm_setzero_si128();
618 let ret
= simd_shuffle4(a
.as_i32x4(), zero
.as_i32x4(), [0_u32; 4]);
619 transmute
::<i32x4
, _
>(ret
)
622 // N.B., `simd_shuffle4`` with integer data types for `a` and `b` is
623 // often compiled to `vbroadcastss`.
624 /// Broadcasts the low packed 32-bit integer from `a` to all elements of
625 /// the 256-bit returned value.
627 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32)
629 #[target_feature(enable = "avx2")]
630 #[cfg_attr(test, assert_instr(vbroadcastss))]
631 #[stable(feature = "simd_x86", since = "1.27.0")]
632 pub unsafe fn _mm256_broadcastd_epi32(a
: __m128i
) -> __m256i
{
633 let zero
= _mm_setzero_si128();
634 let ret
= simd_shuffle8(a
.as_i32x4(), zero
.as_i32x4(), [0_u32; 8]);
635 transmute
::<i32x8
, _
>(ret
)
638 /// Broadcasts the low packed 64-bit integer from `a` to all elements of
639 /// the 128-bit returned value.
641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64)
643 #[target_feature(enable = "avx2")]
644 #[cfg_attr(test, assert_instr(vpbroadcastq))]
645 #[stable(feature = "simd_x86", since = "1.27.0")]
646 pub unsafe fn _mm_broadcastq_epi64(a
: __m128i
) -> __m128i
{
647 let zero
= _mm_setzero_si128().as_i64x2();
648 let ret
= simd_shuffle2(a
.as_i64x2(), zero
, [0_u32; 2]);
649 transmute
::<i64x2
, _
>(ret
)
652 // N.B. `simd_shuffle4` with integer data types for `a` and `b` is
653 // often compiled to `vbroadcastsd`.
654 /// Broadcasts the low packed 64-bit integer from `a` to all elements of
655 /// the 256-bit returned value.
657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64)
659 #[target_feature(enable = "avx2")]
660 #[cfg_attr(test, assert_instr(vbroadcastsd))]
661 #[stable(feature = "simd_x86", since = "1.27.0")]
662 pub unsafe fn _mm256_broadcastq_epi64(a
: __m128i
) -> __m256i
{
663 let zero
= _mm_setzero_si128();
664 let ret
= simd_shuffle4(a
.as_i64x2(), zero
.as_i64x2(), [0_u32; 4]);
665 transmute
::<i64x4
, _
>(ret
)
668 /// Broadcasts the low double-precision (64-bit) floating-point element
669 /// from `a` to all elements of the 128-bit returned value.
671 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd)
673 #[target_feature(enable = "avx2")]
674 #[cfg_attr(test, assert_instr(vmovddup))]
675 #[stable(feature = "simd_x86", since = "1.27.0")]
676 pub unsafe fn _mm_broadcastsd_pd(a
: __m128d
) -> __m128d
{
677 simd_shuffle2(a
, _mm_setzero_pd(), [0_u32; 2])
680 /// Broadcasts the low double-precision (64-bit) floating-point element
681 /// from `a` to all elements of the 256-bit returned value.
683 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd)
685 #[target_feature(enable = "avx2")]
686 #[cfg_attr(test, assert_instr(vbroadcastsd))]
687 #[stable(feature = "simd_x86", since = "1.27.0")]
688 pub unsafe fn _mm256_broadcastsd_pd(a
: __m128d
) -> __m256d
{
689 simd_shuffle4(a
, _mm_setzero_pd(), [0_u32; 4])
692 // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or
694 /// Broadcasts 128 bits of integer data from a to all 128-bit lanes in
695 /// the 256-bit returned value.
697 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256)
699 #[target_feature(enable = "avx2")]
700 #[stable(feature = "simd_x86", since = "1.27.0")]
701 pub unsafe fn _mm256_broadcastsi128_si256(a
: __m128i
) -> __m256i
{
702 let zero
= _mm_setzero_si128();
703 let ret
= simd_shuffle4(a
.as_i64x2(), zero
.as_i64x2(), [0, 1, 0, 1]);
704 transmute
::<i64x4
, _
>(ret
)
707 /// Broadcasts the low single-precision (32-bit) floating-point element
708 /// from `a` to all elements of the 128-bit returned value.
710 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps)
712 #[target_feature(enable = "avx2")]
713 #[cfg_attr(test, assert_instr(vbroadcastss))]
714 #[stable(feature = "simd_x86", since = "1.27.0")]
715 pub unsafe fn _mm_broadcastss_ps(a
: __m128
) -> __m128
{
716 simd_shuffle4(a
, _mm_setzero_ps(), [0_u32; 4])
719 /// Broadcasts the low single-precision (32-bit) floating-point element
720 /// from `a` to all elements of the 256-bit returned value.
722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps)
724 #[target_feature(enable = "avx2")]
725 #[cfg_attr(test, assert_instr(vbroadcastss))]
726 #[stable(feature = "simd_x86", since = "1.27.0")]
727 pub unsafe fn _mm256_broadcastss_ps(a
: __m128
) -> __m256
{
728 simd_shuffle8(a
, _mm_setzero_ps(), [0_u32; 8])
731 /// Broadcasts the low packed 16-bit integer from a to all elements of
732 /// the 128-bit returned value
734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16)
736 #[target_feature(enable = "avx2")]
737 #[cfg_attr(test, assert_instr(vpbroadcastw))]
738 #[stable(feature = "simd_x86", since = "1.27.0")]
739 pub unsafe fn _mm_broadcastw_epi16(a
: __m128i
) -> __m128i
{
740 let zero
= _mm_setzero_si128();
741 let ret
= simd_shuffle8(a
.as_i16x8(), zero
.as_i16x8(), [0_u32; 8]);
742 transmute
::<i16x8
, _
>(ret
)
745 /// Broadcasts the low packed 16-bit integer from a to all elements of
746 /// the 256-bit returned value
748 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16)
750 #[target_feature(enable = "avx2")]
751 #[cfg_attr(test, assert_instr(vpbroadcastw))]
752 #[stable(feature = "simd_x86", since = "1.27.0")]
753 pub unsafe fn _mm256_broadcastw_epi16(a
: __m128i
) -> __m256i
{
754 let zero
= _mm_setzero_si128();
755 let ret
= simd_shuffle16(a
.as_i16x8(), zero
.as_i16x8(), [0_u32; 16]);
756 transmute
::<i16x16
, _
>(ret
)
759 /// Compares packed 64-bit integers in `a` and `b` for equality.
761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64)
763 #[target_feature(enable = "avx2")]
764 #[cfg_attr(test, assert_instr(vpcmpeqq))]
765 #[stable(feature = "simd_x86", since = "1.27.0")]
766 pub unsafe fn _mm256_cmpeq_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
767 transmute
::<i64x4
, _
>(simd_eq(a
.as_i64x4(), b
.as_i64x4()))
770 /// Compares packed 32-bit integers in `a` and `b` for equality.
772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32)
774 #[target_feature(enable = "avx2")]
775 #[cfg_attr(test, assert_instr(vpcmpeqd))]
776 #[stable(feature = "simd_x86", since = "1.27.0")]
777 pub unsafe fn _mm256_cmpeq_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
778 transmute
::<i32x8
, _
>(simd_eq(a
.as_i32x8(), b
.as_i32x8()))
781 /// Compares packed 16-bit integers in `a` and `b` for equality.
783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16)
785 #[target_feature(enable = "avx2")]
786 #[cfg_attr(test, assert_instr(vpcmpeqw))]
787 #[stable(feature = "simd_x86", since = "1.27.0")]
788 pub unsafe fn _mm256_cmpeq_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
789 transmute
::<i16x16
, _
>(simd_eq(a
.as_i16x16(), b
.as_i16x16()))
792 /// Compares packed 8-bit integers in `a` and `b` for equality.
794 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8)
796 #[target_feature(enable = "avx2")]
797 #[cfg_attr(test, assert_instr(vpcmpeqb))]
798 #[stable(feature = "simd_x86", since = "1.27.0")]
799 pub unsafe fn _mm256_cmpeq_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
800 transmute
::<i8x32
, _
>(simd_eq(a
.as_i8x32(), b
.as_i8x32()))
803 /// Compares packed 64-bit integers in `a` and `b` for greater-than.
805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64)
807 #[target_feature(enable = "avx2")]
808 #[cfg_attr(test, assert_instr(vpcmpgtq))]
809 #[stable(feature = "simd_x86", since = "1.27.0")]
810 pub unsafe fn _mm256_cmpgt_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
811 transmute
::<i64x4
, _
>(simd_gt(a
.as_i64x4(), b
.as_i64x4()))
814 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
816 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32)
818 #[target_feature(enable = "avx2")]
819 #[cfg_attr(test, assert_instr(vpcmpgtd))]
820 #[stable(feature = "simd_x86", since = "1.27.0")]
821 pub unsafe fn _mm256_cmpgt_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
822 transmute
::<i32x8
, _
>(simd_gt(a
.as_i32x8(), b
.as_i32x8()))
825 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
827 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16)
829 #[target_feature(enable = "avx2")]
830 #[cfg_attr(test, assert_instr(vpcmpgtw))]
831 #[stable(feature = "simd_x86", since = "1.27.0")]
832 pub unsafe fn _mm256_cmpgt_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
833 transmute
::<i16x16
, _
>(simd_gt(a
.as_i16x16(), b
.as_i16x16()))
836 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
838 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8)
840 #[target_feature(enable = "avx2")]
841 #[cfg_attr(test, assert_instr(vpcmpgtb))]
842 #[stable(feature = "simd_x86", since = "1.27.0")]
843 pub unsafe fn _mm256_cmpgt_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
844 transmute
::<i8x32
, _
>(simd_gt(a
.as_i8x32(), b
.as_i8x32()))
847 /// Sign-extend 16-bit integers to 32-bit integers.
849 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi32)
851 #[target_feature(enable = "avx2")]
852 #[cfg_attr(test, assert_instr(vpmovsxwd))]
853 #[stable(feature = "simd_x86", since = "1.27.0")]
854 pub unsafe fn _mm256_cvtepi16_epi32(a
: __m128i
) -> __m256i
{
855 transmute
::<i32x8
, _
>(simd_cast(a
.as_i16x8()))
858 /// Sign-extend 16-bit integers to 64-bit integers.
860 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi64)
862 #[target_feature(enable = "avx2")]
863 #[cfg_attr(test, assert_instr(vpmovsxwq))]
864 #[stable(feature = "simd_x86", since = "1.27.0")]
865 pub unsafe fn _mm256_cvtepi16_epi64(a
: __m128i
) -> __m256i
{
866 let a
= a
.as_i16x8();
867 let v64
: i16x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
868 transmute
::<i64x4
, _
>(simd_cast(v64
))
871 /// Sign-extend 32-bit integers to 64-bit integers.
873 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_epi64)
875 #[target_feature(enable = "avx2")]
876 #[cfg_attr(test, assert_instr(vpmovsxdq))]
877 #[stable(feature = "simd_x86", since = "1.27.0")]
878 pub unsafe fn _mm256_cvtepi32_epi64(a
: __m128i
) -> __m256i
{
879 transmute
::<i64x4
, _
>(simd_cast(a
.as_i32x4()))
882 /// Sign-extend 8-bit integers to 16-bit integers.
884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi16)
886 #[target_feature(enable = "avx2")]
887 #[cfg_attr(test, assert_instr(vpmovsxbw))]
888 #[stable(feature = "simd_x86", since = "1.27.0")]
889 pub unsafe fn _mm256_cvtepi8_epi16(a
: __m128i
) -> __m256i
{
890 transmute
::<i16x16
, _
>(simd_cast(a
.as_i8x16()))
893 /// Sign-extend 8-bit integers to 32-bit integers.
895 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi32)
897 #[target_feature(enable = "avx2")]
898 #[cfg_attr(test, assert_instr(vpmovsxbd))]
899 #[stable(feature = "simd_x86", since = "1.27.0")]
900 pub unsafe fn _mm256_cvtepi8_epi32(a
: __m128i
) -> __m256i
{
901 let a
= a
.as_i8x16();
902 let v64
: i8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
903 transmute
::<i32x8
, _
>(simd_cast(v64
))
906 /// Sign-extend 8-bit integers to 64-bit integers.
908 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi64)
910 #[target_feature(enable = "avx2")]
911 #[cfg_attr(test, assert_instr(vpmovsxbq))]
912 #[stable(feature = "simd_x86", since = "1.27.0")]
913 pub unsafe fn _mm256_cvtepi8_epi64(a
: __m128i
) -> __m256i
{
914 let a
= a
.as_i8x16();
915 let v32
: i8x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
916 transmute
::<i64x4
, _
>(simd_cast(v32
))
919 /// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit
920 /// integers, and stores the results in `dst`.
922 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32)
924 #[target_feature(enable = "avx2")]
925 #[cfg_attr(test, assert_instr(vpmovzxwd))]
926 #[stable(feature = "simd_x86", since = "1.27.0")]
927 pub unsafe fn _mm256_cvtepu16_epi32(a
: __m128i
) -> __m256i
{
928 transmute
::<i32x8
, _
>(simd_cast(a
.as_u16x8()))
931 /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit
932 /// integers. The upper four elements of `a` are unused.
934 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi64)
936 #[target_feature(enable = "avx2")]
937 #[cfg_attr(test, assert_instr(vpmovzxwq))]
938 #[stable(feature = "simd_x86", since = "1.27.0")]
939 pub unsafe fn _mm256_cvtepu16_epi64(a
: __m128i
) -> __m256i
{
940 let a
= a
.as_u16x8();
941 let v64
: u16x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
942 transmute
::<i64x4
, _
>(simd_cast(v64
))
945 /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers.
947 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_epi64)
949 #[target_feature(enable = "avx2")]
950 #[cfg_attr(test, assert_instr(vpmovzxdq))]
951 #[stable(feature = "simd_x86", since = "1.27.0")]
952 pub unsafe fn _mm256_cvtepu32_epi64(a
: __m128i
) -> __m256i
{
953 transmute
::<i64x4
, _
>(simd_cast(a
.as_u32x4()))
956 /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers.
958 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi16)
960 #[target_feature(enable = "avx2")]
961 #[cfg_attr(test, assert_instr(vpmovzxbw))]
962 #[stable(feature = "simd_x86", since = "1.27.0")]
963 pub unsafe fn _mm256_cvtepu8_epi16(a
: __m128i
) -> __m256i
{
964 transmute
::<i16x16
, _
>(simd_cast(a
.as_u8x16()))
967 /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit
968 /// integers. The upper eight elements of `a` are unused.
970 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi32)
972 #[target_feature(enable = "avx2")]
973 #[cfg_attr(test, assert_instr(vpmovzxbd))]
974 #[stable(feature = "simd_x86", since = "1.27.0")]
975 pub unsafe fn _mm256_cvtepu8_epi32(a
: __m128i
) -> __m256i
{
976 let a
= a
.as_u8x16();
977 let v64
: u8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
978 transmute
::<i32x8
, _
>(simd_cast(v64
))
981 /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit
982 /// integers. The upper twelve elements of `a` are unused.
984 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi64)
986 #[target_feature(enable = "avx2")]
987 #[cfg_attr(test, assert_instr(vpmovzxbq))]
988 #[stable(feature = "simd_x86", since = "1.27.0")]
989 pub unsafe fn _mm256_cvtepu8_epi64(a
: __m128i
) -> __m256i
{
990 let a
= a
.as_u8x16();
991 let v32
: u8x4
= simd_shuffle4(a
, a
, [0, 1, 2, 3]);
992 transmute
::<i64x4
, _
>(simd_cast(v32
))
995 /// Extracts 128 bits (of integer data) from `a` selected with `imm8`.
997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256)
999 #[target_feature(enable = "avx2")]
1001 all(test
, not(target_os
= "windows")),
1002 assert_instr(vextractf128
, imm8
= 1)
1004 #[rustc_args_required_const(1)]
1005 #[stable(feature = "simd_x86", since = "1.27.0")]
1006 pub unsafe fn _mm256_extracti128_si256(a
: __m256i
, imm8
: i32) -> __m128i
{
1007 let a
= a
.as_i64x4();
1008 let b
= _mm256_undefined_si256().as_i64x4();
1009 let dst
: i64x2
= match imm8
& 0b01 {
1010 0 => simd_shuffle2(a
, b
, [0, 1]),
1011 _
=> simd_shuffle2(a
, b
, [2, 3]),
1016 /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`.
1018 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16)
1020 #[target_feature(enable = "avx2")]
1021 #[cfg_attr(test, assert_instr(vphaddw))]
1022 #[stable(feature = "simd_x86", since = "1.27.0")]
1023 pub unsafe fn _mm256_hadd_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1024 transmute(phaddw(a
.as_i16x16(), b
.as_i16x16()))
1027 /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`.
1029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32)
1031 #[target_feature(enable = "avx2")]
1032 #[cfg_attr(test, assert_instr(vphaddd))]
1033 #[stable(feature = "simd_x86", since = "1.27.0")]
1034 pub unsafe fn _mm256_hadd_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
1035 transmute(phaddd(a
.as_i32x8(), b
.as_i32x8()))
1038 /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`
1039 /// using saturation.
1041 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16)
1043 #[target_feature(enable = "avx2")]
1044 #[cfg_attr(test, assert_instr(vphaddsw))]
1045 #[stable(feature = "simd_x86", since = "1.27.0")]
1046 pub unsafe fn _mm256_hadds_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1047 transmute(phaddsw(a
.as_i16x16(), b
.as_i16x16()))
1050 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
1052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi16)
1054 #[target_feature(enable = "avx2")]
1055 #[cfg_attr(test, assert_instr(vphsubw))]
1056 #[stable(feature = "simd_x86", since = "1.27.0")]
1057 pub unsafe fn _mm256_hsub_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1058 transmute(phsubw(a
.as_i16x16(), b
.as_i16x16()))
1061 /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`.
1063 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi32)
1065 #[target_feature(enable = "avx2")]
1066 #[cfg_attr(test, assert_instr(vphsubd))]
1067 #[stable(feature = "simd_x86", since = "1.27.0")]
1068 pub unsafe fn _mm256_hsub_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
1069 transmute(phsubd(a
.as_i32x8(), b
.as_i32x8()))
1072 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`
1073 /// using saturation.
1075 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsubs_epi16)
1077 #[target_feature(enable = "avx2")]
1078 #[cfg_attr(test, assert_instr(vphsubsw))]
1079 #[stable(feature = "simd_x86", since = "1.27.0")]
1080 pub unsafe fn _mm256_hsubs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1081 transmute(phsubsw(a
.as_i16x16(), b
.as_i16x16()))
1084 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1086 /// `scale` is between 1 and 8.
1088 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi32)
1090 #[target_feature(enable = "avx2")]
1091 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1092 #[rustc_args_required_const(2)]
1093 #[stable(feature = "simd_x86", since = "1.27.0")]
1094 pub unsafe fn _mm_i32gather_epi32(slice
: *const i32, offsets
: __m128i
, scale
: i32) -> __m128i
{
1095 let zero
= _mm_setzero_si128().as_i32x4();
1096 let neg_one
= _mm_set1_epi32(-1).as_i32x4();
1097 let offsets
= offsets
.as_i32x4();
1098 let slice
= slice
as *const i8;
1101 pgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
1104 let r
= constify_imm8
!(scale
, call
);
1108 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1110 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1111 /// that position instead.
1113 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi32)
1115 #[target_feature(enable = "avx2")]
1116 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1117 #[rustc_args_required_const(4)]
1118 #[stable(feature = "simd_x86", since = "1.27.0")]
1119 pub unsafe fn _mm_mask_i32gather_epi32(
1126 let src
= src
.as_i32x4();
1127 let mask
= mask
.as_i32x4();
1128 let offsets
= offsets
.as_i32x4();
1129 let slice
= slice
as *const i8;
1132 pgatherdd(src
, slice
, offsets
, mask
, $imm8
)
1135 let r
= constify_imm8
!(scale
, call
);
1139 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1141 /// `scale` is between 1 and 8.
1143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi32)
1145 #[target_feature(enable = "avx2")]
1146 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1147 #[rustc_args_required_const(2)]
1148 #[stable(feature = "simd_x86", since = "1.27.0")]
1149 pub unsafe fn _mm256_i32gather_epi32(slice
: *const i32, offsets
: __m256i
, scale
: i32) -> __m256i
{
1150 let zero
= _mm256_setzero_si256().as_i32x8();
1151 let neg_one
= _mm256_set1_epi32(-1).as_i32x8();
1152 let offsets
= offsets
.as_i32x8();
1153 let slice
= slice
as *const i8;
1156 vpgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
1159 let r
= constify_imm8
!(scale
, call
);
1163 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1165 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1166 /// that position instead.
1168 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi32)
1170 #[target_feature(enable = "avx2")]
1171 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
1172 #[rustc_args_required_const(4)]
1173 #[stable(feature = "simd_x86", since = "1.27.0")]
1174 pub unsafe fn _mm256_mask_i32gather_epi32(
1181 let src
= src
.as_i32x8();
1182 let mask
= mask
.as_i32x8();
1183 let offsets
= offsets
.as_i32x8();
1184 let slice
= slice
as *const i8;
1187 vpgatherdd(src
, slice
, offsets
, mask
, $imm8
)
1190 let r
= constify_imm8
!(scale
, call
);
1194 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1196 /// `scale` is between 1 and 8.
1198 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_ps)
1200 #[target_feature(enable = "avx2")]
1201 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1202 #[rustc_args_required_const(2)]
1203 #[stable(feature = "simd_x86", since = "1.27.0")]
1204 pub unsafe fn _mm_i32gather_ps(slice
: *const f32, offsets
: __m128i
, scale
: i32) -> __m128
{
1205 let zero
= _mm_setzero_ps();
1206 let neg_one
= _mm_set1_ps(-1.0);
1207 let offsets
= offsets
.as_i32x4();
1208 let slice
= slice
as *const i8;
1211 pgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
1214 constify_imm8
!(scale
, call
)
1217 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1219 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1220 /// that position instead.
1222 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_ps)
1224 #[target_feature(enable = "avx2")]
1225 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1226 #[rustc_args_required_const(4)]
1227 #[stable(feature = "simd_x86", since = "1.27.0")]
1228 pub unsafe fn _mm_mask_i32gather_ps(
1235 let offsets
= offsets
.as_i32x4();
1236 let slice
= slice
as *const i8;
1239 pgatherdps(src
, slice
, offsets
, mask
, $imm8
)
1242 constify_imm8
!(scale
, call
)
1245 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1247 /// `scale` is between 1 and 8.
1249 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_ps)
1251 #[target_feature(enable = "avx2")]
1252 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1253 #[rustc_args_required_const(2)]
1254 #[stable(feature = "simd_x86", since = "1.27.0")]
1255 pub unsafe fn _mm256_i32gather_ps(slice
: *const f32, offsets
: __m256i
, scale
: i32) -> __m256
{
1256 let zero
= _mm256_setzero_ps();
1257 let neg_one
= _mm256_set1_ps(-1.0);
1258 let offsets
= offsets
.as_i32x8();
1259 let slice
= slice
as *const i8;
1262 vpgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
1265 constify_imm8
!(scale
, call
)
1268 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1270 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1271 /// that position instead.
1273 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_ps)
1275 #[target_feature(enable = "avx2")]
1276 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
1277 #[rustc_args_required_const(4)]
1278 #[stable(feature = "simd_x86", since = "1.27.0")]
1279 pub unsafe fn _mm256_mask_i32gather_ps(
1286 let offsets
= offsets
.as_i32x8();
1287 let slice
= slice
as *const i8;
1290 vpgatherdps(src
, slice
, offsets
, mask
, $imm8
)
1293 constify_imm8
!(scale
, call
)
1296 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1298 /// `scale` is between 1 and 8.
1300 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi64)
1302 #[target_feature(enable = "avx2")]
1303 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1304 #[rustc_args_required_const(2)]
1305 #[stable(feature = "simd_x86", since = "1.27.0")]
1306 pub unsafe fn _mm_i32gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m128i
{
1307 let zero
= _mm_setzero_si128().as_i64x2();
1308 let neg_one
= _mm_set1_epi64x(-1).as_i64x2();
1309 let offsets
= offsets
.as_i32x4();
1310 let slice
= slice
as *const i8;
1313 pgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
1316 let r
= constify_imm8
!(scale
, call
);
1320 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1322 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1323 /// that position instead.
1325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi64)
1327 #[target_feature(enable = "avx2")]
1328 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1329 #[rustc_args_required_const(4)]
1330 #[stable(feature = "simd_x86", since = "1.27.0")]
1331 pub unsafe fn _mm_mask_i32gather_epi64(
1338 let src
= src
.as_i64x2();
1339 let mask
= mask
.as_i64x2();
1340 let offsets
= offsets
.as_i32x4();
1341 let slice
= slice
as *const i8;
1344 pgatherdq(src
, slice
, offsets
, mask
, $imm8
)
1347 let r
= constify_imm8
!(scale
, call
);
1351 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1353 /// `scale` is between 1 and 8.
1355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64)
1357 #[target_feature(enable = "avx2")]
1358 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1359 #[rustc_args_required_const(2)]
1360 #[stable(feature = "simd_x86", since = "1.27.0")]
1361 pub unsafe fn _mm256_i32gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m256i
{
1362 let zero
= _mm256_setzero_si256().as_i64x4();
1363 let neg_one
= _mm256_set1_epi64x(-1).as_i64x4();
1364 let offsets
= offsets
.as_i32x4();
1365 let slice
= slice
as *const i8;
1368 vpgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
1371 let r
= constify_imm8
!(scale
, call
);
1375 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1377 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1378 /// that position instead.
1380 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi64)
1382 #[target_feature(enable = "avx2")]
1383 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
1384 #[rustc_args_required_const(4)]
1385 #[stable(feature = "simd_x86", since = "1.27.0")]
1386 pub unsafe fn _mm256_mask_i32gather_epi64(
1393 let src
= src
.as_i64x4();
1394 let mask
= mask
.as_i64x4();
1395 let offsets
= offsets
.as_i32x4();
1396 let slice
= slice
as *const i8;
1399 vpgatherdq(src
, slice
, offsets
, mask
, $imm8
)
1402 let r
= constify_imm8
!(scale
, call
);
1406 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1408 /// `scale` is between 1 and 8.
1410 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_pd)
1412 #[target_feature(enable = "avx2")]
1413 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1414 #[rustc_args_required_const(2)]
1415 #[stable(feature = "simd_x86", since = "1.27.0")]
1416 pub unsafe fn _mm_i32gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m128d
{
1417 let zero
= _mm_setzero_pd();
1418 let neg_one
= _mm_set1_pd(-1.0);
1419 let offsets
= offsets
.as_i32x4();
1420 let slice
= slice
as *const i8;
1423 pgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1426 constify_imm8
!(scale
, call
)
1429 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1431 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1432 /// that position instead.
1434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_pd)
1436 #[target_feature(enable = "avx2")]
1437 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1438 #[rustc_args_required_const(4)]
1439 #[stable(feature = "simd_x86", since = "1.27.0")]
1440 pub unsafe fn _mm_mask_i32gather_pd(
1447 let offsets
= offsets
.as_i32x4();
1448 let slice
= slice
as *const i8;
1451 pgatherdpd(src
, slice
, offsets
, mask
, $imm8
)
1454 constify_imm8
!(scale
, call
)
1457 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1459 /// `scale` is between 1 and 8.
1461 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_pd)
1463 #[target_feature(enable = "avx2")]
1464 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1465 #[rustc_args_required_const(2)]
1466 #[stable(feature = "simd_x86", since = "1.27.0")]
1467 pub unsafe fn _mm256_i32gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m256d
{
1468 let zero
= _mm256_setzero_pd();
1469 let neg_one
= _mm256_set1_pd(-1.0);
1470 let offsets
= offsets
.as_i32x4();
1471 let slice
= slice
as *const i8;
1474 vpgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1477 constify_imm8
!(scale
, call
)
1480 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1482 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1483 /// that position instead.
1485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_pd)
1487 #[target_feature(enable = "avx2")]
1488 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
1489 #[rustc_args_required_const(4)]
1490 #[stable(feature = "simd_x86", since = "1.27.0")]
1491 pub unsafe fn _mm256_mask_i32gather_pd(
1498 let offsets
= offsets
.as_i32x4();
1499 let slice
= slice
as *const i8;
1502 vpgatherdpd(src
, slice
, offsets
, mask
, $imm8
)
1505 constify_imm8
!(scale
, call
)
1508 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1510 /// `scale` is between 1 and 8.
1512 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi32)
1514 #[target_feature(enable = "avx2")]
1515 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1516 #[rustc_args_required_const(2)]
1517 #[stable(feature = "simd_x86", since = "1.27.0")]
1518 pub unsafe fn _mm_i64gather_epi32(slice
: *const i32, offsets
: __m128i
, scale
: i32) -> __m128i
{
1519 let zero
= _mm_setzero_si128().as_i32x4();
1520 let neg_one
= _mm_set1_epi64x(-1).as_i32x4();
1521 let offsets
= offsets
.as_i64x2();
1522 let slice
= slice
as *const i8;
1525 pgatherqd(zero
, slice
, offsets
, neg_one
, $imm8
)
1528 let r
= constify_imm8
!(scale
, call
);
1532 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1534 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1535 /// that position instead.
1537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi32)
1539 #[target_feature(enable = "avx2")]
1540 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1541 #[rustc_args_required_const(4)]
1542 #[stable(feature = "simd_x86", since = "1.27.0")]
1543 pub unsafe fn _mm_mask_i64gather_epi32(
1550 let src
= src
.as_i32x4();
1551 let mask
= mask
.as_i32x4();
1552 let offsets
= offsets
.as_i64x2();
1553 let slice
= slice
as *const i8;
1556 pgatherqd(src
, slice
, offsets
, mask
, $imm8
)
1559 let r
= constify_imm8
!(scale
, call
);
1563 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1565 /// `scale` is between 1 and 8.
1567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi32)
1569 #[target_feature(enable = "avx2")]
1570 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1571 #[rustc_args_required_const(2)]
1572 #[stable(feature = "simd_x86", since = "1.27.0")]
1573 pub unsafe fn _mm256_i64gather_epi32(slice
: *const i32, offsets
: __m256i
, scale
: i32) -> __m128i
{
1574 let zero
= _mm_setzero_si128().as_i32x4();
1575 let neg_one
= _mm_set1_epi64x(-1).as_i32x4();
1576 let offsets
= offsets
.as_i64x4();
1577 let slice
= slice
as *const i8;
1580 vpgatherqd(zero
, slice
, offsets
, neg_one
, $imm8
)
1583 let r
= constify_imm8
!(scale
, call
);
1587 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1589 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1590 /// that position instead.
1592 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi32)
1594 #[target_feature(enable = "avx2")]
1595 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
1596 #[rustc_args_required_const(4)]
1597 #[stable(feature = "simd_x86", since = "1.27.0")]
1598 pub unsafe fn _mm256_mask_i64gather_epi32(
1605 let src
= src
.as_i32x4();
1606 let mask
= mask
.as_i32x4();
1607 let offsets
= offsets
.as_i64x4();
1608 let slice
= slice
as *const i8;
1611 vpgatherqd(src
, slice
, offsets
, mask
, $imm8
)
1614 let r
= constify_imm8
!(scale
, call
);
1618 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1620 /// `scale` is between 1 and 8.
1622 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_ps)
1624 #[target_feature(enable = "avx2")]
1625 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1626 #[rustc_args_required_const(2)]
1627 #[stable(feature = "simd_x86", since = "1.27.0")]
1628 pub unsafe fn _mm_i64gather_ps(slice
: *const f32, offsets
: __m128i
, scale
: i32) -> __m128
{
1629 let zero
= _mm_setzero_ps();
1630 let neg_one
= _mm_set1_ps(-1.0);
1631 let offsets
= offsets
.as_i64x2();
1632 let slice
= slice
as *const i8;
1635 pgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
1638 constify_imm8
!(scale
, call
)
1641 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1643 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1644 /// that position instead.
1646 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_ps)
1648 #[target_feature(enable = "avx2")]
1649 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1650 #[rustc_args_required_const(4)]
1651 #[stable(feature = "simd_x86", since = "1.27.0")]
1652 pub unsafe fn _mm_mask_i64gather_ps(
1659 let offsets
= offsets
.as_i64x2();
1660 let slice
= slice
as *const i8;
1663 pgatherqps(src
, slice
, offsets
, mask
, $imm8
)
1666 constify_imm8
!(scale
, call
)
1669 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1671 /// `scale` is between 1 and 8.
1673 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_ps)
1675 #[target_feature(enable = "avx2")]
1676 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1677 #[rustc_args_required_const(2)]
1678 #[stable(feature = "simd_x86", since = "1.27.0")]
1679 pub unsafe fn _mm256_i64gather_ps(slice
: *const f32, offsets
: __m256i
, scale
: i32) -> __m128
{
1680 let zero
= _mm_setzero_ps();
1681 let neg_one
= _mm_set1_ps(-1.0);
1682 let offsets
= offsets
.as_i64x4();
1683 let slice
= slice
as *const i8;
1686 vpgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
1689 constify_imm8
!(scale
, call
)
1692 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1694 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1695 /// that position instead.
1697 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_ps)
1699 #[target_feature(enable = "avx2")]
1700 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
1701 #[rustc_args_required_const(4)]
1702 #[stable(feature = "simd_x86", since = "1.27.0")]
1703 pub unsafe fn _mm256_mask_i64gather_ps(
1710 let offsets
= offsets
.as_i64x4();
1711 let slice
= slice
as *const i8;
1714 vpgatherqps(src
, slice
, offsets
, mask
, $imm8
)
1717 constify_imm8
!(scale
, call
)
1720 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1722 /// `scale` is between 1 and 8.
1724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi64)
1726 #[target_feature(enable = "avx2")]
1727 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1728 #[rustc_args_required_const(2)]
1729 #[stable(feature = "simd_x86", since = "1.27.0")]
1730 pub unsafe fn _mm_i64gather_epi64(slice
: *const i64, offsets
: __m128i
, scale
: i32) -> __m128i
{
1731 let zero
= _mm_setzero_si128().as_i64x2();
1732 let neg_one
= _mm_set1_epi64x(-1).as_i64x2();
1733 let slice
= slice
as *const i8;
1734 let offsets
= offsets
.as_i64x2();
1737 pgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
1740 let r
= constify_imm8
!(scale
, call
);
1744 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1746 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1747 /// that position instead.
1749 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi64)
1751 #[target_feature(enable = "avx2")]
1752 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1753 #[rustc_args_required_const(4)]
1754 #[stable(feature = "simd_x86", since = "1.27.0")]
1755 pub unsafe fn _mm_mask_i64gather_epi64(
1762 let src
= src
.as_i64x2();
1763 let mask
= mask
.as_i64x2();
1764 let offsets
= offsets
.as_i64x2();
1765 let slice
= slice
as *const i8;
1768 pgatherqq(src
, slice
, offsets
, mask
, $imm8
)
1771 let r
= constify_imm8
!(scale
, call
);
1775 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1777 /// `scale` is between 1 and 8.
1779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi64)
1781 #[target_feature(enable = "avx2")]
1782 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1783 #[rustc_args_required_const(2)]
1784 #[stable(feature = "simd_x86", since = "1.27.0")]
1785 pub unsafe fn _mm256_i64gather_epi64(slice
: *const i64, offsets
: __m256i
, scale
: i32) -> __m256i
{
1786 let zero
= _mm256_setzero_si256().as_i64x4();
1787 let neg_one
= _mm256_set1_epi64x(-1).as_i64x4();
1788 let slice
= slice
as *const i8;
1789 let offsets
= offsets
.as_i64x4();
1792 vpgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
1795 let r
= constify_imm8
!(scale
, call
);
1799 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1801 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1802 /// that position instead.
1804 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi64)
1806 #[target_feature(enable = "avx2")]
1807 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
1808 #[rustc_args_required_const(4)]
1809 #[stable(feature = "simd_x86", since = "1.27.0")]
1810 pub unsafe fn _mm256_mask_i64gather_epi64(
1817 let src
= src
.as_i64x4();
1818 let mask
= mask
.as_i64x4();
1819 let offsets
= offsets
.as_i64x4();
1820 let slice
= slice
as *const i8;
1823 vpgatherqq(src
, slice
, offsets
, mask
, $imm8
)
1826 let r
= constify_imm8
!(scale
, call
);
1830 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1832 /// `scale` is between 1 and 8.
1834 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_pd)
1836 #[target_feature(enable = "avx2")]
1837 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1838 #[rustc_args_required_const(2)]
1839 #[stable(feature = "simd_x86", since = "1.27.0")]
1840 pub unsafe fn _mm_i64gather_pd(slice
: *const f64, offsets
: __m128i
, scale
: i32) -> __m128d
{
1841 let zero
= _mm_setzero_pd();
1842 let neg_one
= _mm_set1_pd(-1.0);
1843 let slice
= slice
as *const i8;
1844 let offsets
= offsets
.as_i64x2();
1847 pgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1850 constify_imm8
!(scale
, call
)
1853 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1855 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1856 /// that position instead.
1858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_pd)
1860 #[target_feature(enable = "avx2")]
1861 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1862 #[rustc_args_required_const(4)]
1863 #[stable(feature = "simd_x86", since = "1.27.0")]
1864 pub unsafe fn _mm_mask_i64gather_pd(
1871 let slice
= slice
as *const i8;
1872 let offsets
= offsets
.as_i64x2();
1875 pgatherqpd(src
, slice
, offsets
, mask
, $imm8
)
1878 constify_imm8
!(scale
, call
)
1881 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1883 /// `scale` is between 1 and 8.
1885 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_pd)
1887 #[target_feature(enable = "avx2")]
1888 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1889 #[rustc_args_required_const(2)]
1890 #[stable(feature = "simd_x86", since = "1.27.0")]
1891 pub unsafe fn _mm256_i64gather_pd(slice
: *const f64, offsets
: __m256i
, scale
: i32) -> __m256d
{
1892 let zero
= _mm256_setzero_pd();
1893 let neg_one
= _mm256_set1_pd(-1.0);
1894 let slice
= slice
as *const i8;
1895 let offsets
= offsets
.as_i64x4();
1898 vpgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
1901 constify_imm8
!(scale
, call
)
1904 /// Returns values from `slice` at offsets determined by `offsets * scale`,
1906 /// `scale` is between 1 and 8. If mask is set, load the value from `src` in
1907 /// that position instead.
1909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_pd)
1911 #[target_feature(enable = "avx2")]
1912 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
1913 #[rustc_args_required_const(4)]
1914 #[stable(feature = "simd_x86", since = "1.27.0")]
1915 pub unsafe fn _mm256_mask_i64gather_pd(
1922 let slice
= slice
as *const i8;
1923 let offsets
= offsets
.as_i64x4();
1926 vpgatherqpd(src
, slice
, offsets
, mask
, $imm8
)
1929 constify_imm8
!(scale
, call
)
1932 /// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the
1933 /// location specified by `imm8`.
1935 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256)
1937 #[target_feature(enable = "avx2")]
1939 all(test
, not(target_os
= "windows")),
1940 assert_instr(vinsertf128
, imm8
= 1)
1942 #[rustc_args_required_const(2)]
1943 #[stable(feature = "simd_x86", since = "1.27.0")]
1944 pub unsafe fn _mm256_inserti128_si256(a
: __m256i
, b
: __m128i
, imm8
: i32) -> __m256i
{
1945 let a
= a
.as_i64x4();
1946 let b
= _mm256_castsi128_si256(b
).as_i64x4();
1947 let dst
: i64x4
= match imm8
& 0b01 {
1948 0 => simd_shuffle4(a
, b
, [4, 5, 2, 3]),
1949 _
=> simd_shuffle4(a
, b
, [0, 1, 4, 5]),
1954 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
1955 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs
1956 /// of intermediate 32-bit integers.
1958 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd_epi16)
1960 #[target_feature(enable = "avx2")]
1961 #[cfg_attr(test, assert_instr(vpmaddwd))]
1962 #[stable(feature = "simd_x86", since = "1.27.0")]
1963 pub unsafe fn _mm256_madd_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1964 transmute(pmaddwd(a
.as_i16x16(), b
.as_i16x16()))
1967 /// Vertically multiplies each unsigned 8-bit integer from `a` with the
1968 /// corresponding signed 8-bit integer from `b`, producing intermediate
1969 /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate
1970 /// signed 16-bit integers
1972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16)
1974 #[target_feature(enable = "avx2")]
1975 #[cfg_attr(test, assert_instr(vpmaddubsw))]
1976 #[stable(feature = "simd_x86", since = "1.27.0")]
1977 pub unsafe fn _mm256_maddubs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
1978 transmute(pmaddubsw(a
.as_u8x32(), b
.as_u8x32()))
1981 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1982 /// (elements are zeroed out when the highest bit is not set in the
1983 /// corresponding element).
1985 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi32)
1987 #[target_feature(enable = "avx2")]
1988 #[cfg_attr(test, assert_instr(vpmaskmovd))]
1989 #[stable(feature = "simd_x86", since = "1.27.0")]
1990 pub unsafe fn _mm_maskload_epi32(mem_addr
: *const i32, mask
: __m128i
) -> __m128i
{
1991 transmute(maskloadd(mem_addr
as *const i8, mask
.as_i32x4()))
1994 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
1995 /// (elements are zeroed out when the highest bit is not set in the
1996 /// corresponding element).
1998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi32)
2000 #[target_feature(enable = "avx2")]
2001 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2002 #[stable(feature = "simd_x86", since = "1.27.0")]
2003 pub unsafe fn _mm256_maskload_epi32(mem_addr
: *const i32, mask
: __m256i
) -> __m256i
{
2004 transmute(maskloadd256(mem_addr
as *const i8, mask
.as_i32x8()))
2007 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
2008 /// (elements are zeroed out when the highest bit is not set in the
2009 /// corresponding element).
2011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi64)
2013 #[target_feature(enable = "avx2")]
2014 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2015 #[stable(feature = "simd_x86", since = "1.27.0")]
2016 pub unsafe fn _mm_maskload_epi64(mem_addr
: *const i64, mask
: __m128i
) -> __m128i
{
2017 transmute(maskloadq(mem_addr
as *const i8, mask
.as_i64x2()))
2020 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
2021 /// (elements are zeroed out when the highest bit is not set in the
2022 /// corresponding element).
2024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi64)
2026 #[target_feature(enable = "avx2")]
2027 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2028 #[stable(feature = "simd_x86", since = "1.27.0")]
2029 pub unsafe fn _mm256_maskload_epi64(mem_addr
: *const i64, mask
: __m256i
) -> __m256i
{
2030 transmute(maskloadq256(mem_addr
as *const i8, mask
.as_i64x4()))
2033 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
2034 /// using `mask` (elements are not stored when the highest bit is not set
2035 /// in the corresponding element).
2037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi32)
2039 #[target_feature(enable = "avx2")]
2040 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2041 #[stable(feature = "simd_x86", since = "1.27.0")]
2042 pub unsafe fn _mm_maskstore_epi32(mem_addr
: *mut i32, mask
: __m128i
, a
: __m128i
) {
2043 maskstored(mem_addr
as *mut i8, mask
.as_i32x4(), a
.as_i32x4())
2046 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
2047 /// using `mask` (elements are not stored when the highest bit is not set
2048 /// in the corresponding element).
2050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi32)
2052 #[target_feature(enable = "avx2")]
2053 #[cfg_attr(test, assert_instr(vpmaskmovd))]
2054 #[stable(feature = "simd_x86", since = "1.27.0")]
2055 pub unsafe fn _mm256_maskstore_epi32(mem_addr
: *mut i32, mask
: __m256i
, a
: __m256i
) {
2056 maskstored256(mem_addr
as *mut i8, mask
.as_i32x8(), a
.as_i32x8())
2059 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
2060 /// using `mask` (elements are not stored when the highest bit is not set
2061 /// in the corresponding element).
2063 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi64)
2065 #[target_feature(enable = "avx2")]
2066 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2067 #[stable(feature = "simd_x86", since = "1.27.0")]
2068 pub unsafe fn _mm_maskstore_epi64(mem_addr
: *mut i64, mask
: __m128i
, a
: __m128i
) {
2069 maskstoreq(mem_addr
as *mut i8, mask
.as_i64x2(), a
.as_i64x2())
2072 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
2073 /// using `mask` (elements are not stored when the highest bit is not set
2074 /// in the corresponding element).
2076 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi64)
2078 #[target_feature(enable = "avx2")]
2079 #[cfg_attr(test, assert_instr(vpmaskmovq))]
2080 #[stable(feature = "simd_x86", since = "1.27.0")]
2081 pub unsafe fn _mm256_maskstore_epi64(mem_addr
: *mut i64, mask
: __m256i
, a
: __m256i
) {
2082 maskstoreq256(mem_addr
as *mut i8, mask
.as_i64x4(), a
.as_i64x4())
2085 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2088 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16)
2090 #[target_feature(enable = "avx2")]
2091 #[cfg_attr(test, assert_instr(vpmaxsw))]
2092 #[stable(feature = "simd_x86", since = "1.27.0")]
2093 pub unsafe fn _mm256_max_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2094 transmute(pmaxsw(a
.as_i16x16(), b
.as_i16x16()))
2097 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2100 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32)
2102 #[target_feature(enable = "avx2")]
2103 #[cfg_attr(test, assert_instr(vpmaxsd))]
2104 #[stable(feature = "simd_x86", since = "1.27.0")]
2105 pub unsafe fn _mm256_max_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2106 transmute(pmaxsd(a
.as_i32x8(), b
.as_i32x8()))
2109 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8)
2114 #[target_feature(enable = "avx2")]
2115 #[cfg_attr(test, assert_instr(vpmaxsb))]
2116 #[stable(feature = "simd_x86", since = "1.27.0")]
2117 pub unsafe fn _mm256_max_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2118 transmute(pmaxsb(a
.as_i8x32(), b
.as_i8x32()))
2121 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2122 /// the packed maximum values.
2124 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16)
2126 #[target_feature(enable = "avx2")]
2127 #[cfg_attr(test, assert_instr(vpmaxuw))]
2128 #[stable(feature = "simd_x86", since = "1.27.0")]
2129 pub unsafe fn _mm256_max_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2130 transmute(pmaxuw(a
.as_u16x16(), b
.as_u16x16()))
2133 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2134 /// the packed maximum values.
2136 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32)
2138 #[target_feature(enable = "avx2")]
2139 #[cfg_attr(test, assert_instr(vpmaxud))]
2140 #[stable(feature = "simd_x86", since = "1.27.0")]
2141 pub unsafe fn _mm256_max_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2142 transmute(pmaxud(a
.as_u32x8(), b
.as_u32x8()))
2145 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2146 /// the packed maximum values.
2148 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8)
2150 #[target_feature(enable = "avx2")]
2151 #[cfg_attr(test, assert_instr(vpmaxub))]
2152 #[stable(feature = "simd_x86", since = "1.27.0")]
2153 pub unsafe fn _mm256_max_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2154 transmute(pmaxub(a
.as_u8x32(), b
.as_u8x32()))
2157 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
2160 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16)
2162 #[target_feature(enable = "avx2")]
2163 #[cfg_attr(test, assert_instr(vpminsw))]
2164 #[stable(feature = "simd_x86", since = "1.27.0")]
2165 pub unsafe fn _mm256_min_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2166 transmute(pminsw(a
.as_i16x16(), b
.as_i16x16()))
2169 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
2172 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32)
2174 #[target_feature(enable = "avx2")]
2175 #[cfg_attr(test, assert_instr(vpminsd))]
2176 #[stable(feature = "simd_x86", since = "1.27.0")]
2177 pub unsafe fn _mm256_min_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2178 transmute(pminsd(a
.as_i32x8(), b
.as_i32x8()))
2181 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
2184 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8)
2186 #[target_feature(enable = "avx2")]
2187 #[cfg_attr(test, assert_instr(vpminsb))]
2188 #[stable(feature = "simd_x86", since = "1.27.0")]
2189 pub unsafe fn _mm256_min_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2190 transmute(pminsb(a
.as_i8x32(), b
.as_i8x32()))
2193 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
2194 /// the packed minimum values.
2196 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16)
2198 #[target_feature(enable = "avx2")]
2199 #[cfg_attr(test, assert_instr(vpminuw))]
2200 #[stable(feature = "simd_x86", since = "1.27.0")]
2201 pub unsafe fn _mm256_min_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2202 transmute(pminuw(a
.as_u16x16(), b
.as_u16x16()))
2205 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
2206 /// the packed minimum values.
2208 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32)
2210 #[target_feature(enable = "avx2")]
2211 #[cfg_attr(test, assert_instr(vpminud))]
2212 #[stable(feature = "simd_x86", since = "1.27.0")]
2213 pub unsafe fn _mm256_min_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2214 transmute(pminud(a
.as_u32x8(), b
.as_u32x8()))
2217 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
2218 /// the packed minimum values.
2220 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8)
2222 #[target_feature(enable = "avx2")]
2223 #[cfg_attr(test, assert_instr(vpminub))]
2224 #[stable(feature = "simd_x86", since = "1.27.0")]
2225 pub unsafe fn _mm256_min_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2226 transmute(pminub(a
.as_u8x32(), b
.as_u8x32()))
2229 /// Creates mask from the most significant bit of each 8-bit element in `a`,
2230 /// return the result.
2232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8)
2234 #[target_feature(enable = "avx2")]
2235 #[cfg_attr(test, assert_instr(vpmovmskb))]
2236 #[stable(feature = "simd_x86", since = "1.27.0")]
2237 pub unsafe fn _mm256_movemask_epi8(a
: __m256i
) -> i32 {
2238 pmovmskb(a
.as_i8x32())
2241 /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
2242 /// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit
2243 /// results in dst. Eight SADs are performed for each 128-bit lane using one
2244 /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is
2245 /// selected from `b` starting at on the offset specified in `imm8`. Eight
2246 /// quadruplets are formed from sequential 8-bit integers selected from `a`
2247 /// starting at the offset specified in `imm8`.
2249 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mpsadbw_epu8)
2251 #[target_feature(enable = "avx2")]
2252 #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))]
2253 #[rustc_args_required_const(2)]
2254 #[stable(feature = "simd_x86", since = "1.27.0")]
2255 pub unsafe fn _mm256_mpsadbw_epu8(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
2256 let a
= a
.as_u8x32();
2257 let b
= b
.as_u8x32();
2260 mpsadbw(a
, b
, $imm8
)
2263 let r
= constify_imm8
!(imm8
, call
);
2267 /// Multiplies the low 32-bit integers from each packed 64-bit element in
2270 /// Returns the 64-bit results.
2272 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32)
2274 #[target_feature(enable = "avx2")]
2275 #[cfg_attr(test, assert_instr(vpmuldq))]
2276 #[stable(feature = "simd_x86", since = "1.27.0")]
2277 pub unsafe fn _mm256_mul_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2278 transmute(pmuldq(a
.as_i32x8(), b
.as_i32x8()))
2281 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit
2282 /// element in `a` and `b`
2284 /// Returns the unsigned 64-bit results.
2286 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32)
2288 #[target_feature(enable = "avx2")]
2289 #[cfg_attr(test, assert_instr(vpmuludq))]
2290 #[stable(feature = "simd_x86", since = "1.27.0")]
2291 pub unsafe fn _mm256_mul_epu32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2292 transmute(pmuludq(a
.as_u32x8(), b
.as_u32x8()))
2295 /// Multiplies the packed 16-bit integers in `a` and `b`, producing
2296 /// intermediate 32-bit integers and returning the high 16 bits of the
2297 /// intermediate integers.
2299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epi16)
2301 #[target_feature(enable = "avx2")]
2302 #[cfg_attr(test, assert_instr(vpmulhw))]
2303 #[stable(feature = "simd_x86", since = "1.27.0")]
2304 pub unsafe fn _mm256_mulhi_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2305 transmute(pmulhw(a
.as_i16x16(), b
.as_i16x16()))
2308 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing
2309 /// intermediate 32-bit integers and returning the high 16 bits of the
2310 /// intermediate integers.
2312 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epu16)
2314 #[target_feature(enable = "avx2")]
2315 #[cfg_attr(test, assert_instr(vpmulhuw))]
2316 #[stable(feature = "simd_x86", since = "1.27.0")]
2317 pub unsafe fn _mm256_mulhi_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2318 transmute(pmulhuw(a
.as_u16x16(), b
.as_u16x16()))
2321 /// Multiplies the packed 16-bit integers in `a` and `b`, producing
2322 /// intermediate 32-bit integers, and returns the low 16 bits of the
2323 /// intermediate integers
2325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16)
2327 #[target_feature(enable = "avx2")]
2328 #[cfg_attr(test, assert_instr(vpmullw))]
2329 #[stable(feature = "simd_x86", since = "1.27.0")]
2330 pub unsafe fn _mm256_mullo_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2331 transmute(simd_mul(a
.as_i16x16(), b
.as_i16x16()))
2334 /// Multiplies the packed 32-bit integers in `a` and `b`, producing
2335 /// intermediate 64-bit integers, and returns the low 32 bits of the
2336 /// intermediate integers
2338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32)
2340 #[target_feature(enable = "avx2")]
2341 #[cfg_attr(test, assert_instr(vpmulld))]
2342 #[stable(feature = "simd_x86", since = "1.27.0")]
2343 pub unsafe fn _mm256_mullo_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2344 transmute(simd_mul(a
.as_i32x8(), b
.as_i32x8()))
2347 /// Multiplies packed 16-bit integers in `a` and `b`, producing
2348 /// intermediate signed 32-bit integers. Truncate each intermediate
2349 /// integer to the 18 most significant bits, round by adding 1, and
2350 /// return bits `[16:1]`.
2352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhrs_epi16)
2354 #[target_feature(enable = "avx2")]
2355 #[cfg_attr(test, assert_instr(vpmulhrsw))]
2356 #[stable(feature = "simd_x86", since = "1.27.0")]
2357 pub unsafe fn _mm256_mulhrs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2358 transmute(pmulhrsw(a
.as_i16x16(), b
.as_i16x16()))
2361 /// Computes the bitwise OR of 256 bits (representing integer data) in `a`
2364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256)
2366 #[target_feature(enable = "avx2")]
2367 #[cfg_attr(test, assert_instr(vorps))]
2368 #[stable(feature = "simd_x86", since = "1.27.0")]
2369 pub unsafe fn _mm256_or_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
2370 transmute(simd_or(a
.as_i32x8(), b
.as_i32x8()))
2373 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2374 /// using signed saturation
2376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16)
2378 #[target_feature(enable = "avx2")]
2379 #[cfg_attr(test, assert_instr(vpacksswb))]
2380 #[stable(feature = "simd_x86", since = "1.27.0")]
2381 pub unsafe fn _mm256_packs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2382 transmute(packsswb(a
.as_i16x16(), b
.as_i16x16()))
2385 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2386 /// using signed saturation
2388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32)
2390 #[target_feature(enable = "avx2")]
2391 #[cfg_attr(test, assert_instr(vpackssdw))]
2392 #[stable(feature = "simd_x86", since = "1.27.0")]
2393 pub unsafe fn _mm256_packs_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2394 transmute(packssdw(a
.as_i32x8(), b
.as_i32x8()))
2397 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
2398 /// using unsigned saturation
2400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16)
2402 #[target_feature(enable = "avx2")]
2403 #[cfg_attr(test, assert_instr(vpackuswb))]
2404 #[stable(feature = "simd_x86", since = "1.27.0")]
2405 pub unsafe fn _mm256_packus_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2406 transmute(packuswb(a
.as_i16x16(), b
.as_i16x16()))
2409 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
2410 /// using unsigned saturation
2412 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32)
2414 #[target_feature(enable = "avx2")]
2415 #[cfg_attr(test, assert_instr(vpackusdw))]
2416 #[stable(feature = "simd_x86", since = "1.27.0")]
2417 pub unsafe fn _mm256_packus_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2418 transmute(packusdw(a
.as_i32x8(), b
.as_i32x8()))
2421 /// Permutes packed 32-bit integers from `a` according to the content of `b`.
2423 /// The last 3 bits of each integer of `b` are used as addresses into the 8
2424 /// integers of `a`.
2426 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_epi32)
2428 #[target_feature(enable = "avx2")]
2429 #[cfg_attr(test, assert_instr(vpermps))]
2430 #[stable(feature = "simd_x86", since = "1.27.0")]
2431 pub unsafe fn _mm256_permutevar8x32_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2432 transmute(permd(a
.as_u32x8(), b
.as_u32x8()))
2435 /// Permutes 64-bit integers from `a` using control mask `imm8`.
2437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_epi64)
2439 #[target_feature(enable = "avx2")]
2440 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
2441 #[rustc_args_required_const(1)]
2442 #[stable(feature = "simd_x86", since = "1.27.0")]
2443 pub unsafe fn _mm256_permute4x64_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
2444 let imm8
= (imm8
& 0xFF) as u8;
2445 let zero
= _mm256_setzero_si256().as_i64x4();
2446 let a
= a
.as_i64x4();
2447 macro_rules
! permute4
{
2448 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
2449 simd_shuffle4(a
, zero
, [$a
, $b
, $c
, $d
]);
2452 macro_rules
! permute3
{
2453 ($a
:expr
, $b
:expr
, $c
:expr
) => {
2454 match (imm8
>> 6) & 0b11 {
2455 0b00 => permute4
!($a
, $b
, $c
, 0),
2456 0b01 => permute4
!($a
, $b
, $c
, 1),
2457 0b10 => permute4
!($a
, $b
, $c
, 2),
2458 _
=> permute4
!($a
, $b
, $c
, 3),
2462 macro_rules
! permute2
{
2463 ($a
:expr
, $b
:expr
) => {
2464 match (imm8
>> 4) & 0b11 {
2465 0b00 => permute3
!($a
, $b
, 0),
2466 0b01 => permute3
!($a
, $b
, 1),
2467 0b10 => permute3
!($a
, $b
, 2),
2468 _
=> permute3
!($a
, $b
, 3),
2472 macro_rules
! permute1
{
2474 match (imm8
>> 2) & 0b11 {
2475 0b00 => permute2
!($a
, 0),
2476 0b01 => permute2
!($a
, 1),
2477 0b10 => permute2
!($a
, 2),
2478 _
=> permute2
!($a
, 3),
2482 let r
: i64x4
= match imm8
& 0b11 {
2483 0b00 => permute1
!(0),
2484 0b01 => permute1
!(1),
2485 0b10 => permute1
!(2),
2491 /// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`.
2493 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256)
2495 #[target_feature(enable = "avx2")]
2496 #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))]
2497 #[rustc_args_required_const(2)]
2498 #[stable(feature = "simd_x86", since = "1.27.0")]
2499 pub unsafe fn _mm256_permute2x128_si256(a
: __m256i
, b
: __m256i
, imm8
: i32) -> __m256i
{
2500 let a
= a
.as_i64x4();
2501 let b
= b
.as_i64x4();
2504 vperm2i128(a
, b
, $imm8
)
2507 transmute(constify_imm8
!(imm8
, call
))
2510 /// Shuffles 64-bit floating-point elements in `a` across lanes using the
2511 /// control in `imm8`.
2513 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd)
2515 #[target_feature(enable = "avx2")]
2516 #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
2517 #[rustc_args_required_const(1)]
2518 #[stable(feature = "simd_x86", since = "1.27.0")]
2519 pub unsafe fn _mm256_permute4x64_pd(a
: __m256d
, imm8
: i32) -> __m256d
{
2520 let imm8
= (imm8
& 0xFF) as u8;
2521 let undef
= _mm256_undefined_pd();
2522 macro_rules
! shuffle_done
{
2523 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2524 simd_shuffle4(a
, undef
, [$x01
, $x23
, $x45
, $x67
])
2527 macro_rules
! shuffle_x67
{
2528 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2529 match (imm8
>> 6) & 0b11 {
2530 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2531 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2532 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2533 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2537 macro_rules
! shuffle_x45
{
2538 ($x01
:expr
, $x23
:expr
) => {
2539 match (imm8
>> 4) & 0b11 {
2540 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2541 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2542 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2543 _
=> shuffle_x67
!($x01
, $x23
, 3),
2547 macro_rules
! shuffle_x23
{
2549 match (imm8
>> 2) & 0b11 {
2550 0b00 => shuffle_x45
!($x01
, 0),
2551 0b01 => shuffle_x45
!($x01
, 1),
2552 0b10 => shuffle_x45
!($x01
, 2),
2553 _
=> shuffle_x45
!($x01
, 3),
2558 0b00 => shuffle_x23
!(0),
2559 0b01 => shuffle_x23
!(1),
2560 0b10 => shuffle_x23
!(2),
2561 _
=> shuffle_x23
!(3),
2565 /// Shuffles eight 32-bit foating-point elements in `a` across lanes using
2566 /// the corresponding 32-bit integer index in `idx`.
2568 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps)
2570 #[target_feature(enable = "avx2")]
2571 #[cfg_attr(test, assert_instr(vpermps))]
2572 #[stable(feature = "simd_x86", since = "1.27.0")]
2573 pub unsafe fn _mm256_permutevar8x32_ps(a
: __m256
, idx
: __m256i
) -> __m256
{
2574 permps(a
, idx
.as_i32x8())
2577 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
2578 /// and `b`, then horizontally sum each consecutive 8 differences to
2579 /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit
2580 /// integers in the low 16 bits of the 64-bit return value
2582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sad_epu8)
2584 #[target_feature(enable = "avx2")]
2585 #[cfg_attr(test, assert_instr(vpsadbw))]
2586 #[stable(feature = "simd_x86", since = "1.27.0")]
2587 pub unsafe fn _mm256_sad_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2588 transmute(psadbw(a
.as_u8x32(), b
.as_u8x32()))
2591 /// Shuffles bytes from `a` according to the content of `b`.
2593 /// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes
2596 /// In addition, if the highest significant bit of a byte of `b` is set, the
2597 /// respective destination byte is set to 0.
2599 /// The low and high halves of the vectors are shuffled separately.
2601 /// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically
2605 /// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] {
2606 /// let mut r = [0; 32];
2607 /// for i in 0..16 {
2608 /// // if the most significant bit of b is set,
2609 /// // then the destination byte is set to 0.
2610 /// if b[i] & 0x80 == 0u8 {
2611 /// r[i] = a[(b[i] % 16) as usize];
2613 /// if b[i + 16] & 0x80 == 0u8 {
2614 /// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize];
2621 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi8)
2623 #[target_feature(enable = "avx2")]
2624 #[cfg_attr(test, assert_instr(vpshufb))]
2625 #[stable(feature = "simd_x86", since = "1.27.0")]
2626 pub unsafe fn _mm256_shuffle_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2627 transmute(pshufb(a
.as_u8x32(), b
.as_u8x32()))
2630 /// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in
2634 /// #[cfg(target_arch = "x86")]
2635 /// use std::arch::x86::*;
2636 /// #[cfg(target_arch = "x86_64")]
2637 /// use std::arch::x86_64::*;
2640 /// # if is_x86_feature_detected!("avx2") {
2641 /// # #[target_feature(enable = "avx2")]
2642 /// # unsafe fn worker() {
2643 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
2645 /// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01);
2646 /// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11);
2648 /// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4);
2649 /// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5);
2651 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0);
2652 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0);
2654 /// # unsafe { worker(); }
2659 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
2661 #[target_feature(enable = "avx2")]
2662 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
2663 #[rustc_args_required_const(1)]
2664 #[stable(feature = "simd_x86", since = "1.27.0")]
2665 pub unsafe fn _mm256_shuffle_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
2666 // simd_shuffleX requires that its selector parameter be made up of
2667 // constant values, but we can't enforce that here. In spirit, we need
2668 // to write a `match` on all possible values of a byte, and for each value,
2669 // hard-code the correct `simd_shuffleX` call using only constants. We
2670 // then hope for LLVM to do the rest.
2672 // Of course, that's... awful. So we try to use macros to do it for us.
2673 let imm8
= (imm8
& 0xFF) as u8;
2675 let a
= a
.as_i32x8();
2676 macro_rules
! shuffle_done
{
2677 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2694 macro_rules
! shuffle_x67
{
2695 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2696 match (imm8
>> 6) & 0b11 {
2697 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2698 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2699 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2700 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2704 macro_rules
! shuffle_x45
{
2705 ($x01
:expr
, $x23
:expr
) => {
2706 match (imm8
>> 4) & 0b11 {
2707 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2708 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2709 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2710 _
=> shuffle_x67
!($x01
, $x23
, 3),
2714 macro_rules
! shuffle_x23
{
2716 match (imm8
>> 2) & 0b11 {
2717 0b00 => shuffle_x45
!($x01
, 0),
2718 0b01 => shuffle_x45
!($x01
, 1),
2719 0b10 => shuffle_x45
!($x01
, 2),
2720 _
=> shuffle_x45
!($x01
, 3),
2724 let r
: i32x8
= match imm8
& 0b11 {
2725 0b00 => shuffle_x23
!(0),
2726 0b01 => shuffle_x23
!(1),
2727 0b10 => shuffle_x23
!(2),
2728 _
=> shuffle_x23
!(3),
2733 /// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using
2734 /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied
2737 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflehi_epi16)
2739 #[target_feature(enable = "avx2")]
2740 #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
2741 #[rustc_args_required_const(1)]
2742 #[stable(feature = "simd_x86", since = "1.27.0")]
2743 pub unsafe fn _mm256_shufflehi_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2744 let imm8
= (imm8
& 0xFF) as u8;
2745 let a
= a
.as_i16x16();
2746 macro_rules
! shuffle_done
{
2747 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
2749 simd_shuffle16(a
, a
, [
2750 0, 1, 2, 3, 4+$x01
, 4+$x23
, 4+$x45
, 4+$x67
,
2751 8, 9, 10, 11, 12+$x01
, 12+$x23
, 12+$x45
, 12+$x67
2755 macro_rules
! shuffle_x67
{
2756 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2757 match (imm8
>> 6) & 0b11 {
2758 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2759 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2760 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2761 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2765 macro_rules
! shuffle_x45
{
2766 ($x01
:expr
, $x23
:expr
) => {
2767 match (imm8
>> 4) & 0b11 {
2768 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2769 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2770 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2771 _
=> shuffle_x67
!($x01
, $x23
, 3),
2775 macro_rules
! shuffle_x23
{
2777 match (imm8
>> 2) & 0b11 {
2778 0b00 => shuffle_x45
!($x01
, 0),
2779 0b01 => shuffle_x45
!($x01
, 1),
2780 0b10 => shuffle_x45
!($x01
, 2),
2781 _
=> shuffle_x45
!($x01
, 3),
2785 let r
: i16x16
= match imm8
& 0b11 {
2786 0b00 => shuffle_x23
!(0),
2787 0b01 => shuffle_x23
!(1),
2788 0b10 => shuffle_x23
!(2),
2789 _
=> shuffle_x23
!(3),
2794 /// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using
2795 /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied
2798 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16)
2800 #[target_feature(enable = "avx2")]
2801 #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))]
2802 #[rustc_args_required_const(1)]
2803 #[stable(feature = "simd_x86", since = "1.27.0")]
2804 pub unsafe fn _mm256_shufflelo_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2805 let imm8
= (imm8
& 0xFF) as u8;
2806 let a
= a
.as_i16x16();
2807 macro_rules
! shuffle_done
{
2808 ($x01
: expr
, $x23
: expr
, $x45
: expr
, $x67
: expr
) => {
2810 simd_shuffle16(a
, a
, [
2811 0+$x01
, 0+$x23
, 0+$x45
, 0+$x67
, 4, 5, 6, 7,
2812 8+$x01
, 8+$x23
, 8+$x45
, 8+$x67
, 12, 13, 14, 15,
2816 macro_rules
! shuffle_x67
{
2817 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
2818 match (imm8
>> 6) & 0b11 {
2819 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
2820 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
2821 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
2822 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
2826 macro_rules
! shuffle_x45
{
2827 ($x01
:expr
, $x23
:expr
) => {
2828 match (imm8
>> 4) & 0b11 {
2829 0b00 => shuffle_x67
!($x01
, $x23
, 0),
2830 0b01 => shuffle_x67
!($x01
, $x23
, 1),
2831 0b10 => shuffle_x67
!($x01
, $x23
, 2),
2832 _
=> shuffle_x67
!($x01
, $x23
, 3),
2836 macro_rules
! shuffle_x23
{
2838 match (imm8
>> 2) & 0b11 {
2839 0b00 => shuffle_x45
!($x01
, 0),
2840 0b01 => shuffle_x45
!($x01
, 1),
2841 0b10 => shuffle_x45
!($x01
, 2),
2842 _
=> shuffle_x45
!($x01
, 3),
2846 let r
: i16x16
= match imm8
& 0b11 {
2847 0b00 => shuffle_x23
!(0),
2848 0b01 => shuffle_x23
!(1),
2849 0b10 => shuffle_x23
!(2),
2850 _
=> shuffle_x23
!(3),
2855 /// Negates packed 16-bit integers in `a` when the corresponding signed
2856 /// 16-bit integer in `b` is negative, and returns the results.
2857 /// Results are zeroed out when the corresponding element in `b` is zero.
2859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16)
2861 #[target_feature(enable = "avx2")]
2862 #[cfg_attr(test, assert_instr(vpsignw))]
2863 #[stable(feature = "simd_x86", since = "1.27.0")]
2864 pub unsafe fn _mm256_sign_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
2865 transmute(psignw(a
.as_i16x16(), b
.as_i16x16()))
2868 /// Negates packed 32-bit integers in `a` when the corresponding signed
2869 /// 32-bit integer in `b` is negative, and returns the results.
2870 /// Results are zeroed out when the corresponding element in `b` is zero.
2872 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32)
2874 #[target_feature(enable = "avx2")]
2875 #[cfg_attr(test, assert_instr(vpsignd))]
2876 #[stable(feature = "simd_x86", since = "1.27.0")]
2877 pub unsafe fn _mm256_sign_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
2878 transmute(psignd(a
.as_i32x8(), b
.as_i32x8()))
2881 /// Negates packed 8-bit integers in `a` when the corresponding signed
2882 /// 8-bit integer in `b` is negative, and returns the results.
2883 /// Results are zeroed out when the corresponding element in `b` is zero.
2885 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8)
2887 #[target_feature(enable = "avx2")]
2888 #[cfg_attr(test, assert_instr(vpsignb))]
2889 #[stable(feature = "simd_x86", since = "1.27.0")]
2890 pub unsafe fn _mm256_sign_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
2891 transmute(psignb(a
.as_i8x32(), b
.as_i8x32()))
2894 /// Shifts packed 16-bit integers in `a` left by `count` while
2895 /// shifting in zeros, and returns the result
2897 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16)
2899 #[target_feature(enable = "avx2")]
2900 #[cfg_attr(test, assert_instr(vpsllw))]
2901 #[stable(feature = "simd_x86", since = "1.27.0")]
2902 pub unsafe fn _mm256_sll_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
2903 transmute(psllw(a
.as_i16x16(), count
.as_i16x8()))
2906 /// Shifts packed 32-bit integers in `a` left by `count` while
2907 /// shifting in zeros, and returns the result
2909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32)
2911 #[target_feature(enable = "avx2")]
2912 #[cfg_attr(test, assert_instr(vpslld))]
2913 #[stable(feature = "simd_x86", since = "1.27.0")]
2914 pub unsafe fn _mm256_sll_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
2915 transmute(pslld(a
.as_i32x8(), count
.as_i32x4()))
2918 /// Shifts packed 64-bit integers in `a` left by `count` while
2919 /// shifting in zeros, and returns the result
2921 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64)
2923 #[target_feature(enable = "avx2")]
2924 #[cfg_attr(test, assert_instr(vpsllq))]
2925 #[stable(feature = "simd_x86", since = "1.27.0")]
2926 pub unsafe fn _mm256_sll_epi64(a
: __m256i
, count
: __m128i
) -> __m256i
{
2927 transmute(psllq(a
.as_i64x4(), count
.as_i64x2()))
2930 /// Shifts packed 16-bit integers in `a` left by `imm8` while
2931 /// shifting in zeros, return the results;
2933 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16)
2935 #[target_feature(enable = "avx2")]
2936 #[cfg_attr(test, assert_instr(vpsllw))]
2937 #[stable(feature = "simd_x86", since = "1.27.0")]
2938 pub unsafe fn _mm256_slli_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
2939 transmute(pslliw(a
.as_i16x16(), imm8
))
2942 /// Shifts packed 32-bit integers in `a` left by `imm8` while
2943 /// shifting in zeros, return the results;
2945 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32)
2947 #[target_feature(enable = "avx2")]
2948 #[cfg_attr(test, assert_instr(vpslld))]
2949 #[stable(feature = "simd_x86", since = "1.27.0")]
2950 pub unsafe fn _mm256_slli_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
2951 transmute(psllid(a
.as_i32x8(), imm8
))
2954 /// Shifts packed 64-bit integers in `a` left by `imm8` while
2955 /// shifting in zeros, return the results;
2957 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64)
2959 #[target_feature(enable = "avx2")]
2960 #[cfg_attr(test, assert_instr(vpsllq))]
2961 #[stable(feature = "simd_x86", since = "1.27.0")]
2962 pub unsafe fn _mm256_slli_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
2963 transmute(pslliq(a
.as_i64x4(), imm8
))
2966 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2968 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256)
2970 #[target_feature(enable = "avx2")]
2971 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2972 #[rustc_args_required_const(1)]
2973 #[stable(feature = "simd_x86", since = "1.27.0")]
2974 pub unsafe fn _mm256_slli_si256(a
: __m256i
, imm8
: i32) -> __m256i
{
2975 let a
= a
.as_i64x4();
2981 transmute(constify_imm8
!(imm8
* 8, call
))
2984 /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros.
2986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128)
2988 #[target_feature(enable = "avx2")]
2989 #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))]
2990 #[rustc_args_required_const(1)]
2991 #[stable(feature = "simd_x86", since = "1.27.0")]
2992 pub unsafe fn _mm256_bslli_epi128(a
: __m256i
, imm8
: i32) -> __m256i
{
2993 let a
= a
.as_i64x4();
2999 transmute(constify_imm8
!(imm8
* 8, call
))
3002 /// Shifts packed 32-bit integers in `a` left by the amount
3003 /// specified by the corresponding element in `count` while
3004 /// shifting in zeros, and returns the result.
3006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32)
3008 #[target_feature(enable = "avx2")]
3009 #[cfg_attr(test, assert_instr(vpsllvd))]
3010 #[stable(feature = "simd_x86", since = "1.27.0")]
3011 pub unsafe fn _mm_sllv_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3012 transmute(psllvd(a
.as_i32x4(), count
.as_i32x4()))
3015 /// Shifts packed 32-bit integers in `a` left by the amount
3016 /// specified by the corresponding element in `count` while
3017 /// shifting in zeros, and returns the result.
3019 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32)
3021 #[target_feature(enable = "avx2")]
3022 #[cfg_attr(test, assert_instr(vpsllvd))]
3023 #[stable(feature = "simd_x86", since = "1.27.0")]
3024 pub unsafe fn _mm256_sllv_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3025 transmute(psllvd256(a
.as_i32x8(), count
.as_i32x8()))
3028 /// Shifts packed 64-bit integers in `a` left by the amount
3029 /// specified by the corresponding element in `count` while
3030 /// shifting in zeros, and returns the result.
3032 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64)
3034 #[target_feature(enable = "avx2")]
3035 #[cfg_attr(test, assert_instr(vpsllvq))]
3036 #[stable(feature = "simd_x86", since = "1.27.0")]
3037 pub unsafe fn _mm_sllv_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
3038 transmute(psllvq(a
.as_i64x2(), count
.as_i64x2()))
3041 /// Shifts packed 64-bit integers in `a` left by the amount
3042 /// specified by the corresponding element in `count` while
3043 /// shifting in zeros, and returns the result.
3045 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64)
3047 #[target_feature(enable = "avx2")]
3048 #[cfg_attr(test, assert_instr(vpsllvq))]
3049 #[stable(feature = "simd_x86", since = "1.27.0")]
3050 pub unsafe fn _mm256_sllv_epi64(a
: __m256i
, count
: __m256i
) -> __m256i
{
3051 transmute(psllvq256(a
.as_i64x4(), count
.as_i64x4()))
3054 /// Shifts packed 16-bit integers in `a` right by `count` while
3055 /// shifting in sign bits.
3057 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16)
3059 #[target_feature(enable = "avx2")]
3060 #[cfg_attr(test, assert_instr(vpsraw))]
3061 #[stable(feature = "simd_x86", since = "1.27.0")]
3062 pub unsafe fn _mm256_sra_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
3063 transmute(psraw(a
.as_i16x16(), count
.as_i16x8()))
3066 /// Shifts packed 32-bit integers in `a` right by `count` while
3067 /// shifting in sign bits.
3069 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32)
3071 #[target_feature(enable = "avx2")]
3072 #[cfg_attr(test, assert_instr(vpsrad))]
3073 #[stable(feature = "simd_x86", since = "1.27.0")]
3074 pub unsafe fn _mm256_sra_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
3075 transmute(psrad(a
.as_i32x8(), count
.as_i32x4()))
3078 /// Shifts packed 16-bit integers in `a` right by `imm8` while
3079 /// shifting in sign bits.
3081 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16)
3083 #[target_feature(enable = "avx2")]
3084 #[cfg_attr(test, assert_instr(vpsraw))]
3085 #[stable(feature = "simd_x86", since = "1.27.0")]
3086 pub unsafe fn _mm256_srai_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
3087 transmute(psraiw(a
.as_i16x16(), imm8
))
3090 /// Shifts packed 32-bit integers in `a` right by `imm8` while
3091 /// shifting in sign bits.
3093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32)
3095 #[target_feature(enable = "avx2")]
3096 #[cfg_attr(test, assert_instr(vpsrad))]
3097 #[stable(feature = "simd_x86", since = "1.27.0")]
3098 pub unsafe fn _mm256_srai_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
3099 transmute(psraid(a
.as_i32x8(), imm8
))
3102 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
3103 /// corresponding element in `count` while shifting in sign bits.
3105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32)
3107 #[target_feature(enable = "avx2")]
3108 #[cfg_attr(test, assert_instr(vpsravd))]
3109 #[stable(feature = "simd_x86", since = "1.27.0")]
3110 pub unsafe fn _mm_srav_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3111 transmute(psravd(a
.as_i32x4(), count
.as_i32x4()))
3114 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
3115 /// corresponding element in `count` while shifting in sign bits.
3117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32)
3119 #[target_feature(enable = "avx2")]
3120 #[cfg_attr(test, assert_instr(vpsravd))]
3121 #[stable(feature = "simd_x86", since = "1.27.0")]
3122 pub unsafe fn _mm256_srav_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3123 transmute(psravd256(a
.as_i32x8(), count
.as_i32x8()))
3126 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256)
3130 #[target_feature(enable = "avx2")]
3131 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
3132 #[rustc_args_required_const(1)]
3133 #[stable(feature = "simd_x86", since = "1.27.0")]
3134 pub unsafe fn _mm256_srli_si256(a
: __m256i
, imm8
: i32) -> __m256i
{
3135 let a
= a
.as_i64x4();
3141 transmute(constify_imm8
!(imm8
* 8, call
))
3144 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
3146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128)
3148 #[target_feature(enable = "avx2")]
3149 #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))]
3150 #[rustc_args_required_const(1)]
3151 #[stable(feature = "simd_x86", since = "1.27.0")]
3152 pub unsafe fn _mm256_bsrli_epi128(a
: __m256i
, imm8
: i32) -> __m256i
{
3153 let a
= a
.as_i64x4();
3159 transmute(constify_imm8
!(imm8
* 8, call
))
3162 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
3165 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16)
3167 #[target_feature(enable = "avx2")]
3168 #[cfg_attr(test, assert_instr(vpsrlw))]
3169 #[stable(feature = "simd_x86", since = "1.27.0")]
3170 pub unsafe fn _mm256_srl_epi16(a
: __m256i
, count
: __m128i
) -> __m256i
{
3171 transmute(psrlw(a
.as_i16x16(), count
.as_i16x8()))
3174 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
3177 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32)
3179 #[target_feature(enable = "avx2")]
3180 #[cfg_attr(test, assert_instr(vpsrld))]
3181 #[stable(feature = "simd_x86", since = "1.27.0")]
3182 pub unsafe fn _mm256_srl_epi32(a
: __m256i
, count
: __m128i
) -> __m256i
{
3183 transmute(psrld(a
.as_i32x8(), count
.as_i32x4()))
3186 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
3189 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64)
3191 #[target_feature(enable = "avx2")]
3192 #[cfg_attr(test, assert_instr(vpsrlq))]
3193 #[stable(feature = "simd_x86", since = "1.27.0")]
3194 pub unsafe fn _mm256_srl_epi64(a
: __m256i
, count
: __m128i
) -> __m256i
{
3195 transmute(psrlq(a
.as_i64x4(), count
.as_i64x2()))
3198 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
3201 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16)
3203 #[target_feature(enable = "avx2")]
3204 #[cfg_attr(test, assert_instr(vpsrlw))]
3205 #[stable(feature = "simd_x86", since = "1.27.0")]
3206 pub unsafe fn _mm256_srli_epi16(a
: __m256i
, imm8
: i32) -> __m256i
{
3207 transmute(psrliw(a
.as_i16x16(), imm8
))
3210 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
3213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32)
3215 #[target_feature(enable = "avx2")]
3216 #[cfg_attr(test, assert_instr(vpsrld))]
3217 #[stable(feature = "simd_x86", since = "1.27.0")]
3218 pub unsafe fn _mm256_srli_epi32(a
: __m256i
, imm8
: i32) -> __m256i
{
3219 transmute(psrlid(a
.as_i32x8(), imm8
))
3222 /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
3225 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64)
3227 #[target_feature(enable = "avx2")]
3228 #[cfg_attr(test, assert_instr(vpsrlq))]
3229 #[stable(feature = "simd_x86", since = "1.27.0")]
3230 pub unsafe fn _mm256_srli_epi64(a
: __m256i
, imm8
: i32) -> __m256i
{
3231 transmute(psrliq(a
.as_i64x4(), imm8
))
3234 /// Shifts packed 32-bit integers in `a` right by the amount specified by
3235 /// the corresponding element in `count` while shifting in zeros,
3237 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32)
3239 #[target_feature(enable = "avx2")]
3240 #[cfg_attr(test, assert_instr(vpsrlvd))]
3241 #[stable(feature = "simd_x86", since = "1.27.0")]
3242 pub unsafe fn _mm_srlv_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
3243 transmute(psrlvd(a
.as_i32x4(), count
.as_i32x4()))
3246 /// Shifts packed 32-bit integers in `a` right by the amount specified by
3247 /// the corresponding element in `count` while shifting in zeros,
3249 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32)
3251 #[target_feature(enable = "avx2")]
3252 #[cfg_attr(test, assert_instr(vpsrlvd))]
3253 #[stable(feature = "simd_x86", since = "1.27.0")]
3254 pub unsafe fn _mm256_srlv_epi32(a
: __m256i
, count
: __m256i
) -> __m256i
{
3255 transmute(psrlvd256(a
.as_i32x8(), count
.as_i32x8()))
3258 /// Shifts packed 64-bit integers in `a` right by the amount specified by
3259 /// the corresponding element in `count` while shifting in zeros,
3261 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64)
3263 #[target_feature(enable = "avx2")]
3264 #[cfg_attr(test, assert_instr(vpsrlvq))]
3265 #[stable(feature = "simd_x86", since = "1.27.0")]
3266 pub unsafe fn _mm_srlv_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
3267 transmute(psrlvq(a
.as_i64x2(), count
.as_i64x2()))
3270 /// Shifts packed 64-bit integers in `a` right by the amount specified by
3271 /// the corresponding element in `count` while shifting in zeros,
3273 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64)
3275 #[target_feature(enable = "avx2")]
3276 #[cfg_attr(test, assert_instr(vpsrlvq))]
3277 #[stable(feature = "simd_x86", since = "1.27.0")]
3278 pub unsafe fn _mm256_srlv_epi64(a
: __m256i
, count
: __m256i
) -> __m256i
{
3279 transmute(psrlvq256(a
.as_i64x4(), count
.as_i64x4()))
3282 // TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
3284 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
3286 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi16)
3288 #[target_feature(enable = "avx2")]
3289 #[cfg_attr(test, assert_instr(vpsubw))]
3290 #[stable(feature = "simd_x86", since = "1.27.0")]
3291 pub unsafe fn _mm256_sub_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3292 transmute(simd_sub(a
.as_i16x16(), b
.as_i16x16()))
3295 /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a`
3297 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi32)
3299 #[target_feature(enable = "avx2")]
3300 #[cfg_attr(test, assert_instr(vpsubd))]
3301 #[stable(feature = "simd_x86", since = "1.27.0")]
3302 pub unsafe fn _mm256_sub_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3303 transmute(simd_sub(a
.as_i32x8(), b
.as_i32x8()))
3306 /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a`
3308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi64)
3310 #[target_feature(enable = "avx2")]
3311 #[cfg_attr(test, assert_instr(vpsubq))]
3312 #[stable(feature = "simd_x86", since = "1.27.0")]
3313 pub unsafe fn _mm256_sub_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3314 transmute(simd_sub(a
.as_i64x4(), b
.as_i64x4()))
3317 /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a`
3319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi8)
3321 #[target_feature(enable = "avx2")]
3322 #[cfg_attr(test, assert_instr(vpsubb))]
3323 #[stable(feature = "simd_x86", since = "1.27.0")]
3324 pub unsafe fn _mm256_sub_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3325 transmute(simd_sub(a
.as_i8x32(), b
.as_i8x32()))
3328 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in
3329 /// `a` using saturation.
3331 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi16)
3333 #[target_feature(enable = "avx2")]
3334 #[cfg_attr(test, assert_instr(vpsubsw))]
3335 #[stable(feature = "simd_x86", since = "1.27.0")]
3336 pub unsafe fn _mm256_subs_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3337 transmute(psubsw(a
.as_i16x16(), b
.as_i16x16()))
3340 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in
3341 /// `a` using saturation.
3343 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi8)
3345 #[target_feature(enable = "avx2")]
3346 #[cfg_attr(test, assert_instr(vpsubsb))]
3347 #[stable(feature = "simd_x86", since = "1.27.0")]
3348 pub unsafe fn _mm256_subs_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3349 transmute(psubsb(a
.as_i8x32(), b
.as_i8x32()))
3352 /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit
3353 /// integers in `a` using saturation.
3355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu16)
3357 #[target_feature(enable = "avx2")]
3358 #[cfg_attr(test, assert_instr(vpsubusw))]
3359 #[stable(feature = "simd_x86", since = "1.27.0")]
3360 pub unsafe fn _mm256_subs_epu16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3361 transmute(psubusw(a
.as_u16x16(), b
.as_u16x16()))
3364 /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit
3365 /// integers in `a` using saturation.
3367 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu8)
3369 #[target_feature(enable = "avx2")]
3370 #[cfg_attr(test, assert_instr(vpsubusb))]
3371 #[stable(feature = "simd_x86", since = "1.27.0")]
3372 pub unsafe fn _mm256_subs_epu8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3373 transmute(psubusb(a
.as_u8x32(), b
.as_u8x32()))
3376 /// Unpacks and interleave 8-bit integers from the high half of each
3377 /// 128-bit lane in `a` and `b`.
3380 /// #[cfg(target_arch = "x86")]
3381 /// use std::arch::x86::*;
3382 /// #[cfg(target_arch = "x86_64")]
3383 /// use std::arch::x86_64::*;
3386 /// # if is_x86_feature_detected!("avx2") {
3387 /// # #[target_feature(enable = "avx2")]
3388 /// # unsafe fn worker() {
3389 /// let a = _mm256_setr_epi8(
3390 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3391 /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3393 /// let b = _mm256_setr_epi8(
3394 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3395 /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3399 /// let c = _mm256_unpackhi_epi8(a, b);
3401 /// let expected = _mm256_setr_epi8(
3402 /// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15,
3403 /// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31,
3406 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3409 /// # unsafe { worker(); }
3414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi8)
3416 #[target_feature(enable = "avx2")]
3417 #[cfg_attr(test, assert_instr(vpunpckhbw))]
3418 #[stable(feature = "simd_x86", since = "1.27.0")]
3419 pub unsafe fn _mm256_unpackhi_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3421 let r
: i8x32
= simd_shuffle32(a
.as_i8x32(), b
.as_i8x32(), [
3422 8, 40, 9, 41, 10, 42, 11, 43,
3423 12, 44, 13, 45, 14, 46, 15, 47,
3424 24, 56, 25, 57, 26, 58, 27, 59,
3425 28, 60, 29, 61, 30, 62, 31, 63,
3430 /// Unpacks and interleave 8-bit integers from the low half of each
3431 /// 128-bit lane of `a` and `b`.
3434 /// #[cfg(target_arch = "x86")]
3435 /// use std::arch::x86::*;
3436 /// #[cfg(target_arch = "x86_64")]
3437 /// use std::arch::x86_64::*;
3440 /// # if is_x86_feature_detected!("avx2") {
3441 /// # #[target_feature(enable = "avx2")]
3442 /// # unsafe fn worker() {
3443 /// let a = _mm256_setr_epi8(
3444 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
3445 /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3447 /// let b = _mm256_setr_epi8(
3448 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3449 /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
3453 /// let c = _mm256_unpacklo_epi8(a, b);
3455 /// let expected = _mm256_setr_epi8(
3456 /// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17,
3457 /// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23,
3459 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3462 /// # unsafe { worker(); }
3467 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi8)
3469 #[target_feature(enable = "avx2")]
3470 #[cfg_attr(test, assert_instr(vpunpcklbw))]
3471 #[stable(feature = "simd_x86", since = "1.27.0")]
3472 pub unsafe fn _mm256_unpacklo_epi8(a
: __m256i
, b
: __m256i
) -> __m256i
{
3474 let r
: i8x32
= simd_shuffle32(a
.as_i8x32(), b
.as_i8x32(), [
3475 0, 32, 1, 33, 2, 34, 3, 35,
3476 4, 36, 5, 37, 6, 38, 7, 39,
3477 16, 48, 17, 49, 18, 50, 19, 51,
3478 20, 52, 21, 53, 22, 54, 23, 55,
3483 /// Unpacks and interleave 16-bit integers from the high half of each
3484 /// 128-bit lane of `a` and `b`.
3487 /// #[cfg(target_arch = "x86")]
3488 /// use std::arch::x86::*;
3489 /// #[cfg(target_arch = "x86_64")]
3490 /// use std::arch::x86_64::*;
3493 /// # if is_x86_feature_detected!("avx2") {
3494 /// # #[target_feature(enable = "avx2")]
3495 /// # unsafe fn worker() {
3496 /// let a = _mm256_setr_epi16(
3497 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3499 /// let b = _mm256_setr_epi16(
3500 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3503 /// let c = _mm256_unpackhi_epi16(a, b);
3505 /// let expected = _mm256_setr_epi16(
3506 /// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15,
3508 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3511 /// # unsafe { worker(); }
3516 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi16)
3518 #[target_feature(enable = "avx2")]
3519 #[cfg_attr(test, assert_instr(vpunpckhwd))]
3520 #[stable(feature = "simd_x86", since = "1.27.0")]
3521 pub unsafe fn _mm256_unpackhi_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3522 let r
: i16x16
= simd_shuffle16(
3525 [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31],
3530 /// Unpacks and interleave 16-bit integers from the low half of each
3531 /// 128-bit lane of `a` and `b`.
3534 /// #[cfg(target_arch = "x86")]
3535 /// use std::arch::x86::*;
3536 /// #[cfg(target_arch = "x86_64")]
3537 /// use std::arch::x86_64::*;
3540 /// # if is_x86_feature_detected!("avx2") {
3541 /// # #[target_feature(enable = "avx2")]
3542 /// # unsafe fn worker() {
3544 /// let a = _mm256_setr_epi16(
3545 /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3547 /// let b = _mm256_setr_epi16(
3548 /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
3551 /// let c = _mm256_unpacklo_epi16(a, b);
3553 /// let expected = _mm256_setr_epi16(
3554 /// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11,
3556 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3559 /// # unsafe { worker(); }
3564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi16)
3566 #[target_feature(enable = "avx2")]
3567 #[cfg_attr(test, assert_instr(vpunpcklwd))]
3568 #[stable(feature = "simd_x86", since = "1.27.0")]
3569 pub unsafe fn _mm256_unpacklo_epi16(a
: __m256i
, b
: __m256i
) -> __m256i
{
3570 let r
: i16x16
= simd_shuffle16(
3573 [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27],
3578 /// Unpacks and interleave 32-bit integers from the high half of each
3579 /// 128-bit lane of `a` and `b`.
3582 /// #[cfg(target_arch = "x86")]
3583 /// use std::arch::x86::*;
3584 /// #[cfg(target_arch = "x86_64")]
3585 /// use std::arch::x86_64::*;
3588 /// # if is_x86_feature_detected!("avx2") {
3589 /// # #[target_feature(enable = "avx2")]
3590 /// # unsafe fn worker() {
3591 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3592 /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3594 /// let c = _mm256_unpackhi_epi32(a, b);
3596 /// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7);
3597 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3600 /// # unsafe { worker(); }
3605 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi32)
3607 #[target_feature(enable = "avx2")]
3608 #[cfg_attr(test, assert_instr(vunpckhps))]
3609 #[stable(feature = "simd_x86", since = "1.27.0")]
3610 pub unsafe fn _mm256_unpackhi_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3611 let r
: i32x8
= simd_shuffle8(a
.as_i32x8(), b
.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
3615 /// Unpacks and interleave 32-bit integers from the low half of each
3616 /// 128-bit lane of `a` and `b`.
3619 /// #[cfg(target_arch = "x86")]
3620 /// use std::arch::x86::*;
3621 /// #[cfg(target_arch = "x86_64")]
3622 /// use std::arch::x86_64::*;
3625 /// # if is_x86_feature_detected!("avx2") {
3626 /// # #[target_feature(enable = "avx2")]
3627 /// # unsafe fn worker() {
3628 /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
3629 /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7);
3631 /// let c = _mm256_unpacklo_epi32(a, b);
3633 /// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5);
3634 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3637 /// # unsafe { worker(); }
3642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi32)
3644 #[target_feature(enable = "avx2")]
3645 #[cfg_attr(test, assert_instr(vunpcklps))]
3646 #[stable(feature = "simd_x86", since = "1.27.0")]
3647 pub unsafe fn _mm256_unpacklo_epi32(a
: __m256i
, b
: __m256i
) -> __m256i
{
3648 let r
: i32x8
= simd_shuffle8(a
.as_i32x8(), b
.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
3652 /// Unpacks and interleave 64-bit integers from the high half of each
3653 /// 128-bit lane of `a` and `b`.
3656 /// #[cfg(target_arch = "x86")]
3657 /// use std::arch::x86::*;
3658 /// #[cfg(target_arch = "x86_64")]
3659 /// use std::arch::x86_64::*;
3662 /// # if is_x86_feature_detected!("avx2") {
3663 /// # #[target_feature(enable = "avx2")]
3664 /// # unsafe fn worker() {
3665 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3666 /// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3668 /// let c = _mm256_unpackhi_epi64(a, b);
3670 /// let expected = _mm256_setr_epi64x(1, -1, 3, -3);
3671 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3674 /// # unsafe { worker(); }
3679 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi64)
3681 #[target_feature(enable = "avx2")]
3682 #[cfg_attr(test, assert_instr(vunpckhpd))]
3683 #[stable(feature = "simd_x86", since = "1.27.0")]
3684 pub unsafe fn _mm256_unpackhi_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3685 let r
: i64x4
= simd_shuffle4(a
.as_i64x4(), b
.as_i64x4(), [1, 5, 3, 7]);
3689 /// Unpacks and interleave 64-bit integers from the low half of each
3690 /// 128-bit lane of `a` and `b`.
3693 /// #[cfg(target_arch = "x86")]
3694 /// use std::arch::x86::*;
3695 /// #[cfg(target_arch = "x86_64")]
3696 /// use std::arch::x86_64::*;
3699 /// # if is_x86_feature_detected!("avx2") {
3700 /// # #[target_feature(enable = "avx2")]
3701 /// # unsafe fn worker() {
3702 /// let a = _mm256_setr_epi64x(0, 1, 2, 3);
3703 /// let b = _mm256_setr_epi64x(0, -1, -2, -3);
3705 /// let c = _mm256_unpacklo_epi64(a, b);
3707 /// let expected = _mm256_setr_epi64x(0, 0, 2, -2);
3708 /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0);
3711 /// # unsafe { worker(); }
3716 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi64)
3718 #[target_feature(enable = "avx2")]
3719 #[cfg_attr(test, assert_instr(vunpcklpd))]
3720 #[stable(feature = "simd_x86", since = "1.27.0")]
3721 pub unsafe fn _mm256_unpacklo_epi64(a
: __m256i
, b
: __m256i
) -> __m256i
{
3722 let r
: i64x4
= simd_shuffle4(a
.as_i64x4(), b
.as_i64x4(), [0, 4, 2, 6]);
3726 /// Computes the bitwise XOR of 256 bits (representing integer data)
3729 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256)
3731 #[target_feature(enable = "avx2")]
3732 #[cfg_attr(test, assert_instr(vxorps))]
3733 #[stable(feature = "simd_x86", since = "1.27.0")]
3734 pub unsafe fn _mm256_xor_si256(a
: __m256i
, b
: __m256i
) -> __m256i
{
3735 transmute(simd_xor(a
.as_i64x4(), b
.as_i64x4()))
3738 /// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
3739 /// integer containing the zero-extended integer data.
3741 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
3743 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi8)
3745 #[target_feature(enable = "avx2")]
3746 // This intrinsic has no corresponding instruction.
3747 #[rustc_args_required_const(1)]
3748 #[stable(feature = "simd_x86", since = "1.27.0")]
3749 pub unsafe fn _mm256_extract_epi8(a
: __m256i
, imm8
: i32) -> i8 {
3750 let imm8
= (imm8
& 31) as u32;
3751 simd_extract(a
.as_i8x32(), imm8
)
3754 /// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit
3755 /// integer containing the zero-extended integer data.
3757 /// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
3759 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi16)
3761 #[target_feature(enable = "avx2")]
3762 // This intrinsic has no corresponding instruction.
3763 #[rustc_args_required_const(1)]
3764 #[stable(feature = "simd_x86", since = "1.27.0")]
3765 pub unsafe fn _mm256_extract_epi16(a
: __m256i
, imm8
: i32) -> i16 {
3766 let imm8
= (imm8
& 15) as u32;
3767 simd_extract(a
.as_i16x16(), imm8
)
3770 /// Extracts a 32-bit integer from `a`, selected with `imm8`.
3772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32)
3774 #[target_feature(enable = "avx2")]
3775 // This intrinsic has no corresponding instruction.
3776 #[rustc_args_required_const(1)]
3777 #[stable(feature = "simd_x86", since = "1.27.0")]
3778 pub unsafe fn _mm256_extract_epi32(a
: __m256i
, imm8
: i32) -> i32 {
3779 let imm8
= (imm8
& 7) as u32;
3780 simd_extract(a
.as_i32x8(), imm8
)
3783 /// Returns the first element of the input vector of `[4 x double]`.
3785 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsd_f64)
3787 #[target_feature(enable = "avx2")]
3788 //#[cfg_attr(test, assert_instr(movsd))] FIXME
3789 #[stable(feature = "simd_x86", since = "1.27.0")]
3790 pub unsafe fn _mm256_cvtsd_f64(a
: __m256d
) -> f64 {
3794 /// Returns the first element of the input vector of `[8 x i32]`.
3796 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsi256_si32)
3798 #[target_feature(enable = "avx2")]
3799 //#[cfg_attr(test, assert_instr(movd))] FIXME
3800 #[stable(feature = "simd_x86", since = "1.27.0")]
3801 pub unsafe fn _mm256_cvtsi256_si32(a
: __m256i
) -> i32 {
3802 simd_extract(a
.as_i32x8(), 0)
3805 #[allow(improper_ctypes)]
3807 #[link_name = "llvm.x86.avx2.pabs.b"]
3808 fn pabsb(a
: i8x32
) -> u8x32
;
3809 #[link_name = "llvm.x86.avx2.pabs.w"]
3810 fn pabsw(a
: i16x16
) -> u16x16
;
3811 #[link_name = "llvm.x86.avx2.pabs.d"]
3812 fn pabsd(a
: i32x8
) -> u32x8
;
3813 #[link_name = "llvm.x86.avx2.padds.b"]
3814 fn paddsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3815 #[link_name = "llvm.x86.avx2.padds.w"]
3816 fn paddsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3817 #[link_name = "llvm.x86.avx2.paddus.b"]
3818 fn paddusb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3819 #[link_name = "llvm.x86.avx2.paddus.w"]
3820 fn paddusw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3821 #[link_name = "llvm.x86.avx2.pavg.b"]
3822 fn pavgb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3823 #[link_name = "llvm.x86.avx2.pavg.w"]
3824 fn pavgw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3825 #[link_name = "llvm.x86.avx2.pblendvb"]
3826 fn pblendvb(a
: i8x32
, b
: i8x32
, mask
: i8x32
) -> i8x32
;
3827 #[link_name = "llvm.x86.avx2.phadd.w"]
3828 fn phaddw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3829 #[link_name = "llvm.x86.avx2.phadd.d"]
3830 fn phaddd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3831 #[link_name = "llvm.x86.avx2.phadd.sw"]
3832 fn phaddsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3833 #[link_name = "llvm.x86.avx2.phsub.w"]
3834 fn phsubw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3835 #[link_name = "llvm.x86.avx2.phsub.d"]
3836 fn phsubd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3837 #[link_name = "llvm.x86.avx2.phsub.sw"]
3838 fn phsubsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3839 #[link_name = "llvm.x86.avx2.pmadd.wd"]
3840 fn pmaddwd(a
: i16x16
, b
: i16x16
) -> i32x8
;
3841 #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3842 fn pmaddubsw(a
: u8x32
, b
: u8x32
) -> i16x16
;
3843 #[link_name = "llvm.x86.avx2.maskload.d"]
3844 fn maskloadd(mem_addr
: *const i8, mask
: i32x4
) -> i32x4
;
3845 #[link_name = "llvm.x86.avx2.maskload.d.256"]
3846 fn maskloadd256(mem_addr
: *const i8, mask
: i32x8
) -> i32x8
;
3847 #[link_name = "llvm.x86.avx2.maskload.q"]
3848 fn maskloadq(mem_addr
: *const i8, mask
: i64x2
) -> i64x2
;
3849 #[link_name = "llvm.x86.avx2.maskload.q.256"]
3850 fn maskloadq256(mem_addr
: *const i8, mask
: i64x4
) -> i64x4
;
3851 #[link_name = "llvm.x86.avx2.maskstore.d"]
3852 fn maskstored(mem_addr
: *mut i8, mask
: i32x4
, a
: i32x4
);
3853 #[link_name = "llvm.x86.avx2.maskstore.d.256"]
3854 fn maskstored256(mem_addr
: *mut i8, mask
: i32x8
, a
: i32x8
);
3855 #[link_name = "llvm.x86.avx2.maskstore.q"]
3856 fn maskstoreq(mem_addr
: *mut i8, mask
: i64x2
, a
: i64x2
);
3857 #[link_name = "llvm.x86.avx2.maskstore.q.256"]
3858 fn maskstoreq256(mem_addr
: *mut i8, mask
: i64x4
, a
: i64x4
);
3859 #[link_name = "llvm.x86.avx2.pmaxs.w"]
3860 fn pmaxsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3861 #[link_name = "llvm.x86.avx2.pmaxs.d"]
3862 fn pmaxsd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3863 #[link_name = "llvm.x86.avx2.pmaxs.b"]
3864 fn pmaxsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3865 #[link_name = "llvm.x86.avx2.pmaxu.w"]
3866 fn pmaxuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3867 #[link_name = "llvm.x86.avx2.pmaxu.d"]
3868 fn pmaxud(a
: u32x8
, b
: u32x8
) -> u32x8
;
3869 #[link_name = "llvm.x86.avx2.pmaxu.b"]
3870 fn pmaxub(a
: u8x32
, b
: u8x32
) -> u8x32
;
3871 #[link_name = "llvm.x86.avx2.pmins.w"]
3872 fn pminsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3873 #[link_name = "llvm.x86.avx2.pmins.d"]
3874 fn pminsd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3875 #[link_name = "llvm.x86.avx2.pmins.b"]
3876 fn pminsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3877 #[link_name = "llvm.x86.avx2.pminu.w"]
3878 fn pminuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3879 #[link_name = "llvm.x86.avx2.pminu.d"]
3880 fn pminud(a
: u32x8
, b
: u32x8
) -> u32x8
;
3881 #[link_name = "llvm.x86.avx2.pminu.b"]
3882 fn pminub(a
: u8x32
, b
: u8x32
) -> u8x32
;
3883 #[link_name = "llvm.x86.avx2.pmovmskb"]
3884 fn pmovmskb(a
: i8x32
) -> i32;
3885 #[link_name = "llvm.x86.avx2.mpsadbw"]
3886 fn mpsadbw(a
: u8x32
, b
: u8x32
, imm8
: i32) -> u16x16
;
3887 #[link_name = "llvm.x86.avx2.pmulhu.w"]
3888 fn pmulhuw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3889 #[link_name = "llvm.x86.avx2.pmulh.w"]
3890 fn pmulhw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3891 #[link_name = "llvm.x86.avx2.pmul.dq"]
3892 fn pmuldq(a
: i32x8
, b
: i32x8
) -> i64x4
;
3893 #[link_name = "llvm.x86.avx2.pmulu.dq"]
3894 fn pmuludq(a
: u32x8
, b
: u32x8
) -> u64x4
;
3895 #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
3896 fn pmulhrsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3897 #[link_name = "llvm.x86.avx2.packsswb"]
3898 fn packsswb(a
: i16x16
, b
: i16x16
) -> i8x32
;
3899 #[link_name = "llvm.x86.avx2.packssdw"]
3900 fn packssdw(a
: i32x8
, b
: i32x8
) -> i16x16
;
3901 #[link_name = "llvm.x86.avx2.packuswb"]
3902 fn packuswb(a
: i16x16
, b
: i16x16
) -> u8x32
;
3903 #[link_name = "llvm.x86.avx2.packusdw"]
3904 fn packusdw(a
: i32x8
, b
: i32x8
) -> u16x16
;
3905 #[link_name = "llvm.x86.avx2.psad.bw"]
3906 fn psadbw(a
: u8x32
, b
: u8x32
) -> u64x4
;
3907 #[link_name = "llvm.x86.avx2.psign.b"]
3908 fn psignb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3909 #[link_name = "llvm.x86.avx2.psign.w"]
3910 fn psignw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3911 #[link_name = "llvm.x86.avx2.psign.d"]
3912 fn psignd(a
: i32x8
, b
: i32x8
) -> i32x8
;
3913 #[link_name = "llvm.x86.avx2.psll.w"]
3914 fn psllw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3915 #[link_name = "llvm.x86.avx2.psll.d"]
3916 fn pslld(a
: i32x8
, count
: i32x4
) -> i32x8
;
3917 #[link_name = "llvm.x86.avx2.psll.q"]
3918 fn psllq(a
: i64x4
, count
: i64x2
) -> i64x4
;
3919 #[link_name = "llvm.x86.avx2.pslli.w"]
3920 fn pslliw(a
: i16x16
, imm8
: i32) -> i16x16
;
3921 #[link_name = "llvm.x86.avx2.pslli.d"]
3922 fn psllid(a
: i32x8
, imm8
: i32) -> i32x8
;
3923 #[link_name = "llvm.x86.avx2.pslli.q"]
3924 fn pslliq(a
: i64x4
, imm8
: i32) -> i64x4
;
3925 #[link_name = "llvm.x86.avx2.psllv.d"]
3926 fn psllvd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3927 #[link_name = "llvm.x86.avx2.psllv.d.256"]
3928 fn psllvd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3929 #[link_name = "llvm.x86.avx2.psllv.q"]
3930 fn psllvq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3931 #[link_name = "llvm.x86.avx2.psllv.q.256"]
3932 fn psllvq256(a
: i64x4
, count
: i64x4
) -> i64x4
;
3933 #[link_name = "llvm.x86.avx2.psra.w"]
3934 fn psraw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3935 #[link_name = "llvm.x86.avx2.psra.d"]
3936 fn psrad(a
: i32x8
, count
: i32x4
) -> i32x8
;
3937 #[link_name = "llvm.x86.avx2.psrai.w"]
3938 fn psraiw(a
: i16x16
, imm8
: i32) -> i16x16
;
3939 #[link_name = "llvm.x86.avx2.psrai.d"]
3940 fn psraid(a
: i32x8
, imm8
: i32) -> i32x8
;
3941 #[link_name = "llvm.x86.avx2.psrav.d"]
3942 fn psravd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3943 #[link_name = "llvm.x86.avx2.psrav.d.256"]
3944 fn psravd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3945 #[link_name = "llvm.x86.avx2.psrl.w"]
3946 fn psrlw(a
: i16x16
, count
: i16x8
) -> i16x16
;
3947 #[link_name = "llvm.x86.avx2.psrl.d"]
3948 fn psrld(a
: i32x8
, count
: i32x4
) -> i32x8
;
3949 #[link_name = "llvm.x86.avx2.psrl.q"]
3950 fn psrlq(a
: i64x4
, count
: i64x2
) -> i64x4
;
3951 #[link_name = "llvm.x86.avx2.psrli.w"]
3952 fn psrliw(a
: i16x16
, imm8
: i32) -> i16x16
;
3953 #[link_name = "llvm.x86.avx2.psrli.d"]
3954 fn psrlid(a
: i32x8
, imm8
: i32) -> i32x8
;
3955 #[link_name = "llvm.x86.avx2.psrli.q"]
3956 fn psrliq(a
: i64x4
, imm8
: i32) -> i64x4
;
3957 #[link_name = "llvm.x86.avx2.psrlv.d"]
3958 fn psrlvd(a
: i32x4
, count
: i32x4
) -> i32x4
;
3959 #[link_name = "llvm.x86.avx2.psrlv.d.256"]
3960 fn psrlvd256(a
: i32x8
, count
: i32x8
) -> i32x8
;
3961 #[link_name = "llvm.x86.avx2.psrlv.q"]
3962 fn psrlvq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3963 #[link_name = "llvm.x86.avx2.psrlv.q.256"]
3964 fn psrlvq256(a
: i64x4
, count
: i64x4
) -> i64x4
;
3965 #[link_name = "llvm.x86.avx2.psubs.b"]
3966 fn psubsb(a
: i8x32
, b
: i8x32
) -> i8x32
;
3967 #[link_name = "llvm.x86.avx2.psubs.w"]
3968 fn psubsw(a
: i16x16
, b
: i16x16
) -> i16x16
;
3969 #[link_name = "llvm.x86.avx2.psubus.b"]
3970 fn psubusb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3971 #[link_name = "llvm.x86.avx2.psubus.w"]
3972 fn psubusw(a
: u16x16
, b
: u16x16
) -> u16x16
;
3973 #[link_name = "llvm.x86.avx2.pshuf.b"]
3974 fn pshufb(a
: u8x32
, b
: u8x32
) -> u8x32
;
3975 #[link_name = "llvm.x86.avx2.permd"]
3976 fn permd(a
: u32x8
, b
: u32x8
) -> u32x8
;
3977 #[link_name = "llvm.x86.avx2.permps"]
3978 fn permps(a
: __m256
, b
: i32x8
) -> __m256
;
3979 #[link_name = "llvm.x86.avx2.vperm2i128"]
3980 fn vperm2i128(a
: i64x4
, b
: i64x4
, imm8
: i8) -> i64x4
;
3981 #[link_name = "llvm.x86.avx2.gather.d.d"]
3982 fn pgatherdd(src
: i32x4
, slice
: *const i8, offsets
: i32x4
, mask
: i32x4
, scale
: i8) -> i32x4
;
3983 #[link_name = "llvm.x86.avx2.gather.d.d.256"]
3984 fn vpgatherdd(src
: i32x8
, slice
: *const i8, offsets
: i32x8
, mask
: i32x8
, scale
: i8) -> i32x8
;
3985 #[link_name = "llvm.x86.avx2.gather.d.q"]
3986 fn pgatherdq(src
: i64x2
, slice
: *const i8, offsets
: i32x4
, mask
: i64x2
, scale
: i8) -> i64x2
;
3987 #[link_name = "llvm.x86.avx2.gather.d.q.256"]
3988 fn vpgatherdq(src
: i64x4
, slice
: *const i8, offsets
: i32x4
, mask
: i64x4
, scale
: i8) -> i64x4
;
3989 #[link_name = "llvm.x86.avx2.gather.q.d"]
3990 fn pgatherqd(src
: i32x4
, slice
: *const i8, offsets
: i64x2
, mask
: i32x4
, scale
: i8) -> i32x4
;
3991 #[link_name = "llvm.x86.avx2.gather.q.d.256"]
3992 fn vpgatherqd(src
: i32x4
, slice
: *const i8, offsets
: i64x4
, mask
: i32x4
, scale
: i8) -> i32x4
;
3993 #[link_name = "llvm.x86.avx2.gather.q.q"]
3994 fn pgatherqq(src
: i64x2
, slice
: *const i8, offsets
: i64x2
, mask
: i64x2
, scale
: i8) -> i64x2
;
3995 #[link_name = "llvm.x86.avx2.gather.q.q.256"]
3996 fn vpgatherqq(src
: i64x4
, slice
: *const i8, offsets
: i64x4
, mask
: i64x4
, scale
: i8) -> i64x4
;
3997 #[link_name = "llvm.x86.avx2.gather.d.pd"]
4005 #[link_name = "llvm.x86.avx2.gather.d.pd.256"]
4013 #[link_name = "llvm.x86.avx2.gather.q.pd"]
4021 #[link_name = "llvm.x86.avx2.gather.q.pd.256"]
4029 #[link_name = "llvm.x86.avx2.gather.d.ps"]
4030 fn pgatherdps(src
: __m128
, slice
: *const i8, offsets
: i32x4
, mask
: __m128
, scale
: i8)
4032 #[link_name = "llvm.x86.avx2.gather.d.ps.256"]
4040 #[link_name = "llvm.x86.avx2.gather.q.ps"]
4041 fn pgatherqps(src
: __m128
, slice
: *const i8, offsets
: i64x2
, mask
: __m128
, scale
: i8)
4043 #[link_name = "llvm.x86.avx2.gather.q.ps.256"]
4051 #[link_name = "llvm.x86.avx2.psll.dq"]
4052 fn vpslldq(a
: i64x4
, b
: i32) -> i64x4
;
4053 #[link_name = "llvm.x86.avx2.psrl.dq"]
4054 fn vpsrldq(a
: i64x4
, b
: i32) -> i64x4
;
4060 use stdarch_test
::simd_test
;
4062 use crate::core_arch
::x86
::*;
4064 #[simd_test(enable = "avx2")]
4065 unsafe fn test_mm256_abs_epi32() {
4067 let a
= _mm256_setr_epi32(
4068 0, 1, -1, std
::i32::MAX
,
4069 std
::i32::MIN
, 100, -100, -32,
4071 let r
= _mm256_abs_epi32(a
);
4073 let e
= _mm256_setr_epi32(
4074 0, 1, 1, std
::i32::MAX
,
4075 std
::i32::MAX
.wrapping_add(1), 100, 100, 32,
4077 assert_eq_m256i(r
, e
);
4080 #[simd_test(enable = "avx2")]
4081 unsafe fn test_mm256_abs_epi16() {
4083 let a
= _mm256_setr_epi16(
4084 0, 1, -1, 2, -2, 3, -3, 4,
4085 -4, 5, -5, std
::i16::MAX
, std
::i16::MIN
, 100, -100, -32,
4087 let r
= _mm256_abs_epi16(a
);
4089 let e
= _mm256_setr_epi16(
4090 0, 1, 1, 2, 2, 3, 3, 4,
4091 4, 5, 5, std
::i16::MAX
, std
::i16::MAX
.wrapping_add(1), 100, 100, 32,
4093 assert_eq_m256i(r
, e
);
4096 #[simd_test(enable = "avx2")]
4097 unsafe fn test_mm256_abs_epi8() {
4099 let a
= _mm256_setr_epi8(
4100 0, 1, -1, 2, -2, 3, -3, 4,
4101 -4, 5, -5, std
::i8::MAX
, std
::i8::MIN
, 100, -100, -32,
4102 0, 1, -1, 2, -2, 3, -3, 4,
4103 -4, 5, -5, std
::i8::MAX
, std
::i8::MIN
, 100, -100, -32,
4105 let r
= _mm256_abs_epi8(a
);
4107 let e
= _mm256_setr_epi8(
4108 0, 1, 1, 2, 2, 3, 3, 4,
4109 4, 5, 5, std
::i8::MAX
, std
::i8::MAX
.wrapping_add(1), 100, 100, 32,
4110 0, 1, 1, 2, 2, 3, 3, 4,
4111 4, 5, 5, std
::i8::MAX
, std
::i8::MAX
.wrapping_add(1), 100, 100, 32,
4113 assert_eq_m256i(r
, e
);
4116 #[simd_test(enable = "avx2")]
4117 unsafe fn test_mm256_add_epi64() {
4118 let a
= _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
4119 let b
= _mm256_setr_epi64x(-1, 0, 1, 2);
4120 let r
= _mm256_add_epi64(a
, b
);
4121 let e
= _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002);
4122 assert_eq_m256i(r
, e
);
4125 #[simd_test(enable = "avx2")]
4126 unsafe fn test_mm256_add_epi32() {
4127 let a
= _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
4128 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4129 let r
= _mm256_add_epi32(a
, b
);
4130 let e
= _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14);
4131 assert_eq_m256i(r
, e
);
4134 #[simd_test(enable = "avx2")]
4135 unsafe fn test_mm256_add_epi16() {
4137 let a
= _mm256_setr_epi16(
4138 0, 1, 2, 3, 4, 5, 6, 7,
4139 8, 9, 10, 11, 12, 13, 14, 15,
4142 let b
= _mm256_setr_epi16(
4143 0, 1, 2, 3, 4, 5, 6, 7,
4144 8, 9, 10, 11, 12, 13, 14, 15,
4146 let r
= _mm256_add_epi16(a
, b
);
4148 let e
= _mm256_setr_epi16(
4149 0, 2, 4, 6, 8, 10, 12, 14,
4150 16, 18, 20, 22, 24, 26, 28, 30,
4152 assert_eq_m256i(r
, e
);
4155 #[simd_test(enable = "avx2")]
4156 unsafe fn test_mm256_add_epi8() {
4158 let a
= _mm256_setr_epi8(
4159 0, 1, 2, 3, 4, 5, 6, 7,
4160 8, 9, 10, 11, 12, 13, 14, 15,
4161 16, 17, 18, 19, 20, 21, 22, 23,
4162 24, 25, 26, 27, 28, 29, 30, 31,
4165 let b
= _mm256_setr_epi8(
4166 0, 1, 2, 3, 4, 5, 6, 7,
4167 8, 9, 10, 11, 12, 13, 14, 15,
4168 16, 17, 18, 19, 20, 21, 22, 23,
4169 24, 25, 26, 27, 28, 29, 30, 31,
4171 let r
= _mm256_add_epi8(a
, b
);
4173 let e
= _mm256_setr_epi8(
4174 0, 2, 4, 6, 8, 10, 12, 14,
4175 16, 18, 20, 22, 24, 26, 28, 30,
4176 32, 34, 36, 38, 40, 42, 44, 46,
4177 48, 50, 52, 54, 56, 58, 60, 62,
4179 assert_eq_m256i(r
, e
);
4182 #[simd_test(enable = "avx2")]
4183 unsafe fn test_mm256_adds_epi8() {
4185 let a
= _mm256_setr_epi8(
4186 0, 1, 2, 3, 4, 5, 6, 7,
4187 8, 9, 10, 11, 12, 13, 14, 15,
4188 16, 17, 18, 19, 20, 21, 22, 23,
4189 24, 25, 26, 27, 28, 29, 30, 31,
4192 let b
= _mm256_setr_epi8(
4193 32, 33, 34, 35, 36, 37, 38, 39,
4194 40, 41, 42, 43, 44, 45, 46, 47,
4195 48, 49, 50, 51, 52, 53, 54, 55,
4196 56, 57, 58, 59, 60, 61, 62, 63,
4198 let r
= _mm256_adds_epi8(a
, b
);
4200 let e
= _mm256_setr_epi8(
4201 32, 34, 36, 38, 40, 42, 44, 46,
4202 48, 50, 52, 54, 56, 58, 60, 62,
4203 64, 66, 68, 70, 72, 74, 76, 78,
4204 80, 82, 84, 86, 88, 90, 92, 94,
4206 assert_eq_m256i(r
, e
);
4209 #[simd_test(enable = "avx2")]
4210 unsafe fn test_mm256_adds_epi8_saturate_positive() {
4211 let a
= _mm256_set1_epi8(0x7F);
4212 let b
= _mm256_set1_epi8(1);
4213 let r
= _mm256_adds_epi8(a
, b
);
4214 assert_eq_m256i(r
, a
);
4217 #[simd_test(enable = "avx2")]
4218 unsafe fn test_mm256_adds_epi8_saturate_negative() {
4219 let a
= _mm256_set1_epi8(-0x80);
4220 let b
= _mm256_set1_epi8(-1);
4221 let r
= _mm256_adds_epi8(a
, b
);
4222 assert_eq_m256i(r
, a
);
4225 #[simd_test(enable = "avx2")]
4226 unsafe fn test_mm256_adds_epi16() {
4228 let a
= _mm256_setr_epi16(
4229 0, 1, 2, 3, 4, 5, 6, 7,
4230 8, 9, 10, 11, 12, 13, 14, 15,
4233 let b
= _mm256_setr_epi16(
4234 32, 33, 34, 35, 36, 37, 38, 39,
4235 40, 41, 42, 43, 44, 45, 46, 47,
4237 let r
= _mm256_adds_epi16(a
, b
);
4239 let e
= _mm256_setr_epi16(
4240 32, 34, 36, 38, 40, 42, 44, 46,
4241 48, 50, 52, 54, 56, 58, 60, 62,
4244 assert_eq_m256i(r
, e
);
4247 #[simd_test(enable = "avx2")]
4248 unsafe fn test_mm256_adds_epi16_saturate_positive() {
4249 let a
= _mm256_set1_epi16(0x7FFF);
4250 let b
= _mm256_set1_epi16(1);
4251 let r
= _mm256_adds_epi16(a
, b
);
4252 assert_eq_m256i(r
, a
);
4255 #[simd_test(enable = "avx2")]
4256 unsafe fn test_mm256_adds_epi16_saturate_negative() {
4257 let a
= _mm256_set1_epi16(-0x8000);
4258 let b
= _mm256_set1_epi16(-1);
4259 let r
= _mm256_adds_epi16(a
, b
);
4260 assert_eq_m256i(r
, a
);
4263 #[simd_test(enable = "avx2")]
4264 unsafe fn test_mm256_adds_epu8() {
4266 let a
= _mm256_setr_epi8(
4267 0, 1, 2, 3, 4, 5, 6, 7,
4268 8, 9, 10, 11, 12, 13, 14, 15,
4269 16, 17, 18, 19, 20, 21, 22, 23,
4270 24, 25, 26, 27, 28, 29, 30, 31,
4273 let b
= _mm256_setr_epi8(
4274 32, 33, 34, 35, 36, 37, 38, 39,
4275 40, 41, 42, 43, 44, 45, 46, 47,
4276 48, 49, 50, 51, 52, 53, 54, 55,
4277 56, 57, 58, 59, 60, 61, 62, 63,
4279 let r
= _mm256_adds_epu8(a
, b
);
4281 let e
= _mm256_setr_epi8(
4282 32, 34, 36, 38, 40, 42, 44, 46,
4283 48, 50, 52, 54, 56, 58, 60, 62,
4284 64, 66, 68, 70, 72, 74, 76, 78,
4285 80, 82, 84, 86, 88, 90, 92, 94,
4287 assert_eq_m256i(r
, e
);
4290 #[simd_test(enable = "avx2")]
4291 unsafe fn test_mm256_adds_epu8_saturate() {
4292 let a
= _mm256_set1_epi8(!0);
4293 let b
= _mm256_set1_epi8(1);
4294 let r
= _mm256_adds_epu8(a
, b
);
4295 assert_eq_m256i(r
, a
);
4298 #[simd_test(enable = "avx2")]
4299 unsafe fn test_mm256_adds_epu16() {
4301 let a
= _mm256_setr_epi16(
4302 0, 1, 2, 3, 4, 5, 6, 7,
4303 8, 9, 10, 11, 12, 13, 14, 15,
4306 let b
= _mm256_setr_epi16(
4307 32, 33, 34, 35, 36, 37, 38, 39,
4308 40, 41, 42, 43, 44, 45, 46, 47,
4310 let r
= _mm256_adds_epu16(a
, b
);
4312 let e
= _mm256_setr_epi16(
4313 32, 34, 36, 38, 40, 42, 44, 46,
4314 48, 50, 52, 54, 56, 58, 60, 62,
4317 assert_eq_m256i(r
, e
);
4320 #[simd_test(enable = "avx2")]
4321 unsafe fn test_mm256_adds_epu16_saturate() {
4322 let a
= _mm256_set1_epi16(!0);
4323 let b
= _mm256_set1_epi16(1);
4324 let r
= _mm256_adds_epu16(a
, b
);
4325 assert_eq_m256i(r
, a
);
4328 #[simd_test(enable = "avx2")]
4329 unsafe fn test_mm256_and_si256() {
4330 let a
= _mm256_set1_epi8(5);
4331 let b
= _mm256_set1_epi8(3);
4332 let got
= _mm256_and_si256(a
, b
);
4333 assert_eq_m256i(got
, _mm256_set1_epi8(1));
4336 #[simd_test(enable = "avx2")]
4337 unsafe fn test_mm256_andnot_si256() {
4338 let a
= _mm256_set1_epi8(5);
4339 let b
= _mm256_set1_epi8(3);
4340 let got
= _mm256_andnot_si256(a
, b
);
4341 assert_eq_m256i(got
, _mm256_set1_epi8(2));
4344 #[simd_test(enable = "avx2")]
4345 unsafe fn test_mm256_avg_epu8() {
4346 let (a
, b
) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
4347 let r
= _mm256_avg_epu8(a
, b
);
4348 assert_eq_m256i(r
, _mm256_set1_epi8(6));
4351 #[simd_test(enable = "avx2")]
4352 unsafe fn test_mm256_avg_epu16() {
4353 let (a
, b
) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4354 let r
= _mm256_avg_epu16(a
, b
);
4355 assert_eq_m256i(r
, _mm256_set1_epi16(6));
4358 #[simd_test(enable = "avx2")]
4359 unsafe fn test_mm_blend_epi32() {
4360 let (a
, b
) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
4361 let e
= _mm_setr_epi32(9, 3, 3, 3);
4362 let r
= _mm_blend_epi32(a
, b
, 0x01 as i32);
4363 assert_eq_m128i(r
, e
);
4365 let r
= _mm_blend_epi32(b
, a
, 0x0E as i32);
4366 assert_eq_m128i(r
, e
);
4369 #[simd_test(enable = "avx2")]
4370 unsafe fn test_mm256_blend_epi32() {
4371 let (a
, b
) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
4372 let e
= _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
4373 let r
= _mm256_blend_epi32(a
, b
, 0x01 as i32);
4374 assert_eq_m256i(r
, e
);
4376 let e
= _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9);
4377 let r
= _mm256_blend_epi32(a
, b
, 0x82 as i32);
4378 assert_eq_m256i(r
, e
);
4380 let e
= _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3);
4381 let r
= _mm256_blend_epi32(a
, b
, 0x7C as i32);
4382 assert_eq_m256i(r
, e
);
4385 #[simd_test(enable = "avx2")]
4386 unsafe fn test_mm256_blend_epi16() {
4387 let (a
, b
) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
4388 let e
= _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
4389 let r
= _mm256_blend_epi16(a
, b
, 0x01 as i32);
4390 assert_eq_m256i(r
, e
);
4392 let r
= _mm256_blend_epi16(b
, a
, 0xFE as i32);
4393 assert_eq_m256i(r
, e
);
4396 #[simd_test(enable = "avx2")]
4397 unsafe fn test_mm256_blendv_epi8() {
4398 let (a
, b
) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
4399 let mask
= _mm256_insert_epi8(_mm256_set1_epi8(0), -1, 2);
4400 let e
= _mm256_insert_epi8(_mm256_set1_epi8(4), 2, 2);
4401 let r
= _mm256_blendv_epi8(a
, b
, mask
);
4402 assert_eq_m256i(r
, e
);
4405 #[simd_test(enable = "avx2")]
4406 unsafe fn test_mm_broadcastb_epi8() {
4407 let a
= _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4408 let res
= _mm_broadcastb_epi8(a
);
4409 assert_eq_m128i(res
, _mm_set1_epi8(0x2a));
4412 #[simd_test(enable = "avx2")]
4413 unsafe fn test_mm256_broadcastb_epi8() {
4414 let a
= _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0);
4415 let res
= _mm256_broadcastb_epi8(a
);
4416 assert_eq_m256i(res
, _mm256_set1_epi8(0x2a));
4419 #[simd_test(enable = "avx2")]
4420 unsafe fn test_mm_broadcastd_epi32() {
4421 let a
= _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4422 let res
= _mm_broadcastd_epi32(a
);
4423 assert_eq_m128i(res
, _mm_set1_epi32(0x2a));
4426 #[simd_test(enable = "avx2")]
4427 unsafe fn test_mm256_broadcastd_epi32() {
4428 let a
= _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
4429 let res
= _mm256_broadcastd_epi32(a
);
4430 assert_eq_m256i(res
, _mm256_set1_epi32(0x2a));
4433 #[simd_test(enable = "avx2")]
4434 unsafe fn test_mm_broadcastq_epi64() {
4435 let a
= _mm_setr_epi64x(0x1ffffffff, 0);
4436 let res
= _mm_broadcastq_epi64(a
);
4437 assert_eq_m128i(res
, _mm_set1_epi64x(0x1ffffffff));
4440 #[simd_test(enable = "avx2")]
4441 unsafe fn test_mm256_broadcastq_epi64() {
4442 let a
= _mm_setr_epi64x(0x1ffffffff, 0);
4443 let res
= _mm256_broadcastq_epi64(a
);
4444 assert_eq_m256i(res
, _mm256_set1_epi64x(0x1ffffffff));
4447 #[simd_test(enable = "avx2")]
4448 unsafe fn test_mm_broadcastsd_pd() {
4449 let a
= _mm_setr_pd(6.28, 3.14);
4450 let res
= _mm_broadcastsd_pd(a
);
4451 assert_eq_m128d(res
, _mm_set1_pd(6.28f64));
4454 #[simd_test(enable = "avx2")]
4455 unsafe fn test_mm256_broadcastsd_pd() {
4456 let a
= _mm_setr_pd(6.28, 3.14);
4457 let res
= _mm256_broadcastsd_pd(a
);
4458 assert_eq_m256d(res
, _mm256_set1_pd(6.28f64));
4461 #[simd_test(enable = "avx2")]
4462 unsafe fn test_mm256_broadcastsi128_si256() {
4463 let a
= _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
4464 let res
= _mm256_broadcastsi128_si256(a
);
4465 let retval
= _mm256_setr_epi64x(
4471 assert_eq_m256i(res
, retval
);
4474 #[simd_test(enable = "avx2")]
4475 unsafe fn test_mm_broadcastss_ps() {
4476 let a
= _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4477 let res
= _mm_broadcastss_ps(a
);
4478 assert_eq_m128(res
, _mm_set1_ps(6.28f32));
4481 #[simd_test(enable = "avx2")]
4482 unsafe fn test_mm256_broadcastss_ps() {
4483 let a
= _mm_setr_ps(6.28, 3.14, 0.0, 0.0);
4484 let res
= _mm256_broadcastss_ps(a
);
4485 assert_eq_m256(res
, _mm256_set1_ps(6.28f32));
4488 #[simd_test(enable = "avx2")]
4489 unsafe fn test_mm_broadcastw_epi16() {
4490 let a
= _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0);
4491 let res
= _mm_broadcastw_epi16(a
);
4492 assert_eq_m128i(res
, _mm_set1_epi16(0x22b));
4495 #[simd_test(enable = "avx2")]
4496 unsafe fn test_mm256_broadcastw_epi16() {
4497 let a
= _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0);
4498 let res
= _mm256_broadcastw_epi16(a
);
4499 assert_eq_m256i(res
, _mm256_set1_epi16(0x22b));
4502 #[simd_test(enable = "avx2")]
4503 unsafe fn test_mm256_cmpeq_epi8() {
4505 let a
= _mm256_setr_epi8(
4506 0, 1, 2, 3, 4, 5, 6, 7,
4507 8, 9, 10, 11, 12, 13, 14, 15,
4508 16, 17, 18, 19, 20, 21, 22, 23,
4509 24, 25, 26, 27, 28, 29, 30, 31,
4512 let b
= _mm256_setr_epi8(
4513 31, 30, 2, 28, 27, 26, 25, 24,
4514 23, 22, 21, 20, 19, 18, 17, 16,
4515 15, 14, 13, 12, 11, 10, 9, 8,
4516 7, 6, 5, 4, 3, 2, 1, 0,
4518 let r
= _mm256_cmpeq_epi8(a
, b
);
4519 assert_eq_m256i(r
, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2));
4522 #[simd_test(enable = "avx2")]
4523 unsafe fn test_mm256_cmpeq_epi16() {
4525 let a
= _mm256_setr_epi16(
4526 0, 1, 2, 3, 4, 5, 6, 7,
4527 8, 9, 10, 11, 12, 13, 14, 15,
4530 let b
= _mm256_setr_epi16(
4531 15, 14, 2, 12, 11, 10, 9, 8,
4532 7, 6, 5, 4, 3, 2, 1, 0,
4534 let r
= _mm256_cmpeq_epi16(a
, b
);
4535 assert_eq_m256i(r
, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2));
4538 #[simd_test(enable = "avx2")]
4539 unsafe fn test_mm256_cmpeq_epi32() {
4540 let a
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4541 let b
= _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
4542 let r
= _mm256_cmpeq_epi32(a
, b
);
4543 let e
= _mm256_set1_epi32(0);
4544 let e
= _mm256_insert_epi32(e
, !0, 2);
4545 assert_eq_m256i(r
, e
);
4548 #[simd_test(enable = "avx2")]
4549 unsafe fn test_mm256_cmpeq_epi64() {
4550 let a
= _mm256_setr_epi64x(0, 1, 2, 3);
4551 let b
= _mm256_setr_epi64x(3, 2, 2, 0);
4552 let r
= _mm256_cmpeq_epi64(a
, b
);
4553 assert_eq_m256i(r
, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2));
4556 #[simd_test(enable = "avx2")]
4557 unsafe fn test_mm256_cmpgt_epi8() {
4558 let a
= _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0);
4559 let b
= _mm256_set1_epi8(0);
4560 let r
= _mm256_cmpgt_epi8(a
, b
);
4561 assert_eq_m256i(r
, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0));
4564 #[simd_test(enable = "avx2")]
4565 unsafe fn test_mm256_cmpgt_epi16() {
4566 let a
= _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0);
4567 let b
= _mm256_set1_epi16(0);
4568 let r
= _mm256_cmpgt_epi16(a
, b
);
4569 assert_eq_m256i(r
, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0));
4572 #[simd_test(enable = "avx2")]
4573 unsafe fn test_mm256_cmpgt_epi32() {
4574 let a
= _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0);
4575 let b
= _mm256_set1_epi32(0);
4576 let r
= _mm256_cmpgt_epi32(a
, b
);
4577 assert_eq_m256i(r
, _mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0));
4580 #[simd_test(enable = "avx2")]
4581 unsafe fn test_mm256_cmpgt_epi64() {
4582 let a
= _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0);
4583 let b
= _mm256_set1_epi64x(0);
4584 let r
= _mm256_cmpgt_epi64(a
, b
);
4585 assert_eq_m256i(r
, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0));
4588 #[simd_test(enable = "avx2")]
4589 unsafe fn test_mm256_cvtepi8_epi16() {
4591 let a
= _mm_setr_epi8(
4592 0, 0, -1, 1, -2, 2, -3, 3,
4593 -4, 4, -5, 5, -6, 6, -7, 7,
4596 let r
= _mm256_setr_epi16(
4597 0, 0, -1, 1, -2, 2, -3, 3,
4598 -4, 4, -5, 5, -6, 6, -7, 7,
4600 assert_eq_m256i(r
, _mm256_cvtepi8_epi16(a
));
4603 #[simd_test(enable = "avx2")]
4604 unsafe fn test_mm256_cvtepi8_epi32() {
4606 let a
= _mm_setr_epi8(
4607 0, 0, -1, 1, -2, 2, -3, 3,
4608 -4, 4, -5, 5, -6, 6, -7, 7,
4610 let r
= _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4611 assert_eq_m256i(r
, _mm256_cvtepi8_epi32(a
));
4614 #[simd_test(enable = "avx2")]
4615 unsafe fn test_mm256_cvtepi8_epi64() {
4617 let a
= _mm_setr_epi8(
4618 0, 0, -1, 1, -2, 2, -3, 3,
4619 -4, 4, -5, 5, -6, 6, -7, 7,
4621 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4622 assert_eq_m256i(r
, _mm256_cvtepi8_epi64(a
));
4625 #[simd_test(enable = "avx2")]
4626 unsafe fn test_mm256_cvtepi16_epi32() {
4627 let a
= _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4628 let r
= _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
4629 assert_eq_m256i(r
, _mm256_cvtepi16_epi32(a
));
4632 #[simd_test(enable = "avx2")]
4633 unsafe fn test_mm256_cvtepi16_epi64() {
4634 let a
= _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
4635 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4636 assert_eq_m256i(r
, _mm256_cvtepi16_epi64(a
));
4639 #[simd_test(enable = "avx2")]
4640 unsafe fn test_mm256_cvtepi32_epi64() {
4641 let a
= _mm_setr_epi32(0, 0, -1, 1);
4642 let r
= _mm256_setr_epi64x(0, 0, -1, 1);
4643 assert_eq_m256i(r
, _mm256_cvtepi32_epi64(a
));
4646 #[simd_test(enable = "avx2")]
4647 unsafe fn test_mm256_cvtepu16_epi32() {
4648 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4649 let r
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4650 assert_eq_m256i(r
, _mm256_cvtepu16_epi32(a
));
4653 #[simd_test(enable = "avx2")]
4654 unsafe fn test_mm256_cvtepu16_epi64() {
4655 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4656 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4657 assert_eq_m256i(r
, _mm256_cvtepu16_epi64(a
));
4660 #[simd_test(enable = "avx2")]
4661 unsafe fn test_mm256_cvtepu32_epi64() {
4662 let a
= _mm_setr_epi32(0, 1, 2, 3);
4663 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4664 assert_eq_m256i(r
, _mm256_cvtepu32_epi64(a
));
4667 #[simd_test(enable = "avx2")]
4668 unsafe fn test_mm256_cvtepu8_epi16() {
4670 let a
= _mm_setr_epi8(
4671 0, 1, 2, 3, 4, 5, 6, 7,
4672 8, 9, 10, 11, 12, 13, 14, 15,
4675 let r
= _mm256_setr_epi16(
4676 0, 1, 2, 3, 4, 5, 6, 7,
4677 8, 9, 10, 11, 12, 13, 14, 15,
4679 assert_eq_m256i(r
, _mm256_cvtepu8_epi16(a
));
4682 #[simd_test(enable = "avx2")]
4683 unsafe fn test_mm256_cvtepu8_epi32() {
4685 let a
= _mm_setr_epi8(
4686 0, 1, 2, 3, 4, 5, 6, 7,
4687 8, 9, 10, 11, 12, 13, 14, 15,
4689 let r
= _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
4690 assert_eq_m256i(r
, _mm256_cvtepu8_epi32(a
));
4693 #[simd_test(enable = "avx2")]
4694 unsafe fn test_mm256_cvtepu8_epi64() {
4696 let a
= _mm_setr_epi8(
4697 0, 1, 2, 3, 4, 5, 6, 7,
4698 8, 9, 10, 11, 12, 13, 14, 15,
4700 let r
= _mm256_setr_epi64x(0, 1, 2, 3);
4701 assert_eq_m256i(r
, _mm256_cvtepu8_epi64(a
));
4704 #[simd_test(enable = "avx2")]
4705 unsafe fn test_mm256_extracti128_si256() {
4706 let a
= _mm256_setr_epi64x(1, 2, 3, 4);
4707 let r
= _mm256_extracti128_si256(a
, 0b01);
4708 let e
= _mm_setr_epi64x(3, 4);
4709 assert_eq_m128i(r
, e
);
4712 #[simd_test(enable = "avx2")]
4713 unsafe fn test_mm256_hadd_epi16() {
4714 let a
= _mm256_set1_epi16(2);
4715 let b
= _mm256_set1_epi16(4);
4716 let r
= _mm256_hadd_epi16(a
, b
);
4717 let e
= _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8);
4718 assert_eq_m256i(r
, e
);
4721 #[simd_test(enable = "avx2")]
4722 unsafe fn test_mm256_hadd_epi32() {
4723 let a
= _mm256_set1_epi32(2);
4724 let b
= _mm256_set1_epi32(4);
4725 let r
= _mm256_hadd_epi32(a
, b
);
4726 let e
= _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8);
4727 assert_eq_m256i(r
, e
);
4730 #[simd_test(enable = "avx2")]
4731 unsafe fn test_mm256_hadds_epi16() {
4732 let a
= _mm256_set1_epi16(2);
4733 let a
= _mm256_insert_epi16(a
, 0x7fff, 0);
4734 let a
= _mm256_insert_epi16(a
, 1, 1);
4735 let b
= _mm256_set1_epi16(4);
4736 let r
= _mm256_hadds_epi16(a
, b
);
4738 let e
= _mm256_setr_epi16(
4739 0x7FFF, 4, 4, 4, 8, 8, 8, 8,
4740 4, 4, 4, 4, 8, 8, 8, 8,
4742 assert_eq_m256i(r
, e
);
4745 #[simd_test(enable = "avx2")]
4746 unsafe fn test_mm256_hsub_epi16() {
4747 let a
= _mm256_set1_epi16(2);
4748 let b
= _mm256_set1_epi16(4);
4749 let r
= _mm256_hsub_epi16(a
, b
);
4750 let e
= _mm256_set1_epi16(0);
4751 assert_eq_m256i(r
, e
);
4754 #[simd_test(enable = "avx2")]
4755 unsafe fn test_mm256_hsub_epi32() {
4756 let a
= _mm256_set1_epi32(2);
4757 let b
= _mm256_set1_epi32(4);
4758 let r
= _mm256_hsub_epi32(a
, b
);
4759 let e
= _mm256_set1_epi32(0);
4760 assert_eq_m256i(r
, e
);
4763 #[simd_test(enable = "avx2")]
4764 unsafe fn test_mm256_hsubs_epi16() {
4765 let a
= _mm256_set1_epi16(2);
4766 let a
= _mm256_insert_epi16(a
, 0x7fff, 0);
4767 let a
= _mm256_insert_epi16(a
, -1, 1);
4768 let b
= _mm256_set1_epi16(4);
4769 let r
= _mm256_hsubs_epi16(a
, b
);
4770 let e
= _mm256_insert_epi16(_mm256_set1_epi16(0), 0x7FFF, 0);
4771 assert_eq_m256i(r
, e
);
4774 #[simd_test(enable = "avx2")]
4775 unsafe fn test_mm256_madd_epi16() {
4776 let a
= _mm256_set1_epi16(2);
4777 let b
= _mm256_set1_epi16(4);
4778 let r
= _mm256_madd_epi16(a
, b
);
4779 let e
= _mm256_set1_epi32(16);
4780 assert_eq_m256i(r
, e
);
4783 #[simd_test(enable = "avx2")]
4784 unsafe fn test_mm256_inserti128_si256() {
4785 let a
= _mm256_setr_epi64x(1, 2, 3, 4);
4786 let b
= _mm_setr_epi64x(7, 8);
4787 let r
= _mm256_inserti128_si256(a
, b
, 0b01);
4788 let e
= _mm256_setr_epi64x(1, 2, 7, 8);
4789 assert_eq_m256i(r
, e
);
4792 #[simd_test(enable = "avx2")]
4793 unsafe fn test_mm256_maddubs_epi16() {
4794 let a
= _mm256_set1_epi8(2);
4795 let b
= _mm256_set1_epi8(4);
4796 let r
= _mm256_maddubs_epi16(a
, b
);
4797 let e
= _mm256_set1_epi16(16);
4798 assert_eq_m256i(r
, e
);
4801 #[simd_test(enable = "avx2")]
4802 unsafe fn test_mm_maskload_epi32() {
4803 let nums
= [1, 2, 3, 4];
4804 let a
= &nums
as *const i32;
4805 let mask
= _mm_setr_epi32(-1, 0, 0, -1);
4806 let r
= _mm_maskload_epi32(a
, mask
);
4807 let e
= _mm_setr_epi32(1, 0, 0, 4);
4808 assert_eq_m128i(r
, e
);
4811 #[simd_test(enable = "avx2")]
4812 unsafe fn test_mm256_maskload_epi32() {
4813 let nums
= [1, 2, 3, 4, 5, 6, 7, 8];
4814 let a
= &nums
as *const i32;
4815 let mask
= _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4816 let r
= _mm256_maskload_epi32(a
, mask
);
4817 let e
= _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
4818 assert_eq_m256i(r
, e
);
4821 #[simd_test(enable = "avx2")]
4822 unsafe fn test_mm_maskload_epi64() {
4823 let nums
= [1_i64, 2_i64];
4824 let a
= &nums
as *const i64;
4825 let mask
= _mm_setr_epi64x(0, -1);
4826 let r
= _mm_maskload_epi64(a
, mask
);
4827 let e
= _mm_setr_epi64x(0, 2);
4828 assert_eq_m128i(r
, e
);
4831 #[simd_test(enable = "avx2")]
4832 unsafe fn test_mm256_maskload_epi64() {
4833 let nums
= [1_i64, 2_i64, 3_i64, 4_i64];
4834 let a
= &nums
as *const i64;
4835 let mask
= _mm256_setr_epi64x(0, -1, -1, 0);
4836 let r
= _mm256_maskload_epi64(a
, mask
);
4837 let e
= _mm256_setr_epi64x(0, 2, 3, 0);
4838 assert_eq_m256i(r
, e
);
4841 #[simd_test(enable = "avx2")]
4842 unsafe fn test_mm_maskstore_epi32() {
4843 let a
= _mm_setr_epi32(1, 2, 3, 4);
4844 let mut arr
= [-1, -1, -1, -1];
4845 let mask
= _mm_setr_epi32(-1, 0, 0, -1);
4846 _mm_maskstore_epi32(arr
.as_mut_ptr(), mask
, a
);
4847 let e
= [1, -1, -1, 4];
4851 #[simd_test(enable = "avx2")]
4852 unsafe fn test_mm256_maskstore_epi32() {
4853 let a
= _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
4854 let mut arr
= [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
4855 let mask
= _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
4856 _mm256_maskstore_epi32(arr
.as_mut_ptr(), mask
, a
);
4857 let e
= [1, -1, -1, 42, -1, 6, 7, -1];
4861 #[simd_test(enable = "avx2")]
4862 unsafe fn test_mm_maskstore_epi64() {
4863 let a
= _mm_setr_epi64x(1_i64, 2_i64);
4864 let mut arr
= [-1_i64, -1_i64];
4865 let mask
= _mm_setr_epi64x(0, -1);
4866 _mm_maskstore_epi64(arr
.as_mut_ptr(), mask
, a
);
4871 #[simd_test(enable = "avx2")]
4872 unsafe fn test_mm256_maskstore_epi64() {
4873 let a
= _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
4874 let mut arr
= [-1_i64, -1_i64, -1_i64, -1_i64];
4875 let mask
= _mm256_setr_epi64x(0, -1, -1, 0);
4876 _mm256_maskstore_epi64(arr
.as_mut_ptr(), mask
, a
);
4877 let e
= [-1, 2, 3, -1];
4881 #[simd_test(enable = "avx2")]
4882 unsafe fn test_mm256_max_epi16() {
4883 let a
= _mm256_set1_epi16(2);
4884 let b
= _mm256_set1_epi16(4);
4885 let r
= _mm256_max_epi16(a
, b
);
4886 assert_eq_m256i(r
, b
);
4889 #[simd_test(enable = "avx2")]
4890 unsafe fn test_mm256_max_epi32() {
4891 let a
= _mm256_set1_epi32(2);
4892 let b
= _mm256_set1_epi32(4);
4893 let r
= _mm256_max_epi32(a
, b
);
4894 assert_eq_m256i(r
, b
);
4897 #[simd_test(enable = "avx2")]
4898 unsafe fn test_mm256_max_epi8() {
4899 let a
= _mm256_set1_epi8(2);
4900 let b
= _mm256_set1_epi8(4);
4901 let r
= _mm256_max_epi8(a
, b
);
4902 assert_eq_m256i(r
, b
);
4905 #[simd_test(enable = "avx2")]
4906 unsafe fn test_mm256_max_epu16() {
4907 let a
= _mm256_set1_epi16(2);
4908 let b
= _mm256_set1_epi16(4);
4909 let r
= _mm256_max_epu16(a
, b
);
4910 assert_eq_m256i(r
, b
);
4913 #[simd_test(enable = "avx2")]
4914 unsafe fn test_mm256_max_epu32() {
4915 let a
= _mm256_set1_epi32(2);
4916 let b
= _mm256_set1_epi32(4);
4917 let r
= _mm256_max_epu32(a
, b
);
4918 assert_eq_m256i(r
, b
);
4921 #[simd_test(enable = "avx2")]
4922 unsafe fn test_mm256_max_epu8() {
4923 let a
= _mm256_set1_epi8(2);
4924 let b
= _mm256_set1_epi8(4);
4925 let r
= _mm256_max_epu8(a
, b
);
4926 assert_eq_m256i(r
, b
);
4929 #[simd_test(enable = "avx2")]
4930 unsafe fn test_mm256_min_epi16() {
4931 let a
= _mm256_set1_epi16(2);
4932 let b
= _mm256_set1_epi16(4);
4933 let r
= _mm256_min_epi16(a
, b
);
4934 assert_eq_m256i(r
, a
);
4937 #[simd_test(enable = "avx2")]
4938 unsafe fn test_mm256_min_epi32() {
4939 let a
= _mm256_set1_epi32(2);
4940 let b
= _mm256_set1_epi32(4);
4941 let r
= _mm256_min_epi32(a
, b
);
4942 assert_eq_m256i(r
, a
);
4945 #[simd_test(enable = "avx2")]
4946 unsafe fn test_mm256_min_epi8() {
4947 let a
= _mm256_set1_epi8(2);
4948 let b
= _mm256_set1_epi8(4);
4949 let r
= _mm256_min_epi8(a
, b
);
4950 assert_eq_m256i(r
, a
);
4953 #[simd_test(enable = "avx2")]
4954 unsafe fn test_mm256_min_epu16() {
4955 let a
= _mm256_set1_epi16(2);
4956 let b
= _mm256_set1_epi16(4);
4957 let r
= _mm256_min_epu16(a
, b
);
4958 assert_eq_m256i(r
, a
);
4961 #[simd_test(enable = "avx2")]
4962 unsafe fn test_mm256_min_epu32() {
4963 let a
= _mm256_set1_epi32(2);
4964 let b
= _mm256_set1_epi32(4);
4965 let r
= _mm256_min_epu32(a
, b
);
4966 assert_eq_m256i(r
, a
);
4969 #[simd_test(enable = "avx2")]
4970 unsafe fn test_mm256_min_epu8() {
4971 let a
= _mm256_set1_epi8(2);
4972 let b
= _mm256_set1_epi8(4);
4973 let r
= _mm256_min_epu8(a
, b
);
4974 assert_eq_m256i(r
, a
);
4977 #[simd_test(enable = "avx2")]
4978 unsafe fn test_mm256_movemask_epi8() {
4979 let a
= _mm256_set1_epi8(-1);
4980 let r
= _mm256_movemask_epi8(a
);
4985 #[simd_test(enable = "avx2")]
4986 unsafe fn test_mm256_mpsadbw_epu8() {
4987 let a
= _mm256_set1_epi8(2);
4988 let b
= _mm256_set1_epi8(4);
4989 let r
= _mm256_mpsadbw_epu8(a
, b
, 0);
4990 let e
= _mm256_set1_epi16(8);
4991 assert_eq_m256i(r
, e
);
4994 #[simd_test(enable = "avx2")]
4995 unsafe fn test_mm256_mul_epi32() {
4996 let a
= _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
4997 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
4998 let r
= _mm256_mul_epi32(a
, b
);
4999 let e
= _mm256_setr_epi64x(0, 0, 10, 14);
5000 assert_eq_m256i(r
, e
);
5003 #[simd_test(enable = "avx2")]
5004 unsafe fn test_mm256_mul_epu32() {
5005 let a
= _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
5006 let b
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5007 let r
= _mm256_mul_epu32(a
, b
);
5008 let e
= _mm256_setr_epi64x(0, 0, 10, 14);
5009 assert_eq_m256i(r
, e
);
5012 #[simd_test(enable = "avx2")]
5013 unsafe fn test_mm256_mulhi_epi16() {
5014 let a
= _mm256_set1_epi16(6535);
5015 let b
= _mm256_set1_epi16(6535);
5016 let r
= _mm256_mulhi_epi16(a
, b
);
5017 let e
= _mm256_set1_epi16(651);
5018 assert_eq_m256i(r
, e
);
5021 #[simd_test(enable = "avx2")]
5022 unsafe fn test_mm256_mulhi_epu16() {
5023 let a
= _mm256_set1_epi16(6535);
5024 let b
= _mm256_set1_epi16(6535);
5025 let r
= _mm256_mulhi_epu16(a
, b
);
5026 let e
= _mm256_set1_epi16(651);
5027 assert_eq_m256i(r
, e
);
5030 #[simd_test(enable = "avx2")]
5031 unsafe fn test_mm256_mullo_epi16() {
5032 let a
= _mm256_set1_epi16(2);
5033 let b
= _mm256_set1_epi16(4);
5034 let r
= _mm256_mullo_epi16(a
, b
);
5035 let e
= _mm256_set1_epi16(8);
5036 assert_eq_m256i(r
, e
);
5039 #[simd_test(enable = "avx2")]
5040 unsafe fn test_mm256_mullo_epi32() {
5041 let a
= _mm256_set1_epi32(2);
5042 let b
= _mm256_set1_epi32(4);
5043 let r
= _mm256_mullo_epi32(a
, b
);
5044 let e
= _mm256_set1_epi32(8);
5045 assert_eq_m256i(r
, e
);
5048 #[simd_test(enable = "avx2")]
5049 unsafe fn test_mm256_mulhrs_epi16() {
5050 let a
= _mm256_set1_epi16(2);
5051 let b
= _mm256_set1_epi16(4);
5052 let r
= _mm256_mullo_epi16(a
, b
);
5053 let e
= _mm256_set1_epi16(8);
5054 assert_eq_m256i(r
, e
);
5057 #[simd_test(enable = "avx2")]
5058 unsafe fn test_mm256_or_si256() {
5059 let a
= _mm256_set1_epi8(-1);
5060 let b
= _mm256_set1_epi8(0);
5061 let r
= _mm256_or_si256(a
, b
);
5062 assert_eq_m256i(r
, a
);
5065 #[simd_test(enable = "avx2")]
5066 unsafe fn test_mm256_packs_epi16() {
5067 let a
= _mm256_set1_epi16(2);
5068 let b
= _mm256_set1_epi16(4);
5069 let r
= _mm256_packs_epi16(a
, b
);
5071 let e
= _mm256_setr_epi8(
5072 2, 2, 2, 2, 2, 2, 2, 2,
5073 4, 4, 4, 4, 4, 4, 4, 4,
5074 2, 2, 2, 2, 2, 2, 2, 2,
5075 4, 4, 4, 4, 4, 4, 4, 4,
5078 assert_eq_m256i(r
, e
);
5081 #[simd_test(enable = "avx2")]
5082 unsafe fn test_mm256_packs_epi32() {
5083 let a
= _mm256_set1_epi32(2);
5084 let b
= _mm256_set1_epi32(4);
5085 let r
= _mm256_packs_epi32(a
, b
);
5086 let e
= _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5088 assert_eq_m256i(r
, e
);
5091 #[simd_test(enable = "avx2")]
5092 unsafe fn test_mm256_packus_epi16() {
5093 let a
= _mm256_set1_epi16(2);
5094 let b
= _mm256_set1_epi16(4);
5095 let r
= _mm256_packus_epi16(a
, b
);
5097 let e
= _mm256_setr_epi8(
5098 2, 2, 2, 2, 2, 2, 2, 2,
5099 4, 4, 4, 4, 4, 4, 4, 4,
5100 2, 2, 2, 2, 2, 2, 2, 2,
5101 4, 4, 4, 4, 4, 4, 4, 4,
5104 assert_eq_m256i(r
, e
);
5107 #[simd_test(enable = "avx2")]
5108 unsafe fn test_mm256_packus_epi32() {
5109 let a
= _mm256_set1_epi32(2);
5110 let b
= _mm256_set1_epi32(4);
5111 let r
= _mm256_packus_epi32(a
, b
);
5112 let e
= _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4);
5114 assert_eq_m256i(r
, e
);
5117 #[simd_test(enable = "avx2")]
5118 unsafe fn test_mm256_sad_epu8() {
5119 let a
= _mm256_set1_epi8(2);
5120 let b
= _mm256_set1_epi8(4);
5121 let r
= _mm256_sad_epu8(a
, b
);
5122 let e
= _mm256_set1_epi64x(16);
5123 assert_eq_m256i(r
, e
);
5126 #[simd_test(enable = "avx2")]
5127 unsafe fn test_mm256_shufflehi_epi16() {
5129 let a
= _mm256_setr_epi16(
5130 0, 1, 2, 3, 11, 22, 33, 44,
5131 4, 5, 6, 7, 55, 66, 77, 88,
5134 let e
= _mm256_setr_epi16(
5135 0, 1, 2, 3, 44, 22, 22, 11,
5136 4, 5, 6, 7, 88, 66, 66, 55,
5138 let r
= _mm256_shufflehi_epi16(a
, 0b00_01_01_11);
5139 assert_eq_m256i(r
, e
);
5142 #[simd_test(enable = "avx2")]
5143 unsafe fn test_mm256_shufflelo_epi16() {
5145 let a
= _mm256_setr_epi16(
5146 11, 22, 33, 44, 0, 1, 2, 3,
5147 55, 66, 77, 88, 4, 5, 6, 7,
5150 let e
= _mm256_setr_epi16(
5151 44, 22, 22, 11, 0, 1, 2, 3,
5152 88, 66, 66, 55, 4, 5, 6, 7,
5154 let r
= _mm256_shufflelo_epi16(a
, 0b00_01_01_11);
5155 assert_eq_m256i(r
, e
);
5158 #[simd_test(enable = "avx2")]
5159 unsafe fn test_mm256_sign_epi16() {
5160 let a
= _mm256_set1_epi16(2);
5161 let b
= _mm256_set1_epi16(-1);
5162 let r
= _mm256_sign_epi16(a
, b
);
5163 let e
= _mm256_set1_epi16(-2);
5164 assert_eq_m256i(r
, e
);
5167 #[simd_test(enable = "avx2")]
5168 unsafe fn test_mm256_sign_epi32() {
5169 let a
= _mm256_set1_epi32(2);
5170 let b
= _mm256_set1_epi32(-1);
5171 let r
= _mm256_sign_epi32(a
, b
);
5172 let e
= _mm256_set1_epi32(-2);
5173 assert_eq_m256i(r
, e
);
5176 #[simd_test(enable = "avx2")]
5177 unsafe fn test_mm256_sign_epi8() {
5178 let a
= _mm256_set1_epi8(2);
5179 let b
= _mm256_set1_epi8(-1);
5180 let r
= _mm256_sign_epi8(a
, b
);
5181 let e
= _mm256_set1_epi8(-2);
5182 assert_eq_m256i(r
, e
);
5185 #[simd_test(enable = "avx2")]
5186 unsafe fn test_mm256_sll_epi16() {
5187 let a
= _mm256_set1_epi16(0xFF);
5188 let b
= _mm_insert_epi16(_mm_set1_epi16(0), 4, 0);
5189 let r
= _mm256_sll_epi16(a
, b
);
5190 assert_eq_m256i(r
, _mm256_set1_epi16(0xFF0));
5193 #[simd_test(enable = "avx2")]
5194 unsafe fn test_mm256_sll_epi32() {
5195 let a
= _mm256_set1_epi32(0xFFFF);
5196 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5197 let r
= _mm256_sll_epi32(a
, b
);
5198 assert_eq_m256i(r
, _mm256_set1_epi32(0xFFFF0));
5201 #[simd_test(enable = "avx2")]
5202 unsafe fn test_mm256_sll_epi64() {
5203 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5204 let b
= _mm_insert_epi64(_mm_set1_epi64x(0), 4, 0);
5205 let r
= _mm256_sll_epi64(a
, b
);
5206 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFFF0));
5209 #[simd_test(enable = "avx2")]
5210 unsafe fn test_mm256_slli_epi16() {
5212 _mm256_slli_epi16(_mm256_set1_epi16(0xFF), 4),
5213 _mm256_set1_epi16(0xFF0),
5217 #[simd_test(enable = "avx2")]
5218 unsafe fn test_mm256_slli_epi32() {
5220 _mm256_slli_epi32(_mm256_set1_epi32(0xFFFF), 4),
5221 _mm256_set1_epi32(0xFFFF0),
5225 #[simd_test(enable = "avx2")]
5226 unsafe fn test_mm256_slli_epi64() {
5228 _mm256_slli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
5229 _mm256_set1_epi64x(0xFFFFFFFF0),
5233 #[simd_test(enable = "avx2")]
5234 unsafe fn test_mm256_slli_si256() {
5235 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5236 let r
= _mm256_slli_si256(a
, 3);
5237 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFFF000000));
5240 #[simd_test(enable = "avx2")]
5241 unsafe fn test_mm_sllv_epi32() {
5242 let a
= _mm_set1_epi32(2);
5243 let b
= _mm_set1_epi32(1);
5244 let r
= _mm_sllv_epi32(a
, b
);
5245 let e
= _mm_set1_epi32(4);
5246 assert_eq_m128i(r
, e
);
5249 #[simd_test(enable = "avx2")]
5250 unsafe fn test_mm256_sllv_epi32() {
5251 let a
= _mm256_set1_epi32(2);
5252 let b
= _mm256_set1_epi32(1);
5253 let r
= _mm256_sllv_epi32(a
, b
);
5254 let e
= _mm256_set1_epi32(4);
5255 assert_eq_m256i(r
, e
);
5258 #[simd_test(enable = "avx2")]
5259 unsafe fn test_mm_sllv_epi64() {
5260 let a
= _mm_set1_epi64x(2);
5261 let b
= _mm_set1_epi64x(1);
5262 let r
= _mm_sllv_epi64(a
, b
);
5263 let e
= _mm_set1_epi64x(4);
5264 assert_eq_m128i(r
, e
);
5267 #[simd_test(enable = "avx2")]
5268 unsafe fn test_mm256_sllv_epi64() {
5269 let a
= _mm256_set1_epi64x(2);
5270 let b
= _mm256_set1_epi64x(1);
5271 let r
= _mm256_sllv_epi64(a
, b
);
5272 let e
= _mm256_set1_epi64x(4);
5273 assert_eq_m256i(r
, e
);
5276 #[simd_test(enable = "avx2")]
5277 unsafe fn test_mm256_sra_epi16() {
5278 let a
= _mm256_set1_epi16(-1);
5279 let b
= _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
5280 let r
= _mm256_sra_epi16(a
, b
);
5281 assert_eq_m256i(r
, _mm256_set1_epi16(-1));
5284 #[simd_test(enable = "avx2")]
5285 unsafe fn test_mm256_sra_epi32() {
5286 let a
= _mm256_set1_epi32(-1);
5287 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 1, 0);
5288 let r
= _mm256_sra_epi32(a
, b
);
5289 assert_eq_m256i(r
, _mm256_set1_epi32(-1));
5292 #[simd_test(enable = "avx2")]
5293 unsafe fn test_mm256_srai_epi16() {
5295 _mm256_srai_epi16(_mm256_set1_epi16(-1), 1),
5296 _mm256_set1_epi16(-1),
5300 #[simd_test(enable = "avx2")]
5301 unsafe fn test_mm256_srai_epi32() {
5303 _mm256_srai_epi32(_mm256_set1_epi32(-1), 1),
5304 _mm256_set1_epi32(-1),
5308 #[simd_test(enable = "avx2")]
5309 unsafe fn test_mm_srav_epi32() {
5310 let a
= _mm_set1_epi32(4);
5311 let count
= _mm_set1_epi32(1);
5312 let r
= _mm_srav_epi32(a
, count
);
5313 let e
= _mm_set1_epi32(2);
5314 assert_eq_m128i(r
, e
);
5317 #[simd_test(enable = "avx2")]
5318 unsafe fn test_mm256_srav_epi32() {
5319 let a
= _mm256_set1_epi32(4);
5320 let count
= _mm256_set1_epi32(1);
5321 let r
= _mm256_srav_epi32(a
, count
);
5322 let e
= _mm256_set1_epi32(2);
5323 assert_eq_m256i(r
, e
);
5326 #[simd_test(enable = "avx2")]
5327 unsafe fn test_mm256_srli_si256() {
5329 let a
= _mm256_setr_epi8(
5330 1, 2, 3, 4, 5, 6, 7, 8,
5331 9, 10, 11, 12, 13, 14, 15, 16,
5332 17, 18, 19, 20, 21, 22, 23, 24,
5333 25, 26, 27, 28, 29, 30, 31, 32,
5335 let r
= _mm256_srli_si256(a
, 3);
5337 let e
= _mm256_setr_epi8(
5338 4, 5, 6, 7, 8, 9, 10, 11,
5339 12, 13, 14, 15, 16, 0, 0, 0,
5340 20, 21, 22, 23, 24, 25, 26, 27,
5341 28, 29, 30, 31, 32, 0, 0, 0,
5343 assert_eq_m256i(r
, e
);
5346 #[simd_test(enable = "avx2")]
5347 unsafe fn test_mm256_srl_epi16() {
5348 let a
= _mm256_set1_epi16(0xFF);
5349 let b
= _mm_insert_epi16(_mm_set1_epi16(0), 4, 0);
5350 let r
= _mm256_srl_epi16(a
, b
);
5351 assert_eq_m256i(r
, _mm256_set1_epi16(0xF));
5354 #[simd_test(enable = "avx2")]
5355 unsafe fn test_mm256_srl_epi32() {
5356 let a
= _mm256_set1_epi32(0xFFFF);
5357 let b
= _mm_insert_epi32(_mm_set1_epi32(0), 4, 0);
5358 let r
= _mm256_srl_epi32(a
, b
);
5359 assert_eq_m256i(r
, _mm256_set1_epi32(0xFFF));
5362 #[simd_test(enable = "avx2")]
5363 unsafe fn test_mm256_srl_epi64() {
5364 let a
= _mm256_set1_epi64x(0xFFFFFFFF);
5365 let b
= _mm_setr_epi64x(4, 0);
5366 let r
= _mm256_srl_epi64(a
, b
);
5367 assert_eq_m256i(r
, _mm256_set1_epi64x(0xFFFFFFF));
5370 #[simd_test(enable = "avx2")]
5371 unsafe fn test_mm256_srli_epi16() {
5373 _mm256_srli_epi16(_mm256_set1_epi16(0xFF), 4),
5374 _mm256_set1_epi16(0xF),
5378 #[simd_test(enable = "avx2")]
5379 unsafe fn test_mm256_srli_epi32() {
5381 _mm256_srli_epi32(_mm256_set1_epi32(0xFFFF), 4),
5382 _mm256_set1_epi32(0xFFF),
5386 #[simd_test(enable = "avx2")]
5387 unsafe fn test_mm256_srli_epi64() {
5389 _mm256_srli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4),
5390 _mm256_set1_epi64x(0xFFFFFFF),
5394 #[simd_test(enable = "avx2")]
5395 unsafe fn test_mm_srlv_epi32() {
5396 let a
= _mm_set1_epi32(2);
5397 let count
= _mm_set1_epi32(1);
5398 let r
= _mm_srlv_epi32(a
, count
);
5399 let e
= _mm_set1_epi32(1);
5400 assert_eq_m128i(r
, e
);
5403 #[simd_test(enable = "avx2")]
5404 unsafe fn test_mm256_srlv_epi32() {
5405 let a
= _mm256_set1_epi32(2);
5406 let count
= _mm256_set1_epi32(1);
5407 let r
= _mm256_srlv_epi32(a
, count
);
5408 let e
= _mm256_set1_epi32(1);
5409 assert_eq_m256i(r
, e
);
5412 #[simd_test(enable = "avx2")]
5413 unsafe fn test_mm_srlv_epi64() {
5414 let a
= _mm_set1_epi64x(2);
5415 let count
= _mm_set1_epi64x(1);
5416 let r
= _mm_srlv_epi64(a
, count
);
5417 let e
= _mm_set1_epi64x(1);
5418 assert_eq_m128i(r
, e
);
5421 #[simd_test(enable = "avx2")]
5422 unsafe fn test_mm256_srlv_epi64() {
5423 let a
= _mm256_set1_epi64x(2);
5424 let count
= _mm256_set1_epi64x(1);
5425 let r
= _mm256_srlv_epi64(a
, count
);
5426 let e
= _mm256_set1_epi64x(1);
5427 assert_eq_m256i(r
, e
);
5430 #[simd_test(enable = "avx2")]
5431 unsafe fn test_mm256_sub_epi16() {
5432 let a
= _mm256_set1_epi16(4);
5433 let b
= _mm256_set1_epi16(2);
5434 let r
= _mm256_sub_epi16(a
, b
);
5435 assert_eq_m256i(r
, b
);
5438 #[simd_test(enable = "avx2")]
5439 unsafe fn test_mm256_sub_epi32() {
5440 let a
= _mm256_set1_epi32(4);
5441 let b
= _mm256_set1_epi32(2);
5442 let r
= _mm256_sub_epi32(a
, b
);
5443 assert_eq_m256i(r
, b
);
5446 #[simd_test(enable = "avx2")]
5447 unsafe fn test_mm256_sub_epi64() {
5448 let a
= _mm256_set1_epi64x(4);
5449 let b
= _mm256_set1_epi64x(2);
5450 let r
= _mm256_sub_epi64(a
, b
);
5451 assert_eq_m256i(r
, b
);
5454 #[simd_test(enable = "avx2")]
5455 unsafe fn test_mm256_sub_epi8() {
5456 let a
= _mm256_set1_epi8(4);
5457 let b
= _mm256_set1_epi8(2);
5458 let r
= _mm256_sub_epi8(a
, b
);
5459 assert_eq_m256i(r
, b
);
5462 #[simd_test(enable = "avx2")]
5463 unsafe fn test_mm256_subs_epi16() {
5464 let a
= _mm256_set1_epi16(4);
5465 let b
= _mm256_set1_epi16(2);
5466 let r
= _mm256_subs_epi16(a
, b
);
5467 assert_eq_m256i(r
, b
);
5470 #[simd_test(enable = "avx2")]
5471 unsafe fn test_mm256_subs_epi8() {
5472 let a
= _mm256_set1_epi8(4);
5473 let b
= _mm256_set1_epi8(2);
5474 let r
= _mm256_subs_epi8(a
, b
);
5475 assert_eq_m256i(r
, b
);
5478 #[simd_test(enable = "avx2")]
5479 unsafe fn test_mm256_subs_epu16() {
5480 let a
= _mm256_set1_epi16(4);
5481 let b
= _mm256_set1_epi16(2);
5482 let r
= _mm256_subs_epu16(a
, b
);
5483 assert_eq_m256i(r
, b
);
5486 #[simd_test(enable = "avx2")]
5487 unsafe fn test_mm256_subs_epu8() {
5488 let a
= _mm256_set1_epi8(4);
5489 let b
= _mm256_set1_epi8(2);
5490 let r
= _mm256_subs_epu8(a
, b
);
5491 assert_eq_m256i(r
, b
);
5494 #[simd_test(enable = "avx2")]
5495 unsafe fn test_mm256_xor_si256() {
5496 let a
= _mm256_set1_epi8(5);
5497 let b
= _mm256_set1_epi8(3);
5498 let r
= _mm256_xor_si256(a
, b
);
5499 assert_eq_m256i(r
, _mm256_set1_epi8(6));
5502 #[simd_test(enable = "avx2")]
5503 unsafe fn test_mm256_alignr_epi8() {
5505 let a
= _mm256_setr_epi8(
5506 1, 2, 3, 4, 5, 6, 7, 8,
5507 9, 10, 11, 12, 13, 14, 15, 16,
5508 17, 18, 19, 20, 21, 22, 23, 24,
5509 25, 26, 27, 28, 29, 30, 31, 32,
5512 let b
= _mm256_setr_epi8(
5513 -1, -2, -3, -4, -5, -6, -7, -8,
5514 -9, -10, -11, -12, -13, -14, -15, -16,
5515 -17, -18, -19, -20, -21, -22, -23, -24,
5516 -25, -26, -27, -28, -29, -30, -31, -32,
5518 let r
= _mm256_alignr_epi8(a
, b
, 33);
5519 assert_eq_m256i(r
, _mm256_set1_epi8(0));
5521 let r
= _mm256_alignr_epi8(a
, b
, 17);
5523 let expected
= _mm256_setr_epi8(
5524 2, 3, 4, 5, 6, 7, 8, 9,
5525 10, 11, 12, 13, 14, 15, 16, 0,
5526 18, 19, 20, 21, 22, 23, 24, 25,
5527 26, 27, 28, 29, 30, 31, 32, 0,
5529 assert_eq_m256i(r
, expected
);
5531 let r
= _mm256_alignr_epi8(a
, b
, 4);
5533 let expected
= _mm256_setr_epi8(
5534 -5, -6, -7, -8, -9, -10, -11, -12,
5535 -13, -14, -15, -16, 1, 2, 3, 4,
5536 -21, -22, -23, -24, -25, -26, -27, -28,
5537 -29, -30, -31, -32, 17, 18, 19, 20,
5539 assert_eq_m256i(r
, expected
);
5542 let expected
= _mm256_setr_epi8(
5543 -1, -2, -3, -4, -5, -6, -7, -8,
5544 -9, -10, -11, -12, -13, -14, -15, -16, -17,
5545 -18, -19, -20, -21, -22, -23, -24, -25,
5546 -26, -27, -28, -29, -30, -31, -32,
5548 let r
= _mm256_alignr_epi8(a
, b
, 16);
5549 assert_eq_m256i(r
, expected
);
5551 let r
= _mm256_alignr_epi8(a
, b
, 15);
5553 let expected
= _mm256_setr_epi8(
5554 -16, 1, 2, 3, 4, 5, 6, 7,
5555 8, 9, 10, 11, 12, 13, 14, 15,
5556 -32, 17, 18, 19, 20, 21, 22, 23,
5557 24, 25, 26, 27, 28, 29, 30, 31,
5559 assert_eq_m256i(r
, expected
);
5561 let r
= _mm256_alignr_epi8(a
, b
, 0);
5562 assert_eq_m256i(r
, b
);
5565 #[simd_test(enable = "avx2")]
5566 unsafe fn test_mm256_shuffle_epi8() {
5568 let a
= _mm256_setr_epi8(
5569 1, 2, 3, 4, 5, 6, 7, 8,
5570 9, 10, 11, 12, 13, 14, 15, 16,
5571 17, 18, 19, 20, 21, 22, 23, 24,
5572 25, 26, 27, 28, 29, 30, 31, 32,
5575 let b
= _mm256_setr_epi8(
5576 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5577 12, 5, 5, 10, 4, 1, 8, 0,
5578 4, 128u8 as i8, 4, 3, 24, 12, 6, 19,
5579 12, 5, 5, 10, 4, 1, 8, 0,
5582 let expected
= _mm256_setr_epi8(
5583 5, 0, 5, 4, 9, 13, 7, 4,
5584 13, 6, 6, 11, 5, 2, 9, 1,
5585 21, 0, 21, 20, 25, 29, 23, 20,
5586 29, 22, 22, 27, 21, 18, 25, 17,
5588 let r
= _mm256_shuffle_epi8(a
, b
);
5589 assert_eq_m256i(r
, expected
);
5592 #[simd_test(enable = "avx2")]
5593 unsafe fn test_mm256_permutevar8x32_epi32() {
5594 let a
= _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
5595 let b
= _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5596 let expected
= _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
5597 let r
= _mm256_permutevar8x32_epi32(a
, b
);
5598 assert_eq_m256i(r
, expected
);
5601 #[simd_test(enable = "avx2")]
5602 unsafe fn test_mm256_permute4x64_epi64() {
5603 let a
= _mm256_setr_epi64x(100, 200, 300, 400);
5604 let expected
= _mm256_setr_epi64x(400, 100, 200, 100);
5605 let r
= _mm256_permute4x64_epi64(a
, 0b00010011);
5606 assert_eq_m256i(r
, expected
);
5609 #[simd_test(enable = "avx2")]
5610 unsafe fn test_mm256_permute2x128_si256() {
5611 let a
= _mm256_setr_epi64x(100, 200, 500, 600);
5612 let b
= _mm256_setr_epi64x(300, 400, 700, 800);
5613 let r
= _mm256_permute2x128_si256(a
, b
, 0b00_01_00_11);
5614 let e
= _mm256_setr_epi64x(700, 800, 500, 600);
5615 assert_eq_m256i(r
, e
);
5618 #[simd_test(enable = "avx2")]
5619 unsafe fn test_mm256_permute4x64_pd() {
5620 let a
= _mm256_setr_pd(1., 2., 3., 4.);
5621 let r
= _mm256_permute4x64_pd(a
, 0b00_01_00_11);
5622 let e
= _mm256_setr_pd(4., 1., 2., 1.);
5623 assert_eq_m256d(r
, e
);
5626 #[simd_test(enable = "avx2")]
5627 unsafe fn test_mm256_permutevar8x32_ps() {
5628 let a
= _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
5629 let b
= _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
5630 let r
= _mm256_permutevar8x32_ps(a
, b
);
5631 let e
= _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.);
5632 assert_eq_m256(r
, e
);
5635 #[simd_test(enable = "avx2")]
5636 unsafe fn test_mm_i32gather_epi32() {
5637 let mut arr
= [0i32; 128];
5638 for i
in 0..128i32 {
5639 arr
[i
as usize] = i
;
5641 // A multiplier of 4 is word-addressing
5642 let r
= _mm_i32gather_epi32(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4);
5643 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 32, 48));
5646 #[simd_test(enable = "avx2")]
5647 unsafe fn test_mm_mask_i32gather_epi32() {
5648 let mut arr
= [0i32; 128];
5649 for i
in 0..128i32 {
5650 arr
[i
as usize] = i
;
5652 // A multiplier of 4 is word-addressing
5653 let r
= _mm_mask_i32gather_epi32(
5654 _mm_set1_epi32(256),
5656 _mm_setr_epi32(0, 16, 64, 96),
5657 _mm_setr_epi32(-1, -1, -1, 0),
5660 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 64, 256));
5663 #[simd_test(enable = "avx2")]
5664 unsafe fn test_mm256_i32gather_epi32() {
5665 let mut arr
= [0i32; 128];
5666 for i
in 0..128i32 {
5667 arr
[i
as usize] = i
;
5669 // A multiplier of 4 is word-addressing
5670 let r
= _mm256_i32gather_epi32(
5672 _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
5675 assert_eq_m256i(r
, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
5678 #[simd_test(enable = "avx2")]
5679 unsafe fn test_mm256_mask_i32gather_epi32() {
5680 let mut arr
= [0i32; 128];
5681 for i
in 0..128i32 {
5682 arr
[i
as usize] = i
;
5684 // A multiplier of 4 is word-addressing
5685 let r
= _mm256_mask_i32gather_epi32(
5686 _mm256_set1_epi32(256),
5688 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5689 _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
5692 assert_eq_m256i(r
, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
5695 #[simd_test(enable = "avx2")]
5696 unsafe fn test_mm_i32gather_ps() {
5697 let mut arr
= [0.0f32; 128];
5699 for i
in 0..128usize
{
5703 // A multiplier of 4 is word-addressing for f32s
5704 let r
= _mm_i32gather_ps(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4);
5705 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5708 #[simd_test(enable = "avx2")]
5709 unsafe fn test_mm_mask_i32gather_ps() {
5710 let mut arr
= [0.0f32; 128];
5712 for i
in 0..128usize
{
5716 // A multiplier of 4 is word-addressing for f32s
5717 let r
= _mm_mask_i32gather_ps(
5720 _mm_setr_epi32(0, 16, 64, 96),
5721 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
5724 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
5727 #[simd_test(enable = "avx2")]
5728 unsafe fn test_mm256_i32gather_ps() {
5729 let mut arr
= [0.0f32; 128];
5731 for i
in 0..128usize
{
5735 // A multiplier of 4 is word-addressing for f32s
5736 let r
= _mm256_i32gather_ps(
5738 _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4),
5741 assert_eq_m256(r
, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
5744 #[simd_test(enable = "avx2")]
5745 unsafe fn test_mm256_mask_i32gather_ps() {
5746 let mut arr
= [0.0f32; 128];
5748 for i
in 0..128usize
{
5752 // A multiplier of 4 is word-addressing for f32s
5753 let r
= _mm256_mask_i32gather_ps(
5754 _mm256_set1_ps(256.0),
5756 _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
5757 _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
5762 _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
5766 #[simd_test(enable = "avx2")]
5767 unsafe fn test_mm_i32gather_epi64() {
5768 let mut arr
= [0i64; 128];
5769 for i
in 0..128i64 {
5770 arr
[i
as usize] = i
;
5772 // A multiplier of 8 is word-addressing for i64s
5773 let r
= _mm_i32gather_epi64(arr
.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
5774 assert_eq_m128i(r
, _mm_setr_epi64x(0, 16));
5777 #[simd_test(enable = "avx2")]
5778 unsafe fn test_mm_mask_i32gather_epi64() {
5779 let mut arr
= [0i64; 128];
5780 for i
in 0..128i64 {
5781 arr
[i
as usize] = i
;
5783 // A multiplier of 8 is word-addressing for i64s
5784 let r
= _mm_mask_i32gather_epi64(
5785 _mm_set1_epi64x(256),
5787 _mm_setr_epi32(16, 16, 16, 16),
5788 _mm_setr_epi64x(-1, 0),
5791 assert_eq_m128i(r
, _mm_setr_epi64x(16, 256));
5794 #[simd_test(enable = "avx2")]
5795 unsafe fn test_mm256_i32gather_epi64() {
5796 let mut arr
= [0i64; 128];
5797 for i
in 0..128i64 {
5798 arr
[i
as usize] = i
;
5800 // A multiplier of 8 is word-addressing for i64s
5801 let r
= _mm256_i32gather_epi64(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8);
5802 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 32, 48));
5805 #[simd_test(enable = "avx2")]
5806 unsafe fn test_mm256_mask_i32gather_epi64() {
5807 let mut arr
= [0i64; 128];
5808 for i
in 0..128i64 {
5809 arr
[i
as usize] = i
;
5811 // A multiplier of 8 is word-addressing for i64s
5812 let r
= _mm256_mask_i32gather_epi64(
5813 _mm256_set1_epi64x(256),
5815 _mm_setr_epi32(0, 16, 64, 96),
5816 _mm256_setr_epi64x(-1, -1, -1, 0),
5819 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 64, 256));
5822 #[simd_test(enable = "avx2")]
5823 unsafe fn test_mm_i32gather_pd() {
5824 let mut arr
= [0.0f64; 128];
5826 for i
in 0..128usize
{
5830 // A multiplier of 8 is word-addressing for f64s
5831 let r
= _mm_i32gather_pd(arr
.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8);
5832 assert_eq_m128d(r
, _mm_setr_pd(0.0, 16.0));
5835 #[simd_test(enable = "avx2")]
5836 unsafe fn test_mm_mask_i32gather_pd() {
5837 let mut arr
= [0.0f64; 128];
5839 for i
in 0..128usize
{
5843 // A multiplier of 8 is word-addressing for f64s
5844 let r
= _mm_mask_i32gather_pd(
5847 _mm_setr_epi32(16, 16, 16, 16),
5848 _mm_setr_pd(-1.0, 0.0),
5851 assert_eq_m128d(r
, _mm_setr_pd(16.0, 256.0));
5854 #[simd_test(enable = "avx2")]
5855 unsafe fn test_mm256_i32gather_pd() {
5856 let mut arr
= [0.0f64; 128];
5858 for i
in 0..128usize
{
5862 // A multiplier of 8 is word-addressing for f64s
5863 let r
= _mm256_i32gather_pd(arr
.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8);
5864 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
5867 #[simd_test(enable = "avx2")]
5868 unsafe fn test_mm256_mask_i32gather_pd() {
5869 let mut arr
= [0.0f64; 128];
5871 for i
in 0..128usize
{
5875 // A multiplier of 8 is word-addressing for f64s
5876 let r
= _mm256_mask_i32gather_pd(
5877 _mm256_set1_pd(256.0),
5879 _mm_setr_epi32(0, 16, 64, 96),
5880 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
5883 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
5886 #[simd_test(enable = "avx2")]
5887 unsafe fn test_mm_i64gather_epi32() {
5888 let mut arr
= [0i32; 128];
5889 for i
in 0..128i32 {
5890 arr
[i
as usize] = i
;
5892 // A multiplier of 4 is word-addressing
5893 let r
= _mm_i64gather_epi32(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 4);
5894 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 0, 0));
5897 #[simd_test(enable = "avx2")]
5898 unsafe fn test_mm_mask_i64gather_epi32() {
5899 let mut arr
= [0i32; 128];
5900 for i
in 0..128i32 {
5901 arr
[i
as usize] = i
;
5903 // A multiplier of 4 is word-addressing
5904 let r
= _mm_mask_i64gather_epi32(
5905 _mm_set1_epi32(256),
5907 _mm_setr_epi64x(0, 16),
5908 _mm_setr_epi32(-1, 0, -1, 0),
5911 assert_eq_m128i(r
, _mm_setr_epi32(0, 256, 0, 0));
5914 #[simd_test(enable = "avx2")]
5915 unsafe fn test_mm256_i64gather_epi32() {
5916 let mut arr
= [0i32; 128];
5917 for i
in 0..128i32 {
5918 arr
[i
as usize] = i
;
5920 // A multiplier of 4 is word-addressing
5921 let r
= _mm256_i64gather_epi32(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4);
5922 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 32, 48));
5925 #[simd_test(enable = "avx2")]
5926 unsafe fn test_mm256_mask_i64gather_epi32() {
5927 let mut arr
= [0i32; 128];
5928 for i
in 0..128i32 {
5929 arr
[i
as usize] = i
;
5931 // A multiplier of 4 is word-addressing
5932 let r
= _mm256_mask_i64gather_epi32(
5933 _mm_set1_epi32(256),
5935 _mm256_setr_epi64x(0, 16, 64, 96),
5936 _mm_setr_epi32(-1, -1, -1, 0),
5939 assert_eq_m128i(r
, _mm_setr_epi32(0, 16, 64, 256));
5942 #[simd_test(enable = "avx2")]
5943 unsafe fn test_mm_i64gather_ps() {
5944 let mut arr
= [0.0f32; 128];
5946 for i
in 0..128usize
{
5950 // A multiplier of 4 is word-addressing for f32s
5951 let r
= _mm_i64gather_ps(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 4);
5952 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
5955 #[simd_test(enable = "avx2")]
5956 unsafe fn test_mm_mask_i64gather_ps() {
5957 let mut arr
= [0.0f32; 128];
5959 for i
in 0..128usize
{
5963 // A multiplier of 4 is word-addressing for f32s
5964 let r
= _mm_mask_i64gather_ps(
5967 _mm_setr_epi64x(0, 16),
5968 _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
5971 assert_eq_m128(r
, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
5974 #[simd_test(enable = "avx2")]
5975 unsafe fn test_mm256_i64gather_ps() {
5976 let mut arr
= [0.0f32; 128];
5978 for i
in 0..128usize
{
5982 // A multiplier of 4 is word-addressing for f32s
5983 let r
= _mm256_i64gather_ps(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4);
5984 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
5987 #[simd_test(enable = "avx2")]
5988 unsafe fn test_mm256_mask_i64gather_ps() {
5989 let mut arr
= [0.0f32; 128];
5991 for i
in 0..128usize
{
5995 // A multiplier of 4 is word-addressing for f32s
5996 let r
= _mm256_mask_i64gather_ps(
5999 _mm256_setr_epi64x(0, 16, 64, 96),
6000 _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
6003 assert_eq_m128(r
, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
6006 #[simd_test(enable = "avx2")]
6007 unsafe fn test_mm_i64gather_epi64() {
6008 let mut arr
= [0i64; 128];
6009 for i
in 0..128i64 {
6010 arr
[i
as usize] = i
;
6012 // A multiplier of 8 is word-addressing for i64s
6013 let r
= _mm_i64gather_epi64(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 8);
6014 assert_eq_m128i(r
, _mm_setr_epi64x(0, 16));
6017 #[simd_test(enable = "avx2")]
6018 unsafe fn test_mm_mask_i64gather_epi64() {
6019 let mut arr
= [0i64; 128];
6020 for i
in 0..128i64 {
6021 arr
[i
as usize] = i
;
6023 // A multiplier of 8 is word-addressing for i64s
6024 let r
= _mm_mask_i64gather_epi64(
6025 _mm_set1_epi64x(256),
6027 _mm_setr_epi64x(16, 16),
6028 _mm_setr_epi64x(-1, 0),
6031 assert_eq_m128i(r
, _mm_setr_epi64x(16, 256));
6034 #[simd_test(enable = "avx2")]
6035 unsafe fn test_mm256_i64gather_epi64() {
6036 let mut arr
= [0i64; 128];
6037 for i
in 0..128i64 {
6038 arr
[i
as usize] = i
;
6040 // A multiplier of 8 is word-addressing for i64s
6041 let r
= _mm256_i64gather_epi64(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8);
6042 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 32, 48));
6045 #[simd_test(enable = "avx2")]
6046 unsafe fn test_mm256_mask_i64gather_epi64() {
6047 let mut arr
= [0i64; 128];
6048 for i
in 0..128i64 {
6049 arr
[i
as usize] = i
;
6051 // A multiplier of 8 is word-addressing for i64s
6052 let r
= _mm256_mask_i64gather_epi64(
6053 _mm256_set1_epi64x(256),
6055 _mm256_setr_epi64x(0, 16, 64, 96),
6056 _mm256_setr_epi64x(-1, -1, -1, 0),
6059 assert_eq_m256i(r
, _mm256_setr_epi64x(0, 16, 64, 256));
6062 #[simd_test(enable = "avx2")]
6063 unsafe fn test_mm_i64gather_pd() {
6064 let mut arr
= [0.0f64; 128];
6066 for i
in 0..128usize
{
6070 // A multiplier of 8 is word-addressing for f64s
6071 let r
= _mm_i64gather_pd(arr
.as_ptr(), _mm_setr_epi64x(0, 16), 8);
6072 assert_eq_m128d(r
, _mm_setr_pd(0.0, 16.0));
6075 #[simd_test(enable = "avx2")]
6076 unsafe fn test_mm_mask_i64gather_pd() {
6077 let mut arr
= [0.0f64; 128];
6079 for i
in 0..128usize
{
6083 // A multiplier of 8 is word-addressing for f64s
6084 let r
= _mm_mask_i64gather_pd(
6087 _mm_setr_epi64x(16, 16),
6088 _mm_setr_pd(-1.0, 0.0),
6091 assert_eq_m128d(r
, _mm_setr_pd(16.0, 256.0));
6094 #[simd_test(enable = "avx2")]
6095 unsafe fn test_mm256_i64gather_pd() {
6096 let mut arr
= [0.0f64; 128];
6098 for i
in 0..128usize
{
6102 // A multiplier of 8 is word-addressing for f64s
6103 let r
= _mm256_i64gather_pd(arr
.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8);
6104 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
6107 #[simd_test(enable = "avx2")]
6108 unsafe fn test_mm256_mask_i64gather_pd() {
6109 let mut arr
= [0.0f64; 128];
6111 for i
in 0..128usize
{
6115 // A multiplier of 8 is word-addressing for f64s
6116 let r
= _mm256_mask_i64gather_pd(
6117 _mm256_set1_pd(256.0),
6119 _mm256_setr_epi64x(0, 16, 64, 96),
6120 _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
6123 assert_eq_m256d(r
, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
6126 #[simd_test(enable = "avx")]
6127 unsafe fn test_mm256_extract_epi8() {
6129 let a
= _mm256_setr_epi8(
6130 -1, 1, 2, 3, 4, 5, 6, 7,
6131 8, 9, 10, 11, 12, 13, 14, 15,
6132 16, 17, 18, 19, 20, 21, 22, 23,
6133 24, 25, 26, 27, 28, 29, 30, 31
6135 let r1
= _mm256_extract_epi8(a
, 0);
6136 let r2
= _mm256_extract_epi8(a
, 35);
6141 #[simd_test(enable = "avx2")]
6142 unsafe fn test_mm256_extract_epi16() {
6144 let a
= _mm256_setr_epi16(
6145 -1, 1, 2, 3, 4, 5, 6, 7,
6146 8, 9, 10, 11, 12, 13, 14, 15,
6148 let r1
= _mm256_extract_epi16(a
, 0);
6149 let r2
= _mm256_extract_epi16(a
, 19);
6154 #[simd_test(enable = "avx2")]
6155 unsafe fn test_mm256_extract_epi32() {
6156 let a
= _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
6157 let r1
= _mm256_extract_epi32(a
, 0);
6158 let r2
= _mm256_extract_epi32(a
, 11);
6163 #[simd_test(enable = "avx2")]
6164 unsafe fn test_mm256_cvtsd_f64() {
6165 let a
= _mm256_setr_pd(1., 2., 3., 4.);
6166 let r
= _mm256_cvtsd_f64(a
);
6170 #[simd_test(enable = "avx2")]
6171 unsafe fn test_mm256_cvtsi256_si32() {
6172 let a
= _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
6173 let r
= _mm256_cvtsi256_si32(a
);