]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/core_arch/src/x86/avx512bw.rs
New upstream version 1.65.0+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / avx512bw.rs
CommitLineData
fc512014 1use crate::{
a2a8927a 2 arch::asm,
fc512014
XL
3 core_arch::{simd::*, simd_llvm::*, x86::*},
4 mem::{self, transmute},
5 ptr,
6};
7
8#[cfg(test)]
9use stdarch_test::assert_instr;
10
a2a8927a
XL
11use super::avx512f::{vpl, vps};
12
fc512014
XL
13/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst.
14///
15/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30)
16#[inline]
17#[target_feature(enable = "avx512bw")]
18#[cfg_attr(test, assert_instr(vpabsw))]
19pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i {
20 let a = a.as_i16x32();
21 // all-0 is a properly initialized i16x32
22 let zero: i16x32 = mem::zeroed();
23 let sub = simd_sub(zero, a);
24 let cmp: i16x32 = simd_gt(a, zero);
25 transmute(simd_select(cmp, a, sub))
26}
27
28/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29///
30/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi16&expand=31)
31#[inline]
32#[target_feature(enable = "avx512bw")]
33#[cfg_attr(test, assert_instr(vpabsw))]
34pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
35 let abs = _mm512_abs_epi16(a).as_i16x32();
36 transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
37}
38
39/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
40///
41/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi16&expand=32)
42#[inline]
43#[target_feature(enable = "avx512bw")]
44#[cfg_attr(test, assert_instr(vpabsw))]
45pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
46 let abs = _mm512_abs_epi16(a).as_i16x32();
47 let zero = _mm512_setzero_si512().as_i16x32();
48 transmute(simd_select_bitmask(k, abs, zero))
49}
50
51/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
52///
53/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi16&expand=28)
54#[inline]
55#[target_feature(enable = "avx512bw,avx512vl")]
56#[cfg_attr(test, assert_instr(vpabsw))]
57pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
58 let abs = _mm256_abs_epi16(a).as_i16x16();
59 transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
60}
61
62/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
63///
64/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi16&expand=29)
65#[inline]
66#[target_feature(enable = "avx512bw,avx512vl")]
67#[cfg_attr(test, assert_instr(vpabsw))]
68pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
69 let abs = _mm256_abs_epi16(a).as_i16x16();
70 let zero = _mm256_setzero_si256().as_i16x16();
71 transmute(simd_select_bitmask(k, abs, zero))
72}
73
74/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi16&expand=25)
77#[inline]
78#[target_feature(enable = "avx512bw,avx512vl")]
79#[cfg_attr(test, assert_instr(vpabsw))]
80pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
81 let abs = _mm_abs_epi16(a).as_i16x8();
82 transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
83}
84
85/// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
86///
87/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi16&expand=26)
88#[inline]
89#[target_feature(enable = "avx512bw,avx512vl")]
90#[cfg_attr(test, assert_instr(vpabsw))]
91pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
92 let abs = _mm_abs_epi16(a).as_i16x8();
93 let zero = _mm_setzero_si128().as_i16x8();
94 transmute(simd_select_bitmask(k, abs, zero))
95}
96
97/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst.
98///
99/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi8&expand=57)
100#[inline]
101#[target_feature(enable = "avx512bw")]
102#[cfg_attr(test, assert_instr(vpabsb))]
103pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i {
104 let a = a.as_i8x64();
105 // all-0 is a properly initialized i8x64
106 let zero: i8x64 = mem::zeroed();
107 let sub = simd_sub(zero, a);
108 let cmp: i8x64 = simd_gt(a, zero);
109 transmute(simd_select(cmp, a, sub))
110}
111
112/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
113///
114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi8&expand=58)
115#[inline]
116#[target_feature(enable = "avx512bw")]
117#[cfg_attr(test, assert_instr(vpabsb))]
118pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
119 let abs = _mm512_abs_epi8(a).as_i8x64();
120 transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
121}
122
123/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
124///
125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi8&expand=59)
126#[inline]
127#[target_feature(enable = "avx512bw")]
128#[cfg_attr(test, assert_instr(vpabsb))]
129pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
130 let abs = _mm512_abs_epi8(a).as_i8x64();
131 let zero = _mm512_setzero_si512().as_i8x64();
132 transmute(simd_select_bitmask(k, abs, zero))
133}
134
135/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
136///
137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi8&expand=55)
138#[inline]
139#[target_feature(enable = "avx512bw,avx512vl")]
140#[cfg_attr(test, assert_instr(vpabsb))]
141pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
142 let abs = _mm256_abs_epi8(a).as_i8x32();
143 transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
144}
145
146/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
147///
148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi8&expand=56)
149#[inline]
150#[target_feature(enable = "avx512bw,avx512vl")]
151#[cfg_attr(test, assert_instr(vpabsb))]
152pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
153 let abs = _mm256_abs_epi8(a).as_i8x32();
154 let zero = _mm256_setzero_si256().as_i8x32();
155 transmute(simd_select_bitmask(k, abs, zero))
156}
157
158/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set)
159///
160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi8&expand=52)
161#[inline]
162#[target_feature(enable = "avx512bw,avx512vl")]
163#[cfg_attr(test, assert_instr(vpabsb))]
164pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
165 let abs = _mm_abs_epi8(a).as_i8x16();
166 transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
167}
168
169/// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
170///
171/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi8&expand=53)
172#[inline]
173#[target_feature(enable = "avx512bw,avx512vl")]
174#[cfg_attr(test, assert_instr(vpabsb))]
175pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
176 let abs = _mm_abs_epi8(a).as_i8x16();
177 let zero = _mm_setzero_si128().as_i8x16();
178 transmute(simd_select_bitmask(k, abs, zero))
179}
180
181/// Add packed 16-bit integers in a and b, and store the results in dst.
182///
183/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi16&expand=91)
184#[inline]
185#[target_feature(enable = "avx512bw")]
186#[cfg_attr(test, assert_instr(vpaddw))]
187pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
188 transmute(simd_add(a.as_i16x32(), b.as_i16x32()))
189}
190
191/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
192///
193/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi16&expand=92)
194#[inline]
195#[target_feature(enable = "avx512bw")]
196#[cfg_attr(test, assert_instr(vpaddw))]
197pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
198 let add = _mm512_add_epi16(a, b).as_i16x32();
199 transmute(simd_select_bitmask(k, add, src.as_i16x32()))
200}
201
202/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
203///
204/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi16&expand=93)
205#[inline]
206#[target_feature(enable = "avx512bw")]
207#[cfg_attr(test, assert_instr(vpaddw))]
208pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
209 let add = _mm512_add_epi16(a, b).as_i16x32();
210 let zero = _mm512_setzero_si512().as_i16x32();
211 transmute(simd_select_bitmask(k, add, zero))
212}
213
214/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
215///
216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi&expand=89)
217#[inline]
218#[target_feature(enable = "avx512bw,avx512vl")]
219#[cfg_attr(test, assert_instr(vpaddw))]
220pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
221 let add = _mm256_add_epi16(a, b).as_i16x16();
222 transmute(simd_select_bitmask(k, add, src.as_i16x16()))
223}
224
225/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
226///
227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi16&expand=90)
228#[inline]
229#[target_feature(enable = "avx512bw,avx512vl")]
230#[cfg_attr(test, assert_instr(vpaddw))]
231pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
232 let add = _mm256_add_epi16(a, b).as_i16x16();
233 let zero = _mm256_setzero_si256().as_i16x16();
234 transmute(simd_select_bitmask(k, add, zero))
235}
236
237/// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
238///
239/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi16&expand=86)
240#[inline]
241#[target_feature(enable = "avx512bw,avx512vl")]
242#[cfg_attr(test, assert_instr(vpaddw))]
243pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
244 let add = _mm_add_epi16(a, b).as_i16x8();
245 transmute(simd_select_bitmask(k, add, src.as_i16x8()))
246}
247
248/// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
249///
250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi16&expand=87)
251#[inline]
252#[target_feature(enable = "avx512bw,avx512vl")]
253#[cfg_attr(test, assert_instr(vpaddw))]
254pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
255 let add = _mm_add_epi16(a, b).as_i16x8();
256 let zero = _mm_setzero_si128().as_i16x8();
257 transmute(simd_select_bitmask(k, add, zero))
258}
259
260/// Add packed 8-bit integers in a and b, and store the results in dst.
261///
262/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi8&expand=118)
263#[inline]
264#[target_feature(enable = "avx512bw")]
265#[cfg_attr(test, assert_instr(vpaddb))]
266pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
267 transmute(simd_add(a.as_i8x64(), b.as_i8x64()))
268}
269
270/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
271///
272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi8&expand=119)
273#[inline]
274#[target_feature(enable = "avx512bw")]
275#[cfg_attr(test, assert_instr(vpaddb))]
276pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
277 let add = _mm512_add_epi8(a, b).as_i8x64();
278 transmute(simd_select_bitmask(k, add, src.as_i8x64()))
279}
280
281/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
282///
283/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi8&expand=120)
284#[inline]
285#[target_feature(enable = "avx512bw")]
286#[cfg_attr(test, assert_instr(vpaddb))]
287pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
288 let add = _mm512_add_epi8(a, b).as_i8x64();
289 let zero = _mm512_setzero_si512().as_i8x64();
290 transmute(simd_select_bitmask(k, add, zero))
291}
292
293/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
294///
295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi8&expand=116)
296#[inline]
297#[target_feature(enable = "avx512bw,avx512vl")]
298#[cfg_attr(test, assert_instr(vpaddb))]
299pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
300 let add = _mm256_add_epi8(a, b).as_i8x32();
301 transmute(simd_select_bitmask(k, add, src.as_i8x32()))
302}
303
304/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
305///
306/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi8&expand=117)
307#[inline]
308#[target_feature(enable = "avx512bw,avx512vl")]
309#[cfg_attr(test, assert_instr(vpaddb))]
310pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
311 let add = _mm256_add_epi8(a, b).as_i8x32();
312 let zero = _mm256_setzero_si256().as_i8x32();
313 transmute(simd_select_bitmask(k, add, zero))
314}
315
316/// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
317///
318/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi8&expand=113)
319#[inline]
320#[target_feature(enable = "avx512bw,avx512vl")]
321#[cfg_attr(test, assert_instr(vpaddb))]
322pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
323 let add = _mm_add_epi8(a, b).as_i8x16();
324 transmute(simd_select_bitmask(k, add, src.as_i8x16()))
325}
326
327/// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
328///
329/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi8&expand=114)
330#[inline]
331#[target_feature(enable = "avx512bw,avx512vl")]
332#[cfg_attr(test, assert_instr(vpaddb))]
333pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
334 let add = _mm_add_epi8(a, b).as_i8x16();
335 let zero = _mm_setzero_si128().as_i8x16();
336 transmute(simd_select_bitmask(k, add, zero))
337}
338
339/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst.
340///
341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu16&expand=197)
342#[inline]
343#[target_feature(enable = "avx512bw")]
344#[cfg_attr(test, assert_instr(vpaddusw))]
345pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
346 transmute(vpaddusw(
347 a.as_u16x32(),
348 b.as_u16x32(),
349 _mm512_setzero_si512().as_u16x32(),
350 0b11111111_11111111_11111111_11111111,
351 ))
352}
353
354/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
355///
356/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu16&expand=198)
357#[inline]
358#[target_feature(enable = "avx512bw")]
359#[cfg_attr(test, assert_instr(vpaddusw))]
360pub unsafe fn _mm512_mask_adds_epu16(
361 src: __m512i,
362 k: __mmask32,
363 a: __m512i,
364 b: __m512i,
365) -> __m512i {
366 transmute(vpaddusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
367}
368
369/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
370///
371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu16&expand=199)
372#[inline]
373#[target_feature(enable = "avx512bw")]
374#[cfg_attr(test, assert_instr(vpaddusw))]
375pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
376 transmute(vpaddusw(
377 a.as_u16x32(),
378 b.as_u16x32(),
379 _mm512_setzero_si512().as_u16x32(),
380 k,
381 ))
382}
383
384/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epu16&expand=195)
387#[inline]
388#[target_feature(enable = "avx512bw,avx512vl")]
389#[cfg_attr(test, assert_instr(vpaddusw))]
390pub unsafe fn _mm256_mask_adds_epu16(
391 src: __m256i,
392 k: __mmask16,
393 a: __m256i,
394 b: __m256i,
395) -> __m256i {
396 transmute(vpaddusw256(
397 a.as_u16x16(),
398 b.as_u16x16(),
399 src.as_u16x16(),
400 k,
401 ))
402}
403
404/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
405///
406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epu16&expand=196)
407#[inline]
408#[target_feature(enable = "avx512bw,avx512vl")]
409#[cfg_attr(test, assert_instr(vpaddusw))]
410pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
411 transmute(vpaddusw256(
412 a.as_u16x16(),
413 b.as_u16x16(),
414 _mm256_setzero_si256().as_u16x16(),
415 k,
416 ))
417}
418
419/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
420///
421/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epu16&expand=192)
422#[inline]
423#[target_feature(enable = "avx512bw,avx512vl")]
424#[cfg_attr(test, assert_instr(vpaddusw))]
425pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
426 transmute(vpaddusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k))
427}
428
429/// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epu16&expand=193)
432#[inline]
433#[target_feature(enable = "avx512bw,avx512vl")]
434#[cfg_attr(test, assert_instr(vpaddusw))]
435pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
436 transmute(vpaddusw128(
437 a.as_u16x8(),
438 b.as_u16x8(),
439 _mm_setzero_si128().as_u16x8(),
440 k,
441 ))
442}
443
444/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst.
445///
446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu8&expand=206)
447#[inline]
448#[target_feature(enable = "avx512bw")]
449#[cfg_attr(test, assert_instr(vpaddusb))]
450pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
451 transmute(vpaddusb(
452 a.as_u8x64(),
453 b.as_u8x64(),
454 _mm512_setzero_si512().as_u8x64(),
455 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
456 ))
457}
458
459/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
460///
461/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu8&expand=207)
462#[inline]
463#[target_feature(enable = "avx512bw")]
464#[cfg_attr(test, assert_instr(vpaddusb))]
465pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
466 transmute(vpaddusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
467}
468
469/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
470///
471/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu8&expand=208)
472#[inline]
473#[target_feature(enable = "avx512bw")]
474#[cfg_attr(test, assert_instr(vpaddusb))]
475pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
476 transmute(vpaddusb(
477 a.as_u8x64(),
478 b.as_u8x64(),
479 _mm512_setzero_si512().as_u8x64(),
480 k,
481 ))
482}
483
484/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epu8&expand=204)
487#[inline]
488#[target_feature(enable = "avx512bw,avx512vl")]
489#[cfg_attr(test, assert_instr(vpaddusb))]
490pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
491 transmute(vpaddusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k))
492}
493
494/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
495///
496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epu8&expand=205)
497#[inline]
498#[target_feature(enable = "avx512bw,avx512vl")]
499#[cfg_attr(test, assert_instr(vpaddusb))]
500pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
501 transmute(vpaddusb256(
502 a.as_u8x32(),
503 b.as_u8x32(),
504 _mm256_setzero_si256().as_u8x32(),
505 k,
506 ))
507}
508
509/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
510///
511/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epu8&expand=201)
512#[inline]
513#[target_feature(enable = "avx512bw,avx512vl")]
514#[cfg_attr(test, assert_instr(vpaddusb))]
515pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
516 transmute(vpaddusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k))
517}
518
519/// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
520///
521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epu8&expand=202)
522#[inline]
523#[target_feature(enable = "avx512bw,avx512vl")]
524#[cfg_attr(test, assert_instr(vpaddusb))]
525pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
526 transmute(vpaddusb128(
527 a.as_u8x16(),
528 b.as_u8x16(),
529 _mm_setzero_si128().as_u8x16(),
530 k,
531 ))
532}
533
534/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst.
535///
536/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi16&expand=179)
537#[inline]
538#[target_feature(enable = "avx512bw")]
539#[cfg_attr(test, assert_instr(vpaddsw))]
540pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
541 transmute(vpaddsw(
542 a.as_i16x32(),
543 b.as_i16x32(),
544 _mm512_setzero_si512().as_i16x32(),
545 0b11111111_11111111_11111111_11111111,
546 ))
547}
548
549/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi16&expand=180)
552#[inline]
553#[target_feature(enable = "avx512bw")]
554#[cfg_attr(test, assert_instr(vpaddsw))]
555pub unsafe fn _mm512_mask_adds_epi16(
556 src: __m512i,
557 k: __mmask32,
558 a: __m512i,
559 b: __m512i,
560) -> __m512i {
561 transmute(vpaddsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
562}
563
564/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
565///
566/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi16&expand=181)
567#[inline]
568#[target_feature(enable = "avx512bw")]
569#[cfg_attr(test, assert_instr(vpaddsw))]
570pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
571 transmute(vpaddsw(
572 a.as_i16x32(),
573 b.as_i16x32(),
574 _mm512_setzero_si512().as_i16x32(),
575 k,
576 ))
577}
578
579/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
580///
581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epi16&expand=177)
582#[inline]
583#[target_feature(enable = "avx512bw,avx512vl")]
584#[cfg_attr(test, assert_instr(vpaddsw))]
585pub unsafe fn _mm256_mask_adds_epi16(
586 src: __m256i,
587 k: __mmask16,
588 a: __m256i,
589 b: __m256i,
590) -> __m256i {
591 transmute(vpaddsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k))
592}
593
594/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595///
596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epi16&expand=178)
597#[inline]
598#[target_feature(enable = "avx512bw,avx512vl")]
599#[cfg_attr(test, assert_instr(vpaddsw))]
600pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
601 transmute(vpaddsw256(
602 a.as_i16x16(),
603 b.as_i16x16(),
604 _mm256_setzero_si256().as_i16x16(),
605 k,
606 ))
607}
608
609/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
610///
611/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epi16&expand=174)
612#[inline]
613#[target_feature(enable = "avx512bw,avx512vl")]
614#[cfg_attr(test, assert_instr(vpaddsw))]
615pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
616 transmute(vpaddsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k))
617}
618
619/// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
620///
621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epi16&expand=175)
622#[inline]
623#[target_feature(enable = "avx512bw,avx512vl")]
624#[cfg_attr(test, assert_instr(vpaddsw))]
625pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
626 transmute(vpaddsw128(
627 a.as_i16x8(),
628 b.as_i16x8(),
629 _mm_setzero_si128().as_i16x8(),
630 k,
631 ))
632}
633
634/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst.
635///
636/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi8&expand=188)
637#[inline]
638#[target_feature(enable = "avx512bw")]
639#[cfg_attr(test, assert_instr(vpaddsb))]
640pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
641 transmute(vpaddsb(
642 a.as_i8x64(),
643 b.as_i8x64(),
644 _mm512_setzero_si512().as_i8x64(),
645 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
646 ))
647}
648
649/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
650///
651/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi8&expand=189)
652#[inline]
653#[target_feature(enable = "avx512bw")]
654#[cfg_attr(test, assert_instr(vpaddsb))]
655pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
656 transmute(vpaddsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
657}
658
659/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
660///
661/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi8&expand=190)
662#[inline]
663#[target_feature(enable = "avx512bw")]
664#[cfg_attr(test, assert_instr(vpaddsb))]
665pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
666 transmute(vpaddsb(
667 a.as_i8x64(),
668 b.as_i8x64(),
669 _mm512_setzero_si512().as_i8x64(),
670 k,
671 ))
672}
673
674/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
675///
676/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epi8&expand=186)
677#[inline]
678#[target_feature(enable = "avx512bw,avx512vl")]
679#[cfg_attr(test, assert_instr(vpaddsb))]
680pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
681 transmute(vpaddsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k))
682}
683
684/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
685///
686/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epi8&expand=187)
687#[inline]
688#[target_feature(enable = "avx512bw,avx512vl")]
689#[cfg_attr(test, assert_instr(vpaddsb))]
690pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
691 transmute(vpaddsb256(
692 a.as_i8x32(),
693 b.as_i8x32(),
694 _mm256_setzero_si256().as_i8x32(),
695 k,
696 ))
697}
698
699/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
700///
701/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epi8&expand=183)
702#[inline]
703#[target_feature(enable = "avx512bw,avx512vl")]
704#[cfg_attr(test, assert_instr(vpaddsb))]
705pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
706 transmute(vpaddsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k))
707}
708
709/// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
710///
711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epi8&expand=184)
712#[inline]
713#[target_feature(enable = "avx512bw,avx512vl")]
714#[cfg_attr(test, assert_instr(vpaddsb))]
715pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
716 transmute(vpaddsb128(
717 a.as_i8x16(),
718 b.as_i8x16(),
719 _mm_setzero_si128().as_i8x16(),
720 k,
721 ))
722}
723
724/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst.
725///
726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi16&expand=5685)
727#[inline]
728#[target_feature(enable = "avx512bw")]
729#[cfg_attr(test, assert_instr(vpsubw))]
730pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
731 transmute(simd_sub(a.as_i16x32(), b.as_i16x32()))
732}
733
734/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
735///
736/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi16&expand=5683)
737#[inline]
738#[target_feature(enable = "avx512bw")]
739#[cfg_attr(test, assert_instr(vpsubw))]
740pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
741 let sub = _mm512_sub_epi16(a, b).as_i16x32();
742 transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
743}
744
745/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi16&expand=5684)
748#[inline]
749#[target_feature(enable = "avx512bw")]
750#[cfg_attr(test, assert_instr(vpsubw))]
751pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
752 let sub = _mm512_sub_epi16(a, b).as_i16x32();
753 let zero = _mm512_setzero_si512().as_i16x32();
754 transmute(simd_select_bitmask(k, sub, zero))
755}
756
757/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
758///
759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi16&expand=5680)
760#[inline]
761#[target_feature(enable = "avx512bw,avx512vl")]
762#[cfg_attr(test, assert_instr(vpsubw))]
763pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
764 let sub = _mm256_sub_epi16(a, b).as_i16x16();
765 transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
766}
767
768/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
769///
770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi16&expand=5681)
771#[inline]
772#[target_feature(enable = "avx512bw,avx512vl")]
773#[cfg_attr(test, assert_instr(vpsubw))]
774pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
775 let sub = _mm256_sub_epi16(a, b).as_i16x16();
776 let zero = _mm256_setzero_si256().as_i16x16();
777 transmute(simd_select_bitmask(k, sub, zero))
778}
779
780/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
781///
782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi16&expand=5677)
783#[inline]
784#[target_feature(enable = "avx512bw,avx512vl")]
785#[cfg_attr(test, assert_instr(vpsubw))]
786pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
787 let sub = _mm_sub_epi16(a, b).as_i16x8();
788 transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
789}
790
791/// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
792///
793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi16&expand=5678)
794#[inline]
795#[target_feature(enable = "avx512bw,avx512vl")]
796#[cfg_attr(test, assert_instr(vpsubw))]
797pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
798 let sub = _mm_sub_epi16(a, b).as_i16x8();
799 let zero = _mm_setzero_si128().as_i16x8();
800 transmute(simd_select_bitmask(k, sub, zero))
801}
802
803/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst.
804///
805/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi8&expand=5712)
806#[inline]
807#[target_feature(enable = "avx512bw")]
808#[cfg_attr(test, assert_instr(vpsubb))]
809pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
810 transmute(simd_sub(a.as_i8x64(), b.as_i8x64()))
811}
812
813/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
814///
815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi8&expand=5710)
816#[inline]
817#[target_feature(enable = "avx512bw")]
818#[cfg_attr(test, assert_instr(vpsubb))]
819pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
820 let sub = _mm512_sub_epi8(a, b).as_i8x64();
821 transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
822}
823
824/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
825///
826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi8&expand=5711)
827#[inline]
828#[target_feature(enable = "avx512bw")]
829#[cfg_attr(test, assert_instr(vpsubb))]
830pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
831 let sub = _mm512_sub_epi8(a, b).as_i8x64();
832 let zero = _mm512_setzero_si512().as_i8x64();
833 transmute(simd_select_bitmask(k, sub, zero))
834}
835
836/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
837///
838/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi8&expand=5707)
839#[inline]
840#[target_feature(enable = "avx512bw,avx512vl")]
841#[cfg_attr(test, assert_instr(vpsubb))]
842pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
843 let sub = _mm256_sub_epi8(a, b).as_i8x32();
844 transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
845}
846
847/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
848///
849/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi8&expand=5708)
850#[inline]
851#[target_feature(enable = "avx512bw,avx512vl")]
852#[cfg_attr(test, assert_instr(vpsubb))]
853pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
854 let sub = _mm256_sub_epi8(a, b).as_i8x32();
855 let zero = _mm256_setzero_si256().as_i8x32();
856 transmute(simd_select_bitmask(k, sub, zero))
857}
858
859/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
860///
861/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi8&expand=5704)
862#[inline]
863#[target_feature(enable = "avx512bw,avx512vl")]
864#[cfg_attr(test, assert_instr(vpsubb))]
865pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
866 let sub = _mm_sub_epi8(a, b).as_i8x16();
867 transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
868}
869
870/// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
871///
872/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi8&expand=5705)
873#[inline]
874#[target_feature(enable = "avx512bw,avx512vl")]
875#[cfg_attr(test, assert_instr(vpsubb))]
876pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
877 let sub = _mm_sub_epi8(a, b).as_i8x16();
878 let zero = _mm_setzero_si128().as_i8x16();
879 transmute(simd_select_bitmask(k, sub, zero))
880}
881
882/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst.
883///
884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu16&expand=5793)
885#[inline]
886#[target_feature(enable = "avx512bw")]
887#[cfg_attr(test, assert_instr(vpsubusw))]
888pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
889 transmute(vpsubusw(
890 a.as_u16x32(),
891 b.as_u16x32(),
892 _mm512_setzero_si512().as_u16x32(),
893 0b11111111_11111111_11111111_11111111,
894 ))
895}
896
897/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
898///
899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu16&expand=5791)
900#[inline]
901#[target_feature(enable = "avx512bw")]
902#[cfg_attr(test, assert_instr(vpsubusw))]
903pub unsafe fn _mm512_mask_subs_epu16(
904 src: __m512i,
905 k: __mmask32,
906 a: __m512i,
907 b: __m512i,
908) -> __m512i {
909 transmute(vpsubusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k))
910}
911
912/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
913///
914/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu16&expand=5792)
915#[inline]
916#[target_feature(enable = "avx512bw")]
917#[cfg_attr(test, assert_instr(vpsubusw))]
918pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
919 transmute(vpsubusw(
920 a.as_u16x32(),
921 b.as_u16x32(),
922 _mm512_setzero_si512().as_u16x32(),
923 k,
924 ))
925}
926
927/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
928///
929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epu16&expand=5788)
930#[inline]
931#[target_feature(enable = "avx512bw,avx512vl")]
932#[cfg_attr(test, assert_instr(vpsubusw))]
933pub unsafe fn _mm256_mask_subs_epu16(
934 src: __m256i,
935 k: __mmask16,
936 a: __m256i,
937 b: __m256i,
938) -> __m256i {
939 transmute(vpsubusw256(
940 a.as_u16x16(),
941 b.as_u16x16(),
942 src.as_u16x16(),
943 k,
944 ))
945}
946
947/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
948///
949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epu16&expand=5789)
950#[inline]
951#[target_feature(enable = "avx512bw,avx512vl")]
952#[cfg_attr(test, assert_instr(vpsubusw))]
953pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
954 transmute(vpsubusw256(
955 a.as_u16x16(),
956 b.as_u16x16(),
957 _mm256_setzero_si256().as_u16x16(),
958 k,
959 ))
960}
961
962/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
963///
964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epu16&expand=5785)
965#[inline]
966#[target_feature(enable = "avx512bw,avx512vl")]
967#[cfg_attr(test, assert_instr(vpsubusw))]
968pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
969 transmute(vpsubusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k))
970}
971
972/// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
973///
974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epu16&expand=5786)
975#[inline]
976#[target_feature(enable = "avx512bw,avx512vl")]
977#[cfg_attr(test, assert_instr(vpsubusw))]
978pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
979 transmute(vpsubusw128(
980 a.as_u16x8(),
981 b.as_u16x8(),
982 _mm_setzero_si128().as_u16x8(),
983 k,
984 ))
985}
986
987/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst.
988///
989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu8&expand=5802)
990#[inline]
991#[target_feature(enable = "avx512bw")]
992#[cfg_attr(test, assert_instr(vpsubusb))]
993pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
994 transmute(vpsubusb(
995 a.as_u8x64(),
996 b.as_u8x64(),
997 _mm512_setzero_si512().as_u8x64(),
998 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
999 ))
1000}
1001
1002/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1003///
1004/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu8&expand=5800)
1005#[inline]
1006#[target_feature(enable = "avx512bw")]
1007#[cfg_attr(test, assert_instr(vpsubusb))]
1008pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1009 transmute(vpsubusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k))
1010}
1011
1012/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1013///
1014/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu8&expand=5801)
1015#[inline]
1016#[target_feature(enable = "avx512bw")]
1017#[cfg_attr(test, assert_instr(vpsubusb))]
1018pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1019 transmute(vpsubusb(
1020 a.as_u8x64(),
1021 b.as_u8x64(),
1022 _mm512_setzero_si512().as_u8x64(),
1023 k,
1024 ))
1025}
1026
1027/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1028///
1029/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epu8&expand=5797)
1030#[inline]
1031#[target_feature(enable = "avx512bw,avx512vl")]
1032#[cfg_attr(test, assert_instr(vpsubusb))]
1033pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1034 transmute(vpsubusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k))
1035}
1036
1037/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1038///
1039/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epu8&expand=5798)
1040#[inline]
1041#[target_feature(enable = "avx512bw,avx512vl")]
1042#[cfg_attr(test, assert_instr(vpsubusb))]
1043pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1044 transmute(vpsubusb256(
1045 a.as_u8x32(),
1046 b.as_u8x32(),
1047 _mm256_setzero_si256().as_u8x32(),
1048 k,
1049 ))
1050}
1051
1052/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1053///
1054/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epu8&expand=5794)
1055#[inline]
1056#[target_feature(enable = "avx512bw,avx512vl")]
1057#[cfg_attr(test, assert_instr(vpsubusb))]
1058pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1059 transmute(vpsubusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k))
1060}
1061
1062/// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epu8&expand=5795)
1065#[inline]
1066#[target_feature(enable = "avx512bw,avx512vl")]
1067#[cfg_attr(test, assert_instr(vpsubusb))]
1068pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1069 transmute(vpsubusb128(
1070 a.as_u8x16(),
1071 b.as_u8x16(),
1072 _mm_setzero_si128().as_u8x16(),
1073 k,
1074 ))
1075}
1076
1077/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst.
1078///
1079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi16&expand=5775)
1080#[inline]
1081#[target_feature(enable = "avx512bw")]
1082#[cfg_attr(test, assert_instr(vpsubsw))]
1083pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
1084 transmute(vpsubsw(
1085 a.as_i16x32(),
1086 b.as_i16x32(),
1087 _mm512_setzero_si512().as_i16x32(),
1088 0b11111111_11111111_11111111_11111111,
1089 ))
1090}
1091
1092/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1093///
1094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi16&expand=5773)
1095#[inline]
1096#[target_feature(enable = "avx512bw")]
1097#[cfg_attr(test, assert_instr(vpsubsw))]
1098pub unsafe fn _mm512_mask_subs_epi16(
1099 src: __m512i,
1100 k: __mmask32,
1101 a: __m512i,
1102 b: __m512i,
1103) -> __m512i {
1104 transmute(vpsubsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k))
1105}
1106
1107/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1108///
1109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi16&expand=5774)
1110#[inline]
1111#[target_feature(enable = "avx512bw")]
1112#[cfg_attr(test, assert_instr(vpsubsw))]
1113pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1114 transmute(vpsubsw(
1115 a.as_i16x32(),
1116 b.as_i16x32(),
1117 _mm512_setzero_si512().as_i16x32(),
1118 k,
1119 ))
1120}
1121
1122/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1123///
1124/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epi16&expand=5770)
1125#[inline]
1126#[target_feature(enable = "avx512bw,avx512vl")]
1127#[cfg_attr(test, assert_instr(vpsubsw))]
1128pub unsafe fn _mm256_mask_subs_epi16(
1129 src: __m256i,
1130 k: __mmask16,
1131 a: __m256i,
1132 b: __m256i,
1133) -> __m256i {
1134 transmute(vpsubsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k))
1135}
1136
1137/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1138///
1139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epi16&expand=5771)
1140#[inline]
1141#[target_feature(enable = "avx512bw,avx512vl")]
1142#[cfg_attr(test, assert_instr(vpsubsw))]
1143pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1144 transmute(vpsubsw256(
1145 a.as_i16x16(),
1146 b.as_i16x16(),
1147 _mm256_setzero_si256().as_i16x16(),
1148 k,
1149 ))
1150}
1151
1152/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1153///
1154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epi16&expand=5767)
1155#[inline]
1156#[target_feature(enable = "avx512bw,avx512vl")]
1157#[cfg_attr(test, assert_instr(vpsubsw))]
1158pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1159 transmute(vpsubsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k))
1160}
1161
1162/// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1163///
1164/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epi16&expand=5768)
1165#[inline]
1166#[target_feature(enable = "avx512bw,avx512vl")]
1167#[cfg_attr(test, assert_instr(vpsubsw))]
1168pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1169 transmute(vpsubsw128(
1170 a.as_i16x8(),
1171 b.as_i16x8(),
1172 _mm_setzero_si128().as_i16x8(),
1173 k,
1174 ))
1175}
1176
1177/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst.
1178///
1179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi8&expand=5784)
1180#[inline]
1181#[target_feature(enable = "avx512bw")]
1182#[cfg_attr(test, assert_instr(vpsubsb))]
1183pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
1184 transmute(vpsubsb(
1185 a.as_i8x64(),
1186 b.as_i8x64(),
1187 _mm512_setzero_si512().as_i8x64(),
1188 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
1189 ))
1190}
1191
1192/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1193///
1194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi8&expand=5782)
1195#[inline]
1196#[target_feature(enable = "avx512bw")]
1197#[cfg_attr(test, assert_instr(vpsubsb))]
1198pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1199 transmute(vpsubsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k))
1200}
1201
1202/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1203///
1204/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi8&expand=5783)
1205#[inline]
1206#[target_feature(enable = "avx512bw")]
1207#[cfg_attr(test, assert_instr(vpsubsb))]
1208pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1209 transmute(vpsubsb(
1210 a.as_i8x64(),
1211 b.as_i8x64(),
1212 _mm512_setzero_si512().as_i8x64(),
1213 k,
1214 ))
1215}
1216
1217/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1218///
1219/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epi8&expand=5779)
1220#[inline]
1221#[target_feature(enable = "avx512bw,avx512vl")]
1222#[cfg_attr(test, assert_instr(vpsubsb))]
1223pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1224 transmute(vpsubsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k))
1225}
1226
1227/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1228///
1229/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epi8&expand=5780)
1230#[inline]
1231#[target_feature(enable = "avx512bw,avx512vl")]
1232#[cfg_attr(test, assert_instr(vpsubsb))]
1233pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1234 transmute(vpsubsb256(
1235 a.as_i8x32(),
1236 b.as_i8x32(),
1237 _mm256_setzero_si256().as_i8x32(),
1238 k,
1239 ))
1240}
1241
1242/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1243///
1244/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epi8&expand=5776)
1245#[inline]
1246#[target_feature(enable = "avx512bw,avx512vl")]
1247#[cfg_attr(test, assert_instr(vpsubsb))]
1248pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1249 transmute(vpsubsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k))
1250}
1251
1252/// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1253///
1254/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epi8&expand=5777)
1255#[inline]
1256#[target_feature(enable = "avx512bw,avx512vl")]
1257#[cfg_attr(test, assert_instr(vpsubsb))]
1258pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1259 transmute(vpsubsb128(
1260 a.as_i8x16(),
1261 b.as_i8x16(),
1262 _mm_setzero_si128().as_i8x16(),
1263 k,
1264 ))
1265}
1266
1267/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1268///
1269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epu16&expand=3973)
1270#[inline]
1271#[target_feature(enable = "avx512bw")]
1272#[cfg_attr(test, assert_instr(vpmulhuw))]
1273pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
1274 transmute(vpmulhuw(a.as_u16x32(), b.as_u16x32()))
1275}
1276
1277/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1278///
1279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epu16&expand=3971)
1280#[inline]
1281#[target_feature(enable = "avx512bw")]
1282#[cfg_attr(test, assert_instr(vpmulhuw))]
1283pub unsafe fn _mm512_mask_mulhi_epu16(
1284 src: __m512i,
1285 k: __mmask32,
1286 a: __m512i,
1287 b: __m512i,
1288) -> __m512i {
1289 let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1290 transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
1291}
1292
1293/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1294///
1295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epu16&expand=3972)
1296#[inline]
1297#[target_feature(enable = "avx512bw")]
1298#[cfg_attr(test, assert_instr(vpmulhuw))]
1299pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1300 let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
1301 let zero = _mm512_setzero_si512().as_u16x32();
1302 transmute(simd_select_bitmask(k, mul, zero))
1303}
1304
1305/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1306///
1307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhi_epu16&expand=3968)
1308#[inline]
1309#[target_feature(enable = "avx512bw,avx512vl")]
1310#[cfg_attr(test, assert_instr(vpmulhuw))]
1311pub unsafe fn _mm256_mask_mulhi_epu16(
1312 src: __m256i,
1313 k: __mmask16,
1314 a: __m256i,
1315 b: __m256i,
1316) -> __m256i {
1317 let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1318 transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
1319}
1320
1321/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1322///
1323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhi_epu16&expand=3969)
1324#[inline]
1325#[target_feature(enable = "avx512bw,avx512vl")]
1326#[cfg_attr(test, assert_instr(vpmulhuw))]
1327pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1328 let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
1329 let zero = _mm256_setzero_si256().as_u16x16();
1330 transmute(simd_select_bitmask(k, mul, zero))
1331}
1332
1333/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1334///
1335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhi_epu16&expand=3965)
1336#[inline]
1337#[target_feature(enable = "avx512bw,avx512vl")]
1338#[cfg_attr(test, assert_instr(vpmulhuw))]
1339pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1340 let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1341 transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
1342}
1343
1344/// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1345///
1346/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhi_epu16&expand=3966)
1347#[inline]
1348#[target_feature(enable = "avx512bw,avx512vl")]
1349#[cfg_attr(test, assert_instr(vpmulhuw))]
1350pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1351 let mul = _mm_mulhi_epu16(a, b).as_u16x8();
1352 let zero = _mm_setzero_si128().as_u16x8();
1353 transmute(simd_select_bitmask(k, mul, zero))
1354}
1355
1356/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst.
1357///
1358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epi16&expand=3962)
1359#[inline]
1360#[target_feature(enable = "avx512bw")]
1361#[cfg_attr(test, assert_instr(vpmulhw))]
1362pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
1363 transmute(vpmulhw(a.as_i16x32(), b.as_i16x32()))
1364}
1365
1366/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1367///
1368/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epi16&expand=3960)
1369#[inline]
1370#[target_feature(enable = "avx512bw")]
1371#[cfg_attr(test, assert_instr(vpmulhw))]
1372pub unsafe fn _mm512_mask_mulhi_epi16(
1373 src: __m512i,
1374 k: __mmask32,
1375 a: __m512i,
1376 b: __m512i,
1377) -> __m512i {
1378 let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1379 transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1380}
1381
1382/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1383///
1384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epi16&expand=3961)
1385#[inline]
1386#[target_feature(enable = "avx512bw")]
1387#[cfg_attr(test, assert_instr(vpmulhw))]
1388pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1389 let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
1390 let zero = _mm512_setzero_si512().as_i16x32();
1391 transmute(simd_select_bitmask(k, mul, zero))
1392}
1393
1394/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1395///
1396/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhi_epi16&expand=3957)
1397#[inline]
1398#[target_feature(enable = "avx512bw,avx512vl")]
1399#[cfg_attr(test, assert_instr(vpmulhw))]
1400pub unsafe fn _mm256_mask_mulhi_epi16(
1401 src: __m256i,
1402 k: __mmask16,
1403 a: __m256i,
1404 b: __m256i,
1405) -> __m256i {
1406 let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1407 transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1408}
1409
1410/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhi_epi16&expand=3958)
1413#[inline]
1414#[target_feature(enable = "avx512bw,avx512vl")]
1415#[cfg_attr(test, assert_instr(vpmulhw))]
1416pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1417 let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
1418 let zero = _mm256_setzero_si256().as_i16x16();
1419 transmute(simd_select_bitmask(k, mul, zero))
1420}
1421
1422/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1423///
1424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhi_epi16&expand=3954)
1425#[inline]
1426#[target_feature(enable = "avx512bw,avx512vl")]
1427#[cfg_attr(test, assert_instr(vpmulhw))]
1428pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1429 let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1430 transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1431}
1432
1433/// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1434///
1435/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhi_epi16&expand=3955)
1436#[inline]
1437#[target_feature(enable = "avx512bw,avx512vl")]
1438#[cfg_attr(test, assert_instr(vpmulhw))]
1439pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1440 let mul = _mm_mulhi_epi16(a, b).as_i16x8();
1441 let zero = _mm_setzero_si128().as_i16x8();
1442 transmute(simd_select_bitmask(k, mul, zero))
1443}
1444
1445/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst.
1446///
1447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhrs_epi16&expand=3986)
1448#[inline]
1449#[target_feature(enable = "avx512bw")]
1450#[cfg_attr(test, assert_instr(vpmulhrsw))]
1451pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i {
1452 transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32()))
1453}
1454
1455/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456///
1457/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhrs_epi16&expand=3984)
1458#[inline]
1459#[target_feature(enable = "avx512bw")]
1460#[cfg_attr(test, assert_instr(vpmulhrsw))]
1461pub unsafe fn _mm512_mask_mulhrs_epi16(
1462 src: __m512i,
1463 k: __mmask32,
1464 a: __m512i,
1465 b: __m512i,
1466) -> __m512i {
1467 let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1468 transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1469}
1470
1471/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1472///
1473/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhrs_epi16&expand=3985)
1474#[inline]
1475#[target_feature(enable = "avx512bw")]
1476#[cfg_attr(test, assert_instr(vpmulhrsw))]
1477pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1478 let mul = _mm512_mulhrs_epi16(a, b).as_i16x32();
1479 let zero = _mm512_setzero_si512().as_i16x32();
1480 transmute(simd_select_bitmask(k, mul, zero))
1481}
1482
1483/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1484///
1485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhrs_epi16&expand=3981)
1486#[inline]
1487#[target_feature(enable = "avx512bw,avx512vl")]
1488#[cfg_attr(test, assert_instr(vpmulhrsw))]
1489pub unsafe fn _mm256_mask_mulhrs_epi16(
1490 src: __m256i,
1491 k: __mmask16,
1492 a: __m256i,
1493 b: __m256i,
1494) -> __m256i {
1495 let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1496 transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1497}
1498
1499/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1500///
1501/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhrs_epi16&expand=3982)
1502#[inline]
1503#[target_feature(enable = "avx512bw,avx512vl")]
1504#[cfg_attr(test, assert_instr(vpmulhrsw))]
1505pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1506 let mul = _mm256_mulhrs_epi16(a, b).as_i16x16();
1507 let zero = _mm256_setzero_si256().as_i16x16();
1508 transmute(simd_select_bitmask(k, mul, zero))
1509}
1510
1511/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1512///
1513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhrs_epi16&expand=3978)
1514#[inline]
1515#[target_feature(enable = "avx512bw,avx512vl")]
1516#[cfg_attr(test, assert_instr(vpmulhrsw))]
1517pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1518 let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1519 transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1520}
1521
1522/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1523///
1524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhrs_epi16&expand=3979)
1525#[inline]
1526#[target_feature(enable = "avx512bw,avx512vl")]
1527#[cfg_attr(test, assert_instr(vpmulhrsw))]
1528pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1529 let mul = _mm_mulhrs_epi16(a, b).as_i16x8();
1530 let zero = _mm_setzero_si128().as_i16x8();
1531 transmute(simd_select_bitmask(k, mul, zero))
1532}
1533
1534/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst.
1535///
1536/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi16&expand=3996)
1537#[inline]
1538#[target_feature(enable = "avx512bw")]
1539#[cfg_attr(test, assert_instr(vpmullw))]
1540pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
1541 transmute(simd_mul(a.as_i16x32(), b.as_i16x32()))
1542}
1543
1544/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1545///
1546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi16&expand=3994)
1547#[inline]
1548#[target_feature(enable = "avx512bw")]
1549#[cfg_attr(test, assert_instr(vpmullw))]
1550pub unsafe fn _mm512_mask_mullo_epi16(
1551 src: __m512i,
1552 k: __mmask32,
1553 a: __m512i,
1554 b: __m512i,
1555) -> __m512i {
1556 let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1557 transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
1558}
1559
1560/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1561///
1562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi16&expand=3995)
1563#[inline]
1564#[target_feature(enable = "avx512bw")]
1565#[cfg_attr(test, assert_instr(vpmullw))]
1566pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1567 let mul = _mm512_mullo_epi16(a, b).as_i16x32();
1568 let zero = _mm512_setzero_si512().as_i16x32();
1569 transmute(simd_select_bitmask(k, mul, zero))
1570}
1571
1572/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1573///
1574/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mullo_epi16&expand=3991)
1575#[inline]
1576#[target_feature(enable = "avx512bw,avx512vl")]
1577#[cfg_attr(test, assert_instr(vpmullw))]
1578pub unsafe fn _mm256_mask_mullo_epi16(
1579 src: __m256i,
1580 k: __mmask16,
1581 a: __m256i,
1582 b: __m256i,
1583) -> __m256i {
1584 let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1585 transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
1586}
1587
1588/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1589///
1590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mullo_epi16&expand=3992)
1591#[inline]
1592#[target_feature(enable = "avx512bw,avx512vl")]
1593#[cfg_attr(test, assert_instr(vpmullw))]
1594pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1595 let mul = _mm256_mullo_epi16(a, b).as_i16x16();
1596 let zero = _mm256_setzero_si256().as_i16x16();
1597 transmute(simd_select_bitmask(k, mul, zero))
1598}
1599
1600/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1601///
1602/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mullo_epi16&expand=3988)
1603#[inline]
1604#[target_feature(enable = "avx512bw,avx512vl")]
1605#[cfg_attr(test, assert_instr(vpmullw))]
1606pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1607 let mul = _mm_mullo_epi16(a, b).as_i16x8();
1608 transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
1609}
1610
1611/// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1612///
1613/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mullo_epi16&expand=3989)
1614#[inline]
1615#[target_feature(enable = "avx512bw,avx512vl")]
1616#[cfg_attr(test, assert_instr(vpmullw))]
1617pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1618 let mul = _mm_mullo_epi16(a, b).as_i16x8();
1619 let zero = _mm_setzero_si128().as_i16x8();
1620 transmute(simd_select_bitmask(k, mul, zero))
1621}
1622
1623/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst.
1624///
1625/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu16&expand=3609)
1626#[inline]
1627#[target_feature(enable = "avx512bw")]
1628#[cfg_attr(test, assert_instr(vpmaxuw))]
1629pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1630 transmute(vpmaxuw(a.as_u16x32(), b.as_u16x32()))
1631}
1632
1633/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1634///
1635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu16&expand=3607)
1636#[inline]
1637#[target_feature(enable = "avx512bw")]
1638#[cfg_attr(test, assert_instr(vpmaxuw))]
1639pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1640 let max = _mm512_max_epu16(a, b).as_u16x32();
1641 transmute(simd_select_bitmask(k, max, src.as_u16x32()))
1642}
1643
1644/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1645///
1646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu16&expand=3608)
1647#[inline]
1648#[target_feature(enable = "avx512bw")]
1649#[cfg_attr(test, assert_instr(vpmaxuw))]
1650pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1651 let max = _mm512_max_epu16(a, b).as_u16x32();
1652 let zero = _mm512_setzero_si512().as_u16x32();
1653 transmute(simd_select_bitmask(k, max, zero))
1654}
1655
1656/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1657///
1658/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epu16&expand=3604)
1659#[inline]
1660#[target_feature(enable = "avx512bw,avx512vl")]
1661#[cfg_attr(test, assert_instr(vpmaxuw))]
1662pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1663 let max = _mm256_max_epu16(a, b).as_u16x16();
1664 transmute(simd_select_bitmask(k, max, src.as_u16x16()))
1665}
1666
1667/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1668///
1669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epu16&expand=3605)
1670#[inline]
1671#[target_feature(enable = "avx512bw,avx512vl")]
1672#[cfg_attr(test, assert_instr(vpmaxuw))]
1673pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1674 let max = _mm256_max_epu16(a, b).as_u16x16();
1675 let zero = _mm256_setzero_si256().as_u16x16();
1676 transmute(simd_select_bitmask(k, max, zero))
1677}
1678
1679/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1680///
1681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epu16&expand=3601)
1682#[inline]
1683#[target_feature(enable = "avx512bw,avx512vl")]
1684#[cfg_attr(test, assert_instr(vpmaxuw))]
1685pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686 let max = _mm_max_epu16(a, b).as_u16x8();
1687 transmute(simd_select_bitmask(k, max, src.as_u16x8()))
1688}
1689
1690/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1691///
1692/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epu16&expand=3602)
1693#[inline]
1694#[target_feature(enable = "avx512bw,avx512vl")]
1695#[cfg_attr(test, assert_instr(vpmaxuw))]
1696pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1697 let max = _mm_max_epu16(a, b).as_u16x8();
1698 let zero = _mm_setzero_si128().as_u16x8();
1699 transmute(simd_select_bitmask(k, max, zero))
1700}
1701
1702/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst.
1703///
1704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu8&expand=3636)
1705#[inline]
1706#[target_feature(enable = "avx512bw")]
1707#[cfg_attr(test, assert_instr(vpmaxub))]
1708pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1709 transmute(vpmaxub(a.as_u8x64(), b.as_u8x64()))
1710}
1711
1712/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1713///
1714/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu8&expand=3634)
1715#[inline]
1716#[target_feature(enable = "avx512bw")]
1717#[cfg_attr(test, assert_instr(vpmaxub))]
1718pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1719 let max = _mm512_max_epu8(a, b).as_u8x64();
1720 transmute(simd_select_bitmask(k, max, src.as_u8x64()))
1721}
1722
1723/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1724///
1725/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu8&expand=3635)
1726#[inline]
1727#[target_feature(enable = "avx512bw")]
1728#[cfg_attr(test, assert_instr(vpmaxub))]
1729pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1730 let max = _mm512_max_epu8(a, b).as_u8x64();
1731 let zero = _mm512_setzero_si512().as_u8x64();
1732 transmute(simd_select_bitmask(k, max, zero))
1733}
1734
1735/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1736///
1737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epu8&expand=3631)
1738#[inline]
1739#[target_feature(enable = "avx512bw,avx512vl")]
1740#[cfg_attr(test, assert_instr(vpmaxub))]
1741pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1742 let max = _mm256_max_epu8(a, b).as_u8x32();
1743 transmute(simd_select_bitmask(k, max, src.as_u8x32()))
1744}
1745
1746/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1747///
1748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epu8&expand=3632)
1749#[inline]
1750#[target_feature(enable = "avx512bw,avx512vl")]
1751#[cfg_attr(test, assert_instr(vpmaxub))]
1752pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1753 let max = _mm256_max_epu8(a, b).as_u8x32();
1754 let zero = _mm256_setzero_si256().as_u8x32();
1755 transmute(simd_select_bitmask(k, max, zero))
1756}
1757
1758/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1759///
1760/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epu8&expand=3628)
1761#[inline]
1762#[target_feature(enable = "avx512bw,avx512vl")]
1763#[cfg_attr(test, assert_instr(vpmaxub))]
1764pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1765 let max = _mm_max_epu8(a, b).as_u8x16();
1766 transmute(simd_select_bitmask(k, max, src.as_u8x16()))
1767}
1768
1769/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1770///
1771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epu8&expand=3629)
1772#[inline]
1773#[target_feature(enable = "avx512bw,avx512vl")]
1774#[cfg_attr(test, assert_instr(vpmaxub))]
1775pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1776 let max = _mm_max_epu8(a, b).as_u8x16();
1777 let zero = _mm_setzero_si128().as_u8x16();
1778 transmute(simd_select_bitmask(k, max, zero))
1779}
1780
1781/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst.
1782///
1783/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi16&expand=3573)
1784#[inline]
1785#[target_feature(enable = "avx512bw")]
1786#[cfg_attr(test, assert_instr(vpmaxsw))]
1787pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1788 transmute(vpmaxsw(a.as_i16x32(), b.as_i16x32()))
1789}
1790
1791/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1792///
1793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi16&expand=3571)
1794#[inline]
1795#[target_feature(enable = "avx512bw")]
1796#[cfg_attr(test, assert_instr(vpmaxsw))]
1797pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1798 let max = _mm512_max_epi16(a, b).as_i16x32();
1799 transmute(simd_select_bitmask(k, max, src.as_i16x32()))
1800}
1801
1802/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1803///
1804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi16&expand=3572)
1805#[inline]
1806#[target_feature(enable = "avx512bw")]
1807#[cfg_attr(test, assert_instr(vpmaxsw))]
1808pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1809 let max = _mm512_max_epi16(a, b).as_i16x32();
1810 let zero = _mm512_setzero_si512().as_i16x32();
1811 transmute(simd_select_bitmask(k, max, zero))
1812}
1813
1814/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1815///
1816/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epi16&expand=3568)
1817#[inline]
1818#[target_feature(enable = "avx512bw,avx512vl")]
1819#[cfg_attr(test, assert_instr(vpmaxsw))]
1820pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1821 let max = _mm256_max_epi16(a, b).as_i16x16();
1822 transmute(simd_select_bitmask(k, max, src.as_i16x16()))
1823}
1824
1825/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1826///
1827/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epi16&expand=3569)
1828#[inline]
1829#[target_feature(enable = "avx512bw,avx512vl")]
1830#[cfg_attr(test, assert_instr(vpmaxsw))]
1831pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1832 let max = _mm256_max_epi16(a, b).as_i16x16();
1833 let zero = _mm256_setzero_si256().as_i16x16();
1834 transmute(simd_select_bitmask(k, max, zero))
1835}
1836
1837/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1838///
1839/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epi16&expand=3565)
1840#[inline]
1841#[target_feature(enable = "avx512bw,avx512vl")]
1842#[cfg_attr(test, assert_instr(vpmaxsw))]
1843pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1844 let max = _mm_max_epi16(a, b).as_i16x8();
1845 transmute(simd_select_bitmask(k, max, src.as_i16x8()))
1846}
1847
1848/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1849///
1850/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epi16&expand=3566)
1851#[inline]
1852#[target_feature(enable = "avx512bw,avx512vl")]
1853#[cfg_attr(test, assert_instr(vpmaxsw))]
1854pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1855 let max = _mm_max_epi16(a, b).as_i16x8();
1856 let zero = _mm_setzero_si128().as_i16x8();
1857 transmute(simd_select_bitmask(k, max, zero))
1858}
1859
1860/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst.
1861///
1862/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi8&expand=3600)
1863#[inline]
1864#[target_feature(enable = "avx512bw")]
1865#[cfg_attr(test, assert_instr(vpmaxsb))]
1866pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
1867 transmute(vpmaxsb(a.as_i8x64(), b.as_i8x64()))
1868}
1869
1870/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1871///
1872/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi8&expand=3598)
1873#[inline]
1874#[target_feature(enable = "avx512bw")]
1875#[cfg_attr(test, assert_instr(vpmaxsb))]
1876pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1877 let max = _mm512_max_epi8(a, b).as_i8x64();
1878 transmute(simd_select_bitmask(k, max, src.as_i8x64()))
1879}
1880
1881/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi8&expand=3599)
1884#[inline]
1885#[target_feature(enable = "avx512bw")]
1886#[cfg_attr(test, assert_instr(vpmaxsb))]
1887pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
1888 let max = _mm512_max_epi8(a, b).as_i8x64();
1889 let zero = _mm512_setzero_si512().as_i8x64();
1890 transmute(simd_select_bitmask(k, max, zero))
1891}
1892
1893/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1894///
1895/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epi8&expand=3595)
1896#[inline]
1897#[target_feature(enable = "avx512bw,avx512vl")]
1898#[cfg_attr(test, assert_instr(vpmaxsb))]
1899pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1900 let max = _mm256_max_epi8(a, b).as_i8x32();
1901 transmute(simd_select_bitmask(k, max, src.as_i8x32()))
1902}
1903
1904/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1905///
1906/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epi8&expand=3596)
1907#[inline]
1908#[target_feature(enable = "avx512bw,avx512vl")]
1909#[cfg_attr(test, assert_instr(vpmaxsb))]
1910pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
1911 let max = _mm256_max_epi8(a, b).as_i8x32();
1912 let zero = _mm256_setzero_si256().as_i8x32();
1913 transmute(simd_select_bitmask(k, max, zero))
1914}
1915
1916/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1917///
1918/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epi8&expand=3592)
1919#[inline]
1920#[target_feature(enable = "avx512bw,avx512vl")]
1921#[cfg_attr(test, assert_instr(vpmaxsb))]
1922pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1923 let max = _mm_max_epi8(a, b).as_i8x16();
1924 transmute(simd_select_bitmask(k, max, src.as_i8x16()))
1925}
1926
1927/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1928///
1929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epi8&expand=3593)
1930#[inline]
1931#[target_feature(enable = "avx512bw,avx512vl")]
1932#[cfg_attr(test, assert_instr(vpmaxsb))]
1933pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
1934 let max = _mm_max_epi8(a, b).as_i8x16();
1935 let zero = _mm_setzero_si128().as_i8x16();
1936 transmute(simd_select_bitmask(k, max, zero))
1937}
1938
1939/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst.
1940///
1941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu16&expand=3723)
1942#[inline]
1943#[target_feature(enable = "avx512bw")]
1944#[cfg_attr(test, assert_instr(vpminuw))]
1945pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
1946 transmute(vpminuw(a.as_u16x32(), b.as_u16x32()))
1947}
1948
1949/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu16&expand=3721)
1952#[inline]
1953#[target_feature(enable = "avx512bw")]
1954#[cfg_attr(test, assert_instr(vpminuw))]
1955pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1956 let min = _mm512_min_epu16(a, b).as_u16x32();
1957 transmute(simd_select_bitmask(k, min, src.as_u16x32()))
1958}
1959
1960/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1961///
1962/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu16&expand=3722)
1963#[inline]
1964#[target_feature(enable = "avx512bw")]
1965#[cfg_attr(test, assert_instr(vpminuw))]
1966pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
1967 let min = _mm512_min_epu16(a, b).as_u16x32();
1968 let zero = _mm512_setzero_si512().as_u16x32();
1969 transmute(simd_select_bitmask(k, min, zero))
1970}
1971
1972/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1973///
1974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epu16&expand=3718)
1975#[inline]
1976#[target_feature(enable = "avx512bw,avx512vl")]
1977#[cfg_attr(test, assert_instr(vpminuw))]
1978pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1979 let min = _mm256_min_epu16(a, b).as_u16x16();
1980 transmute(simd_select_bitmask(k, min, src.as_u16x16()))
1981}
1982
1983/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1984///
1985/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epu16&expand=3719)
1986#[inline]
1987#[target_feature(enable = "avx512bw,avx512vl")]
1988#[cfg_attr(test, assert_instr(vpminuw))]
1989pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
1990 let min = _mm256_min_epu16(a, b).as_u16x16();
1991 let zero = _mm256_setzero_si256().as_u16x16();
1992 transmute(simd_select_bitmask(k, min, zero))
1993}
1994
1995/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1996///
1997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epu16&expand=3715)
1998#[inline]
1999#[target_feature(enable = "avx512bw,avx512vl")]
2000#[cfg_attr(test, assert_instr(vpminuw))]
2001pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2002 let min = _mm_min_epu16(a, b).as_u16x8();
2003 transmute(simd_select_bitmask(k, min, src.as_u16x8()))
2004}
2005
2006/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2007///
2008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epu16&expand=3716)
2009#[inline]
2010#[target_feature(enable = "avx512bw,avx512vl")]
2011#[cfg_attr(test, assert_instr(vpminuw))]
2012pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2013 let min = _mm_min_epu16(a, b).as_u16x8();
2014 let zero = _mm_setzero_si128().as_u16x8();
2015 transmute(simd_select_bitmask(k, min, zero))
2016}
2017
2018/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst.
2019///
2020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu8&expand=3750)
2021#[inline]
2022#[target_feature(enable = "avx512bw")]
2023#[cfg_attr(test, assert_instr(vpminub))]
2024pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2025 transmute(vpminub(a.as_u8x64(), b.as_u8x64()))
2026}
2027
2028/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2029///
2030/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu8&expand=3748)
2031#[inline]
2032#[target_feature(enable = "avx512bw")]
2033#[cfg_attr(test, assert_instr(vpminub))]
2034pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2035 let min = _mm512_min_epu8(a, b).as_u8x64();
2036 transmute(simd_select_bitmask(k, min, src.as_u8x64()))
2037}
2038
2039/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2040///
2041/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu8&expand=3749)
2042#[inline]
2043#[target_feature(enable = "avx512bw")]
2044#[cfg_attr(test, assert_instr(vpminub))]
2045pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2046 let min = _mm512_min_epu8(a, b).as_u8x64();
2047 let zero = _mm512_setzero_si512().as_u8x64();
2048 transmute(simd_select_bitmask(k, min, zero))
2049}
2050
2051/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2052///
2053/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epu8&expand=3745)
2054#[inline]
2055#[target_feature(enable = "avx512bw,avx512vl")]
2056#[cfg_attr(test, assert_instr(vpminub))]
2057pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2058 let min = _mm256_min_epu8(a, b).as_u8x32();
2059 transmute(simd_select_bitmask(k, min, src.as_u8x32()))
2060}
2061
2062/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2063///
2064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epu8&expand=3746)
2065#[inline]
2066#[target_feature(enable = "avx512bw,avx512vl")]
2067#[cfg_attr(test, assert_instr(vpminub))]
2068pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2069 let min = _mm256_min_epu8(a, b).as_u8x32();
2070 let zero = _mm256_setzero_si256().as_u8x32();
2071 transmute(simd_select_bitmask(k, min, zero))
2072}
2073
2074/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2075///
2076/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epu8&expand=3742)
2077#[inline]
2078#[target_feature(enable = "avx512bw,avx512vl")]
2079#[cfg_attr(test, assert_instr(vpminub))]
2080pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2081 let min = _mm_min_epu8(a, b).as_u8x16();
2082 transmute(simd_select_bitmask(k, min, src.as_u8x16()))
2083}
2084
2085/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2086///
2087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epu8&expand=3743)
2088#[inline]
2089#[target_feature(enable = "avx512bw,avx512vl")]
2090#[cfg_attr(test, assert_instr(vpminub))]
2091pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2092 let min = _mm_min_epu8(a, b).as_u8x16();
2093 let zero = _mm_setzero_si128().as_u8x16();
2094 transmute(simd_select_bitmask(k, min, zero))
2095}
2096
2097/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst.
2098///
2099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi16&expand=3687)
2100#[inline]
2101#[target_feature(enable = "avx512bw")]
2102#[cfg_attr(test, assert_instr(vpminsw))]
2103pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2104 transmute(vpminsw(a.as_i16x32(), b.as_i16x32()))
2105}
2106
2107/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2108///
2109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi16&expand=3685)
2110#[inline]
2111#[target_feature(enable = "avx512bw")]
2112#[cfg_attr(test, assert_instr(vpminsw))]
2113pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2114 let min = _mm512_min_epi16(a, b).as_i16x32();
2115 transmute(simd_select_bitmask(k, min, src.as_i16x32()))
2116}
2117
2118/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2119///
2120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi16&expand=3686)
2121#[inline]
2122#[target_feature(enable = "avx512bw")]
2123#[cfg_attr(test, assert_instr(vpminsw))]
2124pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
2125 let min = _mm512_min_epi16(a, b).as_i16x32();
2126 let zero = _mm512_setzero_si512().as_i16x32();
2127 transmute(simd_select_bitmask(k, min, zero))
2128}
2129
2130/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2131///
2132/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epi16&expand=3682)
2133#[inline]
2134#[target_feature(enable = "avx512bw,avx512vl")]
2135#[cfg_attr(test, assert_instr(vpminsw))]
2136pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2137 let min = _mm256_min_epi16(a, b).as_i16x16();
2138 transmute(simd_select_bitmask(k, min, src.as_i16x16()))
2139}
2140
2141/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2142///
2143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epi16&expand=3683)
2144#[inline]
2145#[target_feature(enable = "avx512bw,avx512vl")]
2146#[cfg_attr(test, assert_instr(vpminsw))]
2147pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
2148 let min = _mm256_min_epi16(a, b).as_i16x16();
2149 let zero = _mm256_setzero_si256().as_i16x16();
2150 transmute(simd_select_bitmask(k, min, zero))
2151}
2152
2153/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2154///
2155/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi16&expand=3679)
2156#[inline]
2157#[target_feature(enable = "avx512bw,avx512vl")]
2158#[cfg_attr(test, assert_instr(vpminsw))]
2159pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2160 let min = _mm_min_epi16(a, b).as_i16x8();
2161 transmute(simd_select_bitmask(k, min, src.as_i16x8()))
2162}
2163
2164/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2165///
2166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi16&expand=3680)
2167#[inline]
2168#[target_feature(enable = "avx512bw,avx512vl")]
2169#[cfg_attr(test, assert_instr(vpminsw))]
2170pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2171 let min = _mm_min_epi16(a, b).as_i16x8();
2172 let zero = _mm_setzero_si128().as_i16x8();
2173 transmute(simd_select_bitmask(k, min, zero))
2174}
2175
2176/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst.
2177///
2178/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi8&expand=3714)
2179#[inline]
2180#[target_feature(enable = "avx512bw")]
2181#[cfg_attr(test, assert_instr(vpminsb))]
2182pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2183 transmute(vpminsb(a.as_i8x64(), b.as_i8x64()))
2184}
2185
2186/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2187///
2188/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi8&expand=3712)
2189#[inline]
2190#[target_feature(enable = "avx512bw")]
2191#[cfg_attr(test, assert_instr(vpminsb))]
2192pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2193 let min = _mm512_min_epi8(a, b).as_i8x64();
2194 transmute(simd_select_bitmask(k, min, src.as_i8x64()))
2195}
2196
2197/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2198///
2199/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi8&expand=3713)
2200#[inline]
2201#[target_feature(enable = "avx512bw")]
2202#[cfg_attr(test, assert_instr(vpminsb))]
2203pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
2204 let min = _mm512_min_epi8(a, b).as_i8x64();
2205 let zero = _mm512_setzero_si512().as_i8x64();
2206 transmute(simd_select_bitmask(k, min, zero))
2207}
2208
2209/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2210///
2211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epi8&expand=3709)
2212#[inline]
2213#[target_feature(enable = "avx512bw,avx512vl")]
2214#[cfg_attr(test, assert_instr(vpminsb))]
2215pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2216 let min = _mm256_min_epi8(a, b).as_i8x32();
2217 transmute(simd_select_bitmask(k, min, src.as_i8x32()))
2218}
2219
2220/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2221///
2222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epi8&expand=3710)
2223#[inline]
2224#[target_feature(enable = "avx512bw,avx512vl")]
2225#[cfg_attr(test, assert_instr(vpminsb))]
2226pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
2227 let min = _mm256_min_epi8(a, b).as_i8x32();
2228 let zero = _mm256_setzero_si256().as_i8x32();
2229 transmute(simd_select_bitmask(k, min, zero))
2230}
2231
2232/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2233///
2234/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi8&expand=3706)
2235#[inline]
2236#[target_feature(enable = "avx512bw,avx512vl")]
2237#[cfg_attr(test, assert_instr(vpminsb))]
2238pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2239 let min = _mm_min_epi8(a, b).as_i8x16();
2240 transmute(simd_select_bitmask(k, min, src.as_i8x16()))
2241}
2242
2243/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi8&expand=3707)
2246#[inline]
2247#[target_feature(enable = "avx512bw,avx512vl")]
2248#[cfg_attr(test, assert_instr(vpminsb))]
2249pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
2250 let min = _mm_min_epi8(a, b).as_i8x16();
2251 let zero = _mm_setzero_si128().as_i8x16();
2252 transmute(simd_select_bitmask(k, min, zero))
2253}
2254
2255/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2256///
2257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050)
2258#[inline]
2259#[target_feature(enable = "avx512bw")]
2260#[cfg_attr(test, assert_instr(vpcmp))]
2261pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2262 simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32()))
2263}
2264
2265/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2266///
2267/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu16_mask&expand=1051)
2268#[inline]
2269#[target_feature(enable = "avx512bw")]
2270#[cfg_attr(test, assert_instr(vpcmp))]
2271pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2272 _mm512_cmplt_epu16_mask(a, b) & k1
2273}
2274
cdc7bbd5
XL
2275/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2276///
2277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050)
2278#[inline]
2279#[target_feature(enable = "avx512bw,avx512vl")]
2280#[cfg_attr(test, assert_instr(vpcmp))]
2281pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2282 simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16()))
2283}
2284
2285/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2286///
2287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epu16_mask&expand=1049)
2288#[inline]
2289#[target_feature(enable = "avx512bw,avx512vl")]
2290#[cfg_attr(test, assert_instr(vpcmp))]
2291pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2292 _mm256_cmplt_epu16_mask(a, b) & k1
2293}
2294
2295/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k.
2296///
2297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16_mask&expand=1018)
2298#[inline]
2299#[target_feature(enable = "avx512bw,avx512vl")]
2300#[cfg_attr(test, assert_instr(vpcmp))]
2301pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2302 simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8()))
2303}
2304
2305/// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2306///
2307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi16_mask&expand=1019)
2308#[inline]
2309#[target_feature(enable = "avx512bw,avx512vl")]
2310#[cfg_attr(test, assert_instr(vpcmp))]
2311pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2312 _mm_cmplt_epu16_mask(a, b) & k1
2313}
2314
fc512014
XL
2315/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
2316///
2317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_cmplt_epu8_mask&expand=1068)
2318#[inline]
2319#[target_feature(enable = "avx512bw")]
2320#[cfg_attr(test, assert_instr(vpcmp))]
2321pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2322 simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64()))
2323}
2324
2325/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
2326///
2327/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu8_mask&expand=1069)
2328#[inline]
2329#[target_feature(enable = "avx512bw")]
2330#[cfg_attr(test, assert_instr(vpcmp))]
2331pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2332 _mm512_cmplt_epu8_mask(a, b) & k1
2333}
2334
cdc7bbd5 2335/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2336///
cdc7bbd5 2337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epu8_mask&expand=1066)
fc512014 2338#[inline]
cdc7bbd5 2339#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2340#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2341pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2342 simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32()))
fc512014
XL
2343}
2344
cdc7bbd5 2345/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2346///
cdc7bbd5 2347/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epu8_mask&expand=1067)
fc512014 2348#[inline]
cdc7bbd5 2349#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2350#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2351pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2352 _mm256_cmplt_epu8_mask(a, b) & k1
fc512014
XL
2353}
2354
cdc7bbd5 2355/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2356///
cdc7bbd5 2357/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epu8_mask&expand=1064)
fc512014 2358#[inline]
cdc7bbd5 2359#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2360#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2361pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2362 simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16()))
fc512014
XL
2363}
2364
cdc7bbd5 2365/// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2366///
cdc7bbd5 2367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epu8_mask&expand=1065)
fc512014 2368#[inline]
cdc7bbd5 2369#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2370#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2371pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2372 _mm_cmplt_epu8_mask(a, b) & k1
fc512014
XL
2373}
2374
cdc7bbd5 2375/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2376///
cdc7bbd5 2377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi16_mask&expand=1022)
fc512014
XL
2378#[inline]
2379#[target_feature(enable = "avx512bw")]
2380#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2381pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2382 simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32()))
fc512014
XL
2383}
2384
cdc7bbd5 2385/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2386///
cdc7bbd5 2387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi16_mask&expand=1023)
fc512014
XL
2388#[inline]
2389#[target_feature(enable = "avx512bw")]
2390#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2391pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2392 _mm512_cmplt_epi16_mask(a, b) & k1
fc512014
XL
2393}
2394
cdc7bbd5 2395/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2396///
cdc7bbd5 2397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epi16_mask&expand=1020)
fc512014 2398#[inline]
cdc7bbd5 2399#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2400#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2401pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2402 simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16()))
fc512014
XL
2403}
2404
cdc7bbd5 2405/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2406///
cdc7bbd5 2407/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epi16_mask&expand=1021)
fc512014 2408#[inline]
cdc7bbd5 2409#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2410#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2411pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2412 _mm256_cmplt_epi16_mask(a, b) & k1
fc512014
XL
2413}
2414
cdc7bbd5 2415/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2416///
cdc7bbd5 2417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16_mask&expand=1018)
fc512014 2418#[inline]
cdc7bbd5 2419#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2420#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2421pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2422 simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
fc512014
XL
2423}
2424
cdc7bbd5 2425/// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2426///
cdc7bbd5 2427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi16_mask&expand=1019)
fc512014 2428#[inline]
cdc7bbd5 2429#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2430#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2431pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2432 _mm_cmplt_epi16_mask(a, b) & k1
fc512014
XL
2433}
2434
cdc7bbd5 2435/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2436///
cdc7bbd5 2437/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi8_mask&expand=1044)
fc512014
XL
2438#[inline]
2439#[target_feature(enable = "avx512bw")]
2440#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2441pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2442 simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64()))
fc512014
XL
2443}
2444
cdc7bbd5 2445/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2446///
cdc7bbd5 2447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi8_mask&expand=1045)
fc512014
XL
2448#[inline]
2449#[target_feature(enable = "avx512bw")]
2450#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2451pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2452 _mm512_cmplt_epi8_mask(a, b) & k1
fc512014
XL
2453}
2454
cdc7bbd5 2455/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2456///
cdc7bbd5 2457/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epi8_mask&expand=1042)
fc512014 2458#[inline]
cdc7bbd5 2459#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2460#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2461pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2462 simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32()))
fc512014
XL
2463}
2464
cdc7bbd5 2465/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2466///
cdc7bbd5 2467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epi8_mask&expand=1043)
fc512014 2468#[inline]
cdc7bbd5 2469#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2470#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2471pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2472 _mm256_cmplt_epi8_mask(a, b) & k1
fc512014
XL
2473}
2474
cdc7bbd5 2475/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k.
fc512014 2476///
cdc7bbd5 2477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8_mask&expand=1040)
fc512014 2478#[inline]
cdc7bbd5 2479#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2480#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2481pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2482 simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
fc512014
XL
2483}
2484
cdc7bbd5 2485/// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2486///
cdc7bbd5 2487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi8_mask&expand=1041)
fc512014 2488#[inline]
cdc7bbd5 2489#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2490#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2491pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2492 _mm_cmplt_epi8_mask(a, b) & k1
fc512014
XL
2493}
2494
cdc7bbd5 2495/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2496///
cdc7bbd5 2497/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu16_mask&expand=927)
fc512014
XL
2498#[inline]
2499#[target_feature(enable = "avx512bw")]
2500#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2501pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2502 simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32()))
fc512014
XL
2503}
2504
cdc7bbd5 2505/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2506///
cdc7bbd5 2507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu16_mask&expand=928)
fc512014
XL
2508#[inline]
2509#[target_feature(enable = "avx512bw")]
2510#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2511pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2512 _mm512_cmpgt_epu16_mask(a, b) & k1
fc512014
XL
2513}
2514
cdc7bbd5 2515/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2516///
cdc7bbd5 2517/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epu16_mask&expand=925)
fc512014 2518#[inline]
cdc7bbd5 2519#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2520#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2521pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2522 simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16()))
fc512014
XL
2523}
2524
cdc7bbd5 2525/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2526///
cdc7bbd5 2527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epu16_mask&expand=926)
fc512014 2528#[inline]
cdc7bbd5 2529#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2530#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2531pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2532 _mm256_cmpgt_epu16_mask(a, b) & k1
fc512014
XL
2533}
2534
cdc7bbd5 2535/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2536///
cdc7bbd5 2537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epu16_mask&expand=923)
fc512014 2538#[inline]
cdc7bbd5 2539#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2540#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2541pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2542 simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8()))
fc512014
XL
2543}
2544
cdc7bbd5 2545/// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2546///
cdc7bbd5 2547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epu16_mask&expand=924)
fc512014 2548#[inline]
cdc7bbd5 2549#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2550#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2551pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2552 _mm_cmpgt_epu16_mask(a, b) & k1
fc512014
XL
2553}
2554
cdc7bbd5 2555/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2556///
cdc7bbd5 2557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu8_mask&expand=945)
fc512014
XL
2558#[inline]
2559#[target_feature(enable = "avx512bw")]
2560#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2561pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2562 simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64()))
fc512014
XL
2563}
2564
cdc7bbd5 2565/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2566///
cdc7bbd5 2567/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu8_mask&expand=946)
fc512014
XL
2568#[inline]
2569#[target_feature(enable = "avx512bw")]
2570#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2571pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2572 _mm512_cmpgt_epu8_mask(a, b) & k1
fc512014
XL
2573}
2574
cdc7bbd5 2575/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2576///
cdc7bbd5 2577/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epu8_mask&expand=943)
fc512014 2578#[inline]
cdc7bbd5 2579#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2580#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2581pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2582 simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32()))
fc512014
XL
2583}
2584
cdc7bbd5 2585/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2586///
cdc7bbd5 2587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epu8_mask&expand=944)
fc512014 2588#[inline]
cdc7bbd5 2589#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2590#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2591pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2592 _mm256_cmpgt_epu8_mask(a, b) & k1
fc512014
XL
2593}
2594
cdc7bbd5 2595/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2596///
cdc7bbd5 2597/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epu8_mask&expand=941)
fc512014 2598#[inline]
cdc7bbd5 2599#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2600#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2601pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2602 simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16()))
fc512014
XL
2603}
2604
cdc7bbd5 2605/// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2606///
cdc7bbd5 2607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epu8_mask&expand=942)
fc512014 2608#[inline]
cdc7bbd5 2609#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2610#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2611pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2612 _mm_cmpgt_epu8_mask(a, b) & k1
fc512014
XL
2613}
2614
cdc7bbd5 2615/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2616///
cdc7bbd5 2617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi16_mask&expand=897)
fc512014
XL
2618#[inline]
2619#[target_feature(enable = "avx512bw")]
2620#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2621pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2622 simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32()))
fc512014
XL
2623}
2624
cdc7bbd5 2625/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2626///
cdc7bbd5 2627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi16_mask&expand=898)
fc512014
XL
2628#[inline]
2629#[target_feature(enable = "avx512bw")]
2630#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2631pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2632 _mm512_cmpgt_epi16_mask(a, b) & k1
fc512014
XL
2633}
2634
cdc7bbd5 2635/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2636///
cdc7bbd5 2637/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16_mask&expand=895)
fc512014 2638#[inline]
cdc7bbd5 2639#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2640#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2641pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2642 simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16()))
fc512014
XL
2643}
2644
cdc7bbd5 2645/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2646///
cdc7bbd5 2647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epi16_mask&expand=896)
fc512014 2648#[inline]
cdc7bbd5 2649#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2650#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2651pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2652 _mm256_cmpgt_epi16_mask(a, b) & k1
fc512014
XL
2653}
2654
cdc7bbd5 2655/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2656///
cdc7bbd5 2657/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16_mask&expand=893)
fc512014 2658#[inline]
cdc7bbd5 2659#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2660#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2661pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2662 simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
fc512014
XL
2663}
2664
cdc7bbd5 2665/// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2666///
cdc7bbd5 2667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epi16_mask&expand=894)
fc512014 2668#[inline]
cdc7bbd5 2669#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2670#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2671pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2672 _mm_cmpgt_epi16_mask(a, b) & k1
fc512014
XL
2673}
2674
cdc7bbd5 2675/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2676///
cdc7bbd5 2677/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi8_mask&expand=921)
fc512014
XL
2678#[inline]
2679#[target_feature(enable = "avx512bw")]
2680#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2681pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2682 simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64()))
fc512014
XL
2683}
2684
cdc7bbd5 2685/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2686///
cdc7bbd5 2687/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi8_mask&expand=922)
fc512014
XL
2688#[inline]
2689#[target_feature(enable = "avx512bw")]
2690#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2691pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2692 _mm512_cmpgt_epi8_mask(a, b) & k1
fc512014
XL
2693}
2694
cdc7bbd5 2695/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2696///
cdc7bbd5 2697/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8_mask&expand=919)
fc512014 2698#[inline]
cdc7bbd5 2699#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2700#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2701pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2702 simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32()))
fc512014
XL
2703}
2704
cdc7bbd5 2705/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2706///
cdc7bbd5 2707/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epi8_mask&expand=920)
fc512014 2708#[inline]
cdc7bbd5 2709#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2710#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2711pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2712 _mm256_cmpgt_epi8_mask(a, b) & k1
fc512014
XL
2713}
2714
cdc7bbd5 2715/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k.
fc512014 2716///
cdc7bbd5 2717/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8_mask&expand=917)
fc512014 2718#[inline]
cdc7bbd5 2719#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2720#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2721pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2722 simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
fc512014
XL
2723}
2724
cdc7bbd5 2725/// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2726///
cdc7bbd5 2727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epi8_mask&expand=918)
fc512014 2728#[inline]
cdc7bbd5 2729#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2730#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2731pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2732 _mm_cmpgt_epi8_mask(a, b) & k1
fc512014
XL
2733}
2734
cdc7bbd5 2735/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2736///
cdc7bbd5 2737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu16_mask&expand=989)
fc512014
XL
2738#[inline]
2739#[target_feature(enable = "avx512bw")]
2740#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2741pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2742 simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32()))
fc512014
XL
2743}
2744
cdc7bbd5 2745/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2746///
cdc7bbd5 2747/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu16_mask&expand=990)
fc512014
XL
2748#[inline]
2749#[target_feature(enable = "avx512bw")]
2750#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2751pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2752 _mm512_cmple_epu16_mask(a, b) & k1
fc512014
XL
2753}
2754
cdc7bbd5 2755/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2756///
cdc7bbd5 2757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epu16_mask&expand=987)
fc512014 2758#[inline]
cdc7bbd5 2759#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2760#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2761pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2762 simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16()))
fc512014
XL
2763}
2764
cdc7bbd5 2765/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2766///
cdc7bbd5 2767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epu16_mask&expand=988)
fc512014 2768#[inline]
cdc7bbd5 2769#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 2770#[cfg_attr(test, assert_instr(vpcmp))]
cdc7bbd5
XL
2771pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2772 _mm256_cmple_epu16_mask(a, b) & k1
fc512014
XL
2773}
2774
cdc7bbd5 2775/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2776///
cdc7bbd5 2777/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epu16_mask&expand=985)
fc512014 2778#[inline]
cdc7bbd5
XL
2779#[target_feature(enable = "avx512bw,avx512vl")]
2780#[cfg_attr(test, assert_instr(vpcmp))]
2781pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2782 simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8()))
fc512014
XL
2783}
2784
cdc7bbd5 2785/// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2786///
cdc7bbd5 2787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epu16_mask&expand=986)
fc512014 2788#[inline]
cdc7bbd5
XL
2789#[target_feature(enable = "avx512bw,avx512vl")]
2790#[cfg_attr(test, assert_instr(vpcmp))]
2791pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2792 _mm_cmple_epu16_mask(a, b) & k1
fc512014
XL
2793}
2794
cdc7bbd5 2795/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2796///
cdc7bbd5 2797/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu8_mask&expand=1007)
fc512014
XL
2798#[inline]
2799#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2800#[cfg_attr(test, assert_instr(vpcmp))]
2801pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2802 simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64()))
fc512014
XL
2803}
2804
cdc7bbd5 2805/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2806///
cdc7bbd5 2807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu8_mask&expand=1008)
fc512014
XL
2808#[inline]
2809#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2810#[cfg_attr(test, assert_instr(vpcmp))]
2811pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2812 _mm512_cmple_epu8_mask(a, b) & k1
fc512014
XL
2813}
2814
cdc7bbd5 2815/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2816///
cdc7bbd5 2817/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epu8_mask&expand=1005)
fc512014 2818#[inline]
cdc7bbd5
XL
2819#[target_feature(enable = "avx512bw,avx512vl")]
2820#[cfg_attr(test, assert_instr(vpcmp))]
2821pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2822 simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32()))
fc512014
XL
2823}
2824
cdc7bbd5 2825/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2826///
cdc7bbd5 2827/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epu8_mask&expand=1006)
fc512014 2828#[inline]
cdc7bbd5
XL
2829#[target_feature(enable = "avx512bw,avx512vl")]
2830#[cfg_attr(test, assert_instr(vpcmp))]
2831pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2832 _mm256_cmple_epu8_mask(a, b) & k1
fc512014
XL
2833}
2834
cdc7bbd5 2835/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2836///
cdc7bbd5 2837/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epu8_mask&expand=1003)
fc512014 2838#[inline]
cdc7bbd5
XL
2839#[target_feature(enable = "avx512bw,avx512vl")]
2840#[cfg_attr(test, assert_instr(vpcmp))]
2841pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2842 simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16()))
fc512014
XL
2843}
2844
cdc7bbd5 2845/// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2846///
cdc7bbd5 2847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epu8_mask&expand=1004)
fc512014 2848#[inline]
cdc7bbd5
XL
2849#[target_feature(enable = "avx512bw,avx512vl")]
2850#[cfg_attr(test, assert_instr(vpcmp))]
2851pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2852 _mm_cmple_epu8_mask(a, b) & k1
fc512014
XL
2853}
2854
cdc7bbd5 2855/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2856///
cdc7bbd5 2857/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi16_mask&expand=965)
fc512014
XL
2858#[inline]
2859#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2860#[cfg_attr(test, assert_instr(vpcmp))]
2861pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2862 simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32()))
fc512014
XL
2863}
2864
cdc7bbd5 2865/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2866///
cdc7bbd5 2867/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi16_mask&expand=966)
fc512014
XL
2868#[inline]
2869#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2870#[cfg_attr(test, assert_instr(vpcmp))]
2871pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2872 _mm512_cmple_epi16_mask(a, b) & k1
fc512014
XL
2873}
2874
cdc7bbd5 2875/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2876///
cdc7bbd5 2877/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epi16_mask&expand=963)
fc512014 2878#[inline]
cdc7bbd5
XL
2879#[target_feature(enable = "avx512bw,avx512vl")]
2880#[cfg_attr(test, assert_instr(vpcmp))]
2881pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
2882 simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16()))
fc512014
XL
2883}
2884
cdc7bbd5 2885/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2886///
cdc7bbd5 2887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epi16_mask&expand=964)
fc512014 2888#[inline]
cdc7bbd5
XL
2889#[target_feature(enable = "avx512bw,avx512vl")]
2890#[cfg_attr(test, assert_instr(vpcmp))]
2891pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
2892 _mm256_cmple_epi16_mask(a, b) & k1
fc512014
XL
2893}
2894
cdc7bbd5 2895/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2896///
cdc7bbd5 2897/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epi16_mask&expand=961)
fc512014 2898#[inline]
cdc7bbd5
XL
2899#[target_feature(enable = "avx512bw,avx512vl")]
2900#[cfg_attr(test, assert_instr(vpcmp))]
2901pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
2902 simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8()))
fc512014
XL
2903}
2904
cdc7bbd5 2905/// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2906///
cdc7bbd5 2907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epi16_mask&expand=962)
fc512014 2908#[inline]
cdc7bbd5
XL
2909#[target_feature(enable = "avx512bw,avx512vl")]
2910#[cfg_attr(test, assert_instr(vpcmp))]
2911pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
2912 _mm_cmple_epi16_mask(a, b) & k1
fc512014
XL
2913}
2914
cdc7bbd5 2915/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2916///
cdc7bbd5 2917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi8_mask&expand=983)
fc512014
XL
2918#[inline]
2919#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2920#[cfg_attr(test, assert_instr(vpcmp))]
2921pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
2922 simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64()))
fc512014
XL
2923}
2924
cdc7bbd5 2925/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2926///
cdc7bbd5 2927/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi8_mask&expand=984)
fc512014 2928#[inline]
cdc7bbd5
XL
2929#[target_feature(enable = "avx512bw")]
2930#[cfg_attr(test, assert_instr(vpcmp))]
2931pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
2932 _mm512_cmple_epi8_mask(a, b) & k1
fc512014
XL
2933}
2934
cdc7bbd5 2935/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2936///
cdc7bbd5 2937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epi8_mask&expand=981)
fc512014
XL
2938#[inline]
2939#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
2940#[cfg_attr(test, assert_instr(vpcmp))]
2941pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
2942 simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32()))
fc512014
XL
2943}
2944
cdc7bbd5 2945/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2946///
cdc7bbd5 2947/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epi8_mask&expand=982)
fc512014
XL
2948#[inline]
2949#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
2950#[cfg_attr(test, assert_instr(vpcmp))]
2951pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
2952 _mm256_cmple_epi8_mask(a, b) & k1
fc512014
XL
2953}
2954
cdc7bbd5 2955/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
fc512014 2956///
cdc7bbd5 2957/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epi8_mask&expand=979)
fc512014
XL
2958#[inline]
2959#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
2960#[cfg_attr(test, assert_instr(vpcmp))]
2961pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
2962 simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16()))
fc512014
XL
2963}
2964
cdc7bbd5 2965/// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2966///
cdc7bbd5 2967/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epi8_mask&expand=980)
fc512014 2968#[inline]
cdc7bbd5
XL
2969#[target_feature(enable = "avx512bw,avx512vl")]
2970#[cfg_attr(test, assert_instr(vpcmp))]
2971pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
2972 _mm_cmple_epi8_mask(a, b) & k1
fc512014
XL
2973}
2974
cdc7bbd5 2975/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
fc512014 2976///
cdc7bbd5 2977/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu16_mask&expand=867)
fc512014
XL
2978#[inline]
2979#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2980#[cfg_attr(test, assert_instr(vpcmp))]
2981pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
2982 simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32()))
fc512014
XL
2983}
2984
cdc7bbd5 2985/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 2986///
cdc7bbd5 2987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu16_mask&expand=868)
fc512014
XL
2988#[inline]
2989#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
2990#[cfg_attr(test, assert_instr(vpcmp))]
2991pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
2992 _mm512_cmpge_epu16_mask(a, b) & k1
fc512014
XL
2993}
2994
cdc7bbd5 2995/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
fc512014 2996///
cdc7bbd5 2997/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epu16_mask&expand=865)
fc512014
XL
2998#[inline]
2999#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
3000#[cfg_attr(test, assert_instr(vpcmp))]
3001pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3002 simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16()))
fc512014
XL
3003}
3004
cdc7bbd5 3005/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 3006///
cdc7bbd5 3007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epu16_mask&expand=866)
fc512014
XL
3008#[inline]
3009#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
3010#[cfg_attr(test, assert_instr(vpcmp))]
3011pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3012 _mm256_cmpge_epu16_mask(a, b) & k1
fc512014
XL
3013}
3014
cdc7bbd5 3015/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
fc512014 3016///
cdc7bbd5 3017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epu16_mask&expand=863)
fc512014
XL
3018#[inline]
3019#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
3020#[cfg_attr(test, assert_instr(vpcmp))]
3021pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3022 simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8()))
fc512014
XL
3023}
3024
cdc7bbd5 3025/// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 3026///
cdc7bbd5 3027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epu16_mask&expand=864)
fc512014
XL
3028#[inline]
3029#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
3030#[cfg_attr(test, assert_instr(vpcmp))]
3031pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3032 _mm_cmpge_epu16_mask(a, b) & k1
fc512014
XL
3033}
3034
cdc7bbd5 3035/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
fc512014 3036///
cdc7bbd5 3037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu8_mask&expand=885)
fc512014
XL
3038#[inline]
3039#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
3040#[cfg_attr(test, assert_instr(vpcmp))]
3041pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3042 simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64()))
fc512014
XL
3043}
3044
cdc7bbd5 3045/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 3046///
cdc7bbd5 3047/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu8_mask&expand=886)
fc512014
XL
3048#[inline]
3049#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
3050#[cfg_attr(test, assert_instr(vpcmp))]
3051pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3052 _mm512_cmpge_epu8_mask(a, b) & k1
fc512014
XL
3053}
3054
cdc7bbd5 3055/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
fc512014 3056///
cdc7bbd5 3057/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epu8_mask&expand=883)
fc512014 3058#[inline]
cdc7bbd5
XL
3059#[target_feature(enable = "avx512bw,avx512vl")]
3060#[cfg_attr(test, assert_instr(vpcmp))]
3061pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3062 simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32()))
fc512014
XL
3063}
3064
cdc7bbd5 3065/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
fc512014 3066///
cdc7bbd5 3067/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epu8_mask&expand=884)
fc512014
XL
3068#[inline]
3069#[target_feature(enable = "avx512bw,avx512vl")]
cdc7bbd5
XL
3070#[cfg_attr(test, assert_instr(vpcmp))]
3071pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3072 _mm256_cmpge_epu8_mask(a, b) & k1
fc512014
XL
3073}
3074
cdc7bbd5 3075/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
fc512014 3076///
cdc7bbd5
XL
3077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epu8_mask&expand=881)
3078#[inline]
3079#[target_feature(enable = "avx512bw,avx512vl")]
3080#[cfg_attr(test, assert_instr(vpcmp))]
3081pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3082 simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16()))
3083}
3084
3085/// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3086///
3087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epu8_mask&expand=882)
3088#[inline]
3089#[target_feature(enable = "avx512bw,avx512vl")]
3090#[cfg_attr(test, assert_instr(vpcmp))]
3091pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3092 _mm_cmpge_epu8_mask(a, b) & k1
3093}
3094
3095/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3096///
3097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi16_mask&expand=843)
3098#[inline]
3099#[target_feature(enable = "avx512bw")]
3100#[cfg_attr(test, assert_instr(vpcmp))]
3101pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3102 simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32()))
3103}
3104
3105/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3106///
3107/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi16_mask&expand=844)
3108#[inline]
3109#[target_feature(enable = "avx512bw")]
3110#[cfg_attr(test, assert_instr(vpcmp))]
3111pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3112 _mm512_cmpge_epi16_mask(a, b) & k1
3113}
3114
3115/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3116///
3117/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epi16_mask&expand=841)
3118#[inline]
3119#[target_feature(enable = "avx512bw,avx512vl")]
3120#[cfg_attr(test, assert_instr(vpcmp))]
3121pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3122 simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16()))
3123}
3124
3125/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3126///
3127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epi16_mask&expand=842)
3128#[inline]
3129#[target_feature(enable = "avx512bw,avx512vl")]
3130#[cfg_attr(test, assert_instr(vpcmp))]
3131pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3132 _mm256_cmpge_epi16_mask(a, b) & k1
3133}
3134
3135/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3136///
3137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epi16_mask&expand=839)
3138#[inline]
3139#[target_feature(enable = "avx512bw,avx512vl")]
3140#[cfg_attr(test, assert_instr(vpcmp))]
3141pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3142 simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8()))
3143}
3144
3145/// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3146///
3147/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epi16_mask&expand=840)
3148#[inline]
3149#[target_feature(enable = "avx512bw,avx512vl")]
3150#[cfg_attr(test, assert_instr(vpcmp))]
3151pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3152 _mm_cmpge_epi16_mask(a, b) & k1
3153}
3154
3155/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3156///
3157/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi8_mask&expand=861)
3158#[inline]
3159#[target_feature(enable = "avx512bw")]
3160#[cfg_attr(test, assert_instr(vpcmp))]
3161pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3162 simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64()))
3163}
3164
3165/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3166///
3167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi8_mask&expand=862)
3168#[inline]
3169#[target_feature(enable = "avx512bw")]
3170#[cfg_attr(test, assert_instr(vpcmp))]
3171pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3172 _mm512_cmpge_epi8_mask(a, b) & k1
3173}
3174
3175/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3176///
3177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epi8_mask&expand=859)
3178#[inline]
3179#[target_feature(enable = "avx512bw,avx512vl")]
3180#[cfg_attr(test, assert_instr(vpcmp))]
3181pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3182 simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32()))
3183}
3184
3185/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3186///
3187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epi8_mask&expand=860)
3188#[inline]
3189#[target_feature(enable = "avx512bw,avx512vl")]
3190#[cfg_attr(test, assert_instr(vpcmp))]
3191pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3192 _mm256_cmpge_epi8_mask(a, b) & k1
3193}
3194
3195/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
3196///
3197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epi8_mask&expand=857)
3198#[inline]
3199#[target_feature(enable = "avx512bw,avx512vl")]
3200#[cfg_attr(test, assert_instr(vpcmp))]
3201pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3202 simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16()))
3203}
3204
3205/// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epi8_mask&expand=858)
3208#[inline]
3209#[target_feature(enable = "avx512bw,avx512vl")]
3210#[cfg_attr(test, assert_instr(vpcmp))]
3211pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3212 _mm_cmpge_epi8_mask(a, b) & k1
3213}
3214
3215/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3216///
3217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu16_mask&expand=801)
3218#[inline]
3219#[target_feature(enable = "avx512bw")]
3220#[cfg_attr(test, assert_instr(vpcmp))]
3221pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3222 simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32()))
3223}
3224
3225/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3226///
3227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu16_mask&expand=802)
3228#[inline]
3229#[target_feature(enable = "avx512bw")]
3230#[cfg_attr(test, assert_instr(vpcmp))]
3231pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3232 _mm512_cmpeq_epu16_mask(a, b) & k1
3233}
3234
3235/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3236///
3237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epu16_mask&expand=799)
3238#[inline]
3239#[target_feature(enable = "avx512bw,avx512vl")]
3240#[cfg_attr(test, assert_instr(vpcmp))]
3241pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3242 simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16()))
3243}
3244
3245/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3246///
3247/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epu16_mask&expand=800)
3248#[inline]
3249#[target_feature(enable = "avx512bw,avx512vl")]
3250#[cfg_attr(test, assert_instr(vpcmp))]
3251pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3252 _mm256_cmpeq_epu16_mask(a, b) & k1
3253}
3254
3255/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k.
3256///
3257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epu16_mask&expand=797)
3258#[inline]
3259#[target_feature(enable = "avx512bw,avx512vl")]
3260#[cfg_attr(test, assert_instr(vpcmp))]
3261pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3262 simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8()))
3263}
3264
3265/// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3266///
3267/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epu16_mask&expand=798)
3268#[inline]
3269#[target_feature(enable = "avx512bw,avx512vl")]
3270#[cfg_attr(test, assert_instr(vpcmp))]
3271pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3272 _mm_cmpeq_epu16_mask(a, b) & k1
3273}
3274
3275/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3276///
3277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu8_mask&expand=819)
3278#[inline]
3279#[target_feature(enable = "avx512bw")]
3280#[cfg_attr(test, assert_instr(vpcmp))]
3281pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3282 simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64()))
3283}
3284
3285/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3286///
3287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu8_mask&expand=820)
3288#[inline]
3289#[target_feature(enable = "avx512bw")]
3290#[cfg_attr(test, assert_instr(vpcmp))]
3291pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3292 _mm512_cmpeq_epu8_mask(a, b) & k1
3293}
3294
3295/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3296///
3297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epu8_mask&expand=817)
3298#[inline]
3299#[target_feature(enable = "avx512bw,avx512vl")]
3300#[cfg_attr(test, assert_instr(vpcmp))]
3301pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3302 simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32()))
3303}
3304
3305/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3306///
3307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epu8_mask&expand=818)
3308#[inline]
3309#[target_feature(enable = "avx512bw,avx512vl")]
3310#[cfg_attr(test, assert_instr(vpcmp))]
3311pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3312 _mm256_cmpeq_epu8_mask(a, b) & k1
3313}
3314
3315/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k.
3316///
3317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epu8_mask&expand=815)
3318#[inline]
3319#[target_feature(enable = "avx512bw,avx512vl")]
3320#[cfg_attr(test, assert_instr(vpcmp))]
3321pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3322 simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16()))
3323}
3324
3325/// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3326///
3327/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epu8_mask&expand=816)
3328#[inline]
3329#[target_feature(enable = "avx512bw,avx512vl")]
3330#[cfg_attr(test, assert_instr(vpcmp))]
3331pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3332 _mm_cmpeq_epu8_mask(a, b) & k1
3333}
3334
3335/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3336///
3337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi16_mask&expand=771)
3338#[inline]
3339#[target_feature(enable = "avx512bw")]
3340#[cfg_attr(test, assert_instr(vpcmp))]
3341pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3342 simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32()))
3343}
3344
3345/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3346///
3347/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi16_mask&expand=772)
3348#[inline]
3349#[target_feature(enable = "avx512bw")]
3350#[cfg_attr(test, assert_instr(vpcmp))]
3351pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3352 _mm512_cmpeq_epi16_mask(a, b) & k1
3353}
3354
3355/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3356///
3357/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16_mask&expand=769)
3358#[inline]
3359#[target_feature(enable = "avx512bw,avx512vl")]
3360#[cfg_attr(test, assert_instr(vpcmp))]
3361pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3362 simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16()))
3363}
3364
3365/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3366///
3367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epi16_mask&expand=770)
3368#[inline]
3369#[target_feature(enable = "avx512bw,avx512vl")]
3370#[cfg_attr(test, assert_instr(vpcmp))]
3371pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3372 _mm256_cmpeq_epi16_mask(a, b) & k1
3373}
3374
3375/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k.
3376///
3377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16_mask&expand=767)
3378#[inline]
3379#[target_feature(enable = "avx512bw,avx512vl")]
3380#[cfg_attr(test, assert_instr(vpcmp))]
3381pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3382 simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
3383}
3384
3385/// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3386///
3387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epi16_mask&expand=768)
3388#[inline]
3389#[target_feature(enable = "avx512bw,avx512vl")]
3390#[cfg_attr(test, assert_instr(vpcmp))]
3391pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3392 _mm_cmpeq_epi16_mask(a, b) & k1
3393}
3394
3395/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3396///
3397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi8_mask&expand=795)
3398#[inline]
3399#[target_feature(enable = "avx512bw")]
3400#[cfg_attr(test, assert_instr(vpcmp))]
3401pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3402 simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64()))
3403}
3404
3405/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3406///
3407/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi8_mask&expand=796)
3408#[inline]
3409#[target_feature(enable = "avx512bw")]
3410#[cfg_attr(test, assert_instr(vpcmp))]
3411pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3412 _mm512_cmpeq_epi8_mask(a, b) & k1
3413}
3414
3415/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3416///
3417/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8_mask&expand=793)
3418#[inline]
3419#[target_feature(enable = "avx512bw,avx512vl")]
3420#[cfg_attr(test, assert_instr(vpcmp))]
3421pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3422 simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32()))
3423}
3424
3425/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3426///
3427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epi8_mask&expand=794)
3428#[inline]
3429#[target_feature(enable = "avx512bw,avx512vl")]
3430#[cfg_attr(test, assert_instr(vpcmp))]
3431pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3432 _mm256_cmpeq_epi8_mask(a, b) & k1
3433}
3434
3435/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k.
3436///
3437/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8_mask&expand=791)
3438#[inline]
3439#[target_feature(enable = "avx512bw,avx512vl")]
3440#[cfg_attr(test, assert_instr(vpcmp))]
3441pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3442 simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
3443}
3444
3445/// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epi8_mask&expand=792)
3448#[inline]
3449#[target_feature(enable = "avx512bw,avx512vl")]
3450#[cfg_attr(test, assert_instr(vpcmp))]
3451pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3452 _mm_cmpeq_epi8_mask(a, b) & k1
3453}
3454
3455/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3456///
3457/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu16_mask&expand=1106)
3458#[inline]
3459#[target_feature(enable = "avx512bw")]
3460#[cfg_attr(test, assert_instr(vpcmp))]
3461pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3462 simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32()))
3463}
3464
3465/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3466///
3467/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu16_mask&expand=1107)
3468#[inline]
3469#[target_feature(enable = "avx512bw")]
3470#[cfg_attr(test, assert_instr(vpcmp))]
3471pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3472 _mm512_cmpneq_epu16_mask(a, b) & k1
3473}
3474
3475/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3476///
3477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epu16_mask&expand=1104)
3478#[inline]
3479#[target_feature(enable = "avx512bw,avx512vl")]
3480#[cfg_attr(test, assert_instr(vpcmp))]
3481pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3482 simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16()))
3483}
3484
3485/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3486///
3487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epu16_mask&expand=1105)
3488#[inline]
3489#[target_feature(enable = "avx512bw,avx512vl")]
3490#[cfg_attr(test, assert_instr(vpcmp))]
3491pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3492 _mm256_cmpneq_epu16_mask(a, b) & k1
3493}
3494
3495/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3496///
3497/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epu16_mask&expand=1102)
3498#[inline]
3499#[target_feature(enable = "avx512bw,avx512vl")]
3500#[cfg_attr(test, assert_instr(vpcmp))]
3501pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3502 simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8()))
3503}
3504
3505/// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3506///
3507/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epu16_mask&expand=1103)
3508#[inline]
3509#[target_feature(enable = "avx512bw,avx512vl")]
3510#[cfg_attr(test, assert_instr(vpcmp))]
3511pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3512 _mm_cmpneq_epu16_mask(a, b) & k1
3513}
3514
3515/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3516///
3517/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu8_mask&expand=1124)
3518#[inline]
3519#[target_feature(enable = "avx512bw")]
3520#[cfg_attr(test, assert_instr(vpcmp))]
3521pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3522 simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64()))
3523}
3524
3525/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3526///
3527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu8_mask&expand=1125)
3528#[inline]
3529#[target_feature(enable = "avx512bw")]
3530#[cfg_attr(test, assert_instr(vpcmp))]
3531pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3532 _mm512_cmpneq_epu8_mask(a, b) & k1
3533}
3534
3535/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3536///
3537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epu8_mask&expand=1122)
3538#[inline]
3539#[target_feature(enable = "avx512bw,avx512vl")]
3540#[cfg_attr(test, assert_instr(vpcmp))]
3541pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3542 simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32()))
3543}
3544
3545/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3546///
3547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epu8_mask&expand=1123)
3548#[inline]
3549#[target_feature(enable = "avx512bw,avx512vl")]
3550#[cfg_attr(test, assert_instr(vpcmp))]
3551pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3552 _mm256_cmpneq_epu8_mask(a, b) & k1
3553}
3554
3555/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3556///
3557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epu8_mask&expand=1120)
3558#[inline]
3559#[target_feature(enable = "avx512bw,avx512vl")]
3560#[cfg_attr(test, assert_instr(vpcmp))]
3561pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3562 simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16()))
3563}
3564
3565/// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3566///
3567/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epu8_mask&expand=1121)
3568#[inline]
3569#[target_feature(enable = "avx512bw,avx512vl")]
3570#[cfg_attr(test, assert_instr(vpcmp))]
3571pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3572 _mm_cmpneq_epu8_mask(a, b) & k1
3573}
3574
3575/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3576///
3577/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi16_mask&expand=1082)
3578#[inline]
3579#[target_feature(enable = "avx512bw")]
3580#[cfg_attr(test, assert_instr(vpcmp))]
3581pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
3582 simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32()))
3583}
3584
3585/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3586///
3587/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi16_mask&expand=1083)
3588#[inline]
3589#[target_feature(enable = "avx512bw")]
3590#[cfg_attr(test, assert_instr(vpcmp))]
3591pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
3592 _mm512_cmpneq_epi16_mask(a, b) & k1
3593}
3594
3595/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3596///
3597/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epi16_mask&expand=1080)
3598#[inline]
3599#[target_feature(enable = "avx512bw,avx512vl")]
3600#[cfg_attr(test, assert_instr(vpcmp))]
3601pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
3602 simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16()))
3603}
3604
3605/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3606///
3607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epi16_mask&expand=1081)
3608#[inline]
3609#[target_feature(enable = "avx512bw,avx512vl")]
3610#[cfg_attr(test, assert_instr(vpcmp))]
3611pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
3612 _mm256_cmpneq_epi16_mask(a, b) & k1
3613}
3614
3615/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k.
3616///
3617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epi16_mask&expand=1078)
3618#[inline]
3619#[target_feature(enable = "avx512bw,avx512vl")]
3620#[cfg_attr(test, assert_instr(vpcmp))]
3621pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
3622 simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8()))
3623}
3624
3625/// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3626///
3627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epi16_mask&expand=1079)
3628#[inline]
3629#[target_feature(enable = "avx512bw,avx512vl")]
3630#[cfg_attr(test, assert_instr(vpcmp))]
3631pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
3632 _mm_cmpneq_epi16_mask(a, b) & k1
3633}
3634
3635/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3636///
3637/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi8_mask&expand=1100)
3638#[inline]
3639#[target_feature(enable = "avx512bw")]
3640#[cfg_attr(test, assert_instr(vpcmp))]
3641pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
3642 simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64()))
3643}
3644
3645/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3646///
3647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi8_mask&expand=1101)
3648#[inline]
3649#[target_feature(enable = "avx512bw")]
3650#[cfg_attr(test, assert_instr(vpcmp))]
3651pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
3652 _mm512_cmpneq_epi8_mask(a, b) & k1
3653}
3654
3655/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3656///
3657/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epi8_mask&expand=1098)
3658#[inline]
3659#[target_feature(enable = "avx512bw,avx512vl")]
3660#[cfg_attr(test, assert_instr(vpcmp))]
3661pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
3662 simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32()))
3663}
3664
3665/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epi8_mask&expand=1099)
3668#[inline]
3669#[target_feature(enable = "avx512bw,avx512vl")]
3670#[cfg_attr(test, assert_instr(vpcmp))]
3671pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
3672 _mm256_cmpneq_epi8_mask(a, b) & k1
3673}
3674
3675/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k.
3676///
3677/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epi8_mask&expand=1096)
3678#[inline]
3679#[target_feature(enable = "avx512bw,avx512vl")]
3680#[cfg_attr(test, assert_instr(vpcmp))]
3681pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
3682 simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16()))
3683}
3684
3685/// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3686///
3687/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epi8_mask&expand=1097)
3688#[inline]
3689#[target_feature(enable = "avx512bw,avx512vl")]
3690#[cfg_attr(test, assert_instr(vpcmp))]
3691pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
3692 _mm_cmpneq_epi8_mask(a, b) & k1
3693}
3694
17df50a5 3695/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k.
cdc7bbd5
XL
3696///
3697/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu16_mask&expand=715)
3698#[inline]
3699#[target_feature(enable = "avx512bw")]
17df50a5
XL
3700#[rustc_legacy_const_generics(2)]
3701#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3702pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
3703 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3704 let a = a.as_u16x32();
3705 let b = b.as_u16x32();
17df50a5 3706 let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
cdc7bbd5
XL
3707 transmute(r)
3708}
3709
3710/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3711///
3712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu16_mask&expand=716)
3713#[inline]
3714#[target_feature(enable = "avx512bw")]
17df50a5
XL
3715#[rustc_legacy_const_generics(3)]
3716#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3717pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
cdc7bbd5
XL
3718 k1: __mmask32,
3719 a: __m512i,
3720 b: __m512i,
cdc7bbd5 3721) -> __mmask32 {
17df50a5 3722 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3723 let a = a.as_u16x32();
3724 let b = b.as_u16x32();
17df50a5 3725 let r = vpcmpuw(a, b, IMM8, k1);
cdc7bbd5
XL
3726 transmute(r)
3727}
3728
3729/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3730///
3731/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu16_mask&expand=713)
3732#[inline]
3733#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3734#[rustc_legacy_const_generics(2)]
3735#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3736pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
3737 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3738 let a = a.as_u16x16();
3739 let b = b.as_u16x16();
17df50a5 3740 let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111);
cdc7bbd5
XL
3741 transmute(r)
3742}
3743
3744/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3745///
3746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu16_mask&expand=714)
3747#[inline]
3748#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3749#[rustc_legacy_const_generics(3)]
3750#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3751pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
cdc7bbd5
XL
3752 k1: __mmask16,
3753 a: __m256i,
3754 b: __m256i,
cdc7bbd5 3755) -> __mmask16 {
17df50a5 3756 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3757 let a = a.as_u16x16();
3758 let b = b.as_u16x16();
17df50a5 3759 let r = vpcmpuw256(a, b, IMM8, k1);
cdc7bbd5
XL
3760 transmute(r)
3761}
3762
3763/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3764///
3765/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu16_mask&expand=711)
3766#[inline]
3767#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3768#[rustc_legacy_const_generics(2)]
3769#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3770pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
3771 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3772 let a = a.as_u16x8();
3773 let b = b.as_u16x8();
17df50a5 3774 let r = vpcmpuw128(a, b, IMM8, 0b11111111);
cdc7bbd5
XL
3775 transmute(r)
3776}
3777
3778/// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3779///
3780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu16_mask&expand=712)
3781#[inline]
3782#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3783#[rustc_legacy_const_generics(3)]
3784#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3785pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
3786 k1: __mmask8,
3787 a: __m128i,
3788 b: __m128i,
3789) -> __mmask8 {
3790 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3791 let a = a.as_u16x8();
3792 let b = b.as_u16x8();
17df50a5 3793 let r = vpcmpuw128(a, b, IMM8, k1);
cdc7bbd5
XL
3794 transmute(r)
3795}
3796
3797/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3798///
3799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu8_mask&expand=733)
3800#[inline]
3801#[target_feature(enable = "avx512bw")]
17df50a5
XL
3802#[rustc_legacy_const_generics(2)]
3803#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3804pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
3805 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3806 let a = a.as_u8x64();
3807 let b = b.as_u8x64();
17df50a5
XL
3808 let r = vpcmpub(
3809 a,
3810 b,
3811 IMM8,
3812 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
3813 );
cdc7bbd5
XL
3814 transmute(r)
3815}
3816
3817/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3818///
3819/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu8_mask&expand=734)
3820#[inline]
3821#[target_feature(enable = "avx512bw")]
17df50a5
XL
3822#[rustc_legacy_const_generics(3)]
3823#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3824pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
cdc7bbd5
XL
3825 k1: __mmask64,
3826 a: __m512i,
3827 b: __m512i,
cdc7bbd5 3828) -> __mmask64 {
17df50a5 3829 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3830 let a = a.as_u8x64();
3831 let b = b.as_u8x64();
17df50a5 3832 let r = vpcmpub(a, b, IMM8, k1);
cdc7bbd5
XL
3833 transmute(r)
3834}
3835
3836/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3837///
3838/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu8_mask&expand=731)
3839#[inline]
3840#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3841#[rustc_legacy_const_generics(2)]
3842#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3843pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
3844 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3845 let a = a.as_u8x32();
3846 let b = b.as_u8x32();
17df50a5 3847 let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
cdc7bbd5
XL
3848 transmute(r)
3849}
3850
3851/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3852///
3853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu8_mask&expand=732)
3854#[inline]
3855#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3856#[rustc_legacy_const_generics(3)]
3857#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3858pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
cdc7bbd5
XL
3859 k1: __mmask32,
3860 a: __m256i,
3861 b: __m256i,
cdc7bbd5 3862) -> __mmask32 {
17df50a5 3863 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3864 let a = a.as_u8x32();
3865 let b = b.as_u8x32();
17df50a5 3866 let r = vpcmpub256(a, b, IMM8, k1);
cdc7bbd5
XL
3867 transmute(r)
3868}
3869
3870/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3871///
3872/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu8_mask&expand=729)
3873#[inline]
3874#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3875#[rustc_legacy_const_generics(2)]
3876#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3877pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
3878 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3879 let a = a.as_u8x16();
3880 let b = b.as_u8x16();
17df50a5 3881 let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111);
cdc7bbd5
XL
3882 transmute(r)
3883}
3884
3885/// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu8_mask&expand=730)
3888#[inline]
3889#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3890#[rustc_legacy_const_generics(3)]
3891#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3892pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
cdc7bbd5
XL
3893 k1: __mmask16,
3894 a: __m128i,
3895 b: __m128i,
cdc7bbd5 3896) -> __mmask16 {
17df50a5 3897 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3898 let a = a.as_u8x16();
3899 let b = b.as_u8x16();
17df50a5 3900 let r = vpcmpub128(a, b, IMM8, k1);
cdc7bbd5
XL
3901 transmute(r)
3902}
3903
3904/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3905///
3906/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi16_mask&expand=691)
3907#[inline]
3908#[target_feature(enable = "avx512bw")]
17df50a5
XL
3909#[rustc_legacy_const_generics(2)]
3910#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3911pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
3912 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3913 let a = a.as_i16x32();
3914 let b = b.as_i16x32();
17df50a5 3915 let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
cdc7bbd5
XL
3916 transmute(r)
3917}
3918
3919/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3920///
3921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi16_mask&expand=692)
3922#[inline]
3923#[target_feature(enable = "avx512bw")]
17df50a5
XL
3924#[rustc_legacy_const_generics(3)]
3925#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3926pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
cdc7bbd5
XL
3927 k1: __mmask32,
3928 a: __m512i,
3929 b: __m512i,
cdc7bbd5 3930) -> __mmask32 {
17df50a5 3931 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3932 let a = a.as_i16x32();
3933 let b = b.as_i16x32();
17df50a5 3934 let r = vpcmpw(a, b, IMM8, k1);
cdc7bbd5
XL
3935 transmute(r)
3936}
3937
3938/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3939///
3940/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi16_mask&expand=689)
3941#[inline]
3942#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3943#[rustc_legacy_const_generics(2)]
3944#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3945pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
3946 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3947 let a = a.as_i16x16();
3948 let b = b.as_i16x16();
17df50a5 3949 let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111);
cdc7bbd5
XL
3950 transmute(r)
3951}
3952
3953/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3954///
3955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi16_mask&expand=690)
3956#[inline]
3957#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3958#[rustc_legacy_const_generics(3)]
3959#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3960pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
cdc7bbd5
XL
3961 k1: __mmask16,
3962 a: __m256i,
3963 b: __m256i,
cdc7bbd5 3964) -> __mmask16 {
17df50a5 3965 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3966 let a = a.as_i16x16();
3967 let b = b.as_i16x16();
17df50a5 3968 let r = vpcmpw256(a, b, IMM8, k1);
cdc7bbd5
XL
3969 transmute(r)
3970}
3971
3972/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
3973///
3974/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi16_mask&expand=687)
3975#[inline]
3976#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3977#[rustc_legacy_const_generics(2)]
3978#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3979pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
3980 static_assert_imm3!(IMM8);
cdc7bbd5
XL
3981 let a = a.as_i16x8();
3982 let b = b.as_i16x8();
17df50a5 3983 let r = vpcmpw128(a, b, IMM8, 0b11111111);
cdc7bbd5
XL
3984 transmute(r)
3985}
3986
3987/// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
3988///
3989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi16_mask&expand=688)
3990#[inline]
3991#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
3992#[rustc_legacy_const_generics(3)]
3993#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
3994pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
3995 k1: __mmask8,
3996 a: __m128i,
3997 b: __m128i,
3998) -> __mmask8 {
3999 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4000 let a = a.as_i16x8();
4001 let b = b.as_i16x8();
17df50a5 4002 let r = vpcmpw128(a, b, IMM8, k1);
cdc7bbd5
XL
4003 transmute(r)
4004}
4005
4006/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4007///
4008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi8_mask&expand=709)
4009#[inline]
4010#[target_feature(enable = "avx512bw")]
17df50a5
XL
4011#[rustc_legacy_const_generics(2)]
4012#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4013pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
4014 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4015 let a = a.as_i8x64();
4016 let b = b.as_i8x64();
17df50a5
XL
4017 let r = vpcmpb(
4018 a,
4019 b,
4020 IMM8,
4021 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
4022 );
cdc7bbd5
XL
4023 transmute(r)
4024}
4025
4026/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4027///
4028/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi8_mask&expand=710)
4029#[inline]
4030#[target_feature(enable = "avx512bw")]
17df50a5
XL
4031#[rustc_legacy_const_generics(3)]
4032#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4033pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
cdc7bbd5
XL
4034 k1: __mmask64,
4035 a: __m512i,
4036 b: __m512i,
cdc7bbd5 4037) -> __mmask64 {
17df50a5 4038 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4039 let a = a.as_i8x64();
4040 let b = b.as_i8x64();
17df50a5 4041 let r = vpcmpb(a, b, IMM8, k1);
cdc7bbd5
XL
4042 transmute(r)
4043}
4044
4045/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4046///
4047/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi8_mask&expand=707)
4048#[inline]
4049#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
4050#[rustc_legacy_const_generics(2)]
4051#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4052pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
4053 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4054 let a = a.as_i8x32();
4055 let b = b.as_i8x32();
17df50a5 4056 let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111);
cdc7bbd5
XL
4057 transmute(r)
4058}
4059
4060/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4061///
4062/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi8_mask&expand=708)
4063#[inline]
4064#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
4065#[rustc_legacy_const_generics(3)]
4066#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4067pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
cdc7bbd5
XL
4068 k1: __mmask32,
4069 a: __m256i,
4070 b: __m256i,
cdc7bbd5 4071) -> __mmask32 {
17df50a5 4072 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4073 let a = a.as_i8x32();
4074 let b = b.as_i8x32();
17df50a5 4075 let r = vpcmpb256(a, b, IMM8, k1);
cdc7bbd5
XL
4076 transmute(r)
4077}
4078
4079/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
4080///
4081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi8_mask&expand=705)
4082#[inline]
4083#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
4084#[rustc_legacy_const_generics(2)]
4085#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4086pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
4087 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4088 let a = a.as_i8x16();
4089 let b = b.as_i8x16();
17df50a5 4090 let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111);
cdc7bbd5
XL
4091 transmute(r)
4092}
4093
4094/// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
4095///
4096/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi8_mask&expand=706)
4097#[inline]
4098#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
4099#[rustc_legacy_const_generics(3)]
4100#[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
4101pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
cdc7bbd5
XL
4102 k1: __mmask16,
4103 a: __m128i,
4104 b: __m128i,
cdc7bbd5 4105) -> __mmask16 {
17df50a5 4106 static_assert_imm3!(IMM8);
cdc7bbd5
XL
4107 let a = a.as_i8x16();
4108 let b = b.as_i8x16();
17df50a5 4109 let r = vpcmpb128(a, b, IMM8, k1);
cdc7bbd5
XL
4110 transmute(r)
4111}
4112
4113/// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4114///
4115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi16&expand=3368)
4116#[inline]
4117#[target_feature(enable = "avx512bw")]
4118#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
4119pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
4120 ptr::read_unaligned(mem_addr as *const __m512i)
4121}
4122
4123/// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4124///
4125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_epi16&expand=3365)
4126#[inline]
4127#[target_feature(enable = "avx512bw,avx512vl")]
4128#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
4129pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
4130 ptr::read_unaligned(mem_addr as *const __m256i)
4131}
4132
4133/// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4134///
4135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_epi16&expand=3362)
4136#[inline]
4137#[target_feature(enable = "avx512bw,avx512vl")]
4138#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
4139pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
4140 ptr::read_unaligned(mem_addr as *const __m128i)
4141}
4142
4143/// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4144///
4145/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi8&expand=3395)
4146#[inline]
4147#[target_feature(enable = "avx512bw")]
4148#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
4149pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
4150 ptr::read_unaligned(mem_addr as *const __m512i)
4151}
4152
4153/// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4154///
4155/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_epi8&expand=3392)
4156#[inline]
4157#[target_feature(enable = "avx512bw,avx512vl")]
4158#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
4159pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
4160 ptr::read_unaligned(mem_addr as *const __m256i)
4161}
4162
4163/// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
4164///
4165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_epi8&expand=3389)
4166#[inline]
4167#[target_feature(enable = "avx512bw,avx512vl")]
4168#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
4169pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
4170 ptr::read_unaligned(mem_addr as *const __m128i)
4171}
4172
4173/// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4174///
4175/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi16&expand=5622)
4176#[inline]
4177#[target_feature(enable = "avx512bw")]
4178#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
4179pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
4180 ptr::write_unaligned(mem_addr as *mut __m512i, a);
4181}
4182
4183/// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4184///
4185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_epi16&expand=5620)
4186#[inline]
4187#[target_feature(enable = "avx512bw,avx512vl")]
4188#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
4189pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
4190 ptr::write_unaligned(mem_addr as *mut __m256i, a);
4191}
4192
4193/// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4194///
4195/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_epi16&expand=5618)
4196#[inline]
4197#[target_feature(enable = "avx512bw,avx512vl")]
4198#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
4199pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
4200 ptr::write_unaligned(mem_addr as *mut __m128i, a);
4201}
4202
4203/// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4204///
4205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi8&expand=5640)
4206#[inline]
4207#[target_feature(enable = "avx512bw")]
4208#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
4209pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
4210 ptr::write_unaligned(mem_addr as *mut __m512i, a);
4211}
4212
4213/// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4214///
4215/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_epi8&expand=5638)
4216#[inline]
4217#[target_feature(enable = "avx512bw,avx512vl")]
4218#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
4219pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
4220 ptr::write_unaligned(mem_addr as *mut __m256i, a);
4221}
4222
4223/// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
4224///
4225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_epi8&expand=5636)
4226#[inline]
4227#[target_feature(enable = "avx512bw,avx512vl")]
4228#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
4229pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
4230 ptr::write_unaligned(mem_addr as *mut __m128i, a);
4231}
4232
a2a8927a
XL
4233/// Load packed 16-bit integers from memory into dst using writemask k
4234/// (elements are copied from src when the corresponding mask bit is not set).
4235/// mem_addr does not need to be aligned on any particular boundary.
4236///
4237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_epi16)
4238#[inline]
4239#[target_feature(enable = "avx512f,avx512bw")]
4240pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
4241 let mut dst: __m512i = src;
4242 asm!(
4243 vpl!("vmovdqu16 {dst}{{{k}}}"),
4244 p = in(reg) mem_addr,
4245 k = in(kreg) k,
4246 dst = inout(zmm_reg) dst,
4247 options(pure, readonly, nostack)
4248 );
4249 dst
4250}
4251
4252/// Load packed 16-bit integers from memory into dst using zeromask k
4253/// (elements are zeroed out when the corresponding mask bit is not set).
4254/// mem_addr does not need to be aligned on any particular boundary.
4255///
4256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_epi16)
4257#[inline]
4258#[target_feature(enable = "avx512f,avx512bw")]
4259pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
4260 let mut dst: __m512i;
4261 asm!(
4262 vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
4263 p = in(reg) mem_addr,
4264 k = in(kreg) k,
4265 dst = out(zmm_reg) dst,
4266 options(pure, readonly, nostack)
4267 );
4268 dst
4269}
4270
4271/// Load packed 8-bit integers from memory into dst using writemask k
4272/// (elements are copied from src when the corresponding mask bit is not set).
4273/// mem_addr does not need to be aligned on any particular boundary.
4274///
4275/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_epi8)
4276#[inline]
4277#[target_feature(enable = "avx512f,avx512bw")]
4278pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
4279 let mut dst: __m512i = src;
4280 asm!(
4281 vpl!("vmovdqu8 {dst}{{{k}}}"),
4282 p = in(reg) mem_addr,
4283 k = in(kreg) k,
4284 dst = inout(zmm_reg) dst,
4285 options(pure, readonly, nostack)
4286 );
4287 dst
4288}
4289
4290/// Load packed 8-bit integers from memory into dst using zeromask k
4291/// (elements are zeroed out when the corresponding mask bit is not set).
4292/// mem_addr does not need to be aligned on any particular boundary.
4293///
4294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_epi8)
4295#[inline]
4296#[target_feature(enable = "avx512f,avx512bw")]
4297pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
4298 let mut dst: __m512i;
4299 asm!(
4300 vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
4301 p = in(reg) mem_addr,
4302 k = in(kreg) k,
4303 dst = out(zmm_reg) dst,
4304 options(pure, readonly, nostack)
4305 );
4306 dst
4307}
4308
4309/// Load packed 16-bit integers from memory into dst using writemask k
4310/// (elements are copied from src when the corresponding mask bit is not set).
4311/// mem_addr does not need to be aligned on any particular boundary.
4312///
4313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_loadu_epi16)
4314#[inline]
4315#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4316pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
4317 let mut dst: __m256i = src;
4318 asm!(
4319 vpl!("vmovdqu16 {dst}{{{k}}}"),
4320 p = in(reg) mem_addr,
4321 k = in(kreg) k,
4322 dst = inout(ymm_reg) dst,
4323 options(pure, readonly, nostack)
4324 );
4325 dst
4326}
4327
4328/// Load packed 16-bit integers from memory into dst using zeromask k
4329/// (elements are zeroed out when the corresponding mask bit is not set).
4330/// mem_addr does not need to be aligned on any particular boundary.
4331///
4332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_loadu_epi16)
4333#[inline]
4334#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4335pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
4336 let mut dst: __m256i;
4337 asm!(
4338 vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
4339 p = in(reg) mem_addr,
4340 k = in(kreg) k,
4341 dst = out(ymm_reg) dst,
4342 options(pure, readonly, nostack)
4343 );
4344 dst
4345}
4346
4347/// Load packed 8-bit integers from memory into dst using writemask k
4348/// (elements are copied from src when the corresponding mask bit is not set).
4349/// mem_addr does not need to be aligned on any particular boundary.
4350///
4351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_loadu_epi8)
4352#[inline]
4353#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4354pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
4355 let mut dst: __m256i = src;
4356 asm!(
4357 vpl!("vmovdqu8 {dst}{{{k}}}"),
4358 p = in(reg) mem_addr,
4359 k = in(kreg) k,
4360 dst = inout(ymm_reg) dst,
4361 options(pure, readonly, nostack)
4362 );
4363 dst
4364}
4365
4366/// Load packed 8-bit integers from memory into dst using zeromask k
4367/// (elements are zeroed out when the corresponding mask bit is not set).
4368/// mem_addr does not need to be aligned on any particular boundary.
4369///
4370/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_loadu_epi8)
4371#[inline]
4372#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4373pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
4374 let mut dst: __m256i;
4375 asm!(
4376 vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
4377 p = in(reg) mem_addr,
4378 k = in(kreg) k,
4379 dst = out(ymm_reg) dst,
4380 options(pure, readonly, nostack)
4381 );
4382 dst
4383}
4384
4385/// Load packed 16-bit integers from memory into dst using writemask k
4386/// (elements are copied from src when the corresponding mask bit is not set).
4387/// mem_addr does not need to be aligned on any particular boundary.
4388///
4389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_loadu_epi16)
4390#[inline]
4391#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4392pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
4393 let mut dst: __m128i = src;
4394 asm!(
4395 vpl!("vmovdqu16 {dst}{{{k}}}"),
4396 p = in(reg) mem_addr,
4397 k = in(kreg) k,
4398 dst = inout(xmm_reg) dst,
4399 options(pure, readonly, nostack)
4400 );
4401 dst
4402}
4403
4404/// Load packed 16-bit integers from memory into dst using zeromask k
4405/// (elements are zeroed out when the corresponding mask bit is not set).
4406/// mem_addr does not need to be aligned on any particular boundary.
4407///
4408/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_loadu_epi16)
4409#[inline]
4410#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4411pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
4412 let mut dst: __m128i;
4413 asm!(
4414 vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"),
4415 p = in(reg) mem_addr,
4416 k = in(kreg) k,
4417 dst = out(xmm_reg) dst,
4418 options(pure, readonly, nostack)
4419 );
4420 dst
4421}
4422
4423/// Load packed 8-bit integers from memory into dst using writemask k
4424/// (elements are copied from src when the corresponding mask bit is not set).
4425/// mem_addr does not need to be aligned on any particular boundary.
4426///
4427/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_loadu_epi8)
4428#[inline]
4429#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4430pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
4431 let mut dst: __m128i = src;
4432 asm!(
4433 vpl!("vmovdqu8 {dst}{{{k}}}"),
4434 p = in(reg) mem_addr,
4435 k = in(kreg) k,
4436 dst = inout(xmm_reg) dst,
4437 options(pure, readonly, nostack)
4438 );
4439 dst
4440}
4441
4442/// Load packed 8-bit integers from memory into dst using zeromask k
4443/// (elements are zeroed out when the corresponding mask bit is not set).
4444/// mem_addr does not need to be aligned on any particular boundary.
4445///
4446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_loadu_epi8)
4447#[inline]
4448#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4449pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
4450 let mut dst: __m128i;
4451 asm!(
4452 vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"),
4453 p = in(reg) mem_addr,
4454 k = in(kreg) k,
4455 dst = out(xmm_reg) dst,
4456 options(pure, readonly, nostack)
4457 );
4458 dst
4459}
4460
4461/// Store packed 16-bit integers from a into memory using writemask k.
4462/// mem_addr does not need to be aligned on any particular boundary.
4463///
4464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi16)
4465#[inline]
4466#[target_feature(enable = "avx512f,avx512bw")]
4467pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
4468 asm!(
4469 vps!("vmovdqu16", "{{{mask}}}, {a}"),
4470 p = in(reg) mem_addr,
4471 mask = in(kreg) mask,
4472 a = in(zmm_reg) a,
4473 options(nostack)
4474 );
4475}
4476
4477/// Store packed 8-bit integers from a into memory using writemask k.
4478/// mem_addr does not need to be aligned on any particular boundary.
4479///
4480/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi8)
4481#[inline]
4482#[target_feature(enable = "avx512f,avx512bw")]
4483pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
4484 asm!(
4485 vps!("vmovdqu8", "{{{mask}}}, {a}"),
4486 p = in(reg) mem_addr,
4487 mask = in(kreg) mask,
4488 a = in(zmm_reg) a,
4489 options(nostack)
4490 );
4491}
4492
4493/// Store packed 16-bit integers from a into memory using writemask k.
4494/// mem_addr does not need to be aligned on any particular boundary.
4495///
4496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_storeu_epi16)
4497#[inline]
4498#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4499pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
4500 asm!(
4501 vps!("vmovdqu16", "{{{mask}}}, {a}"),
4502 p = in(reg) mem_addr,
4503 mask = in(kreg) mask,
4504 a = in(ymm_reg) a,
4505 options(nostack)
4506 );
4507}
4508
4509/// Store packed 8-bit integers from a into memory using writemask k.
4510/// mem_addr does not need to be aligned on any particular boundary.
4511///
4512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_storeu_epi8)
4513#[inline]
4514#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")]
4515pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
4516 asm!(
4517 vps!("vmovdqu8", "{{{mask}}}, {a}"),
4518 p = in(reg) mem_addr,
4519 mask = in(kreg) mask,
4520 a = in(ymm_reg) a,
4521 options(nostack)
4522 );
4523}
4524
4525/// Store packed 16-bit integers from a into memory using writemask k.
4526/// mem_addr does not need to be aligned on any particular boundary.
4527///
4528/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_storeu_epi16)
4529#[inline]
4530#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4531pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
4532 asm!(
4533 vps!("vmovdqu16", "{{{mask}}}, {a}"),
4534 p = in(reg) mem_addr,
4535 mask = in(kreg) mask,
4536 a = in(xmm_reg) a,
4537 options(nostack)
4538 );
4539}
4540
4541/// Store packed 8-bit integers from a into memory using writemask k.
4542/// mem_addr does not need to be aligned on any particular boundary.
4543///
4544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_storeu_epi8)
4545#[inline]
4546#[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")]
4547pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
4548 asm!(
4549 vps!("vmovdqu8", "{{{mask}}}, {a}"),
4550 p = in(reg) mem_addr,
4551 mask = in(kreg) mask,
4552 a = in(xmm_reg) a,
4553 options(nostack)
4554 );
4555}
4556
cdc7bbd5
XL
4557/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
4558///
4559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_madd_epi16&expand=3511)
4560#[inline]
4561#[target_feature(enable = "avx512bw")]
4562#[cfg_attr(test, assert_instr(vpmaddwd))]
4563pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
4564 transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32()))
4565}
4566
4567/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4568///
4569/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_madd_epi16&expand=3512)
4570#[inline]
4571#[target_feature(enable = "avx512bw")]
4572#[cfg_attr(test, assert_instr(vpmaddwd))]
4573pub unsafe fn _mm512_mask_madd_epi16(
4574 src: __m512i,
4575 k: __mmask16,
4576 a: __m512i,
4577 b: __m512i,
4578) -> __m512i {
4579 let madd = _mm512_madd_epi16(a, b).as_i32x16();
4580 transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
4581}
4582
4583/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4584///
4585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_madd_epi16&expand=3513)
4586#[inline]
4587#[target_feature(enable = "avx512bw")]
4588#[cfg_attr(test, assert_instr(vpmaddwd))]
4589pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
4590 let madd = _mm512_madd_epi16(a, b).as_i32x16();
4591 let zero = _mm512_setzero_si512().as_i32x16();
4592 transmute(simd_select_bitmask(k, madd, zero))
4593}
4594
4595/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4596///
4597/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_madd_epi16&expand=3509)
4598#[inline]
4599#[target_feature(enable = "avx512bw,avx512vl")]
4600#[cfg_attr(test, assert_instr(vpmaddwd))]
4601pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4602 let madd = _mm256_madd_epi16(a, b).as_i32x8();
4603 transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
4604}
4605
4606/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4607///
4608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_madd_epi16&expand=3510)
4609#[inline]
4610#[target_feature(enable = "avx512bw,avx512vl")]
4611#[cfg_attr(test, assert_instr(vpmaddwd))]
4612pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4613 let madd = _mm256_madd_epi16(a, b).as_i32x8();
4614 let zero = _mm256_setzero_si256().as_i32x8();
4615 transmute(simd_select_bitmask(k, madd, zero))
4616}
4617
4618/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4619///
4620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_madd_epi16&expand=3506)
4621#[inline]
4622#[target_feature(enable = "avx512bw,avx512vl")]
4623#[cfg_attr(test, assert_instr(vpmaddwd))]
4624pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4625 let madd = _mm_madd_epi16(a, b).as_i32x4();
4626 transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
4627}
4628
4629/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4630///
4631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_madd_epi16&expand=3507)
4632#[inline]
4633#[target_feature(enable = "avx512bw,avx512vl")]
4634#[cfg_attr(test, assert_instr(vpmaddwd))]
4635pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4636 let madd = _mm_madd_epi16(a, b).as_i32x4();
4637 let zero = _mm_setzero_si128().as_i32x4();
4638 transmute(simd_select_bitmask(k, madd, zero))
4639}
4640
4641/// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst.
4642///
4643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maddubs_epi16&expand=3539)
4644#[inline]
4645#[target_feature(enable = "avx512bw")]
4646#[cfg_attr(test, assert_instr(vpmaddubsw))]
4647pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
4648 transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64()))
4649}
4650
4651/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4652///
4653/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_maddubs_epi16&expand=3540)
4654#[inline]
4655#[target_feature(enable = "avx512bw")]
4656#[cfg_attr(test, assert_instr(vpmaddubsw))]
4657pub unsafe fn _mm512_mask_maddubs_epi16(
4658 src: __m512i,
4659 k: __mmask32,
4660 a: __m512i,
4661 b: __m512i,
4662) -> __m512i {
4663 let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
4664 transmute(simd_select_bitmask(k, madd, src.as_i16x32()))
4665}
4666
4667/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4668///
4669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_maddubs_epi16&expand=3541)
4670#[inline]
4671#[target_feature(enable = "avx512bw")]
4672#[cfg_attr(test, assert_instr(vpmaddubsw))]
4673pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4674 let madd = _mm512_maddubs_epi16(a, b).as_i16x32();
4675 let zero = _mm512_setzero_si512().as_i16x32();
4676 transmute(simd_select_bitmask(k, madd, zero))
4677}
4678
4679/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_maddubs_epi16&expand=3537)
4682#[inline]
4683#[target_feature(enable = "avx512bw,avx512vl")]
4684#[cfg_attr(test, assert_instr(vpmaddubsw))]
4685pub unsafe fn _mm256_mask_maddubs_epi16(
4686 src: __m256i,
4687 k: __mmask16,
4688 a: __m256i,
4689 b: __m256i,
4690) -> __m256i {
4691 let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
4692 transmute(simd_select_bitmask(k, madd, src.as_i16x16()))
4693}
4694
4695/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4696///
4697/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_maddubs_epi16&expand=3538)
4698#[inline]
4699#[target_feature(enable = "avx512bw,avx512vl")]
4700#[cfg_attr(test, assert_instr(vpmaddubsw))]
4701pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4702 let madd = _mm256_maddubs_epi16(a, b).as_i16x16();
4703 let zero = _mm256_setzero_si256().as_i16x16();
4704 transmute(simd_select_bitmask(k, madd, zero))
4705}
4706
4707/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4708///
4709/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_maddubs_epi16&expand=3534)
4710#[inline]
4711#[target_feature(enable = "avx512bw,avx512vl")]
4712#[cfg_attr(test, assert_instr(vpmaddubsw))]
4713pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4714 let madd = _mm_maddubs_epi16(a, b).as_i16x8();
4715 transmute(simd_select_bitmask(k, madd, src.as_i16x8()))
4716}
4717
4718/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4719///
4720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_maddubs_epi16&expand=3535)
4721#[inline]
4722#[target_feature(enable = "avx512bw,avx512vl")]
4723#[cfg_attr(test, assert_instr(vpmaddubsw))]
4724pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4725 let madd = _mm_maddubs_epi16(a, b).as_i16x8();
4726 let zero = _mm_setzero_si128().as_i16x8();
4727 transmute(simd_select_bitmask(k, madd, zero))
4728}
4729
4730/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst.
4731///
4732/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi32&expand=4091)
4733#[inline]
4734#[target_feature(enable = "avx512bw")]
4735#[cfg_attr(test, assert_instr(vpackssdw))]
4736pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
4737 transmute(vpackssdw(a.as_i32x16(), b.as_i32x16()))
4738}
4739
4740/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4741///
4742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi32&expand=4089)
4743#[inline]
4744#[target_feature(enable = "avx512bw")]
4745#[cfg_attr(test, assert_instr(vpackssdw))]
4746pub unsafe fn _mm512_mask_packs_epi32(
4747 src: __m512i,
4748 k: __mmask32,
4749 a: __m512i,
4750 b: __m512i,
4751) -> __m512i {
4752 let pack = _mm512_packs_epi32(a, b).as_i16x32();
4753 transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
4754}
4755
4756/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi32&expand=4090)
4759#[inline]
4760#[target_feature(enable = "avx512bw")]
4761#[cfg_attr(test, assert_instr(vpackssdw))]
4762pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4763 let pack = _mm512_packs_epi32(a, b).as_i16x32();
4764 let zero = _mm512_setzero_si512().as_i16x32();
4765 transmute(simd_select_bitmask(k, pack, zero))
4766}
4767
4768/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4769///
4770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packs_epi32&expand=4086)
4771#[inline]
4772#[target_feature(enable = "avx512bw,avx512vl")]
4773#[cfg_attr(test, assert_instr(vpackssdw))]
4774pub unsafe fn _mm256_mask_packs_epi32(
4775 src: __m256i,
4776 k: __mmask16,
4777 a: __m256i,
4778 b: __m256i,
4779) -> __m256i {
4780 let pack = _mm256_packs_epi32(a, b).as_i16x16();
4781 transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
4782}
4783
4784/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4785///
4786/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packs_epi32&expand=4087)
fc512014
XL
4787#[inline]
4788#[target_feature(enable = "avx512bw,avx512vl")]
4789#[cfg_attr(test, assert_instr(vpackssdw))]
4790pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4791 let pack = _mm256_packs_epi32(a, b).as_i16x16();
4792 let zero = _mm256_setzero_si256().as_i16x16();
4793 transmute(simd_select_bitmask(k, pack, zero))
4794}
4795
4796/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4797///
4798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packs_epi32&expand=4083)
4799#[inline]
4800#[target_feature(enable = "avx512bw,avx512vl")]
4801#[cfg_attr(test, assert_instr(vpackssdw))]
4802pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4803 let pack = _mm_packs_epi32(a, b).as_i16x8();
4804 transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
4805}
4806
4807/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4808///
4809/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packs_epi32&expand=4084)
4810#[inline]
4811#[target_feature(enable = "avx512bw,avx512vl")]
4812#[cfg_attr(test, assert_instr(vpackssdw))]
4813pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4814 let pack = _mm_packs_epi32(a, b).as_i16x8();
4815 let zero = _mm_setzero_si128().as_i16x8();
4816 transmute(simd_select_bitmask(k, pack, zero))
4817}
4818
4819/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst.
4820///
4821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi16&expand=4082)
4822#[inline]
4823#[target_feature(enable = "avx512bw")]
4824#[cfg_attr(test, assert_instr(vpacksswb))]
4825pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
4826 transmute(vpacksswb(a.as_i16x32(), b.as_i16x32()))
4827}
4828
4829/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4830///
4831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi16&expand=4080)
4832#[inline]
4833#[target_feature(enable = "avx512bw")]
4834#[cfg_attr(test, assert_instr(vpacksswb))]
4835pub unsafe fn _mm512_mask_packs_epi16(
4836 src: __m512i,
4837 k: __mmask64,
4838 a: __m512i,
4839 b: __m512i,
4840) -> __m512i {
4841 let pack = _mm512_packs_epi16(a, b).as_i8x64();
4842 transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
4843}
4844
4845/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4846///
4847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi16&expand=4081)
4848#[inline]
4849#[target_feature(enable = "avx512bw")]
4850#[cfg_attr(test, assert_instr(vpacksswb))]
4851pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
4852 let pack = _mm512_packs_epi16(a, b).as_i8x64();
4853 let zero = _mm512_setzero_si512().as_i8x64();
4854 transmute(simd_select_bitmask(k, pack, zero))
4855}
4856
4857/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4858///
4859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packs_epi16&expand=4077)
4860#[inline]
4861#[target_feature(enable = "avx512bw,avx512vl")]
4862#[cfg_attr(test, assert_instr(vpacksswb))]
4863pub unsafe fn _mm256_mask_packs_epi16(
4864 src: __m256i,
4865 k: __mmask32,
4866 a: __m256i,
4867 b: __m256i,
4868) -> __m256i {
4869 let pack = _mm256_packs_epi16(a, b).as_i8x32();
4870 transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
4871}
4872
4873/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4874///
4875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm256_maskz_packs_epi16&expand=4078)
4876#[inline]
4877#[target_feature(enable = "avx512bw,avx512vl")]
4878#[cfg_attr(test, assert_instr(vpacksswb))]
4879pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
4880 let pack = _mm256_packs_epi16(a, b).as_i8x32();
4881 let zero = _mm256_setzero_si256().as_i8x32();
4882 transmute(simd_select_bitmask(k, pack, zero))
4883}
4884
4885/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4886///
4887/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packs_epi16&expand=4074)
4888#[inline]
4889#[target_feature(enable = "avx512bw,avx512vl")]
4890#[cfg_attr(test, assert_instr(vpacksswb))]
4891pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4892 let pack = _mm_packs_epi16(a, b).as_i8x16();
4893 transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
4894}
4895
4896/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4897///
4898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packs_epi16&expand=4075)
4899#[inline]
4900#[target_feature(enable = "avx512bw,avx512vl")]
4901#[cfg_attr(test, assert_instr(vpacksswb))]
4902pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
4903 let pack = _mm_packs_epi16(a, b).as_i8x16();
4904 let zero = _mm_setzero_si128().as_i8x16();
4905 transmute(simd_select_bitmask(k, pack, zero))
4906}
4907
4908/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst.
4909///
4910/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi32&expand=4130)
4911#[inline]
4912#[target_feature(enable = "avx512bw")]
4913#[cfg_attr(test, assert_instr(vpackusdw))]
4914pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
4915 transmute(vpackusdw(a.as_i32x16(), b.as_i32x16()))
4916}
4917
4918/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4919///
4920/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi32&expand=4128)
4921#[inline]
4922#[target_feature(enable = "avx512bw")]
4923#[cfg_attr(test, assert_instr(vpackusdw))]
4924pub unsafe fn _mm512_mask_packus_epi32(
4925 src: __m512i,
4926 k: __mmask32,
4927 a: __m512i,
4928 b: __m512i,
4929) -> __m512i {
4930 let pack = _mm512_packus_epi32(a, b).as_i16x32();
4931 transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
4932}
4933
4934/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4935///
4936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi32&expand=4129)
4937#[inline]
4938#[target_feature(enable = "avx512bw")]
4939#[cfg_attr(test, assert_instr(vpackusdw))]
4940pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
4941 let pack = _mm512_packus_epi32(a, b).as_i16x32();
4942 let zero = _mm512_setzero_si512().as_i16x32();
4943 transmute(simd_select_bitmask(k, pack, zero))
4944}
4945
4946/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4947///
4948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packus_epi32&expand=4125)
4949#[inline]
4950#[target_feature(enable = "avx512bw,avx512vl")]
4951#[cfg_attr(test, assert_instr(vpackusdw))]
4952pub unsafe fn _mm256_mask_packus_epi32(
4953 src: __m256i,
4954 k: __mmask16,
4955 a: __m256i,
4956 b: __m256i,
4957) -> __m256i {
4958 let pack = _mm256_packus_epi32(a, b).as_i16x16();
4959 transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
4960}
4961
4962/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4963///
4964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packus_epi32&expand=4126)
4965#[inline]
4966#[target_feature(enable = "avx512bw,avx512vl")]
4967#[cfg_attr(test, assert_instr(vpackusdw))]
4968pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
4969 let pack = _mm256_packus_epi32(a, b).as_i16x16();
4970 let zero = _mm256_setzero_si256().as_i16x16();
4971 transmute(simd_select_bitmask(k, pack, zero))
4972}
4973
4974/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4975///
4976/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packus_epi32&expand=4122)
4977#[inline]
4978#[target_feature(enable = "avx512bw,avx512vl")]
4979#[cfg_attr(test, assert_instr(vpackusdw))]
4980pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4981 let pack = _mm_packus_epi32(a, b).as_i16x8();
4982 transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
4983}
4984
4985/// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4986///
4987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packus_epi32&expand=4123)
4988#[inline]
4989#[target_feature(enable = "avx512bw,avx512vl")]
4990#[cfg_attr(test, assert_instr(vpackusdw))]
4991pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4992 let pack = _mm_packus_epi32(a, b).as_i16x8();
4993 let zero = _mm_setzero_si128().as_i16x8();
4994 transmute(simd_select_bitmask(k, pack, zero))
4995}
4996
4997/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst.
4998///
4999/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi16&expand=4121)
5000#[inline]
5001#[target_feature(enable = "avx512bw")]
5002#[cfg_attr(test, assert_instr(vpackuswb))]
5003pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
5004 transmute(vpackuswb(a.as_i16x32(), b.as_i16x32()))
5005}
5006
5007/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5008///
5009/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi16&expand=4119)
5010#[inline]
5011#[target_feature(enable = "avx512bw")]
5012#[cfg_attr(test, assert_instr(vpackuswb))]
5013pub unsafe fn _mm512_mask_packus_epi16(
5014 src: __m512i,
5015 k: __mmask64,
5016 a: __m512i,
5017 b: __m512i,
5018) -> __m512i {
5019 let pack = _mm512_packus_epi16(a, b).as_i8x64();
5020 transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
5021}
5022
5023/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5024///
5025/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi16&expand=4120)
5026#[inline]
5027#[target_feature(enable = "avx512bw")]
5028#[cfg_attr(test, assert_instr(vpackuswb))]
5029pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
5030 let pack = _mm512_packus_epi16(a, b).as_i8x64();
5031 let zero = _mm512_setzero_si512().as_i8x64();
5032 transmute(simd_select_bitmask(k, pack, zero))
5033}
5034
5035/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5036///
5037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packus_epi16&expand=4116)
5038#[inline]
5039#[target_feature(enable = "avx512bw,avx512vl")]
5040#[cfg_attr(test, assert_instr(vpackuswb))]
5041pub unsafe fn _mm256_mask_packus_epi16(
5042 src: __m256i,
5043 k: __mmask32,
5044 a: __m256i,
5045 b: __m256i,
5046) -> __m256i {
5047 let pack = _mm256_packus_epi16(a, b).as_i8x32();
5048 transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
5049}
5050
5051/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5052///
5053/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packus_epi16&expand=4117)
5054#[inline]
5055#[target_feature(enable = "avx512bw,avx512vl")]
5056#[cfg_attr(test, assert_instr(vpackuswb))]
5057pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
5058 let pack = _mm256_packus_epi16(a, b).as_i8x32();
5059 let zero = _mm256_setzero_si256().as_i8x32();
5060 transmute(simd_select_bitmask(k, pack, zero))
5061}
5062
5063/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5064///
5065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packus_epi16&expand=4113)
5066#[inline]
5067#[target_feature(enable = "avx512bw,avx512vl")]
5068#[cfg_attr(test, assert_instr(vpackuswb))]
5069pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
5070 let pack = _mm_packus_epi16(a, b).as_i8x16();
5071 transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
5072}
5073
5074/// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5075///
5076/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packus_epi16&expand=4114)
5077#[inline]
5078#[target_feature(enable = "avx512bw,avx512vl")]
5079#[cfg_attr(test, assert_instr(vpackuswb))]
5080pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
5081 let pack = _mm_packus_epi16(a, b).as_i8x16();
5082 let zero = _mm_setzero_si128().as_i8x16();
5083 transmute(simd_select_bitmask(k, pack, zero))
5084}
5085
5086/// Average packed unsigned 16-bit integers in a and b, and store the results in dst.
5087///
5088/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu16&expand=388)
5089#[inline]
5090#[target_feature(enable = "avx512bw")]
5091#[cfg_attr(test, assert_instr(vpavgw))]
5092pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
5093 transmute(vpavgw(a.as_u16x32(), b.as_u16x32()))
5094}
5095
5096/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5097///
5098/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu16&expand=389)
5099#[inline]
5100#[target_feature(enable = "avx512bw")]
5101#[cfg_attr(test, assert_instr(vpavgw))]
5102pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5103 let avg = _mm512_avg_epu16(a, b).as_u16x32();
5104 transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
5105}
5106
5107/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5108///
5109/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu16&expand=390)
5110#[inline]
5111#[target_feature(enable = "avx512bw")]
5112#[cfg_attr(test, assert_instr(vpavgw))]
5113pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
5114 let avg = _mm512_avg_epu16(a, b).as_u16x32();
5115 let zero = _mm512_setzero_si512().as_u16x32();
5116 transmute(simd_select_bitmask(k, avg, zero))
5117}
5118
5119/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5120///
5121/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_avg_epu16&expand=386)
5122#[inline]
5123#[target_feature(enable = "avx512bw,avx512vl")]
5124#[cfg_attr(test, assert_instr(vpavgw))]
5125pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5126 let avg = _mm256_avg_epu16(a, b).as_u16x16();
5127 transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
5128}
5129
5130/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5131///
5132/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_avg_epu16&expand=387)
5133#[inline]
5134#[target_feature(enable = "avx512bw,avx512vl")]
5135#[cfg_attr(test, assert_instr(vpavgw))]
5136pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
5137 let avg = _mm256_avg_epu16(a, b).as_u16x16();
5138 let zero = _mm256_setzero_si256().as_u16x16();
5139 transmute(simd_select_bitmask(k, avg, zero))
5140}
5141
5142/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5143///
5144/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_avg_epu16&expand=383)
5145#[inline]
5146#[target_feature(enable = "avx512bw,avx512vl")]
5147#[cfg_attr(test, assert_instr(vpavgw))]
5148pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5149 let avg = _mm_avg_epu16(a, b).as_u16x8();
5150 transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
5151}
5152
5153/// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5154///
5155/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_avg_epu16&expand=384)
5156#[inline]
5157#[target_feature(enable = "avx512bw,avx512vl")]
5158#[cfg_attr(test, assert_instr(vpavgw))]
5159pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
5160 let avg = _mm_avg_epu16(a, b).as_u16x8();
5161 let zero = _mm_setzero_si128().as_u16x8();
5162 transmute(simd_select_bitmask(k, avg, zero))
5163}
5164
5165/// Average packed unsigned 8-bit integers in a and b, and store the results in dst.
5166///
5167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu8&expand=397)
5168#[inline]
5169#[target_feature(enable = "avx512bw")]
5170#[cfg_attr(test, assert_instr(vpavgb))]
5171pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
5172 transmute(vpavgb(a.as_u8x64(), b.as_u8x64()))
5173}
5174
5175/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5176///
5177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu8&expand=398)
5178#[inline]
5179#[target_feature(enable = "avx512bw")]
5180#[cfg_attr(test, assert_instr(vpavgb))]
5181pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
5182 let avg = _mm512_avg_epu8(a, b).as_u8x64();
5183 transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
5184}
5185
5186/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5187///
5188/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu8&expand=399)
5189#[inline]
5190#[target_feature(enable = "avx512bw")]
5191#[cfg_attr(test, assert_instr(vpavgb))]
5192pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
5193 let avg = _mm512_avg_epu8(a, b).as_u8x64();
5194 let zero = _mm512_setzero_si512().as_u8x64();
5195 transmute(simd_select_bitmask(k, avg, zero))
5196}
5197
5198/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5199///
5200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_avg_epu8&expand=395)
5201#[inline]
5202#[target_feature(enable = "avx512bw,avx512vl")]
5203#[cfg_attr(test, assert_instr(vpavgb))]
5204pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
5205 let avg = _mm256_avg_epu8(a, b).as_u8x32();
5206 transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
5207}
5208
5209/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5210///
5211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_avg_epu8&expand=396)
5212#[inline]
5213#[target_feature(enable = "avx512bw,avx512vl")]
5214#[cfg_attr(test, assert_instr(vpavgb))]
5215pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
5216 let avg = _mm256_avg_epu8(a, b).as_u8x32();
5217 let zero = _mm256_setzero_si256().as_u8x32();
5218 transmute(simd_select_bitmask(k, avg, zero))
5219}
5220
5221/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5222///
5223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_avg_epu8&expand=392)
5224#[inline]
5225#[target_feature(enable = "avx512bw,avx512vl")]
5226#[cfg_attr(test, assert_instr(vpavgb))]
5227pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
5228 let avg = _mm_avg_epu8(a, b).as_u8x16();
5229 transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
5230}
5231
5232/// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5233///
5234/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_avg_epu8&expand=393)
5235#[inline]
5236#[target_feature(enable = "avx512bw,avx512vl")]
5237#[cfg_attr(test, assert_instr(vpavgb))]
5238pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
5239 let avg = _mm_avg_epu8(a, b).as_u8x16();
5240 let zero = _mm_setzero_si128().as_u8x16();
5241 transmute(simd_select_bitmask(k, avg, zero))
5242}
5243
5244/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst.
5245///
5246/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi16&expand=5271)
5247#[inline]
5248#[target_feature(enable = "avx512bw")]
5249#[cfg_attr(test, assert_instr(vpsllw))]
5250pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i {
5251 transmute(vpsllw(a.as_i16x32(), count.as_i16x8()))
5252}
5253
5254/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5255///
5256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi16&expand=5269)
5257#[inline]
5258#[target_feature(enable = "avx512bw")]
5259#[cfg_attr(test, assert_instr(vpsllw))]
5260pub unsafe fn _mm512_mask_sll_epi16(
5261 src: __m512i,
5262 k: __mmask32,
5263 a: __m512i,
5264 count: __m128i,
5265) -> __m512i {
5266 let shf = _mm512_sll_epi16(a, count).as_i16x32();
5267 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5268}
5269
5270/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5271///
5272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi16&expand=5270)
5273#[inline]
5274#[target_feature(enable = "avx512bw")]
5275#[cfg_attr(test, assert_instr(vpsllw))]
5276pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
5277 let shf = _mm512_sll_epi16(a, count).as_i16x32();
5278 let zero = _mm512_setzero_si512().as_i16x32();
5279 transmute(simd_select_bitmask(k, shf, zero))
5280}
5281
5282/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5283///
5284/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sll_epi16&expand=5266)
5285#[inline]
5286#[target_feature(enable = "avx512bw,avx512vl")]
5287#[cfg_attr(test, assert_instr(vpsllw))]
5288pub unsafe fn _mm256_mask_sll_epi16(
5289 src: __m256i,
5290 k: __mmask16,
5291 a: __m256i,
5292 count: __m128i,
5293) -> __m256i {
5294 let shf = _mm256_sll_epi16(a, count).as_i16x16();
5295 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5296}
5297
5298/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5299///
5300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sll_epi16&expand=5267)
5301#[inline]
5302#[target_feature(enable = "avx512bw,avx512vl")]
5303#[cfg_attr(test, assert_instr(vpsllw))]
5304pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
5305 let shf = _mm256_sll_epi16(a, count).as_i16x16();
5306 let zero = _mm256_setzero_si256().as_i16x16();
5307 transmute(simd_select_bitmask(k, shf, zero))
5308}
5309
5310/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5311///
5312/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sll_epi16&expand=5263)
5313#[inline]
5314#[target_feature(enable = "avx512bw,avx512vl")]
5315#[cfg_attr(test, assert_instr(vpsllw))]
5316pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5317 let shf = _mm_sll_epi16(a, count).as_i16x8();
5318 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5319}
5320
5321/// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5322///
5323/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sll_epi16&expand=5264)
5324#[inline]
5325#[target_feature(enable = "avx512bw,avx512vl")]
5326#[cfg_attr(test, assert_instr(vpsllw))]
5327pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5328 let shf = _mm_sll_epi16(a, count).as_i16x8();
5329 let zero = _mm_setzero_si128().as_i16x8();
5330 transmute(simd_select_bitmask(k, shf, zero))
5331}
5332
5333/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
5334///
5335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi16&expand=5301)
5336#[inline]
5337#[target_feature(enable = "avx512bw")]
17df50a5
XL
5338#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5339#[rustc_legacy_const_generics(1)]
5340pub unsafe fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
5341 static_assert_imm_u8!(IMM8);
fc512014 5342 let a = a.as_i16x32();
17df50a5 5343 let r = vpslliw(a, IMM8);
fc512014
XL
5344 transmute(r)
5345}
5346
5347/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5348///
5349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi16&expand=5299)
5350#[inline]
5351#[target_feature(enable = "avx512bw")]
17df50a5
XL
5352#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5353#[rustc_legacy_const_generics(3)]
5354pub unsafe fn _mm512_mask_slli_epi16<const IMM8: u32>(
5355 src: __m512i,
5356 k: __mmask32,
5357 a: __m512i,
5358) -> __m512i {
5359 static_assert_imm_u8!(IMM8);
fc512014 5360 let a = a.as_i16x32();
17df50a5 5361 let shf = vpslliw(a, IMM8);
fc512014
XL
5362 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5363}
5364
5365/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5366///
5367/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi16&expand=5300)
5368#[inline]
5369#[target_feature(enable = "avx512bw")]
17df50a5
XL
5370#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5371#[rustc_legacy_const_generics(2)]
5372pub unsafe fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
5373 static_assert_imm_u8!(IMM8);
fc512014 5374 let a = a.as_i16x32();
17df50a5 5375 let shf = vpslliw(a, IMM8);
fc512014
XL
5376 let zero = _mm512_setzero_si512().as_i16x32();
5377 transmute(simd_select_bitmask(k, shf, zero))
5378}
5379
5380/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5381///
5382/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_slli_epi16&expand=5296)
5383#[inline]
5384#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5385#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5386#[rustc_legacy_const_generics(3)]
5387pub unsafe fn _mm256_mask_slli_epi16<const IMM8: u32>(
5388 src: __m256i,
5389 k: __mmask16,
5390 a: __m256i,
5391) -> __m256i {
5392 static_assert_imm_u8!(IMM8);
5393 let imm8 = IMM8 as i32;
5394 let r = pslliw256(a.as_i16x16(), imm8);
5395 transmute(simd_select_bitmask(k, r, src.as_i16x16()))
fc512014
XL
5396}
5397
5398/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5399///
5400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_slli_epi16&expand=5297)
5401#[inline]
5402#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5403#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5404#[rustc_legacy_const_generics(2)]
5405pub unsafe fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
5406 static_assert_imm_u8!(IMM8);
5407 let imm8 = IMM8 as i32;
5408 let r = pslliw256(a.as_i16x16(), imm8);
fc512014 5409 let zero = _mm256_setzero_si256().as_i16x16();
17df50a5 5410 transmute(simd_select_bitmask(k, r, zero))
fc512014
XL
5411}
5412
5413/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5414///
5415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_slli_epi16&expand=5293)
5416#[inline]
5417#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5418#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5419#[rustc_legacy_const_generics(3)]
5420pub unsafe fn _mm_mask_slli_epi16<const IMM8: u32>(
5421 src: __m128i,
5422 k: __mmask8,
5423 a: __m128i,
5424) -> __m128i {
5425 static_assert_imm_u8!(IMM8);
5426 let imm8 = IMM8 as i32;
5427 let r = pslliw128(a.as_i16x8(), imm8);
5428 transmute(simd_select_bitmask(k, r, src.as_i16x8()))
fc512014
XL
5429}
5430
5431/// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5432///
5433/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_slli_epi16&expand=5294)
5434#[inline]
5435#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5436#[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
5437#[rustc_legacy_const_generics(2)]
5438pub unsafe fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
5439 static_assert_imm_u8!(IMM8);
5440 let imm8 = IMM8 as i32;
5441 let r = pslliw128(a.as_i16x8(), imm8);
fc512014 5442 let zero = _mm_setzero_si128().as_i16x8();
17df50a5 5443 transmute(simd_select_bitmask(k, r, zero))
fc512014
XL
5444}
5445
5446/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5447///
5448/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi16&expand=5333)
5449#[inline]
5450#[target_feature(enable = "avx512bw")]
5451#[cfg_attr(test, assert_instr(vpsllvw))]
5452pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
5453 transmute(vpsllvw(a.as_i16x32(), count.as_i16x32()))
5454}
5455
5456/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5457///
5458/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi16&expand=5331)
5459#[inline]
5460#[target_feature(enable = "avx512bw")]
5461#[cfg_attr(test, assert_instr(vpsllvw))]
5462pub unsafe fn _mm512_mask_sllv_epi16(
5463 src: __m512i,
5464 k: __mmask32,
5465 a: __m512i,
5466 count: __m512i,
5467) -> __m512i {
5468 let shf = _mm512_sllv_epi16(a, count).as_i16x32();
5469 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5470}
5471
5472/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5473///
5474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi16&expand=5332)
5475#[inline]
5476#[target_feature(enable = "avx512bw")]
5477#[cfg_attr(test, assert_instr(vpsllvw))]
5478pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
5479 let shf = _mm512_sllv_epi16(a, count).as_i16x32();
5480 let zero = _mm512_setzero_si512().as_i16x32();
5481 transmute(simd_select_bitmask(k, shf, zero))
5482}
5483
5484/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5485///
5486/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi16&expand=5330)
5487#[inline]
5488#[target_feature(enable = "avx512bw,avx512vl")]
5489#[cfg_attr(test, assert_instr(vpsllvw))]
5490pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
5491 transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16()))
5492}
5493
5494/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5495///
5496/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sllv_epi16&expand=5328)
5497#[inline]
5498#[target_feature(enable = "avx512bw,avx512vl")]
5499#[cfg_attr(test, assert_instr(vpsllvw))]
5500pub unsafe fn _mm256_mask_sllv_epi16(
5501 src: __m256i,
5502 k: __mmask16,
5503 a: __m256i,
5504 count: __m256i,
5505) -> __m256i {
5506 let shf = _mm256_sllv_epi16(a, count).as_i16x16();
5507 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5508}
5509
5510/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5511///
5512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sllv_epi16&expand=5329)
5513#[inline]
5514#[target_feature(enable = "avx512bw,avx512vl")]
5515#[cfg_attr(test, assert_instr(vpsllvw))]
5516pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
5517 let shf = _mm256_sllv_epi16(a, count).as_i16x16();
5518 let zero = _mm256_setzero_si256().as_i16x16();
5519 transmute(simd_select_bitmask(k, shf, zero))
5520}
5521
5522/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5523///
5524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi16&expand=5327)
5525#[inline]
5526#[target_feature(enable = "avx512bw,avx512vl")]
5527#[cfg_attr(test, assert_instr(vpsllvw))]
5528pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
5529 transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8()))
5530}
5531
5532/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5533///
5534/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sllv_epi16&expand=5325)
5535#[inline]
5536#[target_feature(enable = "avx512bw,avx512vl")]
5537#[cfg_attr(test, assert_instr(vpsllvw))]
5538pub unsafe fn _mm_mask_sllv_epi16(
5539 src: __m128i,
5540 k: __mmask8,
5541 a: __m128i,
5542 count: __m128i,
5543) -> __m128i {
5544 let shf = _mm_sllv_epi16(a, count).as_i16x8();
5545 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5546}
5547
5548/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5549///
5550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sllv_epi16&expand=5326)
5551#[inline]
5552#[target_feature(enable = "avx512bw,avx512vl")]
5553#[cfg_attr(test, assert_instr(vpsllvw))]
5554pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5555 let shf = _mm_sllv_epi16(a, count).as_i16x8();
5556 let zero = _mm_setzero_si128().as_i16x8();
5557 transmute(simd_select_bitmask(k, shf, zero))
5558}
5559
5560/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst.
5561///
5562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi16&expand=5483)
5563#[inline]
5564#[target_feature(enable = "avx512bw")]
5565#[cfg_attr(test, assert_instr(vpsrlw))]
5566pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i {
5567 transmute(vpsrlw(a.as_i16x32(), count.as_i16x8()))
5568}
5569
5570/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5571///
5572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi16&expand=5481)
5573#[inline]
5574#[target_feature(enable = "avx512bw")]
5575#[cfg_attr(test, assert_instr(vpsrlw))]
5576pub unsafe fn _mm512_mask_srl_epi16(
5577 src: __m512i,
5578 k: __mmask32,
5579 a: __m512i,
5580 count: __m128i,
5581) -> __m512i {
5582 let shf = _mm512_srl_epi16(a, count).as_i16x32();
5583 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5584}
5585
5586/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5587///
5588/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi16&expand=5482)
5589#[inline]
5590#[target_feature(enable = "avx512bw")]
5591#[cfg_attr(test, assert_instr(vpsrlw))]
5592pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
5593 let shf = _mm512_srl_epi16(a, count).as_i16x32();
5594 let zero = _mm512_setzero_si512().as_i16x32();
5595 transmute(simd_select_bitmask(k, shf, zero))
5596}
5597
5598/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5599///
5600/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srl_epi16&expand=5478)
5601#[inline]
5602#[target_feature(enable = "avx512bw,avx512vl")]
5603#[cfg_attr(test, assert_instr(vpsrlw))]
5604pub unsafe fn _mm256_mask_srl_epi16(
5605 src: __m256i,
5606 k: __mmask16,
5607 a: __m256i,
5608 count: __m128i,
5609) -> __m256i {
5610 let shf = _mm256_srl_epi16(a, count).as_i16x16();
5611 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5612}
5613
5614/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5615///
5616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srl_epi16&expand=5479)
5617#[inline]
5618#[target_feature(enable = "avx512bw,avx512vl")]
5619#[cfg_attr(test, assert_instr(vpsrlw))]
5620pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
5621 let shf = _mm256_srl_epi16(a, count).as_i16x16();
5622 let zero = _mm256_setzero_si256().as_i16x16();
5623 transmute(simd_select_bitmask(k, shf, zero))
5624}
5625
5626/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5627///
5628/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srl_epi16&expand=5475)
5629#[inline]
5630#[target_feature(enable = "avx512bw,avx512vl")]
5631#[cfg_attr(test, assert_instr(vpsrlw))]
5632pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5633 let shf = _mm_srl_epi16(a, count).as_i16x8();
5634 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5635}
5636
5637/// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5638///
5639/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srl_epi16&expand=5476)
5640#[inline]
5641#[target_feature(enable = "avx512bw,avx512vl")]
5642#[cfg_attr(test, assert_instr(vpsrlw))]
5643pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5644 let shf = _mm_srl_epi16(a, count).as_i16x8();
5645 let zero = _mm_setzero_si128().as_i16x8();
5646 transmute(simd_select_bitmask(k, shf, zero))
5647}
5648
5649/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
5650///
5651/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi16&expand=5513)
5652#[inline]
5653#[target_feature(enable = "avx512bw")]
17df50a5
XL
5654#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5655#[rustc_legacy_const_generics(1)]
5656pub unsafe fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
5657 static_assert_imm_u8!(IMM8);
fc512014 5658 let a = a.as_i16x32();
17df50a5 5659 let r = vpsrliw(a, IMM8);
fc512014
XL
5660 transmute(r)
5661}
5662
5663/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5664///
5665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi16&expand=5511)
5666#[inline]
5667#[target_feature(enable = "avx512bw")]
17df50a5
XL
5668#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5669#[rustc_legacy_const_generics(3)]
5670pub unsafe fn _mm512_mask_srli_epi16<const IMM8: u32>(
5671 src: __m512i,
5672 k: __mmask32,
5673 a: __m512i,
5674) -> __m512i {
5675 static_assert_imm_u8!(IMM8);
fc512014 5676 let a = a.as_i16x32();
17df50a5 5677 let shf = vpsrliw(a, IMM8);
fc512014
XL
5678 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5679}
5680
5681/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5682///
5683/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi16&expand=5512)
5684#[inline]
5685#[target_feature(enable = "avx512bw")]
17df50a5
XL
5686#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5687#[rustc_legacy_const_generics(2)]
5688pub unsafe fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
5689 static_assert_imm8!(IMM8);
fc512014
XL
5690 //imm8 should be u32, it seems the document to verify is incorrect
5691 let a = a.as_i16x32();
17df50a5 5692 let shf = vpsrliw(a, IMM8 as u32);
fc512014
XL
5693 let zero = _mm512_setzero_si512().as_i16x32();
5694 transmute(simd_select_bitmask(k, shf, zero))
5695}
5696
5697/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5698///
5699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srli_epi16&expand=5508)
5700#[inline]
5701#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5702#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5703#[rustc_legacy_const_generics(3)]
5704pub unsafe fn _mm256_mask_srli_epi16<const IMM8: i32>(
5705 src: __m256i,
5706 k: __mmask16,
5707 a: __m256i,
5708) -> __m256i {
5709 static_assert_imm8!(IMM8);
5710 let shf = _mm256_srli_epi16::<IMM8>(a);
cdc7bbd5 5711 transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16()))
fc512014
XL
5712}
5713
5714/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5715///
5716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srli_epi16&expand=5509)
5717#[inline]
5718#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5719#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5720#[rustc_legacy_const_generics(2)]
5721pub unsafe fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
5722 static_assert_imm8!(IMM8);
5723 let shf = _mm256_srli_epi16::<IMM8>(a);
fc512014 5724 let zero = _mm256_setzero_si256().as_i16x16();
cdc7bbd5 5725 transmute(simd_select_bitmask(k, shf.as_i16x16(), zero))
fc512014
XL
5726}
5727
5728/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5729///
5730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srli_epi16&expand=5505)
5731#[inline]
5732#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5733#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5734#[rustc_legacy_const_generics(3)]
5735pub unsafe fn _mm_mask_srli_epi16<const IMM8: i32>(
5736 src: __m128i,
5737 k: __mmask8,
5738 a: __m128i,
5739) -> __m128i {
5740 static_assert_imm8!(IMM8);
5741 let shf = _mm_srli_epi16::<IMM8>(a);
cdc7bbd5 5742 transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8()))
fc512014
XL
5743}
5744
5745/// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5746///
5747/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srli_epi16&expand=5506)
5748#[inline]
5749#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
5750#[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
5751#[rustc_legacy_const_generics(2)]
5752pub unsafe fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
5753 static_assert_imm8!(IMM8);
5754 let shf = _mm_srli_epi16::<IMM8>(a);
fc512014 5755 let zero = _mm_setzero_si128().as_i16x8();
cdc7bbd5 5756 transmute(simd_select_bitmask(k, shf.as_i16x8(), zero))
fc512014
XL
5757}
5758
5759/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5760///
5761/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi16&expand=5545)
5762#[inline]
5763#[target_feature(enable = "avx512bw")]
5764#[cfg_attr(test, assert_instr(vpsrlvw))]
5765pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
5766 transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32()))
5767}
5768
5769/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5770///
5771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi16&expand=5543)
5772#[inline]
5773#[target_feature(enable = "avx512bw")]
5774#[cfg_attr(test, assert_instr(vpsrlvw))]
5775pub unsafe fn _mm512_mask_srlv_epi16(
5776 src: __m512i,
5777 k: __mmask32,
5778 a: __m512i,
5779 count: __m512i,
5780) -> __m512i {
5781 let shf = _mm512_srlv_epi16(a, count).as_i16x32();
5782 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5783}
5784
5785/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5786///
5787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi16&expand=5544)
5788#[inline]
5789#[target_feature(enable = "avx512bw")]
5790#[cfg_attr(test, assert_instr(vpsrlvw))]
5791pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
5792 let shf = _mm512_srlv_epi16(a, count).as_i16x32();
5793 let zero = _mm512_setzero_si512().as_i16x32();
5794 transmute(simd_select_bitmask(k, shf, zero))
5795}
5796
5797/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5798///
5799/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi16&expand=5542)
5800#[inline]
5801#[target_feature(enable = "avx512bw,avx512vl")]
5802#[cfg_attr(test, assert_instr(vpsrlvw))]
5803pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
5804 transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16()))
5805}
5806
5807/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5808///
5809/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srlv_epi16&expand=5540)
5810#[inline]
5811#[target_feature(enable = "avx512bw,avx512vl")]
5812#[cfg_attr(test, assert_instr(vpsrlvw))]
5813pub unsafe fn _mm256_mask_srlv_epi16(
5814 src: __m256i,
5815 k: __mmask16,
5816 a: __m256i,
5817 count: __m256i,
5818) -> __m256i {
5819 let shf = _mm256_srlv_epi16(a, count).as_i16x16();
5820 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5821}
5822
5823/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5824///
5825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srlv_epi16&expand=5541)
5826#[inline]
5827#[target_feature(enable = "avx512bw,avx512vl")]
5828#[cfg_attr(test, assert_instr(vpsrlvw))]
5829pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
5830 let shf = _mm256_srlv_epi16(a, count).as_i16x16();
5831 let zero = _mm256_setzero_si256().as_i16x16();
5832 transmute(simd_select_bitmask(k, shf, zero))
5833}
5834
5835/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
5836///
5837/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi16&expand=5539)
5838#[inline]
5839#[target_feature(enable = "avx512bw,avx512vl")]
5840#[cfg_attr(test, assert_instr(vpsrlvw))]
5841pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
5842 transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8()))
5843}
5844
5845/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5846///
5847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srlv_epi16&expand=5537)
5848#[inline]
5849#[target_feature(enable = "avx512bw,avx512vl")]
5850#[cfg_attr(test, assert_instr(vpsrlvw))]
5851pub unsafe fn _mm_mask_srlv_epi16(
5852 src: __m128i,
5853 k: __mmask8,
5854 a: __m128i,
5855 count: __m128i,
5856) -> __m128i {
5857 let shf = _mm_srlv_epi16(a, count).as_i16x8();
5858 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5859}
5860
5861/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5862///
5863/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srlv_epi16&expand=5538)
5864#[inline]
5865#[target_feature(enable = "avx512bw,avx512vl")]
5866#[cfg_attr(test, assert_instr(vpsrlvw))]
5867pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5868 let shf = _mm_srlv_epi16(a, count).as_i16x8();
5869 let zero = _mm_setzero_si128().as_i16x8();
5870 transmute(simd_select_bitmask(k, shf, zero))
5871}
5872
5873/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst.
5874///
5875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi16&expand=5398)
5876#[inline]
5877#[target_feature(enable = "avx512bw")]
5878#[cfg_attr(test, assert_instr(vpsraw))]
5879pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i {
5880 transmute(vpsraw(a.as_i16x32(), count.as_i16x8()))
5881}
5882
5883/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5884///
5885/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi16&expand=5396)
5886#[inline]
5887#[target_feature(enable = "avx512bw")]
5888#[cfg_attr(test, assert_instr(vpsraw))]
5889pub unsafe fn _mm512_mask_sra_epi16(
5890 src: __m512i,
5891 k: __mmask32,
5892 a: __m512i,
5893 count: __m128i,
5894) -> __m512i {
5895 let shf = _mm512_sra_epi16(a, count).as_i16x32();
5896 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5897}
5898
5899/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5900///
5901/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi16&expand=5397)
5902#[inline]
5903#[target_feature(enable = "avx512bw")]
5904#[cfg_attr(test, assert_instr(vpsraw))]
5905pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i {
5906 let shf = _mm512_sra_epi16(a, count).as_i16x32();
5907 let zero = _mm512_setzero_si512().as_i16x32();
5908 transmute(simd_select_bitmask(k, shf, zero))
5909}
5910
5911/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5912///
5913/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sra_epi16&expand=5393)
5914#[inline]
5915#[target_feature(enable = "avx512bw,avx512vl")]
5916#[cfg_attr(test, assert_instr(vpsraw))]
5917pub unsafe fn _mm256_mask_sra_epi16(
5918 src: __m256i,
5919 k: __mmask16,
5920 a: __m256i,
5921 count: __m128i,
5922) -> __m256i {
5923 let shf = _mm256_sra_epi16(a, count).as_i16x16();
5924 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
5925}
5926
5927/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5928///
5929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sra_epi16&expand=5394)
5930#[inline]
5931#[target_feature(enable = "avx512bw,avx512vl")]
5932#[cfg_attr(test, assert_instr(vpsraw))]
5933pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i {
5934 let shf = _mm256_sra_epi16(a, count).as_i16x16();
5935 let zero = _mm256_setzero_si256().as_i16x16();
5936 transmute(simd_select_bitmask(k, shf, zero))
5937}
5938
5939/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5940///
5941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sra_epi16&expand=5390)
5942#[inline]
5943#[target_feature(enable = "avx512bw,avx512vl")]
5944#[cfg_attr(test, assert_instr(vpsraw))]
5945pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5946 let shf = _mm_sra_epi16(a, count).as_i16x8();
5947 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
5948}
5949
5950/// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5951///
5952/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sra_epi16&expand=5391)
5953#[inline]
5954#[target_feature(enable = "avx512bw,avx512vl")]
5955#[cfg_attr(test, assert_instr(vpsraw))]
5956pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
5957 let shf = _mm_sra_epi16(a, count).as_i16x8();
5958 let zero = _mm_setzero_si128().as_i16x8();
5959 transmute(simd_select_bitmask(k, shf, zero))
5960}
5961
5962/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
5963///
5964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi16&expand=5427)
5965#[inline]
5966#[target_feature(enable = "avx512bw")]
17df50a5
XL
5967#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5968#[rustc_legacy_const_generics(1)]
5969pub unsafe fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
5970 static_assert_imm_u8!(IMM8);
fc512014 5971 let a = a.as_i16x32();
17df50a5 5972 let r = vpsraiw(a, IMM8);
fc512014
XL
5973 transmute(r)
5974}
5975
5976/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5977///
5978/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi16&expand=5425)
5979#[inline]
5980#[target_feature(enable = "avx512bw")]
17df50a5
XL
5981#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
5982#[rustc_legacy_const_generics(3)]
5983pub unsafe fn _mm512_mask_srai_epi16<const IMM8: u32>(
5984 src: __m512i,
5985 k: __mmask32,
5986 a: __m512i,
5987) -> __m512i {
5988 static_assert_imm_u8!(IMM8);
fc512014 5989 let a = a.as_i16x32();
17df50a5 5990 let shf = vpsraiw(a, IMM8);
fc512014
XL
5991 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
5992}
5993
5994/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5995///
5996/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi16&expand=5426)
5997#[inline]
5998#[target_feature(enable = "avx512bw")]
17df50a5
XL
5999#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
6000#[rustc_legacy_const_generics(2)]
6001pub unsafe fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
6002 static_assert_imm_u8!(IMM8);
fc512014 6003 let a = a.as_i16x32();
17df50a5 6004 let shf = vpsraiw(a, IMM8);
fc512014
XL
6005 let zero = _mm512_setzero_si512().as_i16x32();
6006 transmute(simd_select_bitmask(k, shf, zero))
6007}
6008
6009/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6010///
6011/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srai_epi16&expand=5422)
6012#[inline]
6013#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
6014#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
6015#[rustc_legacy_const_generics(3)]
6016pub unsafe fn _mm256_mask_srai_epi16<const IMM8: u32>(
6017 src: __m256i,
6018 k: __mmask16,
6019 a: __m256i,
6020) -> __m256i {
6021 static_assert_imm_u8!(IMM8);
6022 let imm8 = IMM8 as i32;
6023 let r = psraiw256(a.as_i16x16(), imm8);
6024 transmute(simd_select_bitmask(k, r, src.as_i16x16()))
fc512014
XL
6025}
6026
6027/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6028///
6029/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srai_epi16&expand=5423)
6030#[inline]
6031#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
6032#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
6033#[rustc_legacy_const_generics(2)]
6034pub unsafe fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
6035 static_assert_imm_u8!(IMM8);
6036 let imm8 = IMM8 as i32;
6037 let r = psraiw256(a.as_i16x16(), imm8);
fc512014 6038 let zero = _mm256_setzero_si256().as_i16x16();
17df50a5 6039 transmute(simd_select_bitmask(k, r, zero))
fc512014
XL
6040}
6041
6042/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6043///
6044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srai_epi16&expand=5419)
6045#[inline]
6046#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
6047#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
6048#[rustc_legacy_const_generics(3)]
6049pub unsafe fn _mm_mask_srai_epi16<const IMM8: u32>(
6050 src: __m128i,
6051 k: __mmask8,
6052 a: __m128i,
6053) -> __m128i {
6054 static_assert_imm_u8!(IMM8);
6055 let imm8 = IMM8 as i32;
6056 let r = psraiw128(a.as_i16x8(), imm8);
6057 transmute(simd_select_bitmask(k, r, src.as_i16x8()))
fc512014
XL
6058}
6059
6060/// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6061///
6062/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srai_epi16&expand=5420)
6063#[inline]
6064#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
6065#[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
6066#[rustc_legacy_const_generics(2)]
6067pub unsafe fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
6068 static_assert_imm_u8!(IMM8);
6069 let imm8 = IMM8 as i32;
6070 let r = psraiw128(a.as_i16x8(), imm8);
fc512014 6071 let zero = _mm_setzero_si128().as_i16x8();
17df50a5 6072 transmute(simd_select_bitmask(k, r, zero))
fc512014
XL
6073}
6074
6075/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
6076///
6077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi16&expand=5456)
6078#[inline]
6079#[target_feature(enable = "avx512bw")]
6080#[cfg_attr(test, assert_instr(vpsravw))]
6081pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
6082 transmute(vpsravw(a.as_i16x32(), count.as_i16x32()))
6083}
6084
6085/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6086///
6087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi16&expand=5454)
6088#[inline]
6089#[target_feature(enable = "avx512bw")]
6090#[cfg_attr(test, assert_instr(vpsravw))]
6091pub unsafe fn _mm512_mask_srav_epi16(
6092 src: __m512i,
6093 k: __mmask32,
6094 a: __m512i,
6095 count: __m512i,
6096) -> __m512i {
6097 let shf = _mm512_srav_epi16(a, count).as_i16x32();
6098 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
6099}
6100
6101/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6102///
6103/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi16&expand=5455)
6104#[inline]
6105#[target_feature(enable = "avx512bw")]
6106#[cfg_attr(test, assert_instr(vpsravw))]
6107pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
6108 let shf = _mm512_srav_epi16(a, count).as_i16x32();
6109 let zero = _mm512_setzero_si512().as_i16x32();
6110 transmute(simd_select_bitmask(k, shf, zero))
6111}
6112
6113/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
6114///
6115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi16&expand=5453)
6116#[inline]
6117#[target_feature(enable = "avx512bw,avx512vl")]
6118#[cfg_attr(test, assert_instr(vpsravw))]
6119pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
6120 transmute(vpsravw256(a.as_i16x16(), count.as_i16x16()))
6121}
6122
6123/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6124///
6125/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srav_epi16&expand=5451)
6126#[inline]
6127#[target_feature(enable = "avx512bw,avx512vl")]
6128#[cfg_attr(test, assert_instr(vpsravw))]
6129pub unsafe fn _mm256_mask_srav_epi16(
6130 src: __m256i,
6131 k: __mmask16,
6132 a: __m256i,
6133 count: __m256i,
6134) -> __m256i {
6135 let shf = _mm256_srav_epi16(a, count).as_i16x16();
6136 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
6137}
6138
6139/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6140///
6141/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srav_epi16&expand=5452)
6142#[inline]
6143#[target_feature(enable = "avx512bw,avx512vl")]
6144#[cfg_attr(test, assert_instr(vpsravw))]
6145pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
6146 let shf = _mm256_srav_epi16(a, count).as_i16x16();
6147 let zero = _mm256_setzero_si256().as_i16x16();
6148 transmute(simd_select_bitmask(k, shf, zero))
6149}
6150
6151/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
6152///
6153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi16&expand=5450)
6154#[inline]
6155#[target_feature(enable = "avx512bw,avx512vl")]
6156#[cfg_attr(test, assert_instr(vpsravw))]
6157pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
6158 transmute(vpsravw128(a.as_i16x8(), count.as_i16x8()))
6159}
6160
6161/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6162///
6163/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srav_epi16&expand=5448)
6164#[inline]
6165#[target_feature(enable = "avx512bw,avx512vl")]
6166#[cfg_attr(test, assert_instr(vpsravw))]
6167pub unsafe fn _mm_mask_srav_epi16(
6168 src: __m128i,
6169 k: __mmask8,
6170 a: __m128i,
6171 count: __m128i,
6172) -> __m128i {
6173 let shf = _mm_srav_epi16(a, count).as_i16x8();
6174 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
6175}
6176
6177/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6178///
6179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srav_epi16&expand=5449)
6180#[inline]
6181#[target_feature(enable = "avx512bw,avx512vl")]
6182#[cfg_attr(test, assert_instr(vpsravw))]
6183pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
6184 let shf = _mm_srav_epi16(a, count).as_i16x8();
6185 let zero = _mm_setzero_si128().as_i16x8();
6186 transmute(simd_select_bitmask(k, shf, zero))
6187}
6188
6189/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
6190///
6191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi16&expand=4226)
6192#[inline]
6193#[target_feature(enable = "avx512bw")]
6194#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
6195pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
6196 transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32()))
6197}
6198
6199/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
6200///
6201/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi16&expand=4223)
6202#[inline]
6203#[target_feature(enable = "avx512bw")]
6204#[cfg_attr(test, assert_instr(vpermt2w))]
6205pub unsafe fn _mm512_mask_permutex2var_epi16(
6206 a: __m512i,
6207 k: __mmask32,
6208 idx: __m512i,
6209 b: __m512i,
6210) -> __m512i {
6211 let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
6212 transmute(simd_select_bitmask(k, permute, a.as_i16x32()))
6213}
6214
6215/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6216///
6217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi16&expand=4225)
6218#[inline]
6219#[target_feature(enable = "avx512bw")]
6220#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
6221pub unsafe fn _mm512_maskz_permutex2var_epi16(
6222 k: __mmask32,
6223 a: __m512i,
6224 idx: __m512i,
6225 b: __m512i,
6226) -> __m512i {
6227 let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
6228 let zero = _mm512_setzero_si512().as_i16x32();
6229 transmute(simd_select_bitmask(k, permute, zero))
6230}
6231
6232/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
6233///
6234/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi16&expand=4224)
6235#[inline]
6236#[target_feature(enable = "avx512bw")]
6237#[cfg_attr(test, assert_instr(vpermi2w))]
6238pub unsafe fn _mm512_mask2_permutex2var_epi16(
6239 a: __m512i,
6240 idx: __m512i,
6241 k: __mmask32,
6242 b: __m512i,
6243) -> __m512i {
6244 let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32();
6245 transmute(simd_select_bitmask(k, permute, idx.as_i16x32()))
6246}
6247
6248/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
6249///
6250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_epi16&expand=4222)
6251#[inline]
6252#[target_feature(enable = "avx512bw,avx512vl")]
6253#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
6254pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
6255 transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16()))
6256}
6257
6258/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
6259///
6260/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutex2var_epi16&expand=4219)
6261#[inline]
6262#[target_feature(enable = "avx512bw,avx512vl")]
6263#[cfg_attr(test, assert_instr(vpermt2w))]
6264pub unsafe fn _mm256_mask_permutex2var_epi16(
6265 a: __m256i,
6266 k: __mmask16,
6267 idx: __m256i,
6268 b: __m256i,
6269) -> __m256i {
6270 let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
6271 transmute(simd_select_bitmask(k, permute, a.as_i16x16()))
6272}
6273
6274/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6275///
6276/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutex2var_epi16&expand=4221)
6277#[inline]
6278#[target_feature(enable = "avx512bw,avx512vl")]
6279#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
6280pub unsafe fn _mm256_maskz_permutex2var_epi16(
6281 k: __mmask16,
6282 a: __m256i,
6283 idx: __m256i,
6284 b: __m256i,
6285) -> __m256i {
6286 let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
6287 let zero = _mm256_setzero_si256().as_i16x16();
6288 transmute(simd_select_bitmask(k, permute, zero))
6289}
6290
6291/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
6292///
6293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask2_permutex2var_epi16&expand=4220)
6294#[inline]
6295#[target_feature(enable = "avx512bw,avx512vl")]
6296#[cfg_attr(test, assert_instr(vpermi2w))]
6297pub unsafe fn _mm256_mask2_permutex2var_epi16(
6298 a: __m256i,
6299 idx: __m256i,
6300 k: __mmask16,
6301 b: __m256i,
6302) -> __m256i {
6303 let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16();
6304 transmute(simd_select_bitmask(k, permute, idx.as_i16x16()))
6305}
6306
6307/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
6308///
6309/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_epi16&expand=4218)
6310#[inline]
6311#[target_feature(enable = "avx512bw,avx512vl")]
6312#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
6313pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
6314 transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8()))
6315}
6316
6317/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
6318///
6319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutex2var_epi16&expand=4215)
6320#[inline]
6321#[target_feature(enable = "avx512bw,avx512vl")]
6322#[cfg_attr(test, assert_instr(vpermt2w))]
6323pub unsafe fn _mm_mask_permutex2var_epi16(
6324 a: __m128i,
6325 k: __mmask8,
6326 idx: __m128i,
6327 b: __m128i,
6328) -> __m128i {
6329 let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
6330 transmute(simd_select_bitmask(k, permute, a.as_i16x8()))
6331}
6332
6333/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6334///
6335/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutex2var_epi16&expand=4217)
6336#[inline]
6337#[target_feature(enable = "avx512bw,avx512vl")]
6338#[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w
6339pub unsafe fn _mm_maskz_permutex2var_epi16(
6340 k: __mmask8,
6341 a: __m128i,
6342 idx: __m128i,
6343 b: __m128i,
6344) -> __m128i {
6345 let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
6346 let zero = _mm_setzero_si128().as_i16x8();
6347 transmute(simd_select_bitmask(k, permute, zero))
6348}
6349
6350/// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
6351///
6352/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask2_permutex2var_epi16&expand=4216)
6353#[inline]
6354#[target_feature(enable = "avx512bw,avx512vl")]
6355#[cfg_attr(test, assert_instr(vpermi2w))]
6356pub unsafe fn _mm_mask2_permutex2var_epi16(
6357 a: __m128i,
6358 idx: __m128i,
6359 k: __mmask8,
6360 b: __m128i,
6361) -> __m128i {
6362 let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8();
6363 transmute(simd_select_bitmask(k, permute, idx.as_i16x8()))
6364}
6365
6366/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
6367///
6368/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi16&expand=4295)
6369#[inline]
6370#[target_feature(enable = "avx512bw")]
6371#[cfg_attr(test, assert_instr(vpermw))]
6372pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i {
6373 transmute(vpermw(a.as_i16x32(), idx.as_i16x32()))
6374}
6375
6376/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6377///
6378/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi16&expand=4293)
6379#[inline]
6380#[target_feature(enable = "avx512bw")]
6381#[cfg_attr(test, assert_instr(vpermw))]
6382pub unsafe fn _mm512_mask_permutexvar_epi16(
6383 src: __m512i,
6384 k: __mmask32,
6385 idx: __m512i,
6386 a: __m512i,
6387) -> __m512i {
6388 let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
6389 transmute(simd_select_bitmask(k, permute, src.as_i16x32()))
6390}
6391
6392/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6393///
6394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi16&expand=4294)
6395#[inline]
6396#[target_feature(enable = "avx512bw")]
6397#[cfg_attr(test, assert_instr(vpermw))]
6398pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i {
6399 let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32();
6400 let zero = _mm512_setzero_si512().as_i16x32();
6401 transmute(simd_select_bitmask(k, permute, zero))
6402}
6403
6404/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
6405///
6406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_epi16&expand=4292)
6407#[inline]
6408#[target_feature(enable = "avx512bw,avx512vl")]
6409#[cfg_attr(test, assert_instr(vpermw))]
6410pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i {
6411 transmute(vpermw256(a.as_i16x16(), idx.as_i16x16()))
6412}
6413
6414/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6415///
6416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutexvar_epi16&expand=4290)
6417#[inline]
6418#[target_feature(enable = "avx512bw,avx512vl")]
6419#[cfg_attr(test, assert_instr(vpermw))]
6420pub unsafe fn _mm256_mask_permutexvar_epi16(
6421 src: __m256i,
6422 k: __mmask16,
6423 idx: __m256i,
6424 a: __m256i,
6425) -> __m256i {
6426 let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
6427 transmute(simd_select_bitmask(k, permute, src.as_i16x16()))
6428}
6429
6430/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6431///
6432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutexvar_epi16&expand=4291)
6433#[inline]
6434#[target_feature(enable = "avx512bw,avx512vl")]
6435#[cfg_attr(test, assert_instr(vpermw))]
6436pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i {
6437 let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16();
6438 let zero = _mm256_setzero_si256().as_i16x16();
6439 transmute(simd_select_bitmask(k, permute, zero))
6440}
6441
6442/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
6443///
6444/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_epi16&expand=4289)
6445#[inline]
6446#[target_feature(enable = "avx512bw,avx512vl")]
6447#[cfg_attr(test, assert_instr(vpermw))]
6448pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i {
6449 transmute(vpermw128(a.as_i16x8(), idx.as_i16x8()))
6450}
6451
6452/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6453///
6454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutexvar_epi16&expand=4287)
6455#[inline]
6456#[target_feature(enable = "avx512bw,avx512vl")]
6457#[cfg_attr(test, assert_instr(vpermw))]
6458pub unsafe fn _mm_mask_permutexvar_epi16(
6459 src: __m128i,
6460 k: __mmask8,
6461 idx: __m128i,
6462 a: __m128i,
6463) -> __m128i {
6464 let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
6465 transmute(simd_select_bitmask(k, permute, src.as_i16x8()))
6466}
6467
6468/// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6469///
6470/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutexvar_epi16&expand=4288)
6471#[inline]
6472#[target_feature(enable = "avx512bw,avx512vl")]
6473#[cfg_attr(test, assert_instr(vpermw))]
6474pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i {
6475 let permute = _mm_permutexvar_epi16(idx, a).as_i16x8();
6476 let zero = _mm_setzero_si128().as_i16x8();
6477 transmute(simd_select_bitmask(k, permute, zero))
6478}
6479
6480/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
6481///
6482/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi16&expand=430)
6483#[inline]
6484#[target_feature(enable = "avx512bw")]
6485#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
6486pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6487 transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32()))
6488}
6489
6490/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
6491///
6492/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_epi16&expand=429)
6493#[inline]
6494#[target_feature(enable = "avx512bw,avx512vl")]
6495#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
6496pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6497 transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16()))
6498}
6499
6500/// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst.
6501///
6502/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_epi16&expand=427)
6503#[inline]
6504#[target_feature(enable = "avx512bw,avx512vl")]
6505#[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
6506pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6507 transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8()))
6508}
6509
6510/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
6511///
6512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi8&expand=441)
6513#[inline]
6514#[target_feature(enable = "avx512bw")]
6515#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
6516pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6517 transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64()))
6518}
6519
6520/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
6521///
6522/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_epi8&expand=440)
6523#[inline]
6524#[target_feature(enable = "avx512bw,avx512vl")]
6525#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
6526pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6527 transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32()))
6528}
6529
6530/// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst.
6531///
6532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_epi8&expand=439)
6533#[inline]
6534#[target_feature(enable = "avx512bw,avx512vl")]
6535#[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
6536pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6537 transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16()))
6538}
6539
6540/// Broadcast the low packed 16-bit integer from a to all elements of dst.
6541///
6542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastw_epi16&expand=587)
6543#[inline]
6544#[target_feature(enable = "avx512bw")]
6545#[cfg_attr(test, assert_instr(vpbroadcastw))]
6546pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
6547 let a = _mm512_castsi128_si512(a).as_i16x32();
17df50a5 6548 let ret: i16x32 = simd_shuffle32!(
fc512014
XL
6549 a,
6550 a,
6551 [
6552 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6553 0, 0, 0,
6554 ],
6555 );
6556 transmute(ret)
6557}
6558
6559/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6560///
6561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastw_epi16&expand=588)
6562#[inline]
6563#[target_feature(enable = "avx512bw")]
6564#[cfg_attr(test, assert_instr(vpbroadcastw))]
6565pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
6566 let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
6567 transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
6568}
6569
6570/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6571///
6572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastw_epi16&expand=589)
6573#[inline]
6574#[target_feature(enable = "avx512bw")]
6575#[cfg_attr(test, assert_instr(vpbroadcastw))]
6576pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
6577 let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
6578 let zero = _mm512_setzero_si512().as_i16x32();
6579 transmute(simd_select_bitmask(k, broadcast, zero))
6580}
6581
6582/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6583///
6584/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_broadcastw_epi16&expand=585)
6585#[inline]
6586#[target_feature(enable = "avx512bw,avx512vl")]
6587#[cfg_attr(test, assert_instr(vpbroadcastw))]
6588pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
6589 let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
6590 transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
6591}
6592
6593/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6594///
6595/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_broadcastw_epi16&expand=586)
6596#[inline]
6597#[target_feature(enable = "avx512bw,avx512vl")]
6598#[cfg_attr(test, assert_instr(vpbroadcastw))]
6599pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
6600 let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
6601 let zero = _mm256_setzero_si256().as_i16x16();
6602 transmute(simd_select_bitmask(k, broadcast, zero))
6603}
6604
6605/// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6606///
6607/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_broadcastw_epi16&expand=582)
6608#[inline]
6609#[target_feature(enable = "avx512bw,avx512vl")]
6610#[cfg_attr(test, assert_instr(vpbroadcastw))]
6611pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
6612 let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
6613 transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
6614}
6615
6616/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6617///
6618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_broadcastw_epi16&expand=583)
6619#[inline]
6620#[target_feature(enable = "avx512bw,avx512vl")]
6621#[cfg_attr(test, assert_instr(vpbroadcastw))]
6622pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
6623 let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
6624 let zero = _mm_setzero_si128().as_i16x8();
6625 transmute(simd_select_bitmask(k, broadcast, zero))
6626}
6627
6628/// Broadcast the low packed 8-bit integer from a to all elements of dst.
6629///
6630/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastb_epi8&expand=536)
6631#[inline]
6632#[target_feature(enable = "avx512bw")]
6633#[cfg_attr(test, assert_instr(vpbroadcastb))]
6634pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
6635 let a = _mm512_castsi128_si512(a).as_i8x64();
17df50a5 6636 let ret: i8x64 = simd_shuffle64!(
fc512014
XL
6637 a,
6638 a,
6639 [
6640 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6641 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
6642 0, 0, 0, 0, 0, 0,
6643 ],
6644 );
6645 transmute(ret)
6646}
6647
6648/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6649///
6650/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastb_epi8&expand=537)
6651#[inline]
6652#[target_feature(enable = "avx512bw")]
6653#[cfg_attr(test, assert_instr(vpbroadcastb))]
6654pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
6655 let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
6656 transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
6657}
6658
6659/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6660///
6661/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastb_epi8&expand=538)
6662#[inline]
6663#[target_feature(enable = "avx512bw")]
6664#[cfg_attr(test, assert_instr(vpbroadcastb))]
6665pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
6666 let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
6667 let zero = _mm512_setzero_si512().as_i8x64();
6668 transmute(simd_select_bitmask(k, broadcast, zero))
6669}
6670
6671/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6672///
6673/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_broadcastb_epi8&expand=534)
6674#[inline]
6675#[target_feature(enable = "avx512bw,avx512vl")]
6676#[cfg_attr(test, assert_instr(vpbroadcastb))]
6677pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
6678 let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
6679 transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
6680}
6681
6682/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6683///
6684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_broadcastb_epi8&expand=535)
6685#[inline]
6686#[target_feature(enable = "avx512bw,avx512vl")]
6687#[cfg_attr(test, assert_instr(vpbroadcastb))]
6688pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
6689 let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
6690 let zero = _mm256_setzero_si256().as_i8x32();
6691 transmute(simd_select_bitmask(k, broadcast, zero))
6692}
6693
6694/// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6695///
6696/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_broadcastb_epi8&expand=531)
6697#[inline]
6698#[target_feature(enable = "avx512bw,avx512vl")]
6699#[cfg_attr(test, assert_instr(vpbroadcastb))]
6700pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
6701 let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
6702 transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
6703}
6704
6705/// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6706///
6707/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_broadcastb_epi8&expand=532)
6708#[inline]
6709#[target_feature(enable = "avx512bw,avx512vl")]
6710#[cfg_attr(test, assert_instr(vpbroadcastb))]
6711pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
6712 let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
6713 let zero = _mm_setzero_si128().as_i8x16();
6714 transmute(simd_select_bitmask(k, broadcast, zero))
6715}
6716
6717/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
6718///
6719/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi16&expand=6012)
6720#[inline]
6721#[target_feature(enable = "avx512bw")]
6722#[cfg_attr(test, assert_instr(vpunpckhwd))]
6723pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
6724 let a = a.as_i16x32();
6725 let b = b.as_i16x32();
6726 #[rustfmt::skip]
17df50a5 6727 let r: i16x32 = simd_shuffle32!(
fc512014
XL
6728 a,
6729 b,
6730 [
6731 4, 32 + 4, 5, 32 + 5,
6732 6, 32 + 6, 7, 32 + 7,
6733 12, 32 + 12, 13, 32 + 13,
6734 14, 32 + 14, 15, 32 + 15,
6735 20, 32 + 20, 21, 32 + 21,
6736 22, 32 + 22, 23, 32 + 23,
6737 28, 32 + 28, 29, 32 + 29,
6738 30, 32 + 30, 31, 32 + 31,
6739 ],
6740 );
6741 transmute(r)
6742}
6743
6744/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi16&expand=6010)
6747#[inline]
6748#[target_feature(enable = "avx512bw")]
6749#[cfg_attr(test, assert_instr(vpunpckhwd))]
6750pub unsafe fn _mm512_mask_unpackhi_epi16(
6751 src: __m512i,
6752 k: __mmask32,
6753 a: __m512i,
6754 b: __m512i,
6755) -> __m512i {
6756 let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
6757 transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
6758}
6759
6760/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6761///
6762/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi16&expand=6011)
6763#[inline]
6764#[target_feature(enable = "avx512bw")]
6765#[cfg_attr(test, assert_instr(vpunpckhwd))]
6766pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6767 let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
6768 let zero = _mm512_setzero_si512().as_i16x32();
6769 transmute(simd_select_bitmask(k, unpackhi, zero))
6770}
6771
6772/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6773///
6774/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpackhi_epi16&expand=6007)
6775#[inline]
6776#[target_feature(enable = "avx512bw,avx512vl")]
6777#[cfg_attr(test, assert_instr(vpunpckhwd))]
6778pub unsafe fn _mm256_mask_unpackhi_epi16(
6779 src: __m256i,
6780 k: __mmask16,
6781 a: __m256i,
6782 b: __m256i,
6783) -> __m256i {
6784 let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
6785 transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
6786}
6787
6788/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6789///
6790/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpackhi_epi16&expand=6008)
6791#[inline]
6792#[target_feature(enable = "avx512bw,avx512vl")]
6793#[cfg_attr(test, assert_instr(vpunpckhwd))]
6794pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
6795 let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
6796 let zero = _mm256_setzero_si256().as_i16x16();
6797 transmute(simd_select_bitmask(k, unpackhi, zero))
6798}
6799
6800/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6801///
6802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpackhi_epi16&expand=6004)
6803#[inline]
6804#[target_feature(enable = "avx512bw,avx512vl")]
6805#[cfg_attr(test, assert_instr(vpunpckhwd))]
6806pub unsafe fn _mm_mask_unpackhi_epi16(
6807 src: __m128i,
6808 k: __mmask8,
6809 a: __m128i,
6810 b: __m128i,
6811) -> __m128i {
6812 let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
6813 transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
6814}
6815
6816/// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6817///
6818/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpackhi_epi16&expand=6005)
6819#[inline]
6820#[target_feature(enable = "avx512bw,avx512vl")]
6821#[cfg_attr(test, assert_instr(vpunpckhwd))]
6822pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6823 let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
6824 let zero = _mm_setzero_si128().as_i16x8();
6825 transmute(simd_select_bitmask(k, unpackhi, zero))
6826}
6827
6828/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
6829///
6830/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi8&expand=6039)
6831#[inline]
6832#[target_feature(enable = "avx512bw")]
6833#[cfg_attr(test, assert_instr(vpunpckhbw))]
6834pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
6835 let a = a.as_i8x64();
6836 let b = b.as_i8x64();
6837 #[rustfmt::skip]
17df50a5 6838 let r: i8x64 = simd_shuffle64!(
fc512014
XL
6839 a,
6840 b,
6841 [
6842 8, 64+8, 9, 64+9,
6843 10, 64+10, 11, 64+11,
6844 12, 64+12, 13, 64+13,
6845 14, 64+14, 15, 64+15,
6846 24, 64+24, 25, 64+25,
6847 26, 64+26, 27, 64+27,
6848 28, 64+28, 29, 64+29,
6849 30, 64+30, 31, 64+31,
6850 40, 64+40, 41, 64+41,
6851 42, 64+42, 43, 64+43,
6852 44, 64+44, 45, 64+45,
6853 46, 64+46, 47, 64+47,
6854 56, 64+56, 57, 64+57,
6855 58, 64+58, 59, 64+59,
6856 60, 64+60, 61, 64+61,
6857 62, 64+62, 63, 64+63,
6858 ],
6859 );
6860 transmute(r)
6861}
6862
6863/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6864///
6865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi8&expand=6037)
6866#[inline]
6867#[target_feature(enable = "avx512bw")]
6868#[cfg_attr(test, assert_instr(vpunpckhbw))]
6869pub unsafe fn _mm512_mask_unpackhi_epi8(
6870 src: __m512i,
6871 k: __mmask64,
6872 a: __m512i,
6873 b: __m512i,
6874) -> __m512i {
6875 let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
6876 transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
6877}
6878
6879/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6880///
6881/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi8&expand=6038)
6882#[inline]
6883#[target_feature(enable = "avx512bw")]
6884#[cfg_attr(test, assert_instr(vpunpckhbw))]
6885pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
6886 let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
6887 let zero = _mm512_setzero_si512().as_i8x64();
6888 transmute(simd_select_bitmask(k, unpackhi, zero))
6889}
6890
6891/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6892///
6893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpackhi_epi8&expand=6034)
6894#[inline]
6895#[target_feature(enable = "avx512bw,avx512vl")]
6896#[cfg_attr(test, assert_instr(vpunpckhbw))]
6897pub unsafe fn _mm256_mask_unpackhi_epi8(
6898 src: __m256i,
6899 k: __mmask32,
6900 a: __m256i,
6901 b: __m256i,
6902) -> __m256i {
6903 let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
6904 transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
6905}
6906
6907/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6908///
6909/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpackhi_epi8&expand=6035)
6910#[inline]
6911#[target_feature(enable = "avx512bw,avx512vl")]
6912#[cfg_attr(test, assert_instr(vpunpckhbw))]
6913pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
6914 let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
6915 let zero = _mm256_setzero_si256().as_i8x32();
6916 transmute(simd_select_bitmask(k, unpackhi, zero))
6917}
6918
6919/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6920///
6921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpackhi_epi8&expand=6031)
6922#[inline]
6923#[target_feature(enable = "avx512bw,avx512vl")]
6924#[cfg_attr(test, assert_instr(vpunpckhbw))]
6925pub unsafe fn _mm_mask_unpackhi_epi8(
6926 src: __m128i,
6927 k: __mmask16,
6928 a: __m128i,
6929 b: __m128i,
6930) -> __m128i {
6931 let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
6932 transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
6933}
6934
6935/// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6936///
6937/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpackhi_epi8&expand=6032)
6938#[inline]
6939#[target_feature(enable = "avx512bw,avx512vl")]
6940#[cfg_attr(test, assert_instr(vpunpckhbw))]
6941pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
6942 let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
6943 let zero = _mm_setzero_si128().as_i8x16();
6944 transmute(simd_select_bitmask(k, unpackhi, zero))
6945}
6946
6947/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
6948///
6949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi16&expand=6069)
6950#[inline]
6951#[target_feature(enable = "avx512bw")]
6952#[cfg_attr(test, assert_instr(vpunpcklwd))]
6953pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
6954 let a = a.as_i16x32();
6955 let b = b.as_i16x32();
6956 #[rustfmt::skip]
17df50a5 6957 let r: i16x32 = simd_shuffle32!(
fc512014
XL
6958 a,
6959 b,
6960 [
6961 0, 32+0, 1, 32+1,
6962 2, 32+2, 3, 32+3,
6963 8, 32+8, 9, 32+9,
6964 10, 32+10, 11, 32+11,
6965 16, 32+16, 17, 32+17,
6966 18, 32+18, 19, 32+19,
6967 24, 32+24, 25, 32+25,
6968 26, 32+26, 27, 32+27
6969 ],
6970 );
6971 transmute(r)
6972}
6973
6974/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6975///
6976/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi16&expand=6067)
6977#[inline]
6978#[target_feature(enable = "avx512bw")]
6979#[cfg_attr(test, assert_instr(vpunpcklwd))]
6980pub unsafe fn _mm512_mask_unpacklo_epi16(
6981 src: __m512i,
6982 k: __mmask32,
6983 a: __m512i,
6984 b: __m512i,
6985) -> __m512i {
6986 let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
6987 transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
6988}
6989
6990/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6991///
6992/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi16&expand=6068)
6993#[inline]
6994#[target_feature(enable = "avx512bw")]
6995#[cfg_attr(test, assert_instr(vpunpcklwd))]
6996pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
6997 let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
6998 let zero = _mm512_setzero_si512().as_i16x32();
6999 transmute(simd_select_bitmask(k, unpacklo, zero))
7000}
7001
7002/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7003///
7004/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpacklo_epi16&expand=6064)
7005#[inline]
7006#[target_feature(enable = "avx512bw,avx512vl")]
7007#[cfg_attr(test, assert_instr(vpunpcklwd))]
7008pub unsafe fn _mm256_mask_unpacklo_epi16(
7009 src: __m256i,
7010 k: __mmask16,
7011 a: __m256i,
7012 b: __m256i,
7013) -> __m256i {
7014 let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
7015 transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
7016}
7017
7018/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7019///
7020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpacklo_epi16&expand=6065)
7021#[inline]
7022#[target_feature(enable = "avx512bw,avx512vl")]
7023#[cfg_attr(test, assert_instr(vpunpcklwd))]
7024pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
7025 let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
7026 let zero = _mm256_setzero_si256().as_i16x16();
7027 transmute(simd_select_bitmask(k, unpacklo, zero))
7028}
7029
7030/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7031///
7032/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpacklo_epi16&expand=6061)
7033#[inline]
7034#[target_feature(enable = "avx512bw,avx512vl")]
7035#[cfg_attr(test, assert_instr(vpunpcklwd))]
7036pub unsafe fn _mm_mask_unpacklo_epi16(
7037 src: __m128i,
7038 k: __mmask8,
7039 a: __m128i,
7040 b: __m128i,
7041) -> __m128i {
7042 let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
7043 transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
7044}
7045
7046/// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7047///
7048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpacklo_epi16&expand=6062)
7049#[inline]
7050#[target_feature(enable = "avx512bw,avx512vl")]
7051#[cfg_attr(test, assert_instr(vpunpcklwd))]
7052pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
7053 let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
7054 let zero = _mm_setzero_si128().as_i16x8();
7055 transmute(simd_select_bitmask(k, unpacklo, zero))
7056}
7057
7058/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
7059///
7060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi8&expand=6096)
7061#[inline]
7062#[target_feature(enable = "avx512bw")]
7063#[cfg_attr(test, assert_instr(vpunpcklbw))]
7064pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
7065 let a = a.as_i8x64();
7066 let b = b.as_i8x64();
7067 #[rustfmt::skip]
17df50a5 7068 let r: i8x64 = simd_shuffle64!(
fc512014
XL
7069 a,
7070 b,
7071 [
7072 0, 64+0, 1, 64+1,
7073 2, 64+2, 3, 64+3,
7074 4, 64+4, 5, 64+5,
7075 6, 64+6, 7, 64+7,
7076 16, 64+16, 17, 64+17,
7077 18, 64+18, 19, 64+19,
7078 20, 64+20, 21, 64+21,
7079 22, 64+22, 23, 64+23,
7080 32, 64+32, 33, 64+33,
7081 34, 64+34, 35, 64+35,
7082 36, 64+36, 37, 64+37,
7083 38, 64+38, 39, 64+39,
7084 48, 64+48, 49, 64+49,
7085 50, 64+50, 51, 64+51,
7086 52, 64+52, 53, 64+53,
7087 54, 64+54, 55, 64+55,
7088 ],
7089 );
7090 transmute(r)
7091}
7092
7093/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7094///
7095/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi8&expand=6094)
7096#[inline]
7097#[target_feature(enable = "avx512bw")]
7098#[cfg_attr(test, assert_instr(vpunpcklbw))]
7099pub unsafe fn _mm512_mask_unpacklo_epi8(
7100 src: __m512i,
7101 k: __mmask64,
7102 a: __m512i,
7103 b: __m512i,
7104) -> __m512i {
7105 let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
7106 transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
7107}
7108
7109/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7110///
7111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi8&expand=6095)
7112#[inline]
7113#[target_feature(enable = "avx512bw")]
7114#[cfg_attr(test, assert_instr(vpunpcklbw))]
7115pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7116 let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
7117 let zero = _mm512_setzero_si512().as_i8x64();
7118 transmute(simd_select_bitmask(k, unpacklo, zero))
7119}
7120
7121/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7122///
7123/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpacklo_epi8&expand=6091)
7124#[inline]
7125#[target_feature(enable = "avx512bw,avx512vl")]
7126#[cfg_attr(test, assert_instr(vpunpcklbw))]
7127pub unsafe fn _mm256_mask_unpacklo_epi8(
7128 src: __m256i,
7129 k: __mmask32,
7130 a: __m256i,
7131 b: __m256i,
7132) -> __m256i {
7133 let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
7134 transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
7135}
7136
7137/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7138///
7139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpacklo_epi8&expand=6092)
7140#[inline]
7141#[target_feature(enable = "avx512bw,avx512vl")]
7142#[cfg_attr(test, assert_instr(vpunpcklbw))]
7143pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7144 let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
7145 let zero = _mm256_setzero_si256().as_i8x32();
7146 transmute(simd_select_bitmask(k, unpacklo, zero))
7147}
7148
7149/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7150///
7151/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpacklo_epi8&expand=6088)
7152#[inline]
7153#[target_feature(enable = "avx512bw,avx512vl")]
7154#[cfg_attr(test, assert_instr(vpunpcklbw))]
7155pub unsafe fn _mm_mask_unpacklo_epi8(
7156 src: __m128i,
7157 k: __mmask16,
7158 a: __m128i,
7159 b: __m128i,
7160) -> __m128i {
7161 let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
7162 transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
7163}
7164
7165/// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7166///
7167/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpacklo_epi8&expand=6089)
7168#[inline]
7169#[target_feature(enable = "avx512bw,avx512vl")]
7170#[cfg_attr(test, assert_instr(vpunpcklbw))]
7171pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7172 let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
7173 let zero = _mm_setzero_si128().as_i8x16();
7174 transmute(simd_select_bitmask(k, unpacklo, zero))
7175}
7176
7177/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7178///
7179/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi16&expand=3795)
7180#[inline]
7181#[target_feature(enable = "avx512bw")]
7182#[cfg_attr(test, assert_instr(vmovdqu16))]
7183pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
7184 let mov = a.as_i16x32();
7185 transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
7186}
7187
7188/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7189///
7190/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi16&expand=3796)
7191#[inline]
7192#[target_feature(enable = "avx512bw")]
7193#[cfg_attr(test, assert_instr(vmovdqu16))]
7194pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
7195 let mov = a.as_i16x32();
7196 let zero = _mm512_setzero_si512().as_i16x32();
7197 transmute(simd_select_bitmask(k, mov, zero))
7198}
7199
7200/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7201///
7202/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mov_epi16&expand=3793)
7203#[inline]
7204#[target_feature(enable = "avx512bw,avx512vl")]
7205#[cfg_attr(test, assert_instr(vmovdqu16))]
7206pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
7207 let mov = a.as_i16x16();
7208 transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
7209}
7210
7211/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7212///
7213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mov_epi16&expand=3794)
7214#[inline]
7215#[target_feature(enable = "avx512bw,avx512vl")]
7216#[cfg_attr(test, assert_instr(vmovdqu16))]
7217pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
7218 let mov = a.as_i16x16();
7219 let zero = _mm256_setzero_si256().as_i16x16();
7220 transmute(simd_select_bitmask(k, mov, zero))
7221}
7222
7223/// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7224///
7225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mov_epi16&expand=3791)
7226#[inline]
7227#[target_feature(enable = "avx512bw,avx512vl")]
7228#[cfg_attr(test, assert_instr(vmovdqu16))]
7229pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
7230 let mov = a.as_i16x8();
7231 transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
7232}
7233
7234/// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7235///
7236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mov_epi16&expand=3792)
7237#[inline]
7238#[target_feature(enable = "avx512bw,avx512vl")]
7239#[cfg_attr(test, assert_instr(vmovdqu16))]
7240pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
7241 let mov = a.as_i16x8();
7242 let zero = _mm_setzero_si128().as_i16x8();
7243 transmute(simd_select_bitmask(k, mov, zero))
7244}
7245
7246/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7247///
7248/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi8&expand=3813)
7249#[inline]
7250#[target_feature(enable = "avx512bw")]
7251#[cfg_attr(test, assert_instr(vmovdqu8))]
7252pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
7253 let mov = a.as_i8x64();
7254 transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
7255}
7256
7257/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7258///
7259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi8&expand=3814)
7260#[inline]
7261#[target_feature(enable = "avx512bw")]
7262#[cfg_attr(test, assert_instr(vmovdqu8))]
7263pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
7264 let mov = a.as_i8x64();
7265 let zero = _mm512_setzero_si512().as_i8x64();
7266 transmute(simd_select_bitmask(k, mov, zero))
7267}
7268
7269/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7270///
7271/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mov_epi8&expand=3811)
7272#[inline]
7273#[target_feature(enable = "avx512bw,avx512vl")]
7274#[cfg_attr(test, assert_instr(vmovdqu8))]
7275pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
7276 let mov = a.as_i8x32();
7277 transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
7278}
7279
7280/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7281///
7282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mov_epi8&expand=3812)
7283#[inline]
7284#[target_feature(enable = "avx512bw,avx512vl")]
7285#[cfg_attr(test, assert_instr(vmovdqu8))]
7286pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
7287 let mov = a.as_i8x32();
7288 let zero = _mm256_setzero_si256().as_i8x32();
7289 transmute(simd_select_bitmask(k, mov, zero))
7290}
7291
7292/// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7293///
7294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mov_epi8&expand=3809)
7295#[inline]
7296#[target_feature(enable = "avx512bw,avx512vl")]
7297#[cfg_attr(test, assert_instr(vmovdqu8))]
7298pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
7299 let mov = a.as_i8x16();
7300 transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
7301}
7302
7303/// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7304///
7305/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mov_epi8&expand=3810)
7306#[inline]
7307#[target_feature(enable = "avx512bw,avx512vl")]
7308#[cfg_attr(test, assert_instr(vmovdqu8))]
7309pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
7310 let mov = a.as_i8x16();
7311 let zero = _mm_setzero_si128().as_i8x16();
7312 transmute(simd_select_bitmask(k, mov, zero))
7313}
7314
7315/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7316///
7317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi16&expand=4942)
7318#[inline]
7319#[target_feature(enable = "avx512bw")]
7320#[cfg_attr(test, assert_instr(vpbroadcastw))]
7321pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
7322 let r = _mm512_set1_epi16(a).as_i16x32();
7323 transmute(simd_select_bitmask(k, r, src.as_i16x32()))
7324}
7325
7326/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7327///
7328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi16&expand=4943)
7329#[inline]
7330#[target_feature(enable = "avx512bw")]
7331#[cfg_attr(test, assert_instr(vpbroadcastw))]
7332pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
7333 let r = _mm512_set1_epi16(a).as_i16x32();
7334 let zero = _mm512_setzero_si512().as_i16x32();
7335 transmute(simd_select_bitmask(k, r, zero))
7336}
7337
7338/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7339///
7340/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_set1_epi16&expand=4939)
7341#[inline]
7342#[target_feature(enable = "avx512bw,avx512vl")]
7343#[cfg_attr(test, assert_instr(vpbroadcastw))]
7344pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
7345 let r = _mm256_set1_epi16(a).as_i16x16();
7346 transmute(simd_select_bitmask(k, r, src.as_i16x16()))
7347}
7348
7349/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7350///
7351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_set1_epi16&expand=4940)
7352#[inline]
7353#[target_feature(enable = "avx512bw,avx512vl")]
7354#[cfg_attr(test, assert_instr(vpbroadcastw))]
7355pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
7356 let r = _mm256_set1_epi16(a).as_i16x16();
7357 let zero = _mm256_setzero_si256().as_i16x16();
7358 transmute(simd_select_bitmask(k, r, zero))
7359}
7360
7361/// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7362///
7363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_set1_epi16&expand=4936)
7364#[inline]
7365#[target_feature(enable = "avx512bw,avx512vl")]
7366#[cfg_attr(test, assert_instr(vpbroadcastw))]
7367pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
7368 let r = _mm_set1_epi16(a).as_i16x8();
7369 transmute(simd_select_bitmask(k, r, src.as_i16x8()))
7370}
7371
7372/// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7373///
7374/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_set1_epi16&expand=4937)
7375#[inline]
7376#[target_feature(enable = "avx512bw,avx512vl")]
7377#[cfg_attr(test, assert_instr(vpbroadcastw))]
7378pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
7379 let r = _mm_set1_epi16(a).as_i16x8();
7380 let zero = _mm_setzero_si128().as_i16x8();
7381 transmute(simd_select_bitmask(k, r, zero))
7382}
7383
7384/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7385///
7386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi8&expand=4970)
7387#[inline]
7388#[target_feature(enable = "avx512bw")]
7389#[cfg_attr(test, assert_instr(vpbroadcastb))]
7390pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
7391 let r = _mm512_set1_epi8(a).as_i8x64();
7392 transmute(simd_select_bitmask(k, r, src.as_i8x64()))
7393}
7394
7395/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7396///
7397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi8&expand=4971)
7398#[inline]
7399#[target_feature(enable = "avx512bw")]
7400#[cfg_attr(test, assert_instr(vpbroadcastb))]
7401pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
7402 let r = _mm512_set1_epi8(a).as_i8x64();
7403 let zero = _mm512_setzero_si512().as_i8x64();
7404 transmute(simd_select_bitmask(k, r, zero))
7405}
7406
7407/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7408///
7409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_set1_epi8&expand=4967)
7410#[inline]
7411#[target_feature(enable = "avx512bw,avx512vl")]
7412#[cfg_attr(test, assert_instr(vpbroadcastb))]
7413pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
7414 let r = _mm256_set1_epi8(a).as_i8x32();
7415 transmute(simd_select_bitmask(k, r, src.as_i8x32()))
7416}
7417
7418/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7419///
7420/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_set1_epi8&expand=4968)
7421#[inline]
7422#[target_feature(enable = "avx512bw,avx512vl")]
7423#[cfg_attr(test, assert_instr(vpbroadcastb))]
7424pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
7425 let r = _mm256_set1_epi8(a).as_i8x32();
7426 let zero = _mm256_setzero_si256().as_i8x32();
7427 transmute(simd_select_bitmask(k, r, zero))
7428}
7429
7430/// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7431///
7432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_set1_epi8&expand=4964)
7433#[inline]
7434#[target_feature(enable = "avx512bw,avx512vl")]
7435#[cfg_attr(test, assert_instr(vpbroadcastb))]
7436pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
7437 let r = _mm_set1_epi8(a).as_i8x16();
7438 transmute(simd_select_bitmask(k, r, src.as_i8x16()))
7439}
7440
7441/// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7442///
7443/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_set1_epi8&expand=4965)
7444#[inline]
7445#[target_feature(enable = "avx512bw,avx512vl")]
7446#[cfg_attr(test, assert_instr(vpbroadcastb))]
7447pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
7448 let r = _mm_set1_epi8(a).as_i8x16();
7449 let zero = _mm_setzero_si128().as_i8x16();
7450 transmute(simd_select_bitmask(k, r, zero))
7451}
7452
7453/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst.
7454///
7455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221)
7456#[inline]
7457#[target_feature(enable = "avx512bw")]
17df50a5
XL
7458#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
7459#[rustc_legacy_const_generics(1)]
7460pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
7461 static_assert_imm8!(IMM8);
fc512014 7462 let a = a.as_i16x32();
17df50a5
XL
7463 let r: i16x32 = simd_shuffle32!(
7464 a,
7465 a,
7466 <const IMM8: i32> [
7467 IMM8 as u32 & 0b11,
7468 (IMM8 as u32 >> 2) & 0b11,
7469 (IMM8 as u32 >> 4) & 0b11,
7470 (IMM8 as u32 >> 6) & 0b11,
7471 4,
7472 5,
7473 6,
7474 7,
7475 (IMM8 as u32 & 0b11) + 8,
7476 ((IMM8 as u32 >> 2) & 0b11) + 8,
7477 ((IMM8 as u32 >> 4) & 0b11) + 8,
7478 ((IMM8 as u32 >> 6) & 0b11) + 8,
7479 12,
7480 13,
7481 14,
7482 15,
7483 (IMM8 as u32 & 0b11) + 16,
7484 ((IMM8 as u32 >> 2) & 0b11) + 16,
7485 ((IMM8 as u32 >> 4) & 0b11) + 16,
7486 ((IMM8 as u32 >> 6) & 0b11) + 16,
7487 20,
7488 21,
7489 22,
7490 23,
7491 (IMM8 as u32 & 0b11) + 24,
7492 ((IMM8 as u32 >> 2) & 0b11) + 24,
7493 ((IMM8 as u32 >> 4) & 0b11) + 24,
7494 ((IMM8 as u32 >> 6) & 0b11) + 24,
7495 28,
7496 29,
7497 30,
7498 31,
7499 ],
7500 );
fc512014
XL
7501 transmute(r)
7502}
7503
7504/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7505///
7506/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219)
7507#[inline]
7508#[target_feature(enable = "avx512bw")]
17df50a5
XL
7509#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
7510#[rustc_legacy_const_generics(3)]
7511pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
fc512014
XL
7512 src: __m512i,
7513 k: __mmask32,
7514 a: __m512i,
fc512014 7515) -> __m512i {
17df50a5
XL
7516 static_assert_imm8!(IMM8);
7517 let r = _mm512_shufflelo_epi16::<IMM8>(a);
cdc7bbd5 7518 transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
fc512014
XL
7519}
7520
7521/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7522///
7523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220)
7524#[inline]
7525#[target_feature(enable = "avx512bw")]
17df50a5
XL
7526#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
7527#[rustc_legacy_const_generics(2)]
7528pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7529 static_assert_imm8!(IMM8);
7530 let r = _mm512_shufflelo_epi16::<IMM8>(a);
cdc7bbd5
XL
7531 let zero = _mm512_setzero_si512().as_i16x32();
7532 transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
7533}
7534
7535/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7536///
7537/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216)
7538#[inline]
7539#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7540#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7541#[rustc_legacy_const_generics(3)]
7542pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
cdc7bbd5
XL
7543 src: __m256i,
7544 k: __mmask16,
7545 a: __m256i,
cdc7bbd5 7546) -> __m256i {
17df50a5
XL
7547 static_assert_imm8!(IMM8);
7548 let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
cdc7bbd5
XL
7549 transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
7550}
7551
7552/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7553///
7554/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217)
7555#[inline]
7556#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7557#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7558#[rustc_legacy_const_generics(2)]
7559pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7560 static_assert_imm8!(IMM8);
7561 let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
cdc7bbd5
XL
7562 let zero = _mm256_setzero_si256().as_i16x16();
7563 transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
7564}
7565
7566/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7567///
7568/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213)
7569#[inline]
7570#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7571#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7572#[rustc_legacy_const_generics(3)]
7573pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
cdc7bbd5
XL
7574 src: __m128i,
7575 k: __mmask8,
7576 a: __m128i,
cdc7bbd5 7577) -> __m128i {
17df50a5
XL
7578 static_assert_imm8!(IMM8);
7579 let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
cdc7bbd5
XL
7580 transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
7581}
7582
7583/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7584///
7585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214)
7586#[inline]
7587#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7588#[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
7589#[rustc_legacy_const_generics(2)]
7590pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7591 static_assert_imm8!(IMM8);
7592 let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
cdc7bbd5
XL
7593 let zero = _mm_setzero_si128().as_i16x8();
7594 transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero))
fc512014
XL
7595}
7596
7597/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst.
7598///
7599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212)
7600#[inline]
7601#[target_feature(enable = "avx512bw")]
17df50a5
XL
7602#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
7603#[rustc_legacy_const_generics(1)]
7604pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
7605 static_assert_imm8!(IMM8);
fc512014 7606 let a = a.as_i16x32();
17df50a5
XL
7607 let r: i16x32 = simd_shuffle32!(
7608 a,
7609 a,
7610 <const IMM8: i32> [
7611 0,
7612 1,
7613 2,
7614 3,
7615 (IMM8 as u32 & 0b11) + 4,
7616 ((IMM8 as u32 >> 2) & 0b11) + 4,
7617 ((IMM8 as u32 >> 4) & 0b11) + 4,
7618 ((IMM8 as u32 >> 6) & 0b11) + 4,
7619 8,
7620 9,
7621 10,
7622 11,
7623 (IMM8 as u32 & 0b11) + 12,
7624 ((IMM8 as u32 >> 2) & 0b11) + 12,
7625 ((IMM8 as u32 >> 4) & 0b11) + 12,
7626 ((IMM8 as u32 >> 6) & 0b11) + 12,
7627 16,
7628 17,
7629 18,
7630 19,
7631 (IMM8 as u32 & 0b11) + 20,
7632 ((IMM8 as u32 >> 2) & 0b11) + 20,
7633 ((IMM8 as u32 >> 4) & 0b11) + 20,
7634 ((IMM8 as u32 >> 6) & 0b11) + 20,
7635 24,
7636 25,
7637 26,
7638 27,
7639 (IMM8 as u32 & 0b11) + 28,
7640 ((IMM8 as u32 >> 2) & 0b11) + 28,
7641 ((IMM8 as u32 >> 4) & 0b11) + 28,
7642 ((IMM8 as u32 >> 6) & 0b11) + 28,
7643 ],
7644 );
fc512014
XL
7645 transmute(r)
7646}
7647
7648/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7649///
7650/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210)
7651#[inline]
7652#[target_feature(enable = "avx512bw")]
17df50a5
XL
7653#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
7654#[rustc_legacy_const_generics(3)]
7655pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
fc512014
XL
7656 src: __m512i,
7657 k: __mmask32,
7658 a: __m512i,
fc512014 7659) -> __m512i {
17df50a5
XL
7660 static_assert_imm8!(IMM8);
7661 let r = _mm512_shufflehi_epi16::<IMM8>(a);
cdc7bbd5 7662 transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
fc512014
XL
7663}
7664
7665/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7666///
7667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211)
7668#[inline]
7669#[target_feature(enable = "avx512bw")]
17df50a5
XL
7670#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
7671#[rustc_legacy_const_generics(2)]
7672pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
7673 static_assert_imm8!(IMM8);
7674 let r = _mm512_shufflehi_epi16::<IMM8>(a);
cdc7bbd5
XL
7675 let zero = _mm512_setzero_si512().as_i16x32();
7676 transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
7677}
7678
7679/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7680///
7681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207)
7682#[inline]
7683#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7684#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7685#[rustc_legacy_const_generics(3)]
7686pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
cdc7bbd5
XL
7687 src: __m256i,
7688 k: __mmask16,
7689 a: __m256i,
cdc7bbd5 7690) -> __m256i {
17df50a5
XL
7691 static_assert_imm8!(IMM8);
7692 let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
cdc7bbd5
XL
7693 transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
7694}
7695
7696/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7697///
7698/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208)
7699#[inline]
7700#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7701#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7702#[rustc_legacy_const_generics(2)]
7703pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
7704 static_assert_imm8!(IMM8);
7705 let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
cdc7bbd5
XL
7706 let zero = _mm256_setzero_si256().as_i16x16();
7707 transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
7708}
7709
7710/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
7711///
7712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204)
7713#[inline]
7714#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7715#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7716#[rustc_legacy_const_generics(3)]
7717pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
cdc7bbd5
XL
7718 src: __m128i,
7719 k: __mmask8,
7720 a: __m128i,
cdc7bbd5 7721) -> __m128i {
17df50a5
XL
7722 static_assert_imm8!(IMM8);
7723 let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
cdc7bbd5
XL
7724 transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
7725}
7726
7727/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7728///
7729/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205)
7730#[inline]
7731#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
7732#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
7733#[rustc_legacy_const_generics(2)]
7734pub unsafe fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
7735 static_assert_imm8!(IMM8);
7736 let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
cdc7bbd5
XL
7737 let zero = _mm_setzero_si128().as_i16x8();
7738 transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero))
7739}
7740
7741/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst.
7742///
7743/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_epi8&expand=5159)
7744#[inline]
7745#[target_feature(enable = "avx512bw")]
7746#[cfg_attr(test, assert_instr(vpshufb))]
7747pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i {
7748 transmute(vpshufb(a.as_i8x64(), b.as_i8x64()))
7749}
7750
7751/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7752///
7753/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_epi8&expand=5157)
7754#[inline]
7755#[target_feature(enable = "avx512bw")]
7756#[cfg_attr(test, assert_instr(vpshufb))]
7757pub unsafe fn _mm512_mask_shuffle_epi8(
7758 src: __m512i,
7759 k: __mmask64,
7760 a: __m512i,
7761 b: __m512i,
7762) -> __m512i {
7763 let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
7764 transmute(simd_select_bitmask(k, shuffle, src.as_i8x64()))
7765}
7766
7767/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7768///
7769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_epi8&expand=5158)
7770#[inline]
7771#[target_feature(enable = "avx512bw")]
7772#[cfg_attr(test, assert_instr(vpshufb))]
7773pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
7774 let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64();
7775 let zero = _mm512_setzero_si512().as_i8x64();
7776 transmute(simd_select_bitmask(k, shuffle, zero))
7777}
7778
7779/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7780///
7781/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_epi8&expand=5154)
7782#[inline]
7783#[target_feature(enable = "avx512bw,avx512vl")]
7784#[cfg_attr(test, assert_instr(vpshufb))]
7785pub unsafe fn _mm256_mask_shuffle_epi8(
7786 src: __m256i,
7787 k: __mmask32,
7788 a: __m256i,
7789 b: __m256i,
7790) -> __m256i {
7791 let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
7792 transmute(simd_select_bitmask(k, shuffle, src.as_i8x32()))
fc512014
XL
7793}
7794
cdc7bbd5 7795/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
fc512014 7796///
cdc7bbd5 7797/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_epi8&expand=5155)
fc512014 7798#[inline]
cdc7bbd5 7799#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 7800#[cfg_attr(test, assert_instr(vpshufb))]
cdc7bbd5
XL
7801pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
7802 let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32();
7803 let zero = _mm256_setzero_si256().as_i8x32();
7804 transmute(simd_select_bitmask(k, shuffle, zero))
fc512014
XL
7805}
7806
7807/// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7808///
cdc7bbd5 7809/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_epi8&expand=5151)
fc512014 7810#[inline]
cdc7bbd5 7811#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 7812#[cfg_attr(test, assert_instr(vpshufb))]
cdc7bbd5
XL
7813pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7814 let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
7815 transmute(simd_select_bitmask(k, shuffle, src.as_i8x16()))
fc512014
XL
7816}
7817
7818/// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7819///
cdc7bbd5 7820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_epi8&expand=5152)
fc512014 7821#[inline]
cdc7bbd5 7822#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 7823#[cfg_attr(test, assert_instr(vpshufb))]
cdc7bbd5
XL
7824pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
7825 let shuffle = _mm_shuffle_epi8(a, b).as_i8x16();
7826 let zero = _mm_setzero_si128().as_i8x16();
fc512014
XL
7827 transmute(simd_select_bitmask(k, shuffle, zero))
7828}
7829
7830/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7831///
7832/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi16_mask&expand=5884)
7833#[inline]
7834#[target_feature(enable = "avx512bw")]
7835#[cfg_attr(test, assert_instr(vptestmw))]
7836pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
7837 let and = _mm512_and_si512(a, b);
7838 let zero = _mm512_setzero_si512();
7839 _mm512_cmpneq_epi16_mask(and, zero)
7840}
7841
7842/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7843///
7844/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi16_mask&expand=5883)
7845#[inline]
7846#[target_feature(enable = "avx512bw")]
7847#[cfg_attr(test, assert_instr(vptestmw))]
7848pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
7849 let and = _mm512_and_si512(a, b);
7850 let zero = _mm512_setzero_si512();
7851 _mm512_mask_cmpneq_epi16_mask(k, and, zero)
7852}
7853
cdc7bbd5
XL
7854/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7855///
7856// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_test_epi16_mask&expand=5882)
7857#[inline]
7858#[target_feature(enable = "avx512bw,avx512vl")]
7859#[cfg_attr(test, assert_instr(vptestmw))]
7860pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
7861 let and = _mm256_and_si256(a, b);
7862 let zero = _mm256_setzero_si256();
7863 _mm256_cmpneq_epi16_mask(and, zero)
7864}
7865
7866/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7867///
7868/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_test_epi16_mask&expand=5881)
7869#[inline]
7870#[target_feature(enable = "avx512bw,avx512vl")]
7871#[cfg_attr(test, assert_instr(vptestmw))]
7872pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
7873 let and = _mm256_and_si256(a, b);
7874 let zero = _mm256_setzero_si256();
7875 _mm256_mask_cmpneq_epi16_mask(k, and, zero)
7876}
7877
7878/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7879///
7880// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_epi16_mask&expand=5880)
7881#[inline]
7882#[target_feature(enable = "avx512bw,avx512vl")]
7883#[cfg_attr(test, assert_instr(vptestmw))]
7884pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
7885 let and = _mm_and_si128(a, b);
7886 let zero = _mm_setzero_si128();
7887 _mm_cmpneq_epi16_mask(and, zero)
7888}
7889
7890/// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7891///
7892/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_test_epi16_mask&expand=5879)
7893#[inline]
7894#[target_feature(enable = "avx512bw,avx512vl")]
7895#[cfg_attr(test, assert_instr(vptestmw))]
7896pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
7897 let and = _mm_and_si128(a, b);
7898 let zero = _mm_setzero_si128();
7899 _mm_mask_cmpneq_epi16_mask(k, and, zero)
7900}
7901
fc512014
XL
7902/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7903///
7904/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi8_mask&expand=5902)
7905#[inline]
7906#[target_feature(enable = "avx512bw")]
7907#[cfg_attr(test, assert_instr(vptestmb))]
7908pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
7909 let and = _mm512_and_si512(a, b);
7910 let zero = _mm512_setzero_si512();
7911 _mm512_cmpneq_epi8_mask(and, zero)
7912}
7913
7914/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7915///
7916/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi8_mask&expand=5901)
7917#[inline]
7918#[target_feature(enable = "avx512bw")]
7919#[cfg_attr(test, assert_instr(vptestmb))]
7920pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
7921 let and = _mm512_and_si512(a, b);
7922 let zero = _mm512_setzero_si512();
7923 _mm512_mask_cmpneq_epi8_mask(k, and, zero)
7924}
7925
cdc7bbd5
XL
7926/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7927///
7928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_test_epi8_mask&expand=5900)
7929#[inline]
7930#[target_feature(enable = "avx512bw,avx512vl")]
7931#[cfg_attr(test, assert_instr(vptestmb))]
7932pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
7933 let and = _mm256_and_si256(a, b);
7934 let zero = _mm256_setzero_si256();
7935 _mm256_cmpneq_epi8_mask(and, zero)
7936}
7937
7938/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7939///
7940/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_test_epi8_mask&expand=5899)
7941#[inline]
7942#[target_feature(enable = "avx512bw,avx512vl")]
7943#[cfg_attr(test, assert_instr(vptestmb))]
7944pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
7945 let and = _mm256_and_si256(a, b);
7946 let zero = _mm256_setzero_si256();
7947 _mm256_mask_cmpneq_epi8_mask(k, and, zero)
7948}
7949
7950/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
7951///
7952/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_epi8_mask&expand=5898)
7953#[inline]
7954#[target_feature(enable = "avx512bw,avx512vl")]
7955#[cfg_attr(test, assert_instr(vptestmb))]
7956pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
7957 let and = _mm_and_si128(a, b);
7958 let zero = _mm_setzero_si128();
7959 _mm_cmpneq_epi8_mask(and, zero)
7960}
7961
7962/// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
7963///
7964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_test_epi8_mask&expand=5897)
7965#[inline]
7966#[target_feature(enable = "avx512bw,avx512vl")]
7967#[cfg_attr(test, assert_instr(vptestmb))]
7968pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
7969 let and = _mm_and_si128(a, b);
7970 let zero = _mm_setzero_si128();
7971 _mm_mask_cmpneq_epi8_mask(k, and, zero)
7972}
7973
fc512014
XL
7974/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7975///
7976/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi16_mask&expand=5915)
7977#[inline]
7978#[target_feature(enable = "avx512bw")]
7979#[cfg_attr(test, assert_instr(vptestnmw))]
7980pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
7981 let and = _mm512_and_si512(a, b);
7982 let zero = _mm512_setzero_si512();
7983 _mm512_cmpeq_epi16_mask(and, zero)
7984}
7985
7986/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
7987///
7988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi16&expand=5914)
7989#[inline]
7990#[target_feature(enable = "avx512bw")]
7991#[cfg_attr(test, assert_instr(vptestnmw))]
7992pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
7993 let and = _mm512_and_si512(a, b);
7994 let zero = _mm512_setzero_si512();
7995 _mm512_mask_cmpeq_epi16_mask(k, and, zero)
7996}
7997
cdc7bbd5
XL
7998/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
7999///
8000/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testn_epi16_mask&expand=5913)
8001#[inline]
8002#[target_feature(enable = "avx512bw,avx512vl")]
8003#[cfg_attr(test, assert_instr(vptestnmw))]
8004pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
8005 let and = _mm256_and_si256(a, b);
8006 let zero = _mm256_setzero_si256();
8007 _mm256_cmpeq_epi16_mask(and, zero)
8008}
8009
8010/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
8011///
8012/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_testn_epi16_mask&expand=5912)
8013#[inline]
8014#[target_feature(enable = "avx512bw,avx512vl")]
8015#[cfg_attr(test, assert_instr(vptestnmw))]
8016pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
8017 let and = _mm256_and_si256(a, b);
8018 let zero = _mm256_setzero_si256();
8019 _mm256_mask_cmpeq_epi16_mask(k, and, zero)
8020}
8021
8022/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
8023///
8024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testn_epi16_mask&expand=5911)
8025#[inline]
8026#[target_feature(enable = "avx512bw,avx512vl")]
8027#[cfg_attr(test, assert_instr(vptestnmw))]
8028pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
8029 let and = _mm_and_si128(a, b);
8030 let zero = _mm_setzero_si128();
8031 _mm_cmpeq_epi16_mask(and, zero)
8032}
8033
8034/// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
8035///
8036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_testn_epi16_mask&expand=5910)
8037#[inline]
8038#[target_feature(enable = "avx512bw,avx512vl")]
8039#[cfg_attr(test, assert_instr(vptestnmw))]
8040pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
8041 let and = _mm_and_si128(a, b);
8042 let zero = _mm_setzero_si128();
8043 _mm_mask_cmpeq_epi16_mask(k, and, zero)
8044}
8045
fc512014
XL
8046/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
8047///
8048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi8_mask&expand=5933)
8049#[inline]
8050#[target_feature(enable = "avx512bw")]
8051#[cfg_attr(test, assert_instr(vptestnmb))]
8052pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
8053 let and = _mm512_and_si512(a, b);
8054 let zero = _mm512_setzero_si512();
8055 _mm512_cmpeq_epi8_mask(and, zero)
8056}
8057
8058/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
8059///
8060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi8_mask&expand=5932)
8061#[inline]
8062#[target_feature(enable = "avx512bw")]
8063#[cfg_attr(test, assert_instr(vptestnmb))]
8064pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
8065 let and = _mm512_and_si512(a, b);
8066 let zero = _mm512_setzero_si512();
8067 _mm512_mask_cmpeq_epi8_mask(k, and, zero)
8068}
8069
cdc7bbd5
XL
8070/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
8071///
8072/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testn_epi8_mask&expand=5931)
8073#[inline]
8074#[target_feature(enable = "avx512bw,avx512vl")]
8075#[cfg_attr(test, assert_instr(vptestnmb))]
8076pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
8077 let and = _mm256_and_si256(a, b);
8078 let zero = _mm256_setzero_si256();
8079 _mm256_cmpeq_epi8_mask(and, zero)
8080}
8081
8082/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
8083///
8084/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_testn_epi8_mask&expand=5930)
8085#[inline]
8086#[target_feature(enable = "avx512bw,avx512vl")]
8087#[cfg_attr(test, assert_instr(vptestnmb))]
8088pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
8089 let and = _mm256_and_si256(a, b);
8090 let zero = _mm256_setzero_si256();
8091 _mm256_mask_cmpeq_epi8_mask(k, and, zero)
8092}
8093
8094/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
8095///
8096/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testn_epi8_mask&expand=5929)
8097#[inline]
8098#[target_feature(enable = "avx512bw,avx512vl")]
8099#[cfg_attr(test, assert_instr(vptestnmb))]
8100pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
8101 let and = _mm_and_si128(a, b);
8102 let zero = _mm_setzero_si128();
8103 _mm_cmpeq_epi8_mask(and, zero)
8104}
8105
8106/// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
8107///
8108/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_testn_epi8_mask&expand=5928)
8109#[inline]
8110#[target_feature(enable = "avx512bw,avx512vl")]
8111#[cfg_attr(test, assert_instr(vptestnmb))]
8112pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
8113 let and = _mm_and_si128(a, b);
8114 let zero = _mm_setzero_si128();
8115 _mm_mask_cmpeq_epi8_mask(k, and, zero)
8116}
8117
fc512014
XL
8118/// Store 64-bit mask from a into memory.
8119///
8120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask64&expand=5578)
8121#[inline]
8122#[target_feature(enable = "avx512bw")]
8123#[cfg_attr(test, assert_instr(mov))] //should be kmovq
8124pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
8125 ptr::write(mem_addr as *mut __mmask64, a);
8126}
8127
8128/// Store 32-bit mask from a into memory.
8129///
8130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask32&expand=5577)
8131#[inline]
8132#[target_feature(enable = "avx512bw")]
8133#[cfg_attr(test, assert_instr(mov))] //should be kmovd
8134pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
8135 ptr::write(mem_addr as *mut __mmask32, a);
8136}
8137
8138/// Load 64-bit mask from memory into k.
8139///
8140/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask64&expand=3318)
8141#[inline]
8142#[target_feature(enable = "avx512bw")]
8143#[cfg_attr(test, assert_instr(mov))] //should be kmovq
8144pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
8145 ptr::read(mem_addr as *const __mmask64)
8146}
8147
8148/// Load 32-bit mask from memory into k.
8149///
8150/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask32&expand=3317)
8151#[inline]
8152#[target_feature(enable = "avx512bw")]
8153#[cfg_attr(test, assert_instr(mov))] //should be kmovd
8154pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 {
8155 ptr::read(mem_addr as *const __mmask32)
8156}
8157
8158/// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst.
8159///
8160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sad_epu8&expand=4855)
8161#[inline]
8162#[target_feature(enable = "avx512bw")]
8163#[cfg_attr(test, assert_instr(vpsadbw))]
8164pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i {
8165 transmute(vpsadbw(a.as_u8x64(), b.as_u8x64()))
8166}
8167
8168/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8169///
8170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dbsad_epu8&expand=2114)
8171#[inline]
8172#[target_feature(enable = "avx512bw")]
17df50a5
XL
8173#[rustc_legacy_const_generics(2)]
8174#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8175pub unsafe fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
8176 static_assert_imm8!(IMM8);
fc512014
XL
8177 let a = a.as_u8x64();
8178 let b = b.as_u8x64();
17df50a5 8179 let r = vdbpsadbw(a, b, IMM8);
fc512014
XL
8180 transmute(r)
8181}
8182
8183/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8184///
8185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dbsad_epu8&expand=2115)
8186#[inline]
8187#[target_feature(enable = "avx512bw")]
17df50a5
XL
8188#[rustc_legacy_const_generics(4)]
8189#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8190pub unsafe fn _mm512_mask_dbsad_epu8<const IMM8: i32>(
fc512014
XL
8191 src: __m512i,
8192 k: __mmask32,
8193 a: __m512i,
8194 b: __m512i,
fc512014 8195) -> __m512i {
17df50a5 8196 static_assert_imm8!(IMM8);
fc512014
XL
8197 let a = a.as_u8x64();
8198 let b = b.as_u8x64();
17df50a5 8199 let r = vdbpsadbw(a, b, IMM8);
fc512014
XL
8200 transmute(simd_select_bitmask(k, r, src.as_u16x32()))
8201}
8202
8203/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8204///
8205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dbsad_epu8&expand=2116)
8206#[inline]
8207#[target_feature(enable = "avx512bw")]
17df50a5
XL
8208#[rustc_legacy_const_generics(3)]
8209#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8210pub unsafe fn _mm512_maskz_dbsad_epu8<const IMM8: i32>(
8211 k: __mmask32,
8212 a: __m512i,
8213 b: __m512i,
8214) -> __m512i {
8215 static_assert_imm8!(IMM8);
fc512014
XL
8216 let a = a.as_u8x64();
8217 let b = b.as_u8x64();
17df50a5 8218 let r = vdbpsadbw(a, b, IMM8);
fc512014
XL
8219 transmute(simd_select_bitmask(
8220 k,
8221 r,
8222 _mm512_setzero_si512().as_u16x32(),
8223 ))
8224}
8225
cdc7bbd5
XL
8226/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8227///
8228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dbsad_epu8&expand=2111)
8229#[inline]
8230#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
8231#[rustc_legacy_const_generics(2)]
8232#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8233pub unsafe fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
8234 static_assert_imm8!(IMM8);
cdc7bbd5
XL
8235 let a = a.as_u8x32();
8236 let b = b.as_u8x32();
17df50a5 8237 let r = vdbpsadbw256(a, b, IMM8);
cdc7bbd5
XL
8238 transmute(r)
8239}
8240
8241/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8242///
8243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dbsad_epu8&expand=2112)
8244#[inline]
8245#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
8246#[rustc_legacy_const_generics(4)]
8247#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8248pub unsafe fn _mm256_mask_dbsad_epu8<const IMM8: i32>(
cdc7bbd5
XL
8249 src: __m256i,
8250 k: __mmask16,
8251 a: __m256i,
8252 b: __m256i,
cdc7bbd5 8253) -> __m256i {
17df50a5 8254 static_assert_imm8!(IMM8);
cdc7bbd5
XL
8255 let a = a.as_u8x32();
8256 let b = b.as_u8x32();
17df50a5 8257 let r = vdbpsadbw256(a, b, IMM8);
cdc7bbd5
XL
8258 transmute(simd_select_bitmask(k, r, src.as_u16x16()))
8259}
8260
8261/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8262///
8263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dbsad_epu8&expand=2113)
8264#[inline]
8265#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
8266#[rustc_legacy_const_generics(3)]
8267#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8268pub unsafe fn _mm256_maskz_dbsad_epu8<const IMM8: i32>(
8269 k: __mmask16,
8270 a: __m256i,
8271 b: __m256i,
8272) -> __m256i {
8273 static_assert_imm8!(IMM8);
cdc7bbd5
XL
8274 let a = a.as_u8x32();
8275 let b = b.as_u8x32();
17df50a5 8276 let r = vdbpsadbw256(a, b, IMM8);
cdc7bbd5
XL
8277 transmute(simd_select_bitmask(
8278 k,
8279 r,
8280 _mm256_setzero_si256().as_u16x16(),
8281 ))
8282}
8283
8284/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8285///
8286/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dbsad_epu8&expand=2108)
8287#[inline]
8288#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
8289#[rustc_legacy_const_generics(2)]
8290#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8291pub unsafe fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
8292 static_assert_imm8!(IMM8);
cdc7bbd5
XL
8293 let a = a.as_u8x16();
8294 let b = b.as_u8x16();
17df50a5 8295 let r = vdbpsadbw128(a, b, IMM8);
cdc7bbd5
XL
8296 transmute(r)
8297}
8298
8299/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8300///
8301/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dbsad_epu8&expand=2109)
8302#[inline]
8303#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
8304#[rustc_legacy_const_generics(4)]
8305#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8306pub unsafe fn _mm_mask_dbsad_epu8<const IMM8: i32>(
cdc7bbd5
XL
8307 src: __m128i,
8308 k: __mmask8,
8309 a: __m128i,
8310 b: __m128i,
cdc7bbd5 8311) -> __m128i {
17df50a5 8312 static_assert_imm8!(IMM8);
cdc7bbd5
XL
8313 let a = a.as_u8x16();
8314 let b = b.as_u8x16();
17df50a5 8315 let r = vdbpsadbw128(a, b, IMM8);
cdc7bbd5
XL
8316 transmute(simd_select_bitmask(k, r, src.as_u16x8()))
8317}
8318
8319/// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets.
8320///
8321/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dbsad_epu8&expand=2110)
8322#[inline]
8323#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
8324#[rustc_legacy_const_generics(3)]
8325#[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))]
8326pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>(
8327 k: __mmask8,
8328 a: __m128i,
8329 b: __m128i,
8330) -> __m128i {
8331 static_assert_imm8!(IMM8);
cdc7bbd5
XL
8332 let a = a.as_u8x16();
8333 let b = b.as_u8x16();
17df50a5 8334 let r = vdbpsadbw128(a, b, IMM8);
cdc7bbd5
XL
8335 transmute(simd_select_bitmask(k, r, _mm_setzero_si128().as_u16x8()))
8336}
8337
fc512014
XL
8338/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
8339///
8340/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi16_mask&expand=3873)
8341#[inline]
8342#[target_feature(enable = "avx512bw")]
c295e0f8 8343#[cfg_attr(test, assert_instr(vpmovw2m))]
cdc7bbd5
XL
8344pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
8345 let filter = _mm512_set1_epi16(1 << 15);
8346 let a = _mm512_and_si512(a, filter);
8347 _mm512_cmpeq_epi16_mask(a, filter)
8348}
8349
8350/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
8351///
8352/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi16_mask&expand=3872)
8353#[inline]
8354#[target_feature(enable = "avx512bw,avx512vl")]
c295e0f8 8355#[cfg_attr(test, assert_instr(vpmovw2m))]
cdc7bbd5
XL
8356pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
8357 let filter = _mm256_set1_epi16(1 << 15);
8358 let a = _mm256_and_si256(a, filter);
8359 _mm256_cmpeq_epi16_mask(a, filter)
8360}
8361
8362/// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a.
8363///
8364/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi16_mask&expand=3871)
8365#[inline]
8366#[target_feature(enable = "avx512bw,avx512vl")]
c295e0f8 8367#[cfg_attr(test, assert_instr(vpmovw2m))]
cdc7bbd5
XL
8368pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
8369 let filter = _mm_set1_epi16(1 << 15);
8370 let a = _mm_and_si128(a, filter);
8371 _mm_cmpeq_epi16_mask(a, filter)
fc512014
XL
8372}
8373
8374/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
8375///
8376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi8_mask&expand=3883)
8377#[inline]
8378#[target_feature(enable = "avx512bw")]
c295e0f8 8379#[cfg_attr(test, assert_instr(vpmovb2m))]
fc512014
XL
8380pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
8381 let filter = _mm512_set1_epi8(1 << 7);
8382 let a = _mm512_and_si512(a, filter);
8383 _mm512_cmpeq_epi8_mask(a, filter)
8384}
8385
cdc7bbd5
XL
8386/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
8387///
8388/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882)
8389#[inline]
8390#[target_feature(enable = "avx512bw,avx512vl")]
c295e0f8
XL
8391#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8392 // using vpmovb2m plus converting the mask register to a standard register.
cdc7bbd5
XL
8393pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
8394 let filter = _mm256_set1_epi8(1 << 7);
8395 let a = _mm256_and_si256(a, filter);
8396 _mm256_cmpeq_epi8_mask(a, filter)
8397}
8398
8399/// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a.
8400///
8401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881)
8402#[inline]
8403#[target_feature(enable = "avx512bw,avx512vl")]
c295e0f8
XL
8404#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
8405 // using vpmovb2m plus converting the mask register to a standard register.
cdc7bbd5
XL
8406pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
8407 let filter = _mm_set1_epi8(1 << 7);
8408 let a = _mm_and_si128(a, filter);
8409 _mm_cmpeq_epi8_mask(a, filter)
8410}
8411
fc512014
XL
8412/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8413///
8414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movm_epi16&expand=3886)
8415#[inline]
8416#[target_feature(enable = "avx512bw")]
8417#[cfg_attr(test, assert_instr(vpmovm2w))]
8418pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
8419 let one = _mm512_set1_epi16(
8420 1 << 15
8421 | 1 << 14
8422 | 1 << 13
8423 | 1 << 12
8424 | 1 << 11
8425 | 1 << 10
8426 | 1 << 9
8427 | 1 << 8
8428 | 1 << 7
8429 | 1 << 6
8430 | 1 << 5
8431 | 1 << 4
8432 | 1 << 3
8433 | 1 << 2
8434 | 1 << 1
8435 | 1 << 0,
8436 )
8437 .as_i16x32();
8438 let zero = _mm512_setzero_si512().as_i16x32();
8439 transmute(simd_select_bitmask(k, one, zero))
8440}
8441
cdc7bbd5
XL
8442/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8443///
8444/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movm_epi16&expand=3885)
8445#[inline]
8446#[target_feature(enable = "avx512bw,avx512vl")]
8447#[cfg_attr(test, assert_instr(vpmovm2w))]
8448pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
8449 let one = _mm256_set1_epi16(
8450 1 << 15
8451 | 1 << 14
8452 | 1 << 13
8453 | 1 << 12
8454 | 1 << 11
8455 | 1 << 10
8456 | 1 << 9
8457 | 1 << 8
8458 | 1 << 7
8459 | 1 << 6
8460 | 1 << 5
8461 | 1 << 4
8462 | 1 << 3
8463 | 1 << 2
8464 | 1 << 1
8465 | 1 << 0,
8466 )
8467 .as_i16x16();
8468 let zero = _mm256_setzero_si256().as_i16x16();
8469 transmute(simd_select_bitmask(k, one, zero))
8470}
8471
8472/// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8473///
8474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movm_epi16&expand=3884)
8475#[inline]
8476#[target_feature(enable = "avx512bw,avx512vl")]
8477#[cfg_attr(test, assert_instr(vpmovm2w))]
8478pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i {
8479 let one = _mm_set1_epi16(
8480 1 << 15
8481 | 1 << 14
8482 | 1 << 13
8483 | 1 << 12
8484 | 1 << 11
8485 | 1 << 10
8486 | 1 << 9
8487 | 1 << 8
8488 | 1 << 7
8489 | 1 << 6
8490 | 1 << 5
8491 | 1 << 4
8492 | 1 << 3
8493 | 1 << 2
8494 | 1 << 1
8495 | 1 << 0,
8496 )
8497 .as_i16x8();
8498 let zero = _mm_setzero_si128().as_i16x8();
8499 transmute(simd_select_bitmask(k, one, zero))
8500}
8501
fc512014
XL
8502/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8503///
8504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movm_epi8&expand=3895)
8505#[inline]
8506#[target_feature(enable = "avx512bw")]
8507#[cfg_attr(test, assert_instr(vpmovm2b))]
8508pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
8509 let one =
8510 _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
8511 .as_i8x64();
8512 let zero = _mm512_setzero_si512().as_i8x64();
8513 transmute(simd_select_bitmask(k, one, zero))
8514}
8515
cdc7bbd5
XL
8516/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8517///
8518/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movm_epi8&expand=3894)
8519#[inline]
8520#[target_feature(enable = "avx512bw,avx512vl")]
8521#[cfg_attr(test, assert_instr(vpmovm2b))]
8522pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
8523 let one =
8524 _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
8525 .as_i8x32();
8526 let zero = _mm256_setzero_si256().as_i8x32();
8527 transmute(simd_select_bitmask(k, one, zero))
8528}
8529
8530/// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k.
8531///
8532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movm_epi8&expand=3893)
8533#[inline]
8534#[target_feature(enable = "avx512bw,avx512vl")]
8535#[cfg_attr(test, assert_instr(vpmovm2b))]
8536pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i {
8537 let one = _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
8538 .as_i8x16();
8539 let zero = _mm_setzero_si128().as_i8x16();
8540 transmute(simd_select_bitmask(k, one, zero))
8541}
8542
fc512014
XL
8543/// Add 32-bit masks in a and b, and store the result in k.
8544///
8545/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207)
8546#[inline]
8547#[target_feature(enable = "avx512bw")]
fc512014
XL
8548pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8549 transmute(a + b)
8550}
8551
8552/// Add 64-bit masks in a and b, and store the result in k.
8553///
8554/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208)
8555#[inline]
8556#[target_feature(enable = "avx512bw")]
fc512014
XL
8557pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8558 transmute(a + b)
8559}
8560
8561/// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
8562///
8563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask32&expand=3213)
8564#[inline]
8565#[target_feature(enable = "avx512bw")]
fc512014
XL
8566pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8567 transmute(a & b)
8568}
8569
8570/// Compute the bitwise AND of 64-bit masks a and b, and store the result in k.
8571///
8572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask64&expand=3214)
8573#[inline]
8574#[target_feature(enable = "avx512bw")]
fc512014
XL
8575pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8576 transmute(a & b)
8577}
8578
8579/// Compute the bitwise NOT of 32-bit mask a, and store the result in k.
8580///
8581/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_knot_mask32&expand=3234)
8582#[inline]
8583#[target_feature(enable = "avx512bw")]
8584pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 {
8585 transmute(a ^ 0b11111111_11111111_11111111_11111111)
8586}
8587
8588/// Compute the bitwise NOT of 64-bit mask a, and store the result in k.
8589///
8590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_knot_mask64&expand=3235)
8591#[inline]
8592#[target_feature(enable = "avx512bw")]
8593pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 {
8594 transmute(a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111)
8595}
8596
8597/// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k.
8598///
8599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask32&expand=3219)
8600#[inline]
8601#[target_feature(enable = "avx512bw")]
fc512014
XL
8602pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8603 transmute(_knot_mask32(a) & b)
8604}
8605
8606/// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k.
8607///
8608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask64&expand=3220)
8609#[inline]
8610#[target_feature(enable = "avx512bw")]
fc512014
XL
8611pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8612 transmute(_knot_mask64(a) & b)
8613}
8614
8615/// Compute the bitwise OR of 32-bit masks a and b, and store the result in k.
8616///
8617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask32&expand=3240)
8618#[inline]
8619#[target_feature(enable = "avx512bw")]
fc512014
XL
8620pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8621 transmute(a | b)
8622}
8623
8624/// Compute the bitwise OR of 64-bit masks a and b, and store the result in k.
8625///
8626/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask64&expand=3241)
8627#[inline]
8628#[target_feature(enable = "avx512bw")]
fc512014
XL
8629pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8630 transmute(a | b)
8631}
8632
8633/// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k.
8634///
8635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask32&expand=3292)
8636#[inline]
8637#[target_feature(enable = "avx512bw")]
fc512014
XL
8638pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8639 transmute(a ^ b)
8640}
8641
8642/// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k.
8643///
8644/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask64&expand=3293)
8645#[inline]
8646#[target_feature(enable = "avx512bw")]
fc512014
XL
8647pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8648 transmute(a ^ b)
8649}
8650
8651/// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k.
8652///
8653/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask32&expand=3286)
8654#[inline]
8655#[target_feature(enable = "avx512bw")]
fc512014
XL
8656pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
8657 transmute(_knot_mask32(a ^ b))
8658}
8659
8660/// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k.
8661///
8662/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask64&expand=3287)
8663#[inline]
8664#[target_feature(enable = "avx512bw")]
fc512014
XL
8665pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
8666 transmute(_knot_mask64(a ^ b))
8667}
8668
8669/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
8670///
8671/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi8&expand=1407)
8672#[inline]
8673#[target_feature(enable = "avx512bw")]
8674#[cfg_attr(test, assert_instr(vpmovwb))]
8675pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
8676 let a = a.as_i16x32();
8677 transmute::<i8x32, _>(simd_cast(a))
8678}
8679
8680/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8681///
8682/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_epi8&expand=1408)
8683#[inline]
8684#[target_feature(enable = "avx512bw")]
8685#[cfg_attr(test, assert_instr(vpmovwb))]
8686pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
8687 let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
8688 transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
8689}
8690
8691/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8692///
8693/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_epi8&expand=1409)
8694#[inline]
8695#[target_feature(enable = "avx512bw")]
8696#[cfg_attr(test, assert_instr(vpmovwb))]
8697pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
8698 let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
8699 transmute(simd_select_bitmask(
8700 k,
8701 convert,
8702 _mm256_setzero_si256().as_i8x32(),
8703 ))
8704}
8705
cdc7bbd5
XL
8706/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
8707///
8708/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi8&expand=1404)
8709#[inline]
8710#[target_feature(enable = "avx512bw,avx512vl")]
8711#[cfg_attr(test, assert_instr(vpmovwb))]
8712pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
8713 let a = a.as_i16x16();
8714 transmute::<i8x16, _>(simd_cast(a))
8715}
8716
8717/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8718///
8719/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_epi8&expand=1405)
8720#[inline]
8721#[target_feature(enable = "avx512bw,avx512vl")]
8722#[cfg_attr(test, assert_instr(vpmovwb))]
8723pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
8724 let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
8725 transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
8726}
8727
8728/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8729///
8730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_epi8&expand=1406)
8731#[inline]
8732#[target_feature(enable = "avx512bw,avx512vl")]
8733#[cfg_attr(test, assert_instr(vpmovwb))]
8734pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
8735 let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
8736 transmute(simd_select_bitmask(
8737 k,
8738 convert,
8739 _mm_setzero_si128().as_i8x16(),
8740 ))
8741}
8742
8743/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
8744///
8745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi8&expand=1401)
8746#[inline]
8747#[target_feature(enable = "avx512bw,avx512vl")]
8748#[cfg_attr(test, assert_instr(vpmovwb))]
8749pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
8750 let a = a.as_i16x8();
8751 let zero = _mm_setzero_si128().as_i16x8();
17df50a5 8752 let v256: i16x16 = simd_shuffle16!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]);
cdc7bbd5
XL
8753 transmute::<i8x16, _>(simd_cast(v256))
8754}
8755
8756/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8757///
8758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_epi8&expand=1402)
8759#[inline]
8760#[target_feature(enable = "avx512bw,avx512vl")]
8761#[cfg_attr(test, assert_instr(vpmovwb))]
8762pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8763 let convert = _mm_cvtepi16_epi8(a).as_i8x16();
8764 let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
8765 transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
8766}
8767
8768/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8769///
8770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_epi8&expand=1403)
8771#[inline]
8772#[target_feature(enable = "avx512bw,avx512vl")]
8773#[cfg_attr(test, assert_instr(vpmovwb))]
8774pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
8775 let convert = _mm_cvtepi16_epi8(a).as_i8x16();
8776 let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
8777 let zero = _mm_setzero_si128().as_i8x16();
8778 transmute(simd_select_bitmask(k, convert, zero))
8779}
8780
fc512014
XL
8781/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
8782///
8783/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi16_epi8&expand=1807)
8784#[inline]
8785#[target_feature(enable = "avx512bw")]
8786#[cfg_attr(test, assert_instr(vpmovswb))]
8787pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
8788 transmute(vpmovswb(
8789 a.as_i16x32(),
8790 _mm256_setzero_si256().as_i8x32(),
8791 0b11111111_11111111_11111111_11111111,
8792 ))
8793}
8794
8795/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8796///
8797/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_epi8&expand=1808)
8798#[inline]
8799#[target_feature(enable = "avx512bw")]
8800#[cfg_attr(test, assert_instr(vpmovswb))]
8801pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
8802 transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k))
8803}
8804
8805/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8806///
8807/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi16_epi8&expand=1809)
8808#[inline]
8809#[target_feature(enable = "avx512bw")]
8810#[cfg_attr(test, assert_instr(vpmovswb))]
8811pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
8812 transmute(vpmovswb(
8813 a.as_i16x32(),
8814 _mm256_setzero_si256().as_i8x32(),
8815 k,
8816 ))
8817}
8818
cdc7bbd5
XL
8819/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
8820///
8821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi16_epi8&expand=1804)
8822#[inline]
8823#[target_feature(enable = "avx512bw,avx512vl")]
8824#[cfg_attr(test, assert_instr(vpmovswb))]
8825pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
8826 transmute(vpmovswb256(
8827 a.as_i16x16(),
8828 _mm_setzero_si128().as_i8x16(),
8829 0b11111111_11111111,
8830 ))
8831}
8832
8833/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8834///
8835/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_epi8&expand=1805)
8836#[inline]
8837#[target_feature(enable = "avx512bw,avx512vl")]
8838#[cfg_attr(test, assert_instr(vpmovswb))]
8839pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
8840 transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k))
8841}
8842
8843/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8844///
8845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi16_epi8&expand=1806)
8846#[inline]
8847#[target_feature(enable = "avx512bw,avx512vl")]
8848#[cfg_attr(test, assert_instr(vpmovswb))]
8849pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
8850 transmute(vpmovswb256(
8851 a.as_i16x16(),
8852 _mm_setzero_si128().as_i8x16(),
8853 k,
8854 ))
8855}
8856
8857/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
8858///
8859/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi16_epi8&expand=1801)
8860#[inline]
8861#[target_feature(enable = "avx512bw,avx512vl")]
8862#[cfg_attr(test, assert_instr(vpmovswb))]
8863pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i {
8864 transmute(vpmovswb128(
8865 a.as_i16x8(),
8866 _mm_setzero_si128().as_i8x16(),
8867 0b11111111,
8868 ))
8869}
8870
8871/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8872///
8873/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_epi8&expand=1802)
8874#[inline]
8875#[target_feature(enable = "avx512bw,avx512vl")]
8876#[cfg_attr(test, assert_instr(vpmovswb))]
8877pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8878 transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k))
8879}
8880
8881/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8882///
8883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi16_epi8&expand=1803)
8884#[inline]
8885#[target_feature(enable = "avx512bw,avx512vl")]
8886#[cfg_attr(test, assert_instr(vpmovswb))]
8887pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
8888 transmute(vpmovswb128(a.as_i16x8(), _mm_setzero_si128().as_i8x16(), k))
8889}
8890
8891/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
8892///
8893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi16_epi8&expand=2042)
8894#[inline]
8895#[target_feature(enable = "avx512bw")]
8896#[cfg_attr(test, assert_instr(vpmovuswb))]
8897pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
8898 transmute(vpmovuswb(
8899 a.as_u16x32(),
8900 _mm256_setzero_si256().as_u8x32(),
8901 0b11111111_11111111_11111111_11111111,
8902 ))
8903}
8904
8905/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8906///
8907/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_epi8&expand=2043)
8908#[inline]
8909#[target_feature(enable = "avx512bw")]
8910#[cfg_attr(test, assert_instr(vpmovuswb))]
8911pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
8912 transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k))
8913}
8914
8915/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8916///
8917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi16_epi8&expand=2044)
8918#[inline]
8919#[target_feature(enable = "avx512bw")]
8920#[cfg_attr(test, assert_instr(vpmovuswb))]
8921pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
8922 transmute(vpmovuswb(
8923 a.as_u16x32(),
8924 _mm256_setzero_si256().as_u8x32(),
8925 k,
8926 ))
8927}
8928
8929/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
8930///
8931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi16_epi8&expand=2039)
8932#[inline]
8933#[target_feature(enable = "avx512bw,avx512vl")]
8934#[cfg_attr(test, assert_instr(vpmovuswb))]
8935pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
8936 transmute(vpmovuswb256(
8937 a.as_u16x16(),
8938 _mm_setzero_si128().as_u8x16(),
8939 0b11111111_11111111,
8940 ))
8941}
8942
8943/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8944///
8945/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_epi8&expand=2040)
8946#[inline]
8947#[target_feature(enable = "avx512bw,avx512vl")]
8948#[cfg_attr(test, assert_instr(vpmovuswb))]
8949pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
8950 transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k))
8951}
8952
8953/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8954///
8955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi16_epi8&expand=2041)
8956#[inline]
8957#[target_feature(enable = "avx512bw,avx512vl")]
8958#[cfg_attr(test, assert_instr(vpmovuswb))]
8959pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
8960 transmute(vpmovuswb256(
8961 a.as_u16x16(),
8962 _mm_setzero_si128().as_u8x16(),
8963 k,
8964 ))
8965}
8966
fc512014
XL
8967/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
8968///
cdc7bbd5 8969/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi16_epi8&expand=2036)
fc512014 8970#[inline]
cdc7bbd5 8971#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 8972#[cfg_attr(test, assert_instr(vpmovuswb))]
cdc7bbd5
XL
8973pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i {
8974 transmute(vpmovuswb128(
8975 a.as_u16x8(),
8976 _mm_setzero_si128().as_u8x16(),
8977 0b11111111,
fc512014
XL
8978 ))
8979}
8980
8981/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8982///
cdc7bbd5 8983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_epi8&expand=2037)
fc512014 8984#[inline]
cdc7bbd5 8985#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 8986#[cfg_attr(test, assert_instr(vpmovuswb))]
cdc7bbd5
XL
8987pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
8988 transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k))
fc512014
XL
8989}
8990
8991/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8992///
cdc7bbd5 8993/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi16_epi8&expand=2038)
fc512014 8994#[inline]
cdc7bbd5 8995#[target_feature(enable = "avx512bw,avx512vl")]
fc512014 8996#[cfg_attr(test, assert_instr(vpmovuswb))]
cdc7bbd5
XL
8997pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
8998 transmute(vpmovuswb128(
8999 a.as_u16x8(),
9000 _mm_setzero_si128().as_u8x16(),
fc512014
XL
9001 k,
9002 ))
9003}
9004
9005/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst.
9006///
9007/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi8_epi16&expand=1526)
9008#[inline]
9009#[target_feature(enable = "avx512bw")]
9010#[cfg_attr(test, assert_instr(vpmovsxbw))]
9011pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
9012 let a = a.as_i8x32();
9013 transmute::<i16x32, _>(simd_cast(a))
9014}
9015
9016/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9017///
9018/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi8_epi16&expand=1527)
9019#[inline]
9020#[target_feature(enable = "avx512bw")]
9021#[cfg_attr(test, assert_instr(vpmovsxbw))]
9022pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
9023 let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
9024 transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
9025}
9026
9027/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9028///
9029/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi8_epi16&expand=1528)
9030#[inline]
9031#[target_feature(enable = "avx512bw")]
9032#[cfg_attr(test, assert_instr(vpmovsxbw))]
9033pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
9034 let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
9035 transmute(simd_select_bitmask(
9036 k,
9037 convert,
9038 _mm512_setzero_si512().as_i16x32(),
9039 ))
9040}
9041
cdc7bbd5
XL
9042/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9043///
9044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi8_epi16&expand=1524)
9045#[inline]
9046#[target_feature(enable = "avx512bw,avx512vl")]
9047#[cfg_attr(test, assert_instr(vpmovsxbw))]
9048pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
9049 let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
9050 transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
9051}
9052
9053/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9054///
9055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi8_epi16&expand=1525)
9056#[inline]
9057#[target_feature(enable = "avx512bw,avx512vl")]
9058#[cfg_attr(test, assert_instr(vpmovsxbw))]
9059pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
9060 let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
9061 transmute(simd_select_bitmask(
9062 k,
9063 convert,
9064 _mm256_setzero_si256().as_i16x16(),
9065 ))
9066}
9067
9068/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9069///
9070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi8_epi16&expand=1521)
9071#[inline]
9072#[target_feature(enable = "avx512bw,avx512vl")]
9073#[cfg_attr(test, assert_instr(vpmovsxbw))]
9074pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9075 let convert = _mm_cvtepi8_epi16(a).as_i16x8();
9076 transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
9077}
9078
9079/// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9080///
9081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi8_epi16&expand=1522)
9082#[inline]
9083#[target_feature(enable = "avx512bw,avx512vl")]
9084#[cfg_attr(test, assert_instr(vpmovsxbw))]
9085pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
9086 let convert = _mm_cvtepi8_epi16(a).as_i16x8();
9087 transmute(simd_select_bitmask(
9088 k,
9089 convert,
9090 _mm_setzero_si128().as_i16x8(),
9091 ))
9092}
9093
fc512014
XL
9094/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst.
9095///
9096/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi16&expand=1612)
9097#[inline]
9098#[target_feature(enable = "avx512bw")]
9099#[cfg_attr(test, assert_instr(vpmovzxbw))]
9100pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
9101 let a = a.as_u8x32();
9102 transmute::<i16x32, _>(simd_cast(a))
9103}
9104
9105/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9106///
9107/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu8_epi16&expand=1613)
9108#[inline]
9109#[target_feature(enable = "avx512bw")]
9110#[cfg_attr(test, assert_instr(vpmovzxbw))]
9111pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
9112 let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
9113 transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
9114}
9115
9116/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9117///
9118/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu8_epi16&expand=1614)
9119#[inline]
9120#[target_feature(enable = "avx512bw")]
9121#[cfg_attr(test, assert_instr(vpmovzxbw))]
9122pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
9123 let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
9124 transmute(simd_select_bitmask(
9125 k,
9126 convert,
9127 _mm512_setzero_si512().as_i16x32(),
9128 ))
9129}
9130
cdc7bbd5
XL
9131/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9132///
9133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi16&expand=1610)
9134#[inline]
9135#[target_feature(enable = "avx512bw,avx512vl")]
9136#[cfg_attr(test, assert_instr(vpmovzxbw))]
9137pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
9138 let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
9139 transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
9140}
9141
9142/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9143///
9144/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi16&expand=1611)
9145#[inline]
9146#[target_feature(enable = "avx512bw,avx512vl")]
9147#[cfg_attr(test, assert_instr(vpmovzxbw))]
9148pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
9149 let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
9150 transmute(simd_select_bitmask(
9151 k,
9152 convert,
9153 _mm256_setzero_si256().as_i16x16(),
9154 ))
9155}
9156
9157/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9158///
9159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi16&expand=1607)
9160#[inline]
9161#[target_feature(enable = "avx512bw,avx512vl")]
9162#[cfg_attr(test, assert_instr(vpmovzxbw))]
9163pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
9164 let convert = _mm_cvtepu8_epi16(a).as_i16x8();
9165 transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
9166}
9167
9168/// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9169///
9170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi16&expand=1608)
9171#[inline]
9172#[target_feature(enable = "avx512bw,avx512vl")]
9173#[cfg_attr(test, assert_instr(vpmovzxbw))]
9174pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
9175 let convert = _mm_cvtepu8_epi16(a).as_i16x8();
9176 transmute(simd_select_bitmask(
9177 k,
9178 convert,
9179 _mm_setzero_si128().as_i16x8(),
9180 ))
9181}
9182
fc512014
XL
9183/// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst.
9184///
9185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bslli_epi128&expand=591)
9186#[inline]
9187#[target_feature(enable = "avx512bw")]
17df50a5
XL
9188#[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
9189#[rustc_legacy_const_generics(1)]
9190pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
9191 static_assert_imm8!(IMM8);
9192 const fn mask(shift: i32, i: u32) -> u32 {
9193 let shift = shift as u32 & 0xff;
9194 if shift > 15 || i % 16 < shift {
9195 0
9196 } else {
9197 64 + (i - shift)
9198 }
9199 }
fc512014
XL
9200 let a = a.as_i8x64();
9201 let zero = _mm512_setzero_si512().as_i8x64();
17df50a5
XL
9202 let r: i8x64 = simd_shuffle64!(
9203 zero,
9204 a,
9205 <const IMM8: i32> [
9206 mask(IMM8, 0),
9207 mask(IMM8, 1),
9208 mask(IMM8, 2),
9209 mask(IMM8, 3),
9210 mask(IMM8, 4),
9211 mask(IMM8, 5),
9212 mask(IMM8, 6),
9213 mask(IMM8, 7),
9214 mask(IMM8, 8),
9215 mask(IMM8, 9),
9216 mask(IMM8, 10),
9217 mask(IMM8, 11),
9218 mask(IMM8, 12),
9219 mask(IMM8, 13),
9220 mask(IMM8, 14),
9221 mask(IMM8, 15),
9222 mask(IMM8, 16),
9223 mask(IMM8, 17),
9224 mask(IMM8, 18),
9225 mask(IMM8, 19),
9226 mask(IMM8, 20),
9227 mask(IMM8, 21),
9228 mask(IMM8, 22),
9229 mask(IMM8, 23),
9230 mask(IMM8, 24),
9231 mask(IMM8, 25),
9232 mask(IMM8, 26),
9233 mask(IMM8, 27),
9234 mask(IMM8, 28),
9235 mask(IMM8, 29),
9236 mask(IMM8, 30),
9237 mask(IMM8, 31),
9238 mask(IMM8, 32),
9239 mask(IMM8, 33),
9240 mask(IMM8, 34),
9241 mask(IMM8, 35),
9242 mask(IMM8, 36),
9243 mask(IMM8, 37),
9244 mask(IMM8, 38),
9245 mask(IMM8, 39),
9246 mask(IMM8, 40),
9247 mask(IMM8, 41),
9248 mask(IMM8, 42),
9249 mask(IMM8, 43),
9250 mask(IMM8, 44),
9251 mask(IMM8, 45),
9252 mask(IMM8, 46),
9253 mask(IMM8, 47),
9254 mask(IMM8, 48),
9255 mask(IMM8, 49),
9256 mask(IMM8, 50),
9257 mask(IMM8, 51),
9258 mask(IMM8, 52),
9259 mask(IMM8, 53),
9260 mask(IMM8, 54),
9261 mask(IMM8, 55),
9262 mask(IMM8, 56),
9263 mask(IMM8, 57),
9264 mask(IMM8, 58),
9265 mask(IMM8, 59),
9266 mask(IMM8, 60),
9267 mask(IMM8, 61),
9268 mask(IMM8, 62),
9269 mask(IMM8, 63),
9270 ],
9271 );
fc512014
XL
9272 transmute(r)
9273}
9274
9275/// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst.
9276///
9277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bsrli_epi128&expand=594)
9278#[inline]
9279#[target_feature(enable = "avx512bw")]
17df50a5
XL
9280#[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
9281#[rustc_legacy_const_generics(1)]
9282pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
9283 static_assert_imm8!(IMM8);
fc512014
XL
9284 let a = a.as_i8x64();
9285 let zero = _mm512_setzero_si512().as_i8x64();
17df50a5
XL
9286 let r: i8x64 = match IMM8 % 16 {
9287 0 => simd_shuffle64!(
9288 a,
9289 zero,
9290 [
9291 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
9292 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
9293 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
9294 ],
9295 ),
9296 1 => simd_shuffle64!(
9297 a,
9298 zero,
9299 [
9300 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
9301 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
9302 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
9303 ],
9304 ),
9305 2 => simd_shuffle64!(
9306 a,
9307 zero,
9308 [
9309 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
9310 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9311 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9312 ],
9313 ),
9314 3 => simd_shuffle64!(
9315 a,
9316 zero,
9317 [
9318 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
9319 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9320 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9321 114,
9322 ],
9323 ),
9324 4 => simd_shuffle64!(
9325 a,
9326 zero,
9327 [
9328 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
9329 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
9330 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9331 115,
9332 ],
9333 ),
9334 5 => simd_shuffle64!(
9335 a,
9336 zero,
9337 [
9338 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
9339 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
9340 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9341 115, 116,
9342 ],
9343 ),
9344 6 => simd_shuffle64!(
9345 a,
9346 zero,
9347 [
9348 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
9349 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9350 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9351 116, 117,
9352 ],
9353 ),
9354 7 => simd_shuffle64!(
9355 a,
9356 zero,
9357 [
9358 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
9359 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9360 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9361 116, 117, 118,
9362 ],
9363 ),
9364 8 => simd_shuffle64!(
9365 a,
9366 zero,
9367 [
9368 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
9369 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
9370 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9371 116, 117, 118, 119,
9372 ],
9373 ),
9374 9 => simd_shuffle64!(
9375 a,
9376 zero,
9377 [
9378 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
9379 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
9380 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9381 117, 118, 119, 120,
9382 ],
9383 ),
9384 10 => simd_shuffle64!(
9385 a,
9386 zero,
9387 [
9388 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
9389 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9390 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9391 118, 119, 120, 121,
9392 ],
9393 ),
9394 11 => simd_shuffle64!(
9395 a,
9396 zero,
9397 [
9398 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
9399 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9400 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9401 117, 118, 119, 120, 121, 122,
9402 ],
9403 ),
9404 12 => simd_shuffle64!(
9405 a,
9406 zero,
9407 [
9408 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
9409 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
9410 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9411 118, 119, 120, 121, 122, 123,
9412 ],
9413 ),
9414 13 => simd_shuffle64!(
9415 a,
9416 zero,
9417 [
9418 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
9419 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
9420 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
9421 119, 120, 121, 122, 123, 124,
9422 ],
9423 ),
9424 14 => simd_shuffle64!(
9425 a,
9426 zero,
9427 [
9428 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
9429 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
9430 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
9431 120, 121, 122, 123, 124, 125,
9432 ],
9433 ),
9434 15 => simd_shuffle64!(
9435 a,
9436 zero,
9437 [
9438 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
9439 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
9440 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
9441 121, 122, 123, 124, 125, 126,
9442 ],
9443 ),
9444 _ => zero,
fc512014
XL
9445 };
9446 transmute(r)
9447}
9448
9449/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst.
9450///
9451/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi8&expand=263)
9452#[inline]
9453#[target_feature(enable = "avx512bw")]
17df50a5
XL
9454#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
9455#[rustc_legacy_const_generics(2)]
9456pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
fc512014
XL
9457 // If palignr is shifting the pair of vectors more than the size of two
9458 // lanes, emit zero.
17df50a5 9459 if IMM8 > 32 {
fc512014
XL
9460 return _mm512_set1_epi8(0);
9461 }
9462 // If palignr is shifting the pair of input vectors more than one lane,
9463 // but less than two lanes, convert to shifting in zeroes.
17df50a5
XL
9464 let (a, b) = if IMM8 > 16 {
9465 (_mm512_set1_epi8(0), a)
fc512014 9466 } else {
17df50a5 9467 (a, b)
fc512014
XL
9468 };
9469 let a = a.as_i8x64();
9470 let b = b.as_i8x64();
17df50a5
XL
9471
9472 let r: i8x64 = match IMM8 % 16 {
9473 0 => simd_shuffle64!(
9474 b,
9475 a,
9476 [
9477 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
9478 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
9479 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
9480 ],
9481 ),
9482 1 => simd_shuffle64!(
9483 b,
9484 a,
9485 [
9486 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23,
9487 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
9488 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112,
9489 ],
9490 ),
9491 2 => simd_shuffle64!(
9492 b,
9493 a,
9494 [
9495 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24,
9496 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9497 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9498 ],
9499 ),
9500 3 => simd_shuffle64!(
9501 b,
9502 a,
9503 [
9504 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24,
9505 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
9506 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113,
9507 114,
9508 ],
9509 ),
9510 4 => simd_shuffle64!(
9511 b,
9512 a,
9513 [
9514 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25,
9515 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
9516 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9517 115,
9518 ],
9519 ),
9520 5 => simd_shuffle64!(
9521 b,
9522 a,
9523 [
9524 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26,
9525 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
9526 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114,
9527 115, 116,
9528 ],
9529 ),
9530 6 => simd_shuffle64!(
9531 b,
9532 a,
9533 [
9534 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27,
9535 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9536 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9537 116, 117,
9538 ],
9539 ),
9540 7 => simd_shuffle64!(
9541 b,
9542 a,
9543 [
9544 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27,
9545 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96,
9546 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9547 116, 117, 118,
9548 ],
9549 ),
9550 8 => simd_shuffle64!(
9551 b,
9552 a,
9553 [
9554 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28,
9555 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97,
9556 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115,
9557 116, 117, 118, 119,
9558 ],
9559 ),
9560 9 => simd_shuffle64!(
9561 b,
9562 a,
9563 [
9564 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29,
9565 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98,
9566 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9567 117, 118, 119, 120,
9568 ],
9569 ),
9570 10 => simd_shuffle64!(
9571 b,
9572 a,
9573 [
9574 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30,
9575 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9576 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9577 118, 119, 120, 121,
9578 ],
9579 ),
9580 11 => simd_shuffle64!(
9581 b,
9582 a,
9583 [
9584 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31,
9585 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99,
9586 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116,
9587 117, 118, 119, 120, 121, 122,
9588 ],
9589 ),
9590 12 => simd_shuffle64!(
9591 b,
9592 a,
9593 [
9594 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80,
9595 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100,
9596 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117,
9597 118, 119, 120, 121, 122, 123,
9598 ],
9599 ),
9600 13 => simd_shuffle64!(
9601 b,
9602 a,
9603 [
9604 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81,
9605 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101,
9606 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118,
9607 119, 120, 121, 122, 123, 124,
9608 ],
9609 ),
9610 14 => simd_shuffle64!(
9611 b,
9612 a,
9613 [
9614 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82,
9615 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102,
9616 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119,
9617 120, 121, 122, 123, 124, 125,
9618 ],
9619 ),
9620 15 => simd_shuffle64!(
9621 b,
9622 a,
9623 [
9624 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83,
9625 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103,
9626 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120,
9627 121, 122, 123, 124, 125, 126,
9628 ],
9629 ),
9630 _ => b,
fc512014
XL
9631 };
9632 transmute(r)
9633}
9634
9635/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9636///
9637/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi8&expand=264)
9638#[inline]
9639#[target_feature(enable = "avx512bw")]
17df50a5
XL
9640#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
9641#[rustc_legacy_const_generics(4)]
9642pub unsafe fn _mm512_mask_alignr_epi8<const IMM8: i32>(
fc512014
XL
9643 src: __m512i,
9644 k: __mmask64,
9645 a: __m512i,
9646 b: __m512i,
fc512014 9647) -> __m512i {
17df50a5
XL
9648 static_assert_imm8!(IMM8);
9649 let r = _mm512_alignr_epi8::<IMM8>(a, b);
cdc7bbd5
XL
9650 transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64()))
9651}
9652
9653/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9654///
9655/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi8&expand=265)
9656#[inline]
9657#[target_feature(enable = "avx512bw")]
17df50a5
XL
9658#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
9659#[rustc_legacy_const_generics(3)]
9660pub unsafe fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
9661 k: __mmask64,
9662 a: __m512i,
9663 b: __m512i,
9664) -> __m512i {
9665 static_assert_imm8!(IMM8);
9666 let r = _mm512_alignr_epi8::<IMM8>(a, b);
cdc7bbd5
XL
9667 let zero = _mm512_setzero_si512().as_i8x64();
9668 transmute(simd_select_bitmask(k, r.as_i8x64(), zero))
9669}
9670
9671/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9672///
9673/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_alignr_epi8&expand=261)
9674#[inline]
9675#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
9676#[rustc_legacy_const_generics(4)]
9677#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
9678pub unsafe fn _mm256_mask_alignr_epi8<const IMM8: i32>(
cdc7bbd5
XL
9679 src: __m256i,
9680 k: __mmask32,
9681 a: __m256i,
9682 b: __m256i,
cdc7bbd5 9683) -> __m256i {
17df50a5
XL
9684 static_assert_imm8!(IMM8);
9685 let r = _mm256_alignr_epi8::<IMM8>(a, b);
cdc7bbd5
XL
9686 transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32()))
9687}
9688
9689/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9690///
9691/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_alignr_epi8&expand=262)
9692#[inline]
9693#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
9694#[rustc_legacy_const_generics(3)]
9695#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
9696pub unsafe fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
9697 k: __mmask32,
9698 a: __m256i,
9699 b: __m256i,
9700) -> __m256i {
9701 static_assert_imm8!(IMM8);
9702 let r = _mm256_alignr_epi8::<IMM8>(a, b);
cdc7bbd5
XL
9703 transmute(simd_select_bitmask(
9704 k,
9705 r.as_i8x32(),
9706 _mm256_setzero_si256().as_i8x32(),
9707 ))
9708}
9709
9710/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9711///
9712/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_alignr_epi8&expand=258)
9713#[inline]
9714#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
9715#[rustc_legacy_const_generics(4)]
9716#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
9717pub unsafe fn _mm_mask_alignr_epi8<const IMM8: i32>(
cdc7bbd5
XL
9718 src: __m128i,
9719 k: __mmask16,
9720 a: __m128i,
9721 b: __m128i,
cdc7bbd5 9722) -> __m128i {
17df50a5
XL
9723 static_assert_imm8!(IMM8);
9724 let r = _mm_alignr_epi8::<IMM8>(a, b);
cdc7bbd5
XL
9725 transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
9726}
9727
9728/// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9729///
9730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_alignr_epi8&expand=259)
9731#[inline]
9732#[target_feature(enable = "avx512bw,avx512vl")]
17df50a5
XL
9733#[rustc_legacy_const_generics(3)]
9734#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
9735pub unsafe fn _mm_maskz_alignr_epi8<const IMM8: i32>(
9736 k: __mmask16,
9737 a: __m128i,
9738 b: __m128i,
9739) -> __m128i {
9740 static_assert_imm8!(IMM8);
9741 let r = _mm_alignr_epi8::<IMM8>(a, b);
cdc7bbd5
XL
9742 let zero = _mm_setzero_si128().as_i8x16();
9743 transmute(simd_select_bitmask(k, r.as_i8x16(), zero))
9744}
9745
9746/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9747///
9748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812)
9749#[inline]
9750#[target_feature(enable = "avx512bw")]
9751#[cfg_attr(test, assert_instr(vpmovswb))]
9752pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9753 vpmovswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9754}
9755
9756/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9757///
9758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811)
9759#[inline]
9760#[target_feature(enable = "avx512bw,avx512vl")]
9761#[cfg_attr(test, assert_instr(vpmovswb))]
9762pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9763 vpmovswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9764}
9765
9766/// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9767///
9768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810)
9769#[inline]
9770#[target_feature(enable = "avx512bw,avx512vl")]
9771#[cfg_attr(test, assert_instr(vpmovswb))]
9772pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9773 vpmovswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9774}
9775
9776/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9777///
9778/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412)
9779#[inline]
9780#[target_feature(enable = "avx512bw")]
9781#[cfg_attr(test, assert_instr(vpmovwb))]
9782pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9783 vpmovwbmem(mem_addr as *mut i8, a.as_i16x32(), k);
fc512014
XL
9784}
9785
cdc7bbd5 9786/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
fc512014 9787///
cdc7bbd5
XL
9788/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411)
9789#[inline]
9790#[target_feature(enable = "avx512bw,avx512vl")]
9791#[cfg_attr(test, assert_instr(vpmovwb))]
9792pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9793 vpmovwbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9794}
9795
9796/// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9797///
9798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410)
9799#[inline]
9800#[target_feature(enable = "avx512bw,avx512vl")]
9801#[cfg_attr(test, assert_instr(vpmovwb))]
9802pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9803 vpmovwbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
9804}
9805
9806/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9807///
9808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047)
fc512014
XL
9809#[inline]
9810#[target_feature(enable = "avx512bw")]
cdc7bbd5
XL
9811#[cfg_attr(test, assert_instr(vpmovuswb))]
9812pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
9813 vpmovuswbmem(mem_addr as *mut i8, a.as_i16x32(), k);
9814}
9815
9816/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9817///
9818/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046)
9819#[inline]
9820#[target_feature(enable = "avx512bw,avx512vl")]
9821#[cfg_attr(test, assert_instr(vpmovuswb))]
9822pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
9823 vpmovuswbmem256(mem_addr as *mut i8, a.as_i16x16(), k);
9824}
9825
9826/// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
9827///
9828/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045)
9829#[inline]
9830#[target_feature(enable = "avx512bw,avx512vl")]
9831#[cfg_attr(test, assert_instr(vpmovuswb))]
9832pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
9833 vpmovuswbmem128(mem_addr as *mut i8, a.as_i16x8(), k);
fc512014
XL
9834}
9835
9836#[allow(improper_ctypes)]
9837extern "C" {
9838 #[link_name = "llvm.x86.avx512.mask.paddus.w.512"]
9839 fn vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
9840 #[link_name = "llvm.x86.avx512.mask.paddus.w.256"]
9841 fn vpaddusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16;
9842 #[link_name = "llvm.x86.avx512.mask.paddus.w.128"]
9843 fn vpaddusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8;
9844
9845 #[link_name = "llvm.x86.avx512.mask.paddus.b.512"]
9846 fn vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
9847 #[link_name = "llvm.x86.avx512.mask.paddus.b.256"]
9848 fn vpaddusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32;
9849 #[link_name = "llvm.x86.avx512.mask.paddus.b.128"]
9850 fn vpaddusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16;
9851
9852 #[link_name = "llvm.x86.avx512.mask.padds.w.512"]
9853 fn vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
9854 #[link_name = "llvm.x86.avx512.mask.padds.w.256"]
9855 fn vpaddsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16;
9856 #[link_name = "llvm.x86.avx512.mask.padds.w.128"]
9857 fn vpaddsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8;
9858
9859 #[link_name = "llvm.x86.avx512.mask.padds.b.512"]
9860 fn vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
9861 #[link_name = "llvm.x86.avx512.mask.padds.b.256"]
9862 fn vpaddsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32;
9863 #[link_name = "llvm.x86.avx512.mask.padds.b.128"]
9864 fn vpaddsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16;
9865
9866 #[link_name = "llvm.x86.avx512.mask.psubus.w.512"]
9867 fn vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32;
9868 #[link_name = "llvm.x86.avx512.mask.psubus.w.256"]
9869 fn vpsubusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16;
9870 #[link_name = "llvm.x86.avx512.mask.psubus.w.128"]
9871 fn vpsubusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8;
9872
9873 #[link_name = "llvm.x86.avx512.mask.psubus.b.512"]
9874 fn vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64;
9875 #[link_name = "llvm.x86.avx512.mask.psubus.b.256"]
9876 fn vpsubusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32;
9877 #[link_name = "llvm.x86.avx512.mask.psubus.b.128"]
9878 fn vpsubusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16;
9879
9880 #[link_name = "llvm.x86.avx512.mask.psubs.w.512"]
9881 fn vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32;
9882 #[link_name = "llvm.x86.avx512.mask.psubs.w.256"]
9883 fn vpsubsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16;
9884 #[link_name = "llvm.x86.avx512.mask.psubs.w.128"]
9885 fn vpsubsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8;
9886
9887 #[link_name = "llvm.x86.avx512.mask.psubs.b.512"]
9888 fn vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64;
9889 #[link_name = "llvm.x86.avx512.mask.psubs.b.256"]
9890 fn vpsubsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32;
9891 #[link_name = "llvm.x86.avx512.mask.psubs.b.128"]
9892 fn vpsubsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16;
9893
9894 #[link_name = "llvm.x86.avx512.pmulhu.w.512"]
9895 fn vpmulhuw(a: u16x32, b: u16x32) -> u16x32;
9896 #[link_name = "llvm.x86.avx512.pmulh.w.512"]
9897 fn vpmulhw(a: i16x32, b: i16x32) -> i16x32;
9898 #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
9899 fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
9900
9901 #[link_name = "llvm.x86.avx512.mask.ucmp.w.512"]
9902 fn vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u32;
cdc7bbd5
XL
9903 #[link_name = "llvm.x86.avx512.mask.ucmp.w.256"]
9904 fn vpcmpuw256(a: u16x16, b: u16x16, op: i32, mask: u16) -> u16;
9905 #[link_name = "llvm.x86.avx512.mask.ucmp.w.128"]
9906 fn vpcmpuw128(a: u16x8, b: u16x8, op: i32, mask: u8) -> u8;
9907
fc512014
XL
9908 #[link_name = "llvm.x86.avx512.mask.ucmp.b.512"]
9909 fn vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u64;
cdc7bbd5
XL
9910 #[link_name = "llvm.x86.avx512.mask.ucmp.b.256"]
9911 fn vpcmpub256(a: u8x32, b: u8x32, op: i32, mask: u32) -> u32;
9912 #[link_name = "llvm.x86.avx512.mask.ucmp.b.128"]
9913 fn vpcmpub128(a: u8x16, b: u8x16, op: i32, mask: u16) -> u16;
9914
fc512014
XL
9915 #[link_name = "llvm.x86.avx512.mask.cmp.w.512"]
9916 fn vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u32;
cdc7bbd5
XL
9917 #[link_name = "llvm.x86.avx512.mask.cmp.w.256"]
9918 fn vpcmpw256(a: i16x16, b: i16x16, op: i32, mask: u16) -> u16;
9919 #[link_name = "llvm.x86.avx512.mask.cmp.w.128"]
9920 fn vpcmpw128(a: i16x8, b: i16x8, op: i32, mask: u8) -> u8;
9921
fc512014
XL
9922 #[link_name = "llvm.x86.avx512.mask.cmp.b.512"]
9923 fn vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u64;
cdc7bbd5
XL
9924 #[link_name = "llvm.x86.avx512.mask.cmp.b.256"]
9925 fn vpcmpb256(a: i8x32, b: i8x32, op: i32, mask: u32) -> u32;
9926 #[link_name = "llvm.x86.avx512.mask.cmp.b.128"]
9927 fn vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u16;
fc512014
XL
9928
9929 #[link_name = "llvm.x86.avx512.mask.pmaxu.w.512"]
9930 fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32;
9931 #[link_name = "llvm.x86.avx512.mask.pmaxu.b.512"]
9932 fn vpmaxub(a: u8x64, b: u8x64) -> u8x64;
9933 #[link_name = "llvm.x86.avx512.mask.pmaxs.w.512"]
9934 fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32;
9935 #[link_name = "llvm.x86.avx512.mask.pmaxs.b.512"]
9936 fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64;
9937
9938 #[link_name = "llvm.x86.avx512.mask.pminu.w.512"]
9939 fn vpminuw(a: u16x32, b: u16x32) -> u16x32;
9940 #[link_name = "llvm.x86.avx512.mask.pminu.b.512"]
9941 fn vpminub(a: u8x64, b: u8x64) -> u8x64;
9942 #[link_name = "llvm.x86.avx512.mask.pmins.w.512"]
9943 fn vpminsw(a: i16x32, b: i16x32) -> i16x32;
9944 #[link_name = "llvm.x86.avx512.mask.pmins.b.512"]
9945 fn vpminsb(a: i8x64, b: i8x64) -> i8x64;
9946
9947 #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
9948 fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
9949 #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
9950 fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
9951
9952 #[link_name = "llvm.x86.avx512.packssdw.512"]
9953 fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
9954 #[link_name = "llvm.x86.avx512.packsswb.512"]
9955 fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
9956 #[link_name = "llvm.x86.avx512.packusdw.512"]
9957 fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
9958 #[link_name = "llvm.x86.avx512.packuswb.512"]
9959 fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
9960
9961 #[link_name = "llvm.x86.avx512.pavg.w.512"]
9962 fn vpavgw(a: u16x32, b: u16x32) -> u16x32;
9963 #[link_name = "llvm.x86.avx512.pavg.b.512"]
9964 fn vpavgb(a: u8x64, b: u8x64) -> u8x64;
9965
9966 #[link_name = "llvm.x86.avx512.psll.w.512"]
9967 fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
9968 #[link_name = "llvm.x86.avx512.pslli.w.512"]
9969 fn vpslliw(a: i16x32, imm8: u32) -> i16x32;
9970
17df50a5
XL
9971 #[link_name = "llvm.x86.avx2.pslli.w"]
9972 fn pslliw256(a: i16x16, imm8: i32) -> i16x16;
9973 #[link_name = "llvm.x86.sse2.pslli.w"]
9974 fn pslliw128(a: i16x8, imm8: i32) -> i16x8;
9975
fc512014
XL
9976 #[link_name = "llvm.x86.avx512.psllv.w.512"]
9977 fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
9978 #[link_name = "llvm.x86.avx512.psllv.w.256"]
9979 fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
9980 #[link_name = "llvm.x86.avx512.psllv.w.128"]
9981 fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
9982
9983 #[link_name = "llvm.x86.avx512.psrl.w.512"]
9984 fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
9985 #[link_name = "llvm.x86.avx512.psrli.w.512"]
9986 fn vpsrliw(a: i16x32, imm8: u32) -> i16x32;
9987
9988 #[link_name = "llvm.x86.avx512.psrlv.w.512"]
9989 fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
9990 #[link_name = "llvm.x86.avx512.psrlv.w.256"]
9991 fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
9992 #[link_name = "llvm.x86.avx512.psrlv.w.128"]
9993 fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
9994
9995 #[link_name = "llvm.x86.avx512.psra.w.512"]
9996 fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
9997 #[link_name = "llvm.x86.avx512.psrai.w.512"]
9998 fn vpsraiw(a: i16x32, imm8: u32) -> i16x32;
9999
17df50a5
XL
10000 #[link_name = "llvm.x86.avx2.psrai.w"]
10001 fn psraiw256(a: i16x16, imm8: i32) -> i16x16;
10002 #[link_name = "llvm.x86.sse2.psrai.w"]
10003 fn psraiw128(a: i16x8, imm8: i32) -> i16x8;
10004
fc512014
XL
10005 #[link_name = "llvm.x86.avx512.psrav.w.512"]
10006 fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
10007 #[link_name = "llvm.x86.avx512.psrav.w.256"]
10008 fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
10009 #[link_name = "llvm.x86.avx512.psrav.w.128"]
10010 fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
10011
10012 #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
10013 fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
10014 #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
10015 fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16;
10016 #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"]
10017 fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8;
10018
10019 #[link_name = "llvm.x86.avx512.permvar.hi.512"]
10020 fn vpermw(a: i16x32, idx: i16x32) -> i16x32;
10021 #[link_name = "llvm.x86.avx512.permvar.hi.256"]
10022 fn vpermw256(a: i16x16, idx: i16x16) -> i16x16;
10023 #[link_name = "llvm.x86.avx512.permvar.hi.128"]
10024 fn vpermw128(a: i16x8, idx: i16x8) -> i16x8;
10025
10026 #[link_name = "llvm.x86.avx512.pshuf.b.512"]
10027 fn vpshufb(a: i8x64, b: i8x64) -> i8x64;
10028
10029 #[link_name = "llvm.x86.avx512.psad.bw.512"]
10030 fn vpsadbw(a: u8x64, b: u8x64) -> u64x8;
cdc7bbd5 10031
fc512014
XL
10032 #[link_name = "llvm.x86.avx512.dbpsadbw.512"]
10033 fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32;
cdc7bbd5
XL
10034 #[link_name = "llvm.x86.avx512.dbpsadbw.256"]
10035 fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
10036 #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
10037 fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
fc512014
XL
10038
10039 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
10040 fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
cdc7bbd5
XL
10041 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
10042 fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
10043 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
10044 fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
10045
fc512014
XL
10046 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
10047 fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
cdc7bbd5
XL
10048 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
10049 fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
10050 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
10051 fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
10052
10053 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
10054 fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
10055 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
10056 fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
10057 #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
10058 fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
10059
10060 #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
10061 fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
10062 #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
10063 fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
10064 #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
10065 fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
10066
10067 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
10068 fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
10069 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
10070 fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
10071 #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
10072 fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
fc512014
XL
10073}
10074
10075#[cfg(test)]
10076mod tests {
10077
10078 use stdarch_test::simd_test;
10079
10080 use crate::core_arch::x86::*;
10081 use crate::hint::black_box;
10082 use crate::mem::{self};
10083
10084 #[simd_test(enable = "avx512bw")]
10085 unsafe fn test_mm512_abs_epi16() {
10086 let a = _mm512_set1_epi16(-1);
10087 let r = _mm512_abs_epi16(a);
10088 let e = _mm512_set1_epi16(1);
10089 assert_eq_m512i(r, e);
10090 }
10091
10092 #[simd_test(enable = "avx512bw")]
10093 unsafe fn test_mm512_mask_abs_epi16() {
10094 let a = _mm512_set1_epi16(-1);
10095 let r = _mm512_mask_abs_epi16(a, 0, a);
10096 assert_eq_m512i(r, a);
10097 let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a);
10098 #[rustfmt::skip]
10099 let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
10100 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
10101 assert_eq_m512i(r, e);
10102 }
10103
10104 #[simd_test(enable = "avx512bw")]
10105 unsafe fn test_mm512_maskz_abs_epi16() {
10106 let a = _mm512_set1_epi16(-1);
10107 let r = _mm512_maskz_abs_epi16(0, a);
10108 assert_eq_m512i(r, _mm512_setzero_si512());
10109 let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a);
10110 #[rustfmt::skip]
10111 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
10112 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
10113 assert_eq_m512i(r, e);
10114 }
10115
10116 #[simd_test(enable = "avx512bw,avx512vl")]
10117 unsafe fn test_mm256_mask_abs_epi16() {
10118 let a = _mm256_set1_epi16(-1);
10119 let r = _mm256_mask_abs_epi16(a, 0, a);
10120 assert_eq_m256i(r, a);
10121 let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a);
10122 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
10123 assert_eq_m256i(r, e);
10124 }
10125
10126 #[simd_test(enable = "avx512bw,avx512vl")]
10127 unsafe fn test_mm256_maskz_abs_epi16() {
10128 let a = _mm256_set1_epi16(-1);
10129 let r = _mm256_maskz_abs_epi16(0, a);
10130 assert_eq_m256i(r, _mm256_setzero_si256());
10131 let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a);
10132 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
10133 assert_eq_m256i(r, e);
10134 }
10135
10136 #[simd_test(enable = "avx512bw,avx512vl")]
10137 unsafe fn test_mm_mask_abs_epi16() {
10138 let a = _mm_set1_epi16(-1);
10139 let r = _mm_mask_abs_epi16(a, 0, a);
10140 assert_eq_m128i(r, a);
10141 let r = _mm_mask_abs_epi16(a, 0b00001111, a);
10142 let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1);
10143 assert_eq_m128i(r, e);
10144 }
10145
10146 #[simd_test(enable = "avx512bw,avx512vl")]
10147 unsafe fn test_mm_maskz_abs_epi16() {
10148 let a = _mm_set1_epi16(-1);
10149 let r = _mm_maskz_abs_epi16(0, a);
10150 assert_eq_m128i(r, _mm_setzero_si128());
10151 let r = _mm_maskz_abs_epi16(0b00001111, a);
10152 let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
10153 assert_eq_m128i(r, e);
10154 }
10155
10156 #[simd_test(enable = "avx512bw")]
10157 unsafe fn test_mm512_abs_epi8() {
10158 let a = _mm512_set1_epi8(-1);
10159 let r = _mm512_abs_epi8(a);
10160 let e = _mm512_set1_epi8(1);
10161 assert_eq_m512i(r, e);
10162 }
10163
10164 #[simd_test(enable = "avx512bw")]
10165 unsafe fn test_mm512_mask_abs_epi8() {
10166 let a = _mm512_set1_epi8(-1);
10167 let r = _mm512_mask_abs_epi8(a, 0, a);
10168 assert_eq_m512i(r, a);
10169 let r = _mm512_mask_abs_epi8(
10170 a,
10171 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10172 a,
10173 );
10174 #[rustfmt::skip]
10175 let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
10176 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
10177 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
10178 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
10179 assert_eq_m512i(r, e);
10180 }
10181
10182 #[simd_test(enable = "avx512bw")]
10183 unsafe fn test_mm512_maskz_abs_epi8() {
10184 let a = _mm512_set1_epi8(-1);
10185 let r = _mm512_maskz_abs_epi8(0, a);
10186 assert_eq_m512i(r, _mm512_setzero_si512());
10187 let r = _mm512_maskz_abs_epi8(
10188 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10189 a,
10190 );
10191 #[rustfmt::skip]
10192 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
10193 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
10194 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
10195 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
10196 assert_eq_m512i(r, e);
10197 }
10198
10199 #[simd_test(enable = "avx512bw,avx512vl")]
10200 unsafe fn test_mm256_mask_abs_epi8() {
10201 let a = _mm256_set1_epi8(-1);
10202 let r = _mm256_mask_abs_epi8(a, 0, a);
10203 assert_eq_m256i(r, a);
10204 let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a);
10205 #[rustfmt::skip]
10206 let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1,
10207 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
10208 assert_eq_m256i(r, e);
10209 }
10210
10211 #[simd_test(enable = "avx512bw,avx512vl")]
10212 unsafe fn test_mm256_maskz_abs_epi8() {
10213 let a = _mm256_set1_epi8(-1);
10214 let r = _mm256_maskz_abs_epi8(0, a);
10215 assert_eq_m256i(r, _mm256_setzero_si256());
10216 let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a);
10217 #[rustfmt::skip]
10218 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
10219 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
10220 assert_eq_m256i(r, e);
10221 }
10222
10223 #[simd_test(enable = "avx512bw,avx512vl")]
10224 unsafe fn test_mm_mask_abs_epi8() {
10225 let a = _mm_set1_epi8(-1);
10226 let r = _mm_mask_abs_epi8(a, 0, a);
10227 assert_eq_m128i(r, a);
10228 let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a);
10229 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1);
10230 assert_eq_m128i(r, e);
10231 }
10232
10233 #[simd_test(enable = "avx512bw,avx512vl")]
10234 unsafe fn test_mm_maskz_abs_epi8() {
10235 let a = _mm_set1_epi8(-1);
10236 let r = _mm_maskz_abs_epi8(0, a);
10237 assert_eq_m128i(r, _mm_setzero_si128());
10238 let r = _mm_maskz_abs_epi8(0b00000000_11111111, a);
10239 #[rustfmt::skip]
10240 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
10241 assert_eq_m128i(r, e);
10242 }
10243
10244 #[simd_test(enable = "avx512bw")]
10245 unsafe fn test_mm512_add_epi16() {
10246 let a = _mm512_set1_epi16(1);
10247 let b = _mm512_set1_epi16(2);
10248 let r = _mm512_add_epi16(a, b);
10249 let e = _mm512_set1_epi16(3);
10250 assert_eq_m512i(r, e);
10251 }
10252
10253 #[simd_test(enable = "avx512bw")]
10254 unsafe fn test_mm512_mask_add_epi16() {
10255 let a = _mm512_set1_epi16(1);
10256 let b = _mm512_set1_epi16(2);
10257 let r = _mm512_mask_add_epi16(a, 0, a, b);
10258 assert_eq_m512i(r, a);
10259 let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
10260 #[rustfmt::skip]
10261 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10262 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10263 assert_eq_m512i(r, e);
10264 }
10265
10266 #[simd_test(enable = "avx512bw")]
10267 unsafe fn test_mm512_maskz_add_epi16() {
10268 let a = _mm512_set1_epi16(1);
10269 let b = _mm512_set1_epi16(2);
10270 let r = _mm512_maskz_add_epi16(0, a, b);
10271 assert_eq_m512i(r, _mm512_setzero_si512());
10272 let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b);
10273 #[rustfmt::skip]
10274 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10275 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10276 assert_eq_m512i(r, e);
10277 }
10278
10279 #[simd_test(enable = "avx512bw,avx512vl")]
10280 unsafe fn test_mm256_mask_add_epi16() {
10281 let a = _mm256_set1_epi16(1);
10282 let b = _mm256_set1_epi16(2);
10283 let r = _mm256_mask_add_epi16(a, 0, a, b);
10284 assert_eq_m256i(r, a);
10285 let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b);
10286 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10287 assert_eq_m256i(r, e);
10288 }
10289
10290 #[simd_test(enable = "avx512bw,avx512vl")]
10291 unsafe fn test_mm256_maskz_add_epi16() {
10292 let a = _mm256_set1_epi16(1);
10293 let b = _mm256_set1_epi16(2);
10294 let r = _mm256_maskz_add_epi16(0, a, b);
10295 assert_eq_m256i(r, _mm256_setzero_si256());
10296 let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b);
10297 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10298 assert_eq_m256i(r, e);
10299 }
10300
10301 #[simd_test(enable = "avx512bw,avx512vl")]
10302 unsafe fn test_mm_mask_add_epi16() {
10303 let a = _mm_set1_epi16(1);
10304 let b = _mm_set1_epi16(2);
10305 let r = _mm_mask_add_epi16(a, 0, a, b);
10306 assert_eq_m128i(r, a);
10307 let r = _mm_mask_add_epi16(a, 0b00001111, a, b);
10308 let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3);
10309 assert_eq_m128i(r, e);
10310 }
10311
10312 #[simd_test(enable = "avx512bw,avx512vl")]
10313 unsafe fn test_mm_maskz_add_epi16() {
10314 let a = _mm_set1_epi16(1);
10315 let b = _mm_set1_epi16(2);
10316 let r = _mm_maskz_add_epi16(0, a, b);
10317 assert_eq_m128i(r, _mm_setzero_si128());
10318 let r = _mm_maskz_add_epi16(0b00001111, a, b);
10319 let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3);
10320 assert_eq_m128i(r, e);
10321 }
10322
10323 #[simd_test(enable = "avx512bw")]
10324 unsafe fn test_mm512_add_epi8() {
10325 let a = _mm512_set1_epi8(1);
10326 let b = _mm512_set1_epi8(2);
10327 let r = _mm512_add_epi8(a, b);
10328 let e = _mm512_set1_epi8(3);
10329 assert_eq_m512i(r, e);
10330 }
10331
10332 #[simd_test(enable = "avx512bw")]
10333 unsafe fn test_mm512_mask_add_epi8() {
10334 let a = _mm512_set1_epi8(1);
10335 let b = _mm512_set1_epi8(2);
10336 let r = _mm512_mask_add_epi8(a, 0, a, b);
10337 assert_eq_m512i(r, a);
10338 let r = _mm512_mask_add_epi8(
10339 a,
10340 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10341 a,
10342 b,
10343 );
10344 #[rustfmt::skip]
10345 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10346 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10347 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10348 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10349 assert_eq_m512i(r, e);
10350 }
10351
10352 #[simd_test(enable = "avx512bw")]
10353 unsafe fn test_mm512_maskz_add_epi8() {
10354 let a = _mm512_set1_epi8(1);
10355 let b = _mm512_set1_epi8(2);
10356 let r = _mm512_maskz_add_epi8(0, a, b);
10357 assert_eq_m512i(r, _mm512_setzero_si512());
10358 let r = _mm512_maskz_add_epi8(
10359 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10360 a,
10361 b,
10362 );
10363 #[rustfmt::skip]
10364 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10365 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10366 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10367 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10368 assert_eq_m512i(r, e);
10369 }
10370
10371 #[simd_test(enable = "avx512bw,avx512vl")]
10372 unsafe fn test_mm256_mask_add_epi8() {
10373 let a = _mm256_set1_epi8(1);
10374 let b = _mm256_set1_epi8(2);
10375 let r = _mm256_mask_add_epi8(a, 0, a, b);
10376 assert_eq_m256i(r, a);
10377 let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
10378 #[rustfmt::skip]
10379 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3,
10380 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10381 assert_eq_m256i(r, e);
10382 }
10383
10384 #[simd_test(enable = "avx512bw,avx512vl")]
10385 unsafe fn test_mm256_maskz_add_epi8() {
10386 let a = _mm256_set1_epi8(1);
10387 let b = _mm256_set1_epi8(2);
10388 let r = _mm256_maskz_add_epi8(0, a, b);
10389 assert_eq_m256i(r, _mm256_setzero_si256());
10390 let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b);
10391 #[rustfmt::skip]
10392 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3,
10393 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10394 assert_eq_m256i(r, e);
10395 }
10396
10397 #[simd_test(enable = "avx512bw,avx512vl")]
10398 unsafe fn test_mm_mask_add_epi8() {
10399 let a = _mm_set1_epi8(1);
10400 let b = _mm_set1_epi8(2);
10401 let r = _mm_mask_add_epi8(a, 0, a, b);
10402 assert_eq_m128i(r, a);
10403 let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b);
10404 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3);
10405 assert_eq_m128i(r, e);
10406 }
10407
10408 #[simd_test(enable = "avx512bw,avx512vl")]
10409 unsafe fn test_mm_maskz_add_epi8() {
10410 let a = _mm_set1_epi8(1);
10411 let b = _mm_set1_epi8(2);
10412 let r = _mm_maskz_add_epi8(0, a, b);
10413 assert_eq_m128i(r, _mm_setzero_si128());
10414 let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b);
10415 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3);
10416 assert_eq_m128i(r, e);
10417 }
10418
10419 #[simd_test(enable = "avx512bw")]
10420 unsafe fn test_mm512_adds_epu16() {
10421 let a = _mm512_set1_epi16(1);
10422 let b = _mm512_set1_epi16(u16::MAX as i16);
10423 let r = _mm512_adds_epu16(a, b);
10424 let e = _mm512_set1_epi16(u16::MAX as i16);
10425 assert_eq_m512i(r, e);
10426 }
10427
10428 #[simd_test(enable = "avx512bw")]
10429 unsafe fn test_mm512_mask_adds_epu16() {
10430 let a = _mm512_set1_epi16(1);
10431 let b = _mm512_set1_epi16(u16::MAX as i16);
10432 let r = _mm512_mask_adds_epu16(a, 0, a, b);
10433 assert_eq_m512i(r, a);
10434 let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
10435 #[rustfmt::skip]
10436 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10438 assert_eq_m512i(r, e);
10439 }
10440
10441 #[simd_test(enable = "avx512bw")]
10442 unsafe fn test_mm512_maskz_adds_epu16() {
10443 let a = _mm512_set1_epi16(1);
10444 let b = _mm512_set1_epi16(u16::MAX as i16);
10445 let r = _mm512_maskz_adds_epu16(0, a, b);
10446 assert_eq_m512i(r, _mm512_setzero_si512());
10447 let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b);
10448 #[rustfmt::skip]
10449 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10450 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10451 assert_eq_m512i(r, e);
10452 }
10453
10454 #[simd_test(enable = "avx512bw,avx512vl")]
10455 unsafe fn test_mm256_mask_adds_epu16() {
10456 let a = _mm256_set1_epi16(1);
10457 let b = _mm256_set1_epi16(u16::MAX as i16);
10458 let r = _mm256_mask_adds_epu16(a, 0, a, b);
10459 assert_eq_m256i(r, a);
10460 let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b);
10461 #[rustfmt::skip]
10462 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10463 assert_eq_m256i(r, e);
10464 }
10465
10466 #[simd_test(enable = "avx512bw,avx512vl")]
10467 unsafe fn test_mm256_maskz_adds_epu16() {
10468 let a = _mm256_set1_epi16(1);
10469 let b = _mm256_set1_epi16(u16::MAX as i16);
10470 let r = _mm256_maskz_adds_epu16(0, a, b);
10471 assert_eq_m256i(r, _mm256_setzero_si256());
10472 let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b);
10473 #[rustfmt::skip]
10474 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10475 assert_eq_m256i(r, e);
10476 }
10477
10478 #[simd_test(enable = "avx512bw,avx512vl")]
10479 unsafe fn test_mm_mask_adds_epu16() {
10480 let a = _mm_set1_epi16(1);
10481 let b = _mm_set1_epi16(u16::MAX as i16);
10482 let r = _mm_mask_adds_epu16(a, 0, a, b);
10483 assert_eq_m128i(r, a);
10484 let r = _mm_mask_adds_epu16(a, 0b00001111, a, b);
10485 #[rustfmt::skip]
10486 let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10487 assert_eq_m128i(r, e);
10488 }
10489
10490 #[simd_test(enable = "avx512bw,avx512vl")]
10491 unsafe fn test_mm_maskz_adds_epu16() {
10492 let a = _mm_set1_epi16(1);
10493 let b = _mm_set1_epi16(u16::MAX as i16);
10494 let r = _mm_maskz_adds_epu16(0, a, b);
10495 assert_eq_m128i(r, _mm_setzero_si128());
10496 let r = _mm_maskz_adds_epu16(0b00001111, a, b);
10497 #[rustfmt::skip]
10498 let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16);
10499 assert_eq_m128i(r, e);
10500 }
10501
10502 #[simd_test(enable = "avx512bw")]
10503 unsafe fn test_mm512_adds_epu8() {
10504 let a = _mm512_set1_epi8(1);
10505 let b = _mm512_set1_epi8(u8::MAX as i8);
10506 let r = _mm512_adds_epu8(a, b);
10507 let e = _mm512_set1_epi8(u8::MAX as i8);
10508 assert_eq_m512i(r, e);
10509 }
10510
10511 #[simd_test(enable = "avx512bw")]
10512 unsafe fn test_mm512_mask_adds_epu8() {
10513 let a = _mm512_set1_epi8(1);
10514 let b = _mm512_set1_epi8(u8::MAX as i8);
10515 let r = _mm512_mask_adds_epu8(a, 0, a, b);
10516 assert_eq_m512i(r, a);
10517 let r = _mm512_mask_adds_epu8(
10518 a,
10519 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10520 a,
10521 b,
10522 );
10523 #[rustfmt::skip]
10524 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10526 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10528 assert_eq_m512i(r, e);
10529 }
10530
10531 #[simd_test(enable = "avx512bw")]
10532 unsafe fn test_mm512_maskz_adds_epu8() {
10533 let a = _mm512_set1_epi8(1);
10534 let b = _mm512_set1_epi8(u8::MAX as i8);
10535 let r = _mm512_maskz_adds_epu8(0, a, b);
10536 assert_eq_m512i(r, _mm512_setzero_si512());
10537 let r = _mm512_maskz_adds_epu8(
10538 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10539 a,
10540 b,
10541 );
10542 #[rustfmt::skip]
10543 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10544 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10545 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10546 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10547 assert_eq_m512i(r, e);
10548 }
10549
10550 #[simd_test(enable = "avx512bw,avx512vl")]
10551 unsafe fn test_mm256_mask_adds_epu8() {
10552 let a = _mm256_set1_epi8(1);
10553 let b = _mm256_set1_epi8(u8::MAX as i8);
10554 let r = _mm256_mask_adds_epu8(a, 0, a, b);
10555 assert_eq_m256i(r, a);
10556 let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
10557 #[rustfmt::skip]
10558 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10560 assert_eq_m256i(r, e);
10561 }
10562
10563 #[simd_test(enable = "avx512bw,avx512vl")]
10564 unsafe fn test_mm256_maskz_adds_epu8() {
10565 let a = _mm256_set1_epi8(1);
10566 let b = _mm256_set1_epi8(u8::MAX as i8);
10567 let r = _mm256_maskz_adds_epu8(0, a, b);
10568 assert_eq_m256i(r, _mm256_setzero_si256());
10569 let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b);
10570 #[rustfmt::skip]
10571 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10572 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10573 assert_eq_m256i(r, e);
10574 }
10575
10576 #[simd_test(enable = "avx512bw,avx512vl")]
10577 unsafe fn test_mm_mask_adds_epu8() {
10578 let a = _mm_set1_epi8(1);
10579 let b = _mm_set1_epi8(u8::MAX as i8);
10580 let r = _mm_mask_adds_epu8(a, 0, a, b);
10581 assert_eq_m128i(r, a);
10582 let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b);
10583 #[rustfmt::skip]
10584 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10585 assert_eq_m128i(r, e);
10586 }
10587
10588 #[simd_test(enable = "avx512bw,avx512vl")]
10589 unsafe fn test_mm_maskz_adds_epu8() {
10590 let a = _mm_set1_epi8(1);
10591 let b = _mm_set1_epi8(u8::MAX as i8);
10592 let r = _mm_maskz_adds_epu8(0, a, b);
10593 assert_eq_m128i(r, _mm_setzero_si128());
10594 let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b);
10595 #[rustfmt::skip]
10596 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8);
10597 assert_eq_m128i(r, e);
10598 }
10599
10600 #[simd_test(enable = "avx512bw")]
10601 unsafe fn test_mm512_adds_epi16() {
10602 let a = _mm512_set1_epi16(1);
10603 let b = _mm512_set1_epi16(i16::MAX);
10604 let r = _mm512_adds_epi16(a, b);
10605 let e = _mm512_set1_epi16(i16::MAX);
10606 assert_eq_m512i(r, e);
10607 }
10608
10609 #[simd_test(enable = "avx512bw")]
10610 unsafe fn test_mm512_mask_adds_epi16() {
10611 let a = _mm512_set1_epi16(1);
10612 let b = _mm512_set1_epi16(i16::MAX);
10613 let r = _mm512_mask_adds_epi16(a, 0, a, b);
10614 assert_eq_m512i(r, a);
10615 let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
10616 #[rustfmt::skip]
10617 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10618 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10619 assert_eq_m512i(r, e);
10620 }
10621
10622 #[simd_test(enable = "avx512bw")]
10623 unsafe fn test_mm512_maskz_adds_epi16() {
10624 let a = _mm512_set1_epi16(1);
10625 let b = _mm512_set1_epi16(i16::MAX);
10626 let r = _mm512_maskz_adds_epi16(0, a, b);
10627 assert_eq_m512i(r, _mm512_setzero_si512());
10628 let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b);
10629 #[rustfmt::skip]
10630 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10631 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10632 assert_eq_m512i(r, e);
10633 }
10634
10635 #[simd_test(enable = "avx512bw,avx512vl")]
10636 unsafe fn test_mm256_mask_adds_epi16() {
10637 let a = _mm256_set1_epi16(1);
10638 let b = _mm256_set1_epi16(i16::MAX);
10639 let r = _mm256_mask_adds_epi16(a, 0, a, b);
10640 assert_eq_m256i(r, a);
10641 let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b);
10642 #[rustfmt::skip]
10643 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10644 assert_eq_m256i(r, e);
10645 }
10646
10647 #[simd_test(enable = "avx512bw,avx512vl")]
10648 unsafe fn test_mm256_maskz_adds_epi16() {
10649 let a = _mm256_set1_epi16(1);
10650 let b = _mm256_set1_epi16(i16::MAX);
10651 let r = _mm256_maskz_adds_epi16(0, a, b);
10652 assert_eq_m256i(r, _mm256_setzero_si256());
10653 let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b);
10654 #[rustfmt::skip]
10655 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10656 assert_eq_m256i(r, e);
10657 }
10658
10659 #[simd_test(enable = "avx512bw,avx512vl")]
10660 unsafe fn test_mm_mask_adds_epi16() {
10661 let a = _mm_set1_epi16(1);
10662 let b = _mm_set1_epi16(i16::MAX);
10663 let r = _mm_mask_adds_epi16(a, 0, a, b);
10664 assert_eq_m128i(r, a);
10665 let r = _mm_mask_adds_epi16(a, 0b00001111, a, b);
10666 let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10667 assert_eq_m128i(r, e);
10668 }
10669
10670 #[simd_test(enable = "avx512bw,avx512vl")]
10671 unsafe fn test_mm_maskz_adds_epi16() {
10672 let a = _mm_set1_epi16(1);
10673 let b = _mm_set1_epi16(i16::MAX);
10674 let r = _mm_maskz_adds_epi16(0, a, b);
10675 assert_eq_m128i(r, _mm_setzero_si128());
10676 let r = _mm_maskz_adds_epi16(0b00001111, a, b);
10677 let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
10678 assert_eq_m128i(r, e);
10679 }
10680
10681 #[simd_test(enable = "avx512bw")]
10682 unsafe fn test_mm512_adds_epi8() {
10683 let a = _mm512_set1_epi8(1);
10684 let b = _mm512_set1_epi8(i8::MAX);
10685 let r = _mm512_adds_epi8(a, b);
10686 let e = _mm512_set1_epi8(i8::MAX);
10687 assert_eq_m512i(r, e);
10688 }
10689
10690 #[simd_test(enable = "avx512bw")]
10691 unsafe fn test_mm512_mask_adds_epi8() {
10692 let a = _mm512_set1_epi8(1);
10693 let b = _mm512_set1_epi8(i8::MAX);
10694 let r = _mm512_mask_adds_epi8(a, 0, a, b);
10695 assert_eq_m512i(r, a);
10696 let r = _mm512_mask_adds_epi8(
10697 a,
10698 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10699 a,
10700 b,
10701 );
10702 #[rustfmt::skip]
10703 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10704 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10705 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10706 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10707 assert_eq_m512i(r, e);
10708 }
10709
10710 #[simd_test(enable = "avx512bw")]
10711 unsafe fn test_mm512_maskz_adds_epi8() {
10712 let a = _mm512_set1_epi8(1);
10713 let b = _mm512_set1_epi8(i8::MAX);
10714 let r = _mm512_maskz_adds_epi8(0, a, b);
10715 assert_eq_m512i(r, _mm512_setzero_si512());
10716 let r = _mm512_maskz_adds_epi8(
10717 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
10718 a,
10719 b,
10720 );
10721 #[rustfmt::skip]
10722 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10726 assert_eq_m512i(r, e);
10727 }
10728
10729 #[simd_test(enable = "avx512bw,avx512vl")]
10730 unsafe fn test_mm256_mask_adds_epi8() {
10731 let a = _mm256_set1_epi8(1);
10732 let b = _mm256_set1_epi8(i8::MAX);
10733 let r = _mm256_mask_adds_epi8(a, 0, a, b);
10734 assert_eq_m256i(r, a);
10735 let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
10736 #[rustfmt::skip]
10737 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10738 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10739 assert_eq_m256i(r, e);
10740 }
10741
10742 #[simd_test(enable = "avx512bw,avx512vl")]
10743 unsafe fn test_mm256_maskz_adds_epi8() {
10744 let a = _mm256_set1_epi8(1);
10745 let b = _mm256_set1_epi8(i8::MAX);
10746 let r = _mm256_maskz_adds_epi8(0, a, b);
10747 assert_eq_m256i(r, _mm256_setzero_si256());
10748 let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b);
10749 #[rustfmt::skip]
10750 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10751 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10752 assert_eq_m256i(r, e);
10753 }
10754
10755 #[simd_test(enable = "avx512bw,avx512vl")]
10756 unsafe fn test_mm_mask_adds_epi8() {
10757 let a = _mm_set1_epi8(1);
10758 let b = _mm_set1_epi8(i8::MAX);
10759 let r = _mm_mask_adds_epi8(a, 0, a, b);
10760 assert_eq_m128i(r, a);
10761 let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b);
10762 #[rustfmt::skip]
10763 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10764 assert_eq_m128i(r, e);
10765 }
10766
10767 #[simd_test(enable = "avx512bw,avx512vl")]
10768 unsafe fn test_mm_maskz_adds_epi8() {
10769 let a = _mm_set1_epi8(1);
10770 let b = _mm_set1_epi8(i8::MAX);
10771 let r = _mm_maskz_adds_epi8(0, a, b);
10772 assert_eq_m128i(r, _mm_setzero_si128());
10773 let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b);
10774 #[rustfmt::skip]
10775 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
10776 assert_eq_m128i(r, e);
10777 }
10778
10779 #[simd_test(enable = "avx512bw")]
10780 unsafe fn test_mm512_sub_epi16() {
10781 let a = _mm512_set1_epi16(1);
10782 let b = _mm512_set1_epi16(2);
10783 let r = _mm512_sub_epi16(a, b);
10784 let e = _mm512_set1_epi16(-1);
10785 assert_eq_m512i(r, e);
10786 }
10787
10788 #[simd_test(enable = "avx512bw")]
10789 unsafe fn test_mm512_mask_sub_epi16() {
10790 let a = _mm512_set1_epi16(1);
10791 let b = _mm512_set1_epi16(2);
10792 let r = _mm512_mask_sub_epi16(a, 0, a, b);
10793 assert_eq_m512i(r, a);
10794 let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
10795 #[rustfmt::skip]
10796 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10797 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10798 assert_eq_m512i(r, e);
10799 }
10800
10801 #[simd_test(enable = "avx512bw")]
10802 unsafe fn test_mm512_maskz_sub_epi16() {
10803 let a = _mm512_set1_epi16(1);
10804 let b = _mm512_set1_epi16(2);
10805 let r = _mm512_maskz_sub_epi16(0, a, b);
10806 assert_eq_m512i(r, _mm512_setzero_si512());
10807 let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b);
10808 #[rustfmt::skip]
10809 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10810 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10811 assert_eq_m512i(r, e);
10812 }
10813
10814 #[simd_test(enable = "avx512bw,avx512vl")]
10815 unsafe fn test_mm256_mask_sub_epi16() {
10816 let a = _mm256_set1_epi16(1);
10817 let b = _mm256_set1_epi16(2);
10818 let r = _mm256_mask_sub_epi16(a, 0, a, b);
10819 assert_eq_m256i(r, a);
10820 let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b);
10821 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10822 assert_eq_m256i(r, e);
10823 }
10824
10825 #[simd_test(enable = "avx512bw,avx512vl")]
10826 unsafe fn test_mm256_maskz_sub_epi16() {
10827 let a = _mm256_set1_epi16(1);
10828 let b = _mm256_set1_epi16(2);
10829 let r = _mm256_maskz_sub_epi16(0, a, b);
10830 assert_eq_m256i(r, _mm256_setzero_si256());
10831 let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b);
10832 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10833 assert_eq_m256i(r, e);
10834 }
10835
10836 #[simd_test(enable = "avx512bw,avx512vl")]
10837 unsafe fn test_mm_mask_sub_epi16() {
10838 let a = _mm_set1_epi16(1);
10839 let b = _mm_set1_epi16(2);
10840 let r = _mm_mask_sub_epi16(a, 0, a, b);
10841 assert_eq_m128i(r, a);
10842 let r = _mm_mask_sub_epi16(a, 0b00001111, a, b);
10843 let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1);
10844 assert_eq_m128i(r, e);
10845 }
10846
10847 #[simd_test(enable = "avx512bw,avx512vl")]
10848 unsafe fn test_mm_maskz_sub_epi16() {
10849 let a = _mm_set1_epi16(1);
10850 let b = _mm_set1_epi16(2);
10851 let r = _mm_maskz_sub_epi16(0, a, b);
10852 assert_eq_m128i(r, _mm_setzero_si128());
10853 let r = _mm_maskz_sub_epi16(0b00001111, a, b);
10854 let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1);
10855 assert_eq_m128i(r, e);
10856 }
10857
10858 #[simd_test(enable = "avx512bw")]
10859 unsafe fn test_mm512_sub_epi8() {
10860 let a = _mm512_set1_epi8(1);
10861 let b = _mm512_set1_epi8(2);
10862 let r = _mm512_sub_epi8(a, b);
10863 let e = _mm512_set1_epi8(-1);
10864 assert_eq_m512i(r, e);
10865 }
10866
10867 #[simd_test(enable = "avx512bw")]
10868 unsafe fn test_mm512_mask_sub_epi8() {
10869 let a = _mm512_set1_epi8(1);
10870 let b = _mm512_set1_epi8(2);
10871 let r = _mm512_mask_sub_epi8(a, 0, a, b);
10872 assert_eq_m512i(r, a);
10873 let r = _mm512_mask_sub_epi8(
10874 a,
10875 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10876 a,
10877 b,
10878 );
10879 #[rustfmt::skip]
10880 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10881 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10882 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10883 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10884 assert_eq_m512i(r, e);
10885 }
10886
10887 #[simd_test(enable = "avx512bw")]
10888 unsafe fn test_mm512_maskz_sub_epi8() {
10889 let a = _mm512_set1_epi8(1);
10890 let b = _mm512_set1_epi8(2);
10891 let r = _mm512_maskz_sub_epi8(0, a, b);
10892 assert_eq_m512i(r, _mm512_setzero_si512());
10893 let r = _mm512_maskz_sub_epi8(
10894 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
10895 a,
10896 b,
10897 );
10898 #[rustfmt::skip]
10899 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10900 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10901 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10902 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10903 assert_eq_m512i(r, e);
10904 }
10905
10906 #[simd_test(enable = "avx512bw,avx512vl")]
10907 unsafe fn test_mm256_mask_sub_epi8() {
10908 let a = _mm256_set1_epi8(1);
10909 let b = _mm256_set1_epi8(2);
10910 let r = _mm256_mask_sub_epi8(a, 0, a, b);
10911 assert_eq_m256i(r, a);
10912 let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
10913 #[rustfmt::skip]
10914 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1,
10915 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10916 assert_eq_m256i(r, e);
10917 }
10918
10919 #[simd_test(enable = "avx512bw,avx512vl")]
10920 unsafe fn test_mm256_maskz_sub_epi8() {
10921 let a = _mm256_set1_epi8(1);
10922 let b = _mm256_set1_epi8(2);
10923 let r = _mm256_maskz_sub_epi8(0, a, b);
10924 assert_eq_m256i(r, _mm256_setzero_si256());
10925 let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b);
10926 #[rustfmt::skip]
10927 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
10928 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10929 assert_eq_m256i(r, e);
10930 }
10931
10932 #[simd_test(enable = "avx512bw,avx512vl")]
10933 unsafe fn test_mm_mask_sub_epi8() {
10934 let a = _mm_set1_epi8(1);
10935 let b = _mm_set1_epi8(2);
10936 let r = _mm_mask_sub_epi8(a, 0, a, b);
10937 assert_eq_m128i(r, a);
10938 let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b);
10939 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1);
10940 assert_eq_m128i(r, e);
10941 }
10942
10943 #[simd_test(enable = "avx512bw,avx512vl")]
10944 unsafe fn test_mm_maskz_sub_epi8() {
10945 let a = _mm_set1_epi8(1);
10946 let b = _mm_set1_epi8(2);
10947 let r = _mm_maskz_sub_epi8(0, a, b);
10948 assert_eq_m128i(r, _mm_setzero_si128());
10949 let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b);
10950 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
10951 assert_eq_m128i(r, e);
10952 }
10953
10954 #[simd_test(enable = "avx512bw")]
10955 unsafe fn test_mm512_subs_epu16() {
10956 let a = _mm512_set1_epi16(1);
10957 let b = _mm512_set1_epi16(u16::MAX as i16);
10958 let r = _mm512_subs_epu16(a, b);
10959 let e = _mm512_set1_epi16(0);
10960 assert_eq_m512i(r, e);
10961 }
10962
10963 #[simd_test(enable = "avx512bw")]
10964 unsafe fn test_mm512_mask_subs_epu16() {
10965 let a = _mm512_set1_epi16(1);
10966 let b = _mm512_set1_epi16(u16::MAX as i16);
10967 let r = _mm512_mask_subs_epu16(a, 0, a, b);
10968 assert_eq_m512i(r, a);
10969 let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
10970 #[rustfmt::skip]
10971 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10972 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10973 assert_eq_m512i(r, e);
10974 }
10975
10976 #[simd_test(enable = "avx512bw")]
10977 unsafe fn test_mm512_maskz_subs_epu16() {
10978 let a = _mm512_set1_epi16(1);
10979 let b = _mm512_set1_epi16(u16::MAX as i16);
10980 let r = _mm512_maskz_subs_epu16(0, a, b);
10981 assert_eq_m512i(r, _mm512_setzero_si512());
10982 let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b);
10983 #[rustfmt::skip]
10984 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10985 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
10986 assert_eq_m512i(r, e);
10987 }
10988
10989 #[simd_test(enable = "avx512bw,avx512vl")]
10990 unsafe fn test_mm256_mask_subs_epu16() {
10991 let a = _mm256_set1_epi16(1);
10992 let b = _mm256_set1_epi16(u16::MAX as i16);
10993 let r = _mm256_mask_subs_epu16(a, 0, a, b);
10994 assert_eq_m256i(r, a);
10995 let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b);
10996 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
10997 assert_eq_m256i(r, e);
10998 }
10999
11000 #[simd_test(enable = "avx512bw,avx512vl")]
11001 unsafe fn test_mm256_maskz_subs_epu16() {
11002 let a = _mm256_set1_epi16(1);
11003 let b = _mm256_set1_epi16(u16::MAX as i16);
11004 let r = _mm256_maskz_subs_epu16(0, a, b);
11005 assert_eq_m256i(r, _mm256_setzero_si256());
11006 let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b);
11007 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11008 assert_eq_m256i(r, e);
11009 }
11010
11011 #[simd_test(enable = "avx512bw,avx512vl")]
11012 unsafe fn test_mm_mask_subs_epu16() {
11013 let a = _mm_set1_epi16(1);
11014 let b = _mm_set1_epi16(u16::MAX as i16);
11015 let r = _mm_mask_subs_epu16(a, 0, a, b);
11016 assert_eq_m128i(r, a);
11017 let r = _mm_mask_subs_epu16(a, 0b00001111, a, b);
11018 let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11019 assert_eq_m128i(r, e);
11020 }
11021
11022 #[simd_test(enable = "avx512bw,avx512vl")]
11023 unsafe fn test_mm_maskz_subs_epu16() {
11024 let a = _mm_set1_epi16(1);
11025 let b = _mm_set1_epi16(u16::MAX as i16);
11026 let r = _mm_maskz_subs_epu16(0, a, b);
11027 assert_eq_m128i(r, _mm_setzero_si128());
11028 let r = _mm_maskz_subs_epu16(0b00001111, a, b);
11029 let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11030 assert_eq_m128i(r, e);
11031 }
11032
11033 #[simd_test(enable = "avx512bw")]
11034 unsafe fn test_mm512_subs_epu8() {
11035 let a = _mm512_set1_epi8(1);
11036 let b = _mm512_set1_epi8(u8::MAX as i8);
11037 let r = _mm512_subs_epu8(a, b);
11038 let e = _mm512_set1_epi8(0);
11039 assert_eq_m512i(r, e);
11040 }
11041
11042 #[simd_test(enable = "avx512bw")]
11043 unsafe fn test_mm512_mask_subs_epu8() {
11044 let a = _mm512_set1_epi8(1);
11045 let b = _mm512_set1_epi8(u8::MAX as i8);
11046 let r = _mm512_mask_subs_epu8(a, 0, a, b);
11047 assert_eq_m512i(r, a);
11048 let r = _mm512_mask_subs_epu8(
11049 a,
11050 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
11051 a,
11052 b,
11053 );
11054 #[rustfmt::skip]
11055 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11056 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11057 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11059 assert_eq_m512i(r, e);
11060 }
11061
11062 #[simd_test(enable = "avx512bw")]
11063 unsafe fn test_mm512_maskz_subs_epu8() {
11064 let a = _mm512_set1_epi8(1);
11065 let b = _mm512_set1_epi8(u8::MAX as i8);
11066 let r = _mm512_maskz_subs_epu8(0, a, b);
11067 assert_eq_m512i(r, _mm512_setzero_si512());
11068 let r = _mm512_maskz_subs_epu8(
11069 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
11070 a,
11071 b,
11072 );
11073 #[rustfmt::skip]
11074 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11075 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11076 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11077 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11078 assert_eq_m512i(r, e);
11079 }
11080
11081 #[simd_test(enable = "avx512bw,avx512vl")]
11082 unsafe fn test_mm256_mask_subs_epu8() {
11083 let a = _mm256_set1_epi8(1);
11084 let b = _mm256_set1_epi8(u8::MAX as i8);
11085 let r = _mm256_mask_subs_epu8(a, 0, a, b);
11086 assert_eq_m256i(r, a);
11087 let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
11088 #[rustfmt::skip]
11089 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11090 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11091 assert_eq_m256i(r, e);
11092 }
11093
11094 #[simd_test(enable = "avx512bw,avx512vl")]
11095 unsafe fn test_mm256_maskz_subs_epu8() {
11096 let a = _mm256_set1_epi8(1);
11097 let b = _mm256_set1_epi8(u8::MAX as i8);
11098 let r = _mm256_maskz_subs_epu8(0, a, b);
11099 assert_eq_m256i(r, _mm256_setzero_si256());
11100 let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b);
11101 #[rustfmt::skip]
11102 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11104 assert_eq_m256i(r, e);
11105 }
11106
11107 #[simd_test(enable = "avx512bw,avx512vl")]
11108 unsafe fn test_mm_mask_subs_epu8() {
11109 let a = _mm_set1_epi8(1);
11110 let b = _mm_set1_epi8(u8::MAX as i8);
11111 let r = _mm_mask_subs_epu8(a, 0, a, b);
11112 assert_eq_m128i(r, a);
11113 let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b);
11114 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11115 assert_eq_m128i(r, e);
11116 }
11117
11118 #[simd_test(enable = "avx512bw,avx512vl")]
11119 unsafe fn test_mm_maskz_subs_epu8() {
11120 let a = _mm_set1_epi8(1);
11121 let b = _mm_set1_epi8(u8::MAX as i8);
11122 let r = _mm_maskz_subs_epu8(0, a, b);
11123 assert_eq_m128i(r, _mm_setzero_si128());
11124 let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b);
11125 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11126 assert_eq_m128i(r, e);
11127 }
11128
11129 #[simd_test(enable = "avx512bw")]
11130 unsafe fn test_mm512_subs_epi16() {
11131 let a = _mm512_set1_epi16(-1);
11132 let b = _mm512_set1_epi16(i16::MAX);
11133 let r = _mm512_subs_epi16(a, b);
11134 let e = _mm512_set1_epi16(i16::MIN);
11135 assert_eq_m512i(r, e);
11136 }
11137
11138 #[simd_test(enable = "avx512bw")]
11139 unsafe fn test_mm512_mask_subs_epi16() {
11140 let a = _mm512_set1_epi16(-1);
11141 let b = _mm512_set1_epi16(i16::MAX);
11142 let r = _mm512_mask_subs_epi16(a, 0, a, b);
11143 assert_eq_m512i(r, a);
11144 let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11145 #[rustfmt::skip]
11146 let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11147 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
11148 assert_eq_m512i(r, e);
11149 }
11150
11151 #[simd_test(enable = "avx512bw")]
11152 unsafe fn test_mm512_maskz_subs_epi16() {
11153 let a = _mm512_set1_epi16(-1);
11154 let b = _mm512_set1_epi16(i16::MAX);
11155 let r = _mm512_maskz_subs_epi16(0, a, b);
11156 assert_eq_m512i(r, _mm512_setzero_si512());
11157 let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b);
11158 #[rustfmt::skip]
11159 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11160 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
11161 assert_eq_m512i(r, e);
11162 }
11163
11164 #[simd_test(enable = "avx512bw,avx512vl")]
11165 unsafe fn test_mm256_mask_subs_epi16() {
11166 let a = _mm256_set1_epi16(-1);
11167 let b = _mm256_set1_epi16(i16::MAX);
11168 let r = _mm256_mask_subs_epi16(a, 0, a, b);
11169 assert_eq_m256i(r, a);
11170 let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b);
11171 #[rustfmt::skip]
11172 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
11173 assert_eq_m256i(r, e);
11174 }
11175
11176 #[simd_test(enable = "avx512bw,avx512vl")]
11177 unsafe fn test_mm256_maskz_subs_epi16() {
11178 let a = _mm256_set1_epi16(-1);
11179 let b = _mm256_set1_epi16(i16::MAX);
11180 let r = _mm256_maskz_subs_epi16(0, a, b);
11181 assert_eq_m256i(r, _mm256_setzero_si256());
11182 let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b);
11183 #[rustfmt::skip]
11184 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
11185 assert_eq_m256i(r, e);
11186 }
11187
11188 #[simd_test(enable = "avx512bw,avx512vl")]
11189 unsafe fn test_mm_mask_subs_epi16() {
11190 let a = _mm_set1_epi16(-1);
11191 let b = _mm_set1_epi16(i16::MAX);
11192 let r = _mm_mask_subs_epi16(a, 0, a, b);
11193 assert_eq_m128i(r, a);
11194 let r = _mm_mask_subs_epi16(a, 0b00001111, a, b);
11195 let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
11196 assert_eq_m128i(r, e);
11197 }
11198
11199 #[simd_test(enable = "avx512bw,avx512vl")]
11200 unsafe fn test_mm_maskz_subs_epi16() {
11201 let a = _mm_set1_epi16(-1);
11202 let b = _mm_set1_epi16(i16::MAX);
11203 let r = _mm_maskz_subs_epi16(0, a, b);
11204 assert_eq_m128i(r, _mm_setzero_si128());
11205 let r = _mm_maskz_subs_epi16(0b00001111, a, b);
11206 let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN);
11207 assert_eq_m128i(r, e);
11208 }
11209
11210 #[simd_test(enable = "avx512bw")]
11211 unsafe fn test_mm512_subs_epi8() {
11212 let a = _mm512_set1_epi8(-1);
11213 let b = _mm512_set1_epi8(i8::MAX);
11214 let r = _mm512_subs_epi8(a, b);
11215 let e = _mm512_set1_epi8(i8::MIN);
11216 assert_eq_m512i(r, e);
11217 }
11218
11219 #[simd_test(enable = "avx512bw")]
11220 unsafe fn test_mm512_mask_subs_epi8() {
11221 let a = _mm512_set1_epi8(-1);
11222 let b = _mm512_set1_epi8(i8::MAX);
11223 let r = _mm512_mask_subs_epi8(a, 0, a, b);
11224 assert_eq_m512i(r, a);
11225 let r = _mm512_mask_subs_epi8(
11226 a,
11227 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
11228 a,
11229 b,
11230 );
11231 #[rustfmt::skip]
11232 let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11233 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11234 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11235 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
11236 assert_eq_m512i(r, e);
11237 }
11238
11239 #[simd_test(enable = "avx512bw")]
11240 unsafe fn test_mm512_maskz_subs_epi8() {
11241 let a = _mm512_set1_epi8(-1);
11242 let b = _mm512_set1_epi8(i8::MAX);
11243 let r = _mm512_maskz_subs_epi8(0, a, b);
11244 assert_eq_m512i(r, _mm512_setzero_si512());
11245 let r = _mm512_maskz_subs_epi8(
11246 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
11247 a,
11248 b,
11249 );
11250 #[rustfmt::skip]
11251 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11252 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11253 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11254 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
11255 assert_eq_m512i(r, e);
11256 }
11257
11258 #[simd_test(enable = "avx512bw,avx512vl")]
11259 unsafe fn test_mm256_mask_subs_epi8() {
11260 let a = _mm256_set1_epi8(-1);
11261 let b = _mm256_set1_epi8(i8::MAX);
11262 let r = _mm256_mask_subs_epi8(a, 0, a, b);
11263 assert_eq_m256i(r, a);
11264 let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b);
11265 #[rustfmt::skip]
11266 let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
11267 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
11268 assert_eq_m256i(r, e);
11269 }
11270
11271 #[simd_test(enable = "avx512bw,avx512vl")]
11272 unsafe fn test_mm256_maskz_subs_epi8() {
11273 let a = _mm256_set1_epi8(-1);
11274 let b = _mm256_set1_epi8(i8::MAX);
11275 let r = _mm256_maskz_subs_epi8(0, a, b);
11276 assert_eq_m256i(r, _mm256_setzero_si256());
11277 let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b);
11278 #[rustfmt::skip]
11279 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11280 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
11281 assert_eq_m256i(r, e);
11282 }
11283
11284 #[simd_test(enable = "avx512bw,avx512vl")]
11285 unsafe fn test_mm_mask_subs_epi8() {
11286 let a = _mm_set1_epi8(-1);
11287 let b = _mm_set1_epi8(i8::MAX);
11288 let r = _mm_mask_subs_epi8(a, 0, a, b);
11289 assert_eq_m128i(r, a);
11290 let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b);
11291 #[rustfmt::skip]
11292 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
11293 assert_eq_m128i(r, e);
11294 }
11295
11296 #[simd_test(enable = "avx512bw,avx512vl")]
11297 unsafe fn test_mm_maskz_subs_epi8() {
11298 let a = _mm_set1_epi8(-1);
11299 let b = _mm_set1_epi8(i8::MAX);
11300 let r = _mm_maskz_subs_epi8(0, a, b);
11301 assert_eq_m128i(r, _mm_setzero_si128());
11302 let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b);
11303 #[rustfmt::skip]
11304 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN);
11305 assert_eq_m128i(r, e);
11306 }
11307
11308 #[simd_test(enable = "avx512bw")]
11309 unsafe fn test_mm512_mulhi_epu16() {
11310 let a = _mm512_set1_epi16(1);
11311 let b = _mm512_set1_epi16(1);
11312 let r = _mm512_mulhi_epu16(a, b);
11313 let e = _mm512_set1_epi16(0);
11314 assert_eq_m512i(r, e);
11315 }
11316
11317 #[simd_test(enable = "avx512bw")]
11318 unsafe fn test_mm512_mask_mulhi_epu16() {
11319 let a = _mm512_set1_epi16(1);
11320 let b = _mm512_set1_epi16(1);
11321 let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
11322 assert_eq_m512i(r, a);
11323 let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
11324 #[rustfmt::skip]
11325 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11326 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11327 assert_eq_m512i(r, e);
11328 }
11329
11330 #[simd_test(enable = "avx512bw")]
11331 unsafe fn test_mm512_maskz_mulhi_epu16() {
11332 let a = _mm512_set1_epi16(1);
11333 let b = _mm512_set1_epi16(1);
11334 let r = _mm512_maskz_mulhi_epu16(0, a, b);
11335 assert_eq_m512i(r, _mm512_setzero_si512());
11336 let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b);
11337 #[rustfmt::skip]
11338 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11339 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11340 assert_eq_m512i(r, e);
11341 }
11342
11343 #[simd_test(enable = "avx512bw,avx512vl")]
11344 unsafe fn test_mm256_mask_mulhi_epu16() {
11345 let a = _mm256_set1_epi16(1);
11346 let b = _mm256_set1_epi16(1);
11347 let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
11348 assert_eq_m256i(r, a);
11349 let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b);
11350 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11351 assert_eq_m256i(r, e);
11352 }
11353
11354 #[simd_test(enable = "avx512bw,avx512vl")]
11355 unsafe fn test_mm256_maskz_mulhi_epu16() {
11356 let a = _mm256_set1_epi16(1);
11357 let b = _mm256_set1_epi16(1);
11358 let r = _mm256_maskz_mulhi_epu16(0, a, b);
11359 assert_eq_m256i(r, _mm256_setzero_si256());
11360 let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b);
11361 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11362 assert_eq_m256i(r, e);
11363 }
11364
11365 #[simd_test(enable = "avx512bw,avx512vl")]
11366 unsafe fn test_mm_mask_mulhi_epu16() {
11367 let a = _mm_set1_epi16(1);
11368 let b = _mm_set1_epi16(1);
11369 let r = _mm_mask_mulhi_epu16(a, 0, a, b);
11370 assert_eq_m128i(r, a);
11371 let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b);
11372 let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11373 assert_eq_m128i(r, e);
11374 }
11375
11376 #[simd_test(enable = "avx512bw,avx512vl")]
11377 unsafe fn test_mm_maskz_mulhi_epu16() {
11378 let a = _mm_set1_epi16(1);
11379 let b = _mm_set1_epi16(1);
11380 let r = _mm_maskz_mulhi_epu16(0, a, b);
11381 assert_eq_m128i(r, _mm_setzero_si128());
11382 let r = _mm_maskz_mulhi_epu16(0b00001111, a, b);
11383 let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11384 assert_eq_m128i(r, e);
11385 }
11386
11387 #[simd_test(enable = "avx512bw")]
11388 unsafe fn test_mm512_mulhi_epi16() {
11389 let a = _mm512_set1_epi16(1);
11390 let b = _mm512_set1_epi16(1);
11391 let r = _mm512_mulhi_epi16(a, b);
11392 let e = _mm512_set1_epi16(0);
11393 assert_eq_m512i(r, e);
11394 }
11395
11396 #[simd_test(enable = "avx512bw")]
11397 unsafe fn test_mm512_mask_mulhi_epi16() {
11398 let a = _mm512_set1_epi16(1);
11399 let b = _mm512_set1_epi16(1);
11400 let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
11401 assert_eq_m512i(r, a);
11402 let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11403 #[rustfmt::skip]
11404 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11405 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11406 assert_eq_m512i(r, e);
11407 }
11408
11409 #[simd_test(enable = "avx512bw")]
11410 unsafe fn test_mm512_maskz_mulhi_epi16() {
11411 let a = _mm512_set1_epi16(1);
11412 let b = _mm512_set1_epi16(1);
11413 let r = _mm512_maskz_mulhi_epi16(0, a, b);
11414 assert_eq_m512i(r, _mm512_setzero_si512());
11415 let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b);
11416 #[rustfmt::skip]
11417 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11418 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11419 assert_eq_m512i(r, e);
11420 }
11421
11422 #[simd_test(enable = "avx512bw,avx512vl")]
11423 unsafe fn test_mm256_mask_mulhi_epi16() {
11424 let a = _mm256_set1_epi16(1);
11425 let b = _mm256_set1_epi16(1);
11426 let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
11427 assert_eq_m256i(r, a);
11428 let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b);
11429 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11430 assert_eq_m256i(r, e);
11431 }
11432
11433 #[simd_test(enable = "avx512bw,avx512vl")]
11434 unsafe fn test_mm256_maskz_mulhi_epi16() {
11435 let a = _mm256_set1_epi16(1);
11436 let b = _mm256_set1_epi16(1);
11437 let r = _mm256_maskz_mulhi_epi16(0, a, b);
11438 assert_eq_m256i(r, _mm256_setzero_si256());
11439 let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b);
11440 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11441 assert_eq_m256i(r, e);
11442 }
11443
11444 #[simd_test(enable = "avx512bw,avx512vl")]
11445 unsafe fn test_mm_mask_mulhi_epi16() {
11446 let a = _mm_set1_epi16(1);
11447 let b = _mm_set1_epi16(1);
11448 let r = _mm_mask_mulhi_epi16(a, 0, a, b);
11449 assert_eq_m128i(r, a);
11450 let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b);
11451 let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11452 assert_eq_m128i(r, e);
11453 }
11454
11455 #[simd_test(enable = "avx512bw,avx512vl")]
11456 unsafe fn test_mm_maskz_mulhi_epi16() {
11457 let a = _mm_set1_epi16(1);
11458 let b = _mm_set1_epi16(1);
11459 let r = _mm_maskz_mulhi_epi16(0, a, b);
11460 assert_eq_m128i(r, _mm_setzero_si128());
11461 let r = _mm_maskz_mulhi_epi16(0b00001111, a, b);
11462 let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11463 assert_eq_m128i(r, e);
11464 }
11465
11466 #[simd_test(enable = "avx512bw")]
11467 unsafe fn test_mm512_mulhrs_epi16() {
11468 let a = _mm512_set1_epi16(1);
11469 let b = _mm512_set1_epi16(1);
11470 let r = _mm512_mulhrs_epi16(a, b);
11471 let e = _mm512_set1_epi16(0);
11472 assert_eq_m512i(r, e);
11473 }
11474
11475 #[simd_test(enable = "avx512bw")]
11476 unsafe fn test_mm512_mask_mulhrs_epi16() {
11477 let a = _mm512_set1_epi16(1);
11478 let b = _mm512_set1_epi16(1);
11479 let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
11480 assert_eq_m512i(r, a);
11481 let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11482 #[rustfmt::skip]
11483 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11485 assert_eq_m512i(r, e);
11486 }
11487
11488 #[simd_test(enable = "avx512bw")]
11489 unsafe fn test_mm512_maskz_mulhrs_epi16() {
11490 let a = _mm512_set1_epi16(1);
11491 let b = _mm512_set1_epi16(1);
11492 let r = _mm512_maskz_mulhrs_epi16(0, a, b);
11493 assert_eq_m512i(r, _mm512_setzero_si512());
11494 let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b);
11495 #[rustfmt::skip]
11496 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11497 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11498 assert_eq_m512i(r, e);
11499 }
11500
11501 #[simd_test(enable = "avx512bw,avx512vl")]
11502 unsafe fn test_mm256_mask_mulhrs_epi16() {
11503 let a = _mm256_set1_epi16(1);
11504 let b = _mm256_set1_epi16(1);
11505 let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
11506 assert_eq_m256i(r, a);
11507 let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b);
11508 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
11509 assert_eq_m256i(r, e);
11510 }
11511
11512 #[simd_test(enable = "avx512bw,avx512vl")]
11513 unsafe fn test_mm256_maskz_mulhrs_epi16() {
11514 let a = _mm256_set1_epi16(1);
11515 let b = _mm256_set1_epi16(1);
11516 let r = _mm256_maskz_mulhrs_epi16(0, a, b);
11517 assert_eq_m256i(r, _mm256_setzero_si256());
11518 let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b);
11519 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
11520 assert_eq_m256i(r, e);
11521 }
11522
11523 #[simd_test(enable = "avx512bw,avx512vl")]
11524 unsafe fn test_mm_mask_mulhrs_epi16() {
11525 let a = _mm_set1_epi16(1);
11526 let b = _mm_set1_epi16(1);
11527 let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
11528 assert_eq_m128i(r, a);
11529 let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b);
11530 let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
11531 assert_eq_m128i(r, e);
11532 }
11533
11534 #[simd_test(enable = "avx512bw,avx512vl")]
11535 unsafe fn test_mm_maskz_mulhrs_epi16() {
11536 let a = _mm_set1_epi16(1);
11537 let b = _mm_set1_epi16(1);
11538 let r = _mm_maskz_mulhrs_epi16(0, a, b);
11539 assert_eq_m128i(r, _mm_setzero_si128());
11540 let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b);
11541 let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
11542 assert_eq_m128i(r, e);
11543 }
11544
11545 #[simd_test(enable = "avx512bw")]
11546 unsafe fn test_mm512_mullo_epi16() {
11547 let a = _mm512_set1_epi16(1);
11548 let b = _mm512_set1_epi16(1);
11549 let r = _mm512_mullo_epi16(a, b);
11550 let e = _mm512_set1_epi16(1);
11551 assert_eq_m512i(r, e);
11552 }
11553
11554 #[simd_test(enable = "avx512bw")]
11555 unsafe fn test_mm512_mask_mullo_epi16() {
11556 let a = _mm512_set1_epi16(1);
11557 let b = _mm512_set1_epi16(1);
11558 let r = _mm512_mask_mullo_epi16(a, 0, a, b);
11559 assert_eq_m512i(r, a);
11560 let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b);
11561 #[rustfmt::skip]
11562 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
11564 assert_eq_m512i(r, e);
11565 }
11566
11567 #[simd_test(enable = "avx512bw")]
11568 unsafe fn test_mm512_maskz_mullo_epi16() {
11569 let a = _mm512_set1_epi16(1);
11570 let b = _mm512_set1_epi16(1);
11571 let r = _mm512_maskz_mullo_epi16(0, a, b);
11572 assert_eq_m512i(r, _mm512_setzero_si512());
11573 let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b);
11574 #[rustfmt::skip]
11575 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
11576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
11577 assert_eq_m512i(r, e);
11578 }
11579
11580 #[simd_test(enable = "avx512bw,avx512vl")]
11581 unsafe fn test_mm256_mask_mullo_epi16() {
11582 let a = _mm256_set1_epi16(1);
11583 let b = _mm256_set1_epi16(1);
11584 let r = _mm256_mask_mullo_epi16(a, 0, a, b);
11585 assert_eq_m256i(r, a);
11586 let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b);
11587 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
11588 assert_eq_m256i(r, e);
11589 }
11590
11591 #[simd_test(enable = "avx512bw,avx512vl")]
11592 unsafe fn test_mm256_maskz_mullo_epi16() {
11593 let a = _mm256_set1_epi16(1);
11594 let b = _mm256_set1_epi16(1);
11595 let r = _mm256_maskz_mullo_epi16(0, a, b);
11596 assert_eq_m256i(r, _mm256_setzero_si256());
11597 let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b);
11598 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
11599 assert_eq_m256i(r, e);
11600 }
11601
11602 #[simd_test(enable = "avx512bw,avx512vl")]
11603 unsafe fn test_mm_mask_mullo_epi16() {
11604 let a = _mm_set1_epi16(1);
11605 let b = _mm_set1_epi16(1);
11606 let r = _mm_mask_mullo_epi16(a, 0, a, b);
11607 assert_eq_m128i(r, a);
11608 let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b);
11609 let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
11610 assert_eq_m128i(r, e);
11611 }
11612
11613 #[simd_test(enable = "avx512bw,avx512vl")]
11614 unsafe fn test_mm_maskz_mullo_epi16() {
11615 let a = _mm_set1_epi16(1);
11616 let b = _mm_set1_epi16(1);
11617 let r = _mm_maskz_mullo_epi16(0, a, b);
11618 assert_eq_m128i(r, _mm_setzero_si128());
11619 let r = _mm_maskz_mullo_epi16(0b00001111, a, b);
11620 let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
11621 assert_eq_m128i(r, e);
11622 }
11623
11624 #[simd_test(enable = "avx512bw")]
11625 unsafe fn test_mm512_max_epu16() {
11626 #[rustfmt::skip]
11627 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11628 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11629 #[rustfmt::skip]
11630 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11631 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11632 let r = _mm512_max_epu16(a, b);
11633 #[rustfmt::skip]
11634 let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11635 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11636 assert_eq_m512i(r, e);
11637 }
11638
11639 #[simd_test(enable = "avx512f")]
11640 unsafe fn test_mm512_mask_max_epu16() {
11641 #[rustfmt::skip]
11642 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11643 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11644 #[rustfmt::skip]
11645 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11646 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11647 let r = _mm512_mask_max_epu16(a, 0, a, b);
11648 assert_eq_m512i(r, a);
11649 let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
11650 #[rustfmt::skip]
11651 let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11652 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11653 assert_eq_m512i(r, e);
11654 }
11655
11656 #[simd_test(enable = "avx512f")]
11657 unsafe fn test_mm512_maskz_max_epu16() {
11658 #[rustfmt::skip]
11659 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11660 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11661 #[rustfmt::skip]
11662 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11663 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11664 let r = _mm512_maskz_max_epu16(0, a, b);
11665 assert_eq_m512i(r, _mm512_setzero_si512());
11666 let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b);
11667 #[rustfmt::skip]
11668 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11669 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11670 assert_eq_m512i(r, e);
11671 }
11672
11673 #[simd_test(enable = "avx512f,avx512vl")]
11674 unsafe fn test_mm256_mask_max_epu16() {
11675 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11676 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11677 let r = _mm256_mask_max_epu16(a, 0, a, b);
11678 assert_eq_m256i(r, a);
11679 let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b);
11680 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11681 assert_eq_m256i(r, e);
11682 }
11683
11684 #[simd_test(enable = "avx512f,avx512vl")]
11685 unsafe fn test_mm256_maskz_max_epu16() {
11686 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11687 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11688 let r = _mm256_maskz_max_epu16(0, a, b);
11689 assert_eq_m256i(r, _mm256_setzero_si256());
11690 let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b);
11691 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11692 assert_eq_m256i(r, e);
11693 }
11694
11695 #[simd_test(enable = "avx512f,avx512vl")]
11696 unsafe fn test_mm_mask_max_epu16() {
11697 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11698 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11699 let r = _mm_mask_max_epu16(a, 0, a, b);
11700 assert_eq_m128i(r, a);
11701 let r = _mm_mask_max_epu16(a, 0b00001111, a, b);
11702 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11703 assert_eq_m128i(r, e);
11704 }
11705
11706 #[simd_test(enable = "avx512f,avx512vl")]
11707 unsafe fn test_mm_maskz_max_epu16() {
11708 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11709 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11710 let r = _mm_maskz_max_epu16(0, a, b);
11711 assert_eq_m128i(r, _mm_setzero_si128());
11712 let r = _mm_maskz_max_epu16(0b00001111, a, b);
11713 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
11714 assert_eq_m128i(r, e);
11715 }
11716
11717 #[simd_test(enable = "avx512bw")]
11718 unsafe fn test_mm512_max_epu8() {
11719 #[rustfmt::skip]
11720 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11721 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11722 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11723 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11724 #[rustfmt::skip]
11725 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11726 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11727 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11728 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11729 let r = _mm512_max_epu8(a, b);
11730 #[rustfmt::skip]
11731 let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11732 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11733 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11734 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11735 assert_eq_m512i(r, e);
11736 }
11737
11738 #[simd_test(enable = "avx512f")]
11739 unsafe fn test_mm512_mask_max_epu8() {
11740 #[rustfmt::skip]
11741 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11742 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11743 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11744 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11745 #[rustfmt::skip]
11746 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11747 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11748 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11749 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11750 let r = _mm512_mask_max_epu8(a, 0, a, b);
11751 assert_eq_m512i(r, a);
11752 let r = _mm512_mask_max_epu8(
11753 a,
11754 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11755 a,
11756 b,
11757 );
11758 #[rustfmt::skip]
11759 let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11760 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11761 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11762 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11763 assert_eq_m512i(r, e);
11764 }
11765
11766 #[simd_test(enable = "avx512f")]
11767 unsafe fn test_mm512_maskz_max_epu8() {
11768 #[rustfmt::skip]
11769 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11770 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11771 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11772 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11773 #[rustfmt::skip]
11774 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11775 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11776 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11777 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11778 let r = _mm512_maskz_max_epu8(0, a, b);
11779 assert_eq_m512i(r, _mm512_setzero_si512());
11780 let r = _mm512_maskz_max_epu8(
11781 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11782 a,
11783 b,
11784 );
11785 #[rustfmt::skip]
11786 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11787 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11788 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11789 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11790 assert_eq_m512i(r, e);
11791 }
11792
11793 #[simd_test(enable = "avx512f,avx512vl")]
11794 unsafe fn test_mm256_mask_max_epu8() {
11795 #[rustfmt::skip]
11796 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11797 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11798 #[rustfmt::skip]
11799 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11800 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11801 let r = _mm256_mask_max_epu8(a, 0, a, b);
11802 assert_eq_m256i(r, a);
11803 let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
11804 #[rustfmt::skip]
11805 let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11806 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11807 assert_eq_m256i(r, e);
11808 }
11809
11810 #[simd_test(enable = "avx512f,avx512vl")]
11811 unsafe fn test_mm256_maskz_max_epu8() {
11812 #[rustfmt::skip]
11813 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11814 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11815 #[rustfmt::skip]
11816 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11817 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11818 let r = _mm256_maskz_max_epu8(0, a, b);
11819 assert_eq_m256i(r, _mm256_setzero_si256());
11820 let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b);
11821 #[rustfmt::skip]
11822 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11823 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11824 assert_eq_m256i(r, e);
11825 }
11826
11827 #[simd_test(enable = "avx512f,avx512vl")]
11828 unsafe fn test_mm_mask_max_epu8() {
11829 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11830 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11831 let r = _mm_mask_max_epu8(a, 0, a, b);
11832 assert_eq_m128i(r, a);
11833 let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b);
11834 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11835 assert_eq_m128i(r, e);
11836 }
11837
11838 #[simd_test(enable = "avx512f,avx512vl")]
11839 unsafe fn test_mm_maskz_max_epu8() {
11840 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11841 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11842 let r = _mm_maskz_max_epu8(0, a, b);
11843 assert_eq_m128i(r, _mm_setzero_si128());
11844 let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b);
11845 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11846 assert_eq_m128i(r, e);
11847 }
11848
11849 #[simd_test(enable = "avx512bw")]
11850 unsafe fn test_mm512_max_epi16() {
11851 #[rustfmt::skip]
11852 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11853 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11854 #[rustfmt::skip]
11855 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11856 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11857 let r = _mm512_max_epi16(a, b);
11858 #[rustfmt::skip]
11859 let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11860 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11861 assert_eq_m512i(r, e);
11862 }
11863
11864 #[simd_test(enable = "avx512f")]
11865 unsafe fn test_mm512_mask_max_epi16() {
11866 #[rustfmt::skip]
11867 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11868 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11869 #[rustfmt::skip]
11870 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11871 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11872 let r = _mm512_mask_max_epi16(a, 0, a, b);
11873 assert_eq_m512i(r, a);
11874 let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
11875 #[rustfmt::skip]
11876 let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11877 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11878 assert_eq_m512i(r, e);
11879 }
11880
11881 #[simd_test(enable = "avx512f")]
11882 unsafe fn test_mm512_maskz_max_epi16() {
11883 #[rustfmt::skip]
11884 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11885 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11886 #[rustfmt::skip]
11887 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11888 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11889 let r = _mm512_maskz_max_epi16(0, a, b);
11890 assert_eq_m512i(r, _mm512_setzero_si512());
11891 let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b);
11892 #[rustfmt::skip]
11893 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
11894 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11895 assert_eq_m512i(r, e);
11896 }
11897
11898 #[simd_test(enable = "avx512f,avx512vl")]
11899 unsafe fn test_mm256_mask_max_epi16() {
11900 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11901 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11902 let r = _mm256_mask_max_epi16(a, 0, a, b);
11903 assert_eq_m256i(r, a);
11904 let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b);
11905 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11906 assert_eq_m256i(r, e);
11907 }
11908
11909 #[simd_test(enable = "avx512f,avx512vl")]
11910 unsafe fn test_mm256_maskz_max_epi16() {
11911 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11912 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11913 let r = _mm256_maskz_max_epi16(0, a, b);
11914 assert_eq_m256i(r, _mm256_setzero_si256());
11915 let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b);
11916 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
11917 assert_eq_m256i(r, e);
11918 }
11919
11920 #[simd_test(enable = "avx512f,avx512vl")]
11921 unsafe fn test_mm_mask_max_epi16() {
11922 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11923 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11924 let r = _mm_mask_max_epi16(a, 0, a, b);
11925 assert_eq_m128i(r, a);
11926 let r = _mm_mask_max_epi16(a, 0b00001111, a, b);
11927 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11928 assert_eq_m128i(r, e);
11929 }
11930
11931 #[simd_test(enable = "avx512f,avx512vl")]
11932 unsafe fn test_mm_maskz_max_epi16() {
11933 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
11934 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
11935 let r = _mm_maskz_max_epi16(0, a, b);
11936 assert_eq_m128i(r, _mm_setzero_si128());
11937 let r = _mm_maskz_max_epi16(0b00001111, a, b);
11938 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
11939 assert_eq_m128i(r, e);
11940 }
11941
11942 #[simd_test(enable = "avx512bw")]
11943 unsafe fn test_mm512_max_epi8() {
11944 #[rustfmt::skip]
11945 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11946 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11947 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11948 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11949 #[rustfmt::skip]
11950 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11951 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11952 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11953 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11954 let r = _mm512_max_epi8(a, b);
11955 #[rustfmt::skip]
11956 let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11957 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11958 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15,
11959 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
11960 assert_eq_m512i(r, e);
11961 }
11962
11963 #[simd_test(enable = "avx512f")]
11964 unsafe fn test_mm512_mask_max_epi8() {
11965 #[rustfmt::skip]
11966 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11967 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11968 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11969 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11970 #[rustfmt::skip]
11971 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11972 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11973 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
11974 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
11975 let r = _mm512_mask_max_epi8(a, 0, a, b);
11976 assert_eq_m512i(r, a);
11977 let r = _mm512_mask_max_epi8(
11978 a,
11979 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
11980 a,
11981 b,
11982 );
11983 #[rustfmt::skip]
11984 let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11985 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11986 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11987 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11988 assert_eq_m512i(r, e);
11989 }
11990
11991 #[simd_test(enable = "avx512f")]
11992 unsafe fn test_mm512_maskz_max_epi8() {
11993 #[rustfmt::skip]
11994 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11995 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
11997 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
11998 #[rustfmt::skip]
11999 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12000 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12001 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12002 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12003 let r = _mm512_maskz_max_epi8(0, a, b);
12004 assert_eq_m512i(r, _mm512_setzero_si512());
12005 let r = _mm512_maskz_max_epi8(
12006 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12007 a,
12008 b,
12009 );
12010 #[rustfmt::skip]
12011 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
12012 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
12013 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
12014 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
12015 assert_eq_m512i(r, e);
12016 }
12017
12018 #[simd_test(enable = "avx512f,avx512vl")]
12019 unsafe fn test_mm256_mask_max_epi8() {
12020 #[rustfmt::skip]
12021 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12022 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12023 #[rustfmt::skip]
12024 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12025 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12026 let r = _mm256_mask_max_epi8(a, 0, a, b);
12027 assert_eq_m256i(r, a);
12028 let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12029 #[rustfmt::skip]
12030 let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12031 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12032 assert_eq_m256i(r, e);
12033 }
12034
12035 #[simd_test(enable = "avx512f,avx512vl")]
12036 unsafe fn test_mm256_maskz_max_epi8() {
12037 #[rustfmt::skip]
12038 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12039 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12040 #[rustfmt::skip]
12041 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12042 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12043 let r = _mm256_maskz_max_epi8(0, a, b);
12044 assert_eq_m256i(r, _mm256_setzero_si256());
12045 let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b);
12046 #[rustfmt::skip]
12047 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15,
12048 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
12049 assert_eq_m256i(r, e);
12050 }
12051
12052 #[simd_test(enable = "avx512f,avx512vl")]
12053 unsafe fn test_mm_mask_max_epi8() {
12054 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12055 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12056 let r = _mm_mask_max_epi8(a, 0, a, b);
12057 assert_eq_m128i(r, a);
12058 let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b);
12059 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12060 assert_eq_m128i(r, e);
12061 }
12062
12063 #[simd_test(enable = "avx512f,avx512vl")]
12064 unsafe fn test_mm_maskz_max_epi8() {
12065 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12066 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12067 let r = _mm_maskz_max_epi8(0, a, b);
12068 assert_eq_m128i(r, _mm_setzero_si128());
12069 let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b);
12070 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
12071 assert_eq_m128i(r, e);
12072 }
12073
12074 #[simd_test(enable = "avx512bw")]
12075 unsafe fn test_mm512_min_epu16() {
12076 #[rustfmt::skip]
12077 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12078 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12079 #[rustfmt::skip]
12080 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12081 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12082 let r = _mm512_min_epu16(a, b);
12083 #[rustfmt::skip]
12084 let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12085 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12086 assert_eq_m512i(r, e);
12087 }
12088
12089 #[simd_test(enable = "avx512f")]
12090 unsafe fn test_mm512_mask_min_epu16() {
12091 #[rustfmt::skip]
12092 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12093 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12094 #[rustfmt::skip]
12095 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12096 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12097 let r = _mm512_mask_min_epu16(a, 0, a, b);
12098 assert_eq_m512i(r, a);
12099 let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b);
12100 #[rustfmt::skip]
12101 let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12102 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12103 assert_eq_m512i(r, e);
12104 }
12105
12106 #[simd_test(enable = "avx512f")]
12107 unsafe fn test_mm512_maskz_min_epu16() {
12108 #[rustfmt::skip]
12109 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12110 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12111 #[rustfmt::skip]
12112 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12113 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12114 let r = _mm512_maskz_min_epu16(0, a, b);
12115 assert_eq_m512i(r, _mm512_setzero_si512());
12116 let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b);
12117 #[rustfmt::skip]
12118 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12119 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12120 assert_eq_m512i(r, e);
12121 }
12122
12123 #[simd_test(enable = "avx512f,avx512vl")]
12124 unsafe fn test_mm256_mask_min_epu16() {
12125 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12126 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12127 let r = _mm256_mask_min_epu16(a, 0, a, b);
12128 assert_eq_m256i(r, a);
12129 let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b);
12130 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12131 assert_eq_m256i(r, e);
12132 }
12133
12134 #[simd_test(enable = "avx512f,avx512vl")]
12135 unsafe fn test_mm256_maskz_min_epu16() {
12136 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12137 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12138 let r = _mm256_maskz_min_epu16(0, a, b);
12139 assert_eq_m256i(r, _mm256_setzero_si256());
12140 let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b);
12141 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12142 assert_eq_m256i(r, e);
12143 }
12144
12145 #[simd_test(enable = "avx512f,avx512vl")]
12146 unsafe fn test_mm_mask_min_epu16() {
12147 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
12148 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
12149 let r = _mm_mask_min_epu16(a, 0, a, b);
12150 assert_eq_m128i(r, a);
12151 let r = _mm_mask_min_epu16(a, 0b00001111, a, b);
12152 let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
12153 assert_eq_m128i(r, e);
12154 }
12155
12156 #[simd_test(enable = "avx512f,avx512vl")]
12157 unsafe fn test_mm_maskz_min_epu16() {
12158 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
12159 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
12160 let r = _mm_maskz_min_epu16(0, a, b);
12161 assert_eq_m128i(r, _mm_setzero_si128());
12162 let r = _mm_maskz_min_epu16(0b00001111, a, b);
12163 let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
12164 assert_eq_m128i(r, e);
12165 }
12166
12167 #[simd_test(enable = "avx512bw")]
12168 unsafe fn test_mm512_min_epu8() {
12169 #[rustfmt::skip]
12170 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12171 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12172 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12173 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12174 #[rustfmt::skip]
12175 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12176 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12177 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12178 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12179 let r = _mm512_min_epu8(a, b);
12180 #[rustfmt::skip]
12181 let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12182 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12183 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12184 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12185 assert_eq_m512i(r, e);
12186 }
12187
12188 #[simd_test(enable = "avx512f")]
12189 unsafe fn test_mm512_mask_min_epu8() {
12190 #[rustfmt::skip]
12191 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12192 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12193 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12194 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12195 #[rustfmt::skip]
12196 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12197 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12198 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12199 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12200 let r = _mm512_mask_min_epu8(a, 0, a, b);
12201 assert_eq_m512i(r, a);
12202 let r = _mm512_mask_min_epu8(
12203 a,
12204 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12205 a,
12206 b,
12207 );
12208 #[rustfmt::skip]
12209 let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12210 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12211 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12212 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12213 assert_eq_m512i(r, e);
12214 }
12215
12216 #[simd_test(enable = "avx512f")]
12217 unsafe fn test_mm512_maskz_min_epu8() {
12218 #[rustfmt::skip]
12219 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12220 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12221 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12222 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12223 #[rustfmt::skip]
12224 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12225 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12226 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12227 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12228 let r = _mm512_maskz_min_epu8(0, a, b);
12229 assert_eq_m512i(r, _mm512_setzero_si512());
12230 let r = _mm512_maskz_min_epu8(
12231 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12232 a,
12233 b,
12234 );
12235 #[rustfmt::skip]
12236 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12237 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12238 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12239 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12240 assert_eq_m512i(r, e);
12241 }
12242
12243 #[simd_test(enable = "avx512f,avx512vl")]
12244 unsafe fn test_mm256_mask_min_epu8() {
12245 #[rustfmt::skip]
12246 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12247 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12248 #[rustfmt::skip]
12249 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12250 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12251 let r = _mm256_mask_min_epu8(a, 0, a, b);
12252 assert_eq_m256i(r, a);
12253 let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b);
12254 #[rustfmt::skip]
12255 let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12256 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12257 assert_eq_m256i(r, e);
12258 }
12259
12260 #[simd_test(enable = "avx512f,avx512vl")]
12261 unsafe fn test_mm256_maskz_min_epu8() {
12262 #[rustfmt::skip]
12263 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12264 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12265 #[rustfmt::skip]
12266 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12267 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12268 let r = _mm256_maskz_min_epu8(0, a, b);
12269 assert_eq_m256i(r, _mm256_setzero_si256());
12270 let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b);
12271 #[rustfmt::skip]
12272 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12273 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12274 assert_eq_m256i(r, e);
12275 }
12276
12277 #[simd_test(enable = "avx512f,avx512vl")]
12278 unsafe fn test_mm_mask_min_epu8() {
12279 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12280 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12281 let r = _mm_mask_min_epu8(a, 0, a, b);
12282 assert_eq_m128i(r, a);
12283 let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b);
12284 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12285 assert_eq_m128i(r, e);
12286 }
12287
12288 #[simd_test(enable = "avx512f,avx512vl")]
12289 unsafe fn test_mm_maskz_min_epu8() {
12290 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12291 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12292 let r = _mm_maskz_min_epu8(0, a, b);
12293 assert_eq_m128i(r, _mm_setzero_si128());
12294 let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b);
12295 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12296 assert_eq_m128i(r, e);
12297 }
12298
12299 #[simd_test(enable = "avx512bw")]
12300 unsafe fn test_mm512_min_epi16() {
12301 #[rustfmt::skip]
12302 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12303 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12304 #[rustfmt::skip]
12305 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12306 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12307 let r = _mm512_min_epi16(a, b);
12308 #[rustfmt::skip]
12309 let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12310 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12311 assert_eq_m512i(r, e);
12312 }
12313
12314 #[simd_test(enable = "avx512f")]
12315 unsafe fn test_mm512_mask_min_epi16() {
12316 #[rustfmt::skip]
12317 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12318 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12319 #[rustfmt::skip]
12320 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12321 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12322 let r = _mm512_mask_min_epi16(a, 0, a, b);
12323 assert_eq_m512i(r, a);
12324 let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b);
12325 #[rustfmt::skip]
12326 let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12327 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12328 assert_eq_m512i(r, e);
12329 }
12330
12331 #[simd_test(enable = "avx512f")]
12332 unsafe fn test_mm512_maskz_min_epi16() {
12333 #[rustfmt::skip]
12334 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12335 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12336 #[rustfmt::skip]
12337 let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12338 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12339 let r = _mm512_maskz_min_epi16(0, a, b);
12340 assert_eq_m512i(r, _mm512_setzero_si512());
12341 let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b);
12342 #[rustfmt::skip]
12343 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12344 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12345 assert_eq_m512i(r, e);
12346 }
12347
12348 #[simd_test(enable = "avx512f,avx512vl")]
12349 unsafe fn test_mm256_mask_min_epi16() {
12350 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12351 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12352 let r = _mm256_mask_min_epi16(a, 0, a, b);
12353 assert_eq_m256i(r, a);
12354 let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b);
12355 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12356 assert_eq_m256i(r, e);
12357 }
12358
12359 #[simd_test(enable = "avx512f,avx512vl")]
12360 unsafe fn test_mm256_maskz_min_epi16() {
12361 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12362 let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12363 let r = _mm256_maskz_min_epi16(0, a, b);
12364 assert_eq_m256i(r, _mm256_setzero_si256());
12365 let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b);
12366 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12367 assert_eq_m256i(r, e);
12368 }
12369
12370 #[simd_test(enable = "avx512f,avx512vl")]
12371 unsafe fn test_mm_mask_min_epi16() {
12372 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
12373 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
12374 let r = _mm_mask_min_epi16(a, 0, a, b);
12375 assert_eq_m128i(r, a);
12376 let r = _mm_mask_min_epi16(a, 0b00001111, a, b);
12377 let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0);
12378 assert_eq_m128i(r, e);
12379 }
12380
12381 #[simd_test(enable = "avx512f,avx512vl")]
12382 unsafe fn test_mm_maskz_min_epi16() {
12383 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
12384 let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
12385 let r = _mm_maskz_min_epi16(0, a, b);
12386 assert_eq_m128i(r, _mm_setzero_si128());
12387 let r = _mm_maskz_min_epi16(0b00001111, a, b);
12388 let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
12389 assert_eq_m128i(r, e);
12390 }
12391
12392 #[simd_test(enable = "avx512bw")]
12393 unsafe fn test_mm512_min_epi8() {
12394 #[rustfmt::skip]
12395 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12396 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12397 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12398 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12399 #[rustfmt::skip]
12400 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12401 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12402 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12403 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12404 let r = _mm512_min_epi8(a, b);
12405 #[rustfmt::skip]
12406 let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12407 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12408 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12409 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12410 assert_eq_m512i(r, e);
12411 }
12412
12413 #[simd_test(enable = "avx512f")]
12414 unsafe fn test_mm512_mask_min_epi8() {
12415 #[rustfmt::skip]
12416 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12417 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12418 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12419 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12420 #[rustfmt::skip]
12421 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12422 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12423 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12424 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12425 let r = _mm512_mask_min_epi8(a, 0, a, b);
12426 assert_eq_m512i(r, a);
12427 let r = _mm512_mask_min_epi8(
12428 a,
12429 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12430 a,
12431 b,
12432 );
12433 #[rustfmt::skip]
12434 let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12435 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12436 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12437 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12438 assert_eq_m512i(r, e);
12439 }
12440
12441 #[simd_test(enable = "avx512f")]
12442 unsafe fn test_mm512_maskz_min_epi8() {
12443 #[rustfmt::skip]
12444 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12445 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12446 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12447 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12448 #[rustfmt::skip]
12449 let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12450 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12451 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12452 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12453 let r = _mm512_maskz_min_epi8(0, a, b);
12454 assert_eq_m512i(r, _mm512_setzero_si512());
12455 let r = _mm512_maskz_min_epi8(
12456 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111,
12457 a,
12458 b,
12459 );
12460 #[rustfmt::skip]
12461 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12462 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12463 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12464 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12465 assert_eq_m512i(r, e);
12466 }
12467
12468 #[simd_test(enable = "avx512f,avx512vl")]
12469 unsafe fn test_mm256_mask_min_epi8() {
12470 #[rustfmt::skip]
12471 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12472 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12473 #[rustfmt::skip]
12474 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12475 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12476 let r = _mm256_mask_min_epi8(a, 0, a, b);
12477 assert_eq_m256i(r, a);
12478 let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b);
12479 #[rustfmt::skip]
12480 let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0,
12481 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12482 assert_eq_m256i(r, e);
12483 }
12484
12485 #[simd_test(enable = "avx512f,avx512vl")]
12486 unsafe fn test_mm256_maskz_min_epi8() {
12487 #[rustfmt::skip]
12488 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
12489 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12490 #[rustfmt::skip]
12491 let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
12492 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12493 let r = _mm256_maskz_min_epi8(0, a, b);
12494 assert_eq_m256i(r, _mm256_setzero_si256());
12495 let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b);
12496 #[rustfmt::skip]
12497 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0,
12498 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12499 assert_eq_m256i(r, e);
12500 }
12501
12502 #[simd_test(enable = "avx512f,avx512vl")]
12503 unsafe fn test_mm_mask_min_epi8() {
12504 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12505 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12506 let r = _mm_mask_min_epi8(a, 0, a, b);
12507 assert_eq_m128i(r, a);
12508 let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b);
12509 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
12510 assert_eq_m128i(r, e);
12511 }
12512
12513 #[simd_test(enable = "avx512f,avx512vl")]
12514 unsafe fn test_mm_maskz_min_epi8() {
12515 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
12516 let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
12517 let r = _mm_maskz_min_epi8(0, a, b);
12518 assert_eq_m128i(r, _mm_setzero_si128());
12519 let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b);
12520 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0);
12521 assert_eq_m128i(r, e);
12522 }
12523
12524 #[simd_test(enable = "avx512bw")]
12525 unsafe fn test_mm512_cmplt_epu16_mask() {
12526 let a = _mm512_set1_epi16(-2);
12527 let b = _mm512_set1_epi16(-1);
12528 let m = _mm512_cmplt_epu16_mask(a, b);
12529 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12530 }
12531
12532 #[simd_test(enable = "avx512bw")]
12533 unsafe fn test_mm512_mask_cmplt_epu16_mask() {
12534 let a = _mm512_set1_epi16(-2);
12535 let b = _mm512_set1_epi16(-1);
12536 let mask = 0b01010101_01010101_01010101_01010101;
12537 let r = _mm512_mask_cmplt_epu16_mask(mask, a, b);
12538 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12539 }
12540
cdc7bbd5
XL
12541 #[simd_test(enable = "avx512bw,avx512vl")]
12542 unsafe fn test_mm256_cmplt_epu16_mask() {
12543 let a = _mm256_set1_epi16(-2);
12544 let b = _mm256_set1_epi16(-1);
12545 let m = _mm256_cmplt_epu16_mask(a, b);
12546 assert_eq!(m, 0b11111111_11111111);
12547 }
12548
12549 #[simd_test(enable = "avx512bw,avx512vl")]
12550 unsafe fn test_mm256_mask_cmplt_epu16_mask() {
12551 let a = _mm256_set1_epi16(-2);
12552 let b = _mm256_set1_epi16(-1);
12553 let mask = 0b01010101_01010101;
12554 let r = _mm256_mask_cmplt_epu16_mask(mask, a, b);
12555 assert_eq!(r, 0b01010101_01010101);
12556 }
12557
12558 #[simd_test(enable = "avx512bw,avx512vl")]
12559 unsafe fn test_mm_cmplt_epu16_mask() {
12560 let a = _mm_set1_epi16(-2);
12561 let b = _mm_set1_epi16(-1);
12562 let m = _mm_cmplt_epu16_mask(a, b);
12563 assert_eq!(m, 0b11111111);
12564 }
12565
12566 #[simd_test(enable = "avx512bw,avx512vl")]
12567 unsafe fn test_mm_mask_cmplt_epu16_mask() {
12568 let a = _mm_set1_epi16(-2);
12569 let b = _mm_set1_epi16(-1);
12570 let mask = 0b01010101;
12571 let r = _mm_mask_cmplt_epu16_mask(mask, a, b);
12572 assert_eq!(r, 0b01010101);
12573 }
12574
fc512014
XL
12575 #[simd_test(enable = "avx512bw")]
12576 unsafe fn test_mm512_cmplt_epu8_mask() {
12577 let a = _mm512_set1_epi8(-2);
12578 let b = _mm512_set1_epi8(-1);
12579 let m = _mm512_cmplt_epu8_mask(a, b);
12580 assert_eq!(
12581 m,
12582 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12583 );
12584 }
12585
12586 #[simd_test(enable = "avx512bw")]
12587 unsafe fn test_mm512_mask_cmplt_epu8_mask() {
12588 let a = _mm512_set1_epi8(-2);
12589 let b = _mm512_set1_epi8(-1);
12590 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12591 let r = _mm512_mask_cmplt_epu8_mask(mask, a, b);
12592 assert_eq!(
12593 r,
12594 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12595 );
12596 }
12597
cdc7bbd5
XL
12598 #[simd_test(enable = "avx512bw,avx512vl")]
12599 unsafe fn test_mm256_cmplt_epu8_mask() {
12600 let a = _mm256_set1_epi8(-2);
12601 let b = _mm256_set1_epi8(-1);
12602 let m = _mm256_cmplt_epu8_mask(a, b);
12603 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12604 }
12605
12606 #[simd_test(enable = "avx512bw,avx512vl")]
12607 unsafe fn test_mm256_mask_cmplt_epu8_mask() {
12608 let a = _mm256_set1_epi8(-2);
12609 let b = _mm256_set1_epi8(-1);
12610 let mask = 0b01010101_01010101_01010101_01010101;
12611 let r = _mm256_mask_cmplt_epu8_mask(mask, a, b);
12612 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12613 }
12614
12615 #[simd_test(enable = "avx512bw,avx512vl")]
12616 unsafe fn test_mm_cmplt_epu8_mask() {
12617 let a = _mm_set1_epi8(-2);
12618 let b = _mm_set1_epi8(-1);
12619 let m = _mm_cmplt_epu8_mask(a, b);
12620 assert_eq!(m, 0b11111111_11111111);
12621 }
12622
12623 #[simd_test(enable = "avx512bw,avx512vl")]
12624 unsafe fn test_mm_mask_cmplt_epu8_mask() {
12625 let a = _mm_set1_epi8(-2);
12626 let b = _mm_set1_epi8(-1);
12627 let mask = 0b01010101_01010101;
12628 let r = _mm_mask_cmplt_epu8_mask(mask, a, b);
12629 assert_eq!(r, 0b01010101_01010101);
12630 }
12631
fc512014
XL
12632 #[simd_test(enable = "avx512bw")]
12633 unsafe fn test_mm512_cmplt_epi16_mask() {
12634 let a = _mm512_set1_epi16(-2);
12635 let b = _mm512_set1_epi16(-1);
12636 let m = _mm512_cmplt_epi16_mask(a, b);
12637 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12638 }
12639
12640 #[simd_test(enable = "avx512bw")]
12641 unsafe fn test_mm512_mask_cmplt_epi16_mask() {
12642 let a = _mm512_set1_epi16(-2);
12643 let b = _mm512_set1_epi16(-1);
12644 let mask = 0b01010101_01010101_01010101_01010101;
12645 let r = _mm512_mask_cmplt_epi16_mask(mask, a, b);
12646 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12647 }
12648
cdc7bbd5
XL
12649 #[simd_test(enable = "avx512bw,avx512vl")]
12650 unsafe fn test_mm256_cmplt_epi16_mask() {
12651 let a = _mm256_set1_epi16(-2);
12652 let b = _mm256_set1_epi16(-1);
12653 let m = _mm256_cmplt_epi16_mask(a, b);
12654 assert_eq!(m, 0b11111111_11111111);
12655 }
12656
12657 #[simd_test(enable = "avx512bw,avx512vl")]
12658 unsafe fn test_mm256_mask_cmplt_epi16_mask() {
12659 let a = _mm256_set1_epi16(-2);
12660 let b = _mm256_set1_epi16(-1);
12661 let mask = 0b01010101_01010101;
12662 let r = _mm256_mask_cmplt_epi16_mask(mask, a, b);
12663 assert_eq!(r, 0b01010101_01010101);
12664 }
12665
12666 #[simd_test(enable = "avx512bw,avx512vl")]
12667 unsafe fn test_mm_cmplt_epi16_mask() {
12668 let a = _mm_set1_epi16(-2);
12669 let b = _mm_set1_epi16(-1);
12670 let m = _mm_cmplt_epi16_mask(a, b);
12671 assert_eq!(m, 0b11111111);
12672 }
12673
12674 #[simd_test(enable = "avx512bw,avx512vl")]
12675 unsafe fn test_mm_mask_cmplt_epi16_mask() {
12676 let a = _mm_set1_epi16(-2);
12677 let b = _mm_set1_epi16(-1);
12678 let mask = 0b01010101;
12679 let r = _mm_mask_cmplt_epi16_mask(mask, a, b);
12680 assert_eq!(r, 0b01010101);
12681 }
12682
fc512014
XL
12683 #[simd_test(enable = "avx512bw")]
12684 unsafe fn test_mm512_cmplt_epi8_mask() {
12685 let a = _mm512_set1_epi8(-2);
12686 let b = _mm512_set1_epi8(-1);
12687 let m = _mm512_cmplt_epi8_mask(a, b);
12688 assert_eq!(
12689 m,
12690 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12691 );
12692 }
12693
12694 #[simd_test(enable = "avx512bw")]
12695 unsafe fn test_mm512_mask_cmplt_epi8_mask() {
12696 let a = _mm512_set1_epi8(-2);
12697 let b = _mm512_set1_epi8(-1);
12698 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12699 let r = _mm512_mask_cmplt_epi8_mask(mask, a, b);
12700 assert_eq!(
12701 r,
12702 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12703 );
12704 }
12705
cdc7bbd5
XL
12706 #[simd_test(enable = "avx512bw,avx512vl")]
12707 unsafe fn test_mm256_cmplt_epi8_mask() {
12708 let a = _mm256_set1_epi8(-2);
12709 let b = _mm256_set1_epi8(-1);
12710 let m = _mm256_cmplt_epi8_mask(a, b);
12711 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12712 }
12713
12714 #[simd_test(enable = "avx512bw,avx512vl")]
12715 unsafe fn test_mm256_mask_cmplt_epi8_mask() {
12716 let a = _mm256_set1_epi8(-2);
12717 let b = _mm256_set1_epi8(-1);
12718 let mask = 0b01010101_01010101_01010101_01010101;
12719 let r = _mm256_mask_cmplt_epi8_mask(mask, a, b);
12720 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12721 }
12722
12723 #[simd_test(enable = "avx512bw,avx512vl")]
12724 unsafe fn test_mm_cmplt_epi8_mask() {
12725 let a = _mm_set1_epi8(-2);
12726 let b = _mm_set1_epi8(-1);
12727 let m = _mm_cmplt_epi8_mask(a, b);
12728 assert_eq!(m, 0b11111111_11111111);
12729 }
12730
12731 #[simd_test(enable = "avx512bw,avx512vl")]
12732 unsafe fn test_mm_mask_cmplt_epi8_mask() {
12733 let a = _mm_set1_epi8(-2);
12734 let b = _mm_set1_epi8(-1);
12735 let mask = 0b01010101_01010101;
12736 let r = _mm_mask_cmplt_epi8_mask(mask, a, b);
12737 assert_eq!(r, 0b01010101_01010101);
12738 }
12739
fc512014
XL
12740 #[simd_test(enable = "avx512bw")]
12741 unsafe fn test_mm512_cmpgt_epu16_mask() {
12742 let a = _mm512_set1_epi16(2);
12743 let b = _mm512_set1_epi16(1);
12744 let m = _mm512_cmpgt_epu16_mask(a, b);
12745 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12746 }
12747
12748 #[simd_test(enable = "avx512bw")]
12749 unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
12750 let a = _mm512_set1_epi16(2);
12751 let b = _mm512_set1_epi16(1);
12752 let mask = 0b01010101_01010101_01010101_01010101;
12753 let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b);
12754 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12755 }
12756
cdc7bbd5
XL
12757 #[simd_test(enable = "avx512bw,avx512vl")]
12758 unsafe fn test_mm256_cmpgt_epu16_mask() {
12759 let a = _mm256_set1_epi16(2);
12760 let b = _mm256_set1_epi16(1);
12761 let m = _mm256_cmpgt_epu16_mask(a, b);
12762 assert_eq!(m, 0b11111111_11111111);
12763 }
12764
12765 #[simd_test(enable = "avx512bw,avx512vl")]
12766 unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
12767 let a = _mm256_set1_epi16(2);
12768 let b = _mm256_set1_epi16(1);
12769 let mask = 0b01010101_01010101;
12770 let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b);
12771 assert_eq!(r, 0b01010101_01010101);
12772 }
12773
12774 #[simd_test(enable = "avx512bw,avx512vl")]
12775 unsafe fn test_mm_cmpgt_epu16_mask() {
12776 let a = _mm_set1_epi16(2);
12777 let b = _mm_set1_epi16(1);
12778 let m = _mm_cmpgt_epu16_mask(a, b);
12779 assert_eq!(m, 0b11111111);
12780 }
12781
12782 #[simd_test(enable = "avx512bw,avx512vl")]
12783 unsafe fn test_mm_mask_cmpgt_epu16_mask() {
12784 let a = _mm_set1_epi16(2);
12785 let b = _mm_set1_epi16(1);
12786 let mask = 0b01010101;
12787 let r = _mm_mask_cmpgt_epu16_mask(mask, a, b);
12788 assert_eq!(r, 0b01010101);
12789 }
12790
fc512014
XL
12791 #[simd_test(enable = "avx512bw")]
12792 unsafe fn test_mm512_cmpgt_epu8_mask() {
12793 let a = _mm512_set1_epi8(2);
12794 let b = _mm512_set1_epi8(1);
12795 let m = _mm512_cmpgt_epu8_mask(a, b);
12796 assert_eq!(
12797 m,
12798 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12799 );
12800 }
12801
12802 #[simd_test(enable = "avx512bw")]
12803 unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
12804 let a = _mm512_set1_epi8(2);
12805 let b = _mm512_set1_epi8(1);
12806 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12807 let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b);
12808 assert_eq!(
12809 r,
12810 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12811 );
12812 }
12813
cdc7bbd5
XL
12814 #[simd_test(enable = "avx512bw,avx512vl")]
12815 unsafe fn test_mm256_cmpgt_epu8_mask() {
12816 let a = _mm256_set1_epi8(2);
12817 let b = _mm256_set1_epi8(1);
12818 let m = _mm256_cmpgt_epu8_mask(a, b);
12819 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12820 }
12821
12822 #[simd_test(enable = "avx512bw,avx512vl")]
12823 unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
12824 let a = _mm256_set1_epi8(2);
12825 let b = _mm256_set1_epi8(1);
12826 let mask = 0b01010101_01010101_01010101_01010101;
12827 let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b);
12828 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12829 }
12830
12831 #[simd_test(enable = "avx512bw,avx512vl")]
12832 unsafe fn test_mm_cmpgt_epu8_mask() {
12833 let a = _mm_set1_epi8(2);
12834 let b = _mm_set1_epi8(1);
12835 let m = _mm_cmpgt_epu8_mask(a, b);
12836 assert_eq!(m, 0b11111111_11111111);
12837 }
12838
12839 #[simd_test(enable = "avx512bw,avx512vl")]
12840 unsafe fn test_mm_mask_cmpgt_epu8_mask() {
12841 let a = _mm_set1_epi8(2);
12842 let b = _mm_set1_epi8(1);
12843 let mask = 0b01010101_01010101;
12844 let r = _mm_mask_cmpgt_epu8_mask(mask, a, b);
12845 assert_eq!(r, 0b01010101_01010101);
12846 }
12847
fc512014
XL
12848 #[simd_test(enable = "avx512bw")]
12849 unsafe fn test_mm512_cmpgt_epi16_mask() {
12850 let a = _mm512_set1_epi16(2);
12851 let b = _mm512_set1_epi16(-1);
12852 let m = _mm512_cmpgt_epi16_mask(a, b);
12853 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12854 }
12855
12856 #[simd_test(enable = "avx512bw")]
12857 unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
12858 let a = _mm512_set1_epi16(2);
12859 let b = _mm512_set1_epi16(-1);
12860 let mask = 0b01010101_01010101_01010101_01010101;
12861 let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b);
12862 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12863 }
12864
cdc7bbd5
XL
12865 #[simd_test(enable = "avx512bw,avx512vl")]
12866 unsafe fn test_mm256_cmpgt_epi16_mask() {
12867 let a = _mm256_set1_epi16(2);
12868 let b = _mm256_set1_epi16(-1);
12869 let m = _mm256_cmpgt_epi16_mask(a, b);
12870 assert_eq!(m, 0b11111111_11111111);
12871 }
12872
12873 #[simd_test(enable = "avx512bw,avx512vl")]
12874 unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
12875 let a = _mm256_set1_epi16(2);
12876 let b = _mm256_set1_epi16(-1);
12877 let mask = 0b001010101_01010101;
12878 let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b);
12879 assert_eq!(r, 0b01010101_01010101);
12880 }
12881
12882 #[simd_test(enable = "avx512bw,avx512vl")]
12883 unsafe fn test_mm_cmpgt_epi16_mask() {
12884 let a = _mm_set1_epi16(2);
12885 let b = _mm_set1_epi16(-1);
12886 let m = _mm_cmpgt_epi16_mask(a, b);
12887 assert_eq!(m, 0b11111111);
12888 }
12889
12890 #[simd_test(enable = "avx512bw,avx512vl")]
12891 unsafe fn test_mm_mask_cmpgt_epi16_mask() {
12892 let a = _mm_set1_epi16(2);
12893 let b = _mm_set1_epi16(-1);
12894 let mask = 0b01010101;
12895 let r = _mm_mask_cmpgt_epi16_mask(mask, a, b);
12896 assert_eq!(r, 0b01010101);
12897 }
12898
fc512014
XL
12899 #[simd_test(enable = "avx512bw")]
12900 unsafe fn test_mm512_cmpgt_epi8_mask() {
12901 let a = _mm512_set1_epi8(2);
12902 let b = _mm512_set1_epi8(-1);
12903 let m = _mm512_cmpgt_epi8_mask(a, b);
12904 assert_eq!(
12905 m,
12906 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
12907 );
12908 }
12909
12910 #[simd_test(enable = "avx512bw")]
12911 unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
12912 let a = _mm512_set1_epi8(2);
12913 let b = _mm512_set1_epi8(-1);
12914 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
12915 let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b);
12916 assert_eq!(
12917 r,
12918 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
12919 );
12920 }
12921
cdc7bbd5
XL
12922 #[simd_test(enable = "avx512bw,avx512vl")]
12923 unsafe fn test_mm256_cmpgt_epi8_mask() {
12924 let a = _mm256_set1_epi8(2);
12925 let b = _mm256_set1_epi8(-1);
12926 let m = _mm256_cmpgt_epi8_mask(a, b);
12927 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12928 }
12929
12930 #[simd_test(enable = "avx512bw,avx512vl")]
12931 unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
12932 let a = _mm256_set1_epi8(2);
12933 let b = _mm256_set1_epi8(-1);
12934 let mask = 0b01010101_01010101_01010101_01010101;
12935 let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b);
12936 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12937 }
12938
12939 #[simd_test(enable = "avx512bw,avx512vl")]
12940 unsafe fn test_mm_cmpgt_epi8_mask() {
12941 let a = _mm_set1_epi8(2);
12942 let b = _mm_set1_epi8(-1);
12943 let m = _mm_cmpgt_epi8_mask(a, b);
12944 assert_eq!(m, 0b11111111_11111111);
12945 }
12946
12947 #[simd_test(enable = "avx512bw,avx512vl")]
12948 unsafe fn test_mm_mask_cmpgt_epi8_mask() {
12949 let a = _mm_set1_epi8(2);
12950 let b = _mm_set1_epi8(-1);
12951 let mask = 0b01010101_01010101;
12952 let r = _mm_mask_cmpgt_epi8_mask(mask, a, b);
12953 assert_eq!(r, 0b01010101_01010101);
12954 }
12955
fc512014
XL
12956 #[simd_test(enable = "avx512bw")]
12957 unsafe fn test_mm512_cmple_epu16_mask() {
12958 let a = _mm512_set1_epi16(-1);
12959 let b = _mm512_set1_epi16(-1);
12960 let m = _mm512_cmple_epu16_mask(a, b);
12961 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
12962 }
12963
12964 #[simd_test(enable = "avx512bw")]
12965 unsafe fn test_mm512_mask_cmple_epu16_mask() {
12966 let a = _mm512_set1_epi16(-1);
12967 let b = _mm512_set1_epi16(-1);
12968 let mask = 0b01010101_01010101_01010101_01010101;
12969 let r = _mm512_mask_cmple_epu16_mask(mask, a, b);
12970 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
12971 }
12972
cdc7bbd5
XL
12973 #[simd_test(enable = "avx512bw,avx512vl")]
12974 unsafe fn test_mm256_cmple_epu16_mask() {
12975 let a = _mm256_set1_epi16(-1);
12976 let b = _mm256_set1_epi16(-1);
12977 let m = _mm256_cmple_epu16_mask(a, b);
12978 assert_eq!(m, 0b11111111_11111111);
12979 }
12980
12981 #[simd_test(enable = "avx512bw,avx512vl")]
12982 unsafe fn test_mm256_mask_cmple_epu16_mask() {
12983 let a = _mm256_set1_epi16(-1);
12984 let b = _mm256_set1_epi16(-1);
12985 let mask = 0b01010101_01010101;
12986 let r = _mm256_mask_cmple_epu16_mask(mask, a, b);
12987 assert_eq!(r, 0b01010101_01010101);
12988 }
12989
12990 #[simd_test(enable = "avx512bw,avx512vl")]
12991 unsafe fn test_mm_cmple_epu16_mask() {
12992 let a = _mm_set1_epi16(-1);
12993 let b = _mm_set1_epi16(-1);
12994 let m = _mm_cmple_epu16_mask(a, b);
12995 assert_eq!(m, 0b11111111);
12996 }
12997
12998 #[simd_test(enable = "avx512bw,avx512vl")]
12999 unsafe fn test_mm_mask_cmple_epu16_mask() {
13000 let a = _mm_set1_epi16(-1);
13001 let b = _mm_set1_epi16(-1);
13002 let mask = 0b01010101;
13003 let r = _mm_mask_cmple_epu16_mask(mask, a, b);
13004 assert_eq!(r, 0b01010101);
13005 }
13006
fc512014
XL
13007 #[simd_test(enable = "avx512bw")]
13008 unsafe fn test_mm512_cmple_epu8_mask() {
13009 let a = _mm512_set1_epi8(-1);
13010 let b = _mm512_set1_epi8(-1);
13011 let m = _mm512_cmple_epu8_mask(a, b);
13012 assert_eq!(
13013 m,
13014 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13015 );
13016 }
13017
cdc7bbd5
XL
13018 #[simd_test(enable = "avx512bw")]
13019 unsafe fn test_mm512_mask_cmple_epu8_mask() {
13020 let a = _mm512_set1_epi8(-1);
13021 let b = _mm512_set1_epi8(-1);
13022 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13023 let r = _mm512_mask_cmple_epu8_mask(mask, a, b);
13024 assert_eq!(
13025 r,
13026 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13027 );
13028 }
13029
13030 #[simd_test(enable = "avx512bw,avx512vl")]
13031 unsafe fn test_mm256_cmple_epu8_mask() {
13032 let a = _mm256_set1_epi8(-1);
13033 let b = _mm256_set1_epi8(-1);
13034 let m = _mm256_cmple_epu8_mask(a, b);
13035 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13036 }
13037
13038 #[simd_test(enable = "avx512bw,avx512vl")]
13039 unsafe fn test_mm256_mask_cmple_epu8_mask() {
13040 let a = _mm256_set1_epi8(-1);
13041 let b = _mm256_set1_epi8(-1);
13042 let mask = 0b01010101_01010101_01010101_01010101;
13043 let r = _mm256_mask_cmple_epu8_mask(mask, a, b);
13044 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13045 }
13046
13047 #[simd_test(enable = "avx512bw,avx512vl")]
13048 unsafe fn test_mm_cmple_epu8_mask() {
13049 let a = _mm_set1_epi8(-1);
13050 let b = _mm_set1_epi8(-1);
13051 let m = _mm_cmple_epu8_mask(a, b);
13052 assert_eq!(m, 0b11111111_11111111);
13053 }
13054
13055 #[simd_test(enable = "avx512bw,avx512vl")]
13056 unsafe fn test_mm_mask_cmple_epu8_mask() {
13057 let a = _mm_set1_epi8(-1);
13058 let b = _mm_set1_epi8(-1);
13059 let mask = 0b01010101_01010101;
13060 let r = _mm_mask_cmple_epu8_mask(mask, a, b);
13061 assert_eq!(r, 0b01010101_01010101);
fc512014
XL
13062 }
13063
13064 #[simd_test(enable = "avx512bw")]
13065 unsafe fn test_mm512_cmple_epi16_mask() {
13066 let a = _mm512_set1_epi16(-1);
13067 let b = _mm512_set1_epi16(-1);
13068 let m = _mm512_cmple_epi16_mask(a, b);
13069 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13070 }
13071
13072 #[simd_test(enable = "avx512bw")]
13073 unsafe fn test_mm512_mask_cmple_epi16_mask() {
13074 let a = _mm512_set1_epi16(-1);
13075 let b = _mm512_set1_epi16(-1);
13076 let mask = 0b01010101_01010101_01010101_01010101;
13077 let r = _mm512_mask_cmple_epi16_mask(mask, a, b);
13078 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13079 }
13080
cdc7bbd5
XL
13081 #[simd_test(enable = "avx512bw,avx512vl")]
13082 unsafe fn test_mm256_cmple_epi16_mask() {
13083 let a = _mm256_set1_epi16(-1);
13084 let b = _mm256_set1_epi16(-1);
13085 let m = _mm256_cmple_epi16_mask(a, b);
13086 assert_eq!(m, 0b11111111_11111111);
13087 }
13088
13089 #[simd_test(enable = "avx512bw,avx512vl")]
13090 unsafe fn test_mm256_mask_cmple_epi16_mask() {
13091 let a = _mm256_set1_epi16(-1);
13092 let b = _mm256_set1_epi16(-1);
13093 let mask = 0b01010101_01010101;
13094 let r = _mm256_mask_cmple_epi16_mask(mask, a, b);
13095 assert_eq!(r, 0b01010101_01010101);
13096 }
13097
13098 #[simd_test(enable = "avx512bw,avx512vl")]
13099 unsafe fn test_mm_cmple_epi16_mask() {
13100 let a = _mm_set1_epi16(-1);
13101 let b = _mm_set1_epi16(-1);
13102 let m = _mm_cmple_epi16_mask(a, b);
13103 assert_eq!(m, 0b11111111);
13104 }
13105
13106 #[simd_test(enable = "avx512bw,avx512vl")]
13107 unsafe fn test_mm_mask_cmple_epi16_mask() {
13108 let a = _mm_set1_epi16(-1);
13109 let b = _mm_set1_epi16(-1);
13110 let mask = 0b01010101;
13111 let r = _mm_mask_cmple_epi16_mask(mask, a, b);
13112 assert_eq!(r, 0b01010101);
13113 }
13114
fc512014
XL
13115 #[simd_test(enable = "avx512bw")]
13116 unsafe fn test_mm512_cmple_epi8_mask() {
13117 let a = _mm512_set1_epi8(-1);
13118 let b = _mm512_set1_epi8(-1);
13119 let m = _mm512_cmple_epi8_mask(a, b);
13120 assert_eq!(
13121 m,
13122 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13123 );
13124 }
13125
13126 #[simd_test(enable = "avx512bw")]
13127 unsafe fn test_mm512_mask_cmple_epi8_mask() {
13128 let a = _mm512_set1_epi8(-1);
13129 let b = _mm512_set1_epi8(-1);
13130 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13131 let r = _mm512_mask_cmple_epi8_mask(mask, a, b);
13132 assert_eq!(
13133 r,
13134 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13135 );
13136 }
13137
cdc7bbd5
XL
13138 #[simd_test(enable = "avx512bw,avx512vl")]
13139 unsafe fn test_mm256_cmple_epi8_mask() {
13140 let a = _mm256_set1_epi8(-1);
13141 let b = _mm256_set1_epi8(-1);
13142 let m = _mm256_cmple_epi8_mask(a, b);
13143 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13144 }
13145
13146 #[simd_test(enable = "avx512bw,avx512vl")]
13147 unsafe fn test_mm256_mask_cmple_epi8_mask() {
13148 let a = _mm256_set1_epi8(-1);
13149 let b = _mm256_set1_epi8(-1);
13150 let mask = 0b01010101_01010101_01010101_01010101;
13151 let r = _mm256_mask_cmple_epi8_mask(mask, a, b);
13152 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13153 }
13154
13155 #[simd_test(enable = "avx512bw,avx512vl")]
13156 unsafe fn test_mm_cmple_epi8_mask() {
13157 let a = _mm_set1_epi8(-1);
13158 let b = _mm_set1_epi8(-1);
13159 let m = _mm_cmple_epi8_mask(a, b);
13160 assert_eq!(m, 0b11111111_11111111);
13161 }
13162
13163 #[simd_test(enable = "avx512bw,avx512vl")]
13164 unsafe fn test_mm_mask_cmple_epi8_mask() {
13165 let a = _mm_set1_epi8(-1);
13166 let b = _mm_set1_epi8(-1);
13167 let mask = 0b01010101_01010101;
13168 let r = _mm_mask_cmple_epi8_mask(mask, a, b);
13169 assert_eq!(r, 0b01010101_01010101);
13170 }
13171
fc512014
XL
13172 #[simd_test(enable = "avx512bw")]
13173 unsafe fn test_mm512_cmpge_epu16_mask() {
13174 let a = _mm512_set1_epi16(1);
13175 let b = _mm512_set1_epi16(1);
13176 let m = _mm512_cmpge_epu16_mask(a, b);
13177 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13178 }
13179
13180 #[simd_test(enable = "avx512bw")]
13181 unsafe fn test_mm512_mask_cmpge_epu16_mask() {
13182 let a = _mm512_set1_epi16(1);
13183 let b = _mm512_set1_epi16(1);
13184 let mask = 0b01010101_01010101_01010101_01010101;
13185 let r = _mm512_mask_cmpge_epu16_mask(mask, a, b);
13186 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13187 }
13188
cdc7bbd5
XL
13189 #[simd_test(enable = "avx512bw,avx512vl")]
13190 unsafe fn test_mm256_cmpge_epu16_mask() {
13191 let a = _mm256_set1_epi16(1);
13192 let b = _mm256_set1_epi16(1);
13193 let m = _mm256_cmpge_epu16_mask(a, b);
13194 assert_eq!(m, 0b11111111_11111111);
13195 }
13196
13197 #[simd_test(enable = "avx512bw,avx512vl")]
13198 unsafe fn test_mm256_mask_cmpge_epu16_mask() {
13199 let a = _mm256_set1_epi16(1);
13200 let b = _mm256_set1_epi16(1);
13201 let mask = 0b01010101_01010101;
13202 let r = _mm256_mask_cmpge_epu16_mask(mask, a, b);
13203 assert_eq!(r, 0b01010101_01010101);
13204 }
13205
13206 #[simd_test(enable = "avx512bw,avx512vl")]
13207 unsafe fn test_mm_cmpge_epu16_mask() {
13208 let a = _mm_set1_epi16(1);
13209 let b = _mm_set1_epi16(1);
13210 let m = _mm_cmpge_epu16_mask(a, b);
13211 assert_eq!(m, 0b11111111);
13212 }
13213
13214 #[simd_test(enable = "avx512bw,avx512vl")]
13215 unsafe fn test_mm_mask_cmpge_epu16_mask() {
13216 let a = _mm_set1_epi16(1);
13217 let b = _mm_set1_epi16(1);
13218 let mask = 0b01010101;
13219 let r = _mm_mask_cmpge_epu16_mask(mask, a, b);
13220 assert_eq!(r, 0b01010101);
13221 }
13222
fc512014
XL
13223 #[simd_test(enable = "avx512bw")]
13224 unsafe fn test_mm512_cmpge_epu8_mask() {
13225 let a = _mm512_set1_epi8(1);
13226 let b = _mm512_set1_epi8(1);
13227 let m = _mm512_cmpge_epu8_mask(a, b);
13228 assert_eq!(
13229 m,
13230 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13231 );
13232 }
13233
13234 #[simd_test(enable = "avx512bw")]
13235 unsafe fn test_mm512_mask_cmpge_epu8_mask() {
13236 let a = _mm512_set1_epi8(1);
13237 let b = _mm512_set1_epi8(1);
13238 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13239 let r = _mm512_mask_cmpge_epu8_mask(mask, a, b);
13240 assert_eq!(
13241 r,
13242 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13243 );
13244 }
13245
cdc7bbd5
XL
13246 #[simd_test(enable = "avx512bw,avx512vl")]
13247 unsafe fn test_mm256_cmpge_epu8_mask() {
13248 let a = _mm256_set1_epi8(1);
13249 let b = _mm256_set1_epi8(1);
13250 let m = _mm256_cmpge_epu8_mask(a, b);
13251 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13252 }
13253
13254 #[simd_test(enable = "avx512bw,avx512vl")]
13255 unsafe fn test_mm256_mask_cmpge_epu8_mask() {
13256 let a = _mm256_set1_epi8(1);
13257 let b = _mm256_set1_epi8(1);
13258 let mask = 0b01010101_01010101_01010101_01010101;
13259 let r = _mm256_mask_cmpge_epu8_mask(mask, a, b);
13260 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13261 }
13262
13263 #[simd_test(enable = "avx512bw,avx512vl")]
13264 unsafe fn test_mm_cmpge_epu8_mask() {
13265 let a = _mm_set1_epi8(1);
13266 let b = _mm_set1_epi8(1);
13267 let m = _mm_cmpge_epu8_mask(a, b);
13268 assert_eq!(m, 0b11111111_11111111);
13269 }
13270
13271 #[simd_test(enable = "avx512bw,avx512vl")]
13272 unsafe fn test_mm_mask_cmpge_epu8_mask() {
13273 let a = _mm_set1_epi8(1);
13274 let b = _mm_set1_epi8(1);
13275 let mask = 0b01010101_01010101;
13276 let r = _mm_mask_cmpge_epu8_mask(mask, a, b);
13277 assert_eq!(r, 0b01010101_01010101);
13278 }
13279
fc512014
XL
13280 #[simd_test(enable = "avx512bw")]
13281 unsafe fn test_mm512_cmpge_epi16_mask() {
13282 let a = _mm512_set1_epi16(-1);
13283 let b = _mm512_set1_epi16(-1);
13284 let m = _mm512_cmpge_epi16_mask(a, b);
13285 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13286 }
13287
13288 #[simd_test(enable = "avx512bw")]
13289 unsafe fn test_mm512_mask_cmpge_epi16_mask() {
13290 let a = _mm512_set1_epi16(-1);
13291 let b = _mm512_set1_epi16(-1);
13292 let mask = 0b01010101_01010101_01010101_01010101;
13293 let r = _mm512_mask_cmpge_epi16_mask(mask, a, b);
13294 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13295 }
13296
cdc7bbd5
XL
13297 #[simd_test(enable = "avx512bw,avx512vl")]
13298 unsafe fn test_mm256_cmpge_epi16_mask() {
13299 let a = _mm256_set1_epi16(-1);
13300 let b = _mm256_set1_epi16(-1);
13301 let m = _mm256_cmpge_epi16_mask(a, b);
13302 assert_eq!(m, 0b11111111_11111111);
13303 }
13304
13305 #[simd_test(enable = "avx512bw,avx512vl")]
13306 unsafe fn test_mm256_mask_cmpge_epi16_mask() {
13307 let a = _mm256_set1_epi16(-1);
13308 let b = _mm256_set1_epi16(-1);
13309 let mask = 0b01010101_01010101;
13310 let r = _mm256_mask_cmpge_epi16_mask(mask, a, b);
13311 assert_eq!(r, 0b01010101_01010101);
13312 }
13313
13314 #[simd_test(enable = "avx512bw,avx512vl")]
13315 unsafe fn test_mm_cmpge_epi16_mask() {
13316 let a = _mm_set1_epi16(-1);
13317 let b = _mm_set1_epi16(-1);
13318 let m = _mm_cmpge_epi16_mask(a, b);
13319 assert_eq!(m, 0b11111111);
13320 }
13321
13322 #[simd_test(enable = "avx512bw,avx512vl")]
13323 unsafe fn test_mm_mask_cmpge_epi16_mask() {
13324 let a = _mm_set1_epi16(-1);
13325 let b = _mm_set1_epi16(-1);
13326 let mask = 0b01010101;
13327 let r = _mm_mask_cmpge_epi16_mask(mask, a, b);
13328 assert_eq!(r, 0b01010101);
13329 }
13330
fc512014
XL
13331 #[simd_test(enable = "avx512bw")]
13332 unsafe fn test_mm512_cmpge_epi8_mask() {
13333 let a = _mm512_set1_epi8(-1);
13334 let b = _mm512_set1_epi8(-1);
13335 let m = _mm512_cmpge_epi8_mask(a, b);
13336 assert_eq!(
13337 m,
13338 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13339 );
13340 }
13341
13342 #[simd_test(enable = "avx512bw")]
13343 unsafe fn test_mm512_mask_cmpge_epi8_mask() {
13344 let a = _mm512_set1_epi8(-1);
13345 let b = _mm512_set1_epi8(-1);
13346 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13347 let r = _mm512_mask_cmpge_epi8_mask(mask, a, b);
13348 assert_eq!(
13349 r,
13350 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13351 );
13352 }
13353
cdc7bbd5
XL
13354 #[simd_test(enable = "avx512bw,avx512vl")]
13355 unsafe fn test_mm256_cmpge_epi8_mask() {
13356 let a = _mm256_set1_epi8(-1);
13357 let b = _mm256_set1_epi8(-1);
13358 let m = _mm256_cmpge_epi8_mask(a, b);
13359 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13360 }
13361
13362 #[simd_test(enable = "avx512bw,avx512vl")]
13363 unsafe fn test_mm256_mask_cmpge_epi8_mask() {
13364 let a = _mm256_set1_epi8(-1);
13365 let b = _mm256_set1_epi8(-1);
13366 let mask = 0b01010101_01010101_01010101_01010101;
13367 let r = _mm256_mask_cmpge_epi8_mask(mask, a, b);
13368 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13369 }
13370
13371 #[simd_test(enable = "avx512bw,avx512vl")]
13372 unsafe fn test_mm_cmpge_epi8_mask() {
13373 let a = _mm_set1_epi8(-1);
13374 let b = _mm_set1_epi8(-1);
13375 let m = _mm_cmpge_epi8_mask(a, b);
13376 assert_eq!(m, 0b11111111_11111111);
13377 }
13378
13379 #[simd_test(enable = "avx512bw,avx512vl")]
13380 unsafe fn test_mm_mask_cmpge_epi8_mask() {
13381 let a = _mm_set1_epi8(-1);
13382 let b = _mm_set1_epi8(-1);
13383 let mask = 0b01010101_01010101;
13384 let r = _mm_mask_cmpge_epi8_mask(mask, a, b);
13385 assert_eq!(r, 0b01010101_01010101);
13386 }
13387
fc512014
XL
13388 #[simd_test(enable = "avx512bw")]
13389 unsafe fn test_mm512_cmpeq_epu16_mask() {
13390 let a = _mm512_set1_epi16(1);
13391 let b = _mm512_set1_epi16(1);
13392 let m = _mm512_cmpeq_epu16_mask(a, b);
13393 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13394 }
13395
13396 #[simd_test(enable = "avx512bw")]
13397 unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
13398 let a = _mm512_set1_epi16(1);
13399 let b = _mm512_set1_epi16(1);
13400 let mask = 0b01010101_01010101_01010101_01010101;
13401 let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b);
13402 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13403 }
13404
cdc7bbd5
XL
13405 #[simd_test(enable = "avx512bw,avx512vl")]
13406 unsafe fn test_mm256_cmpeq_epu16_mask() {
13407 let a = _mm256_set1_epi16(1);
13408 let b = _mm256_set1_epi16(1);
13409 let m = _mm256_cmpeq_epu16_mask(a, b);
13410 assert_eq!(m, 0b11111111_11111111);
13411 }
13412
13413 #[simd_test(enable = "avx512bw,avx512vl")]
13414 unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
13415 let a = _mm256_set1_epi16(1);
13416 let b = _mm256_set1_epi16(1);
13417 let mask = 0b01010101_01010101;
13418 let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b);
13419 assert_eq!(r, 0b01010101_01010101);
13420 }
13421
13422 #[simd_test(enable = "avx512bw,avx512vl")]
13423 unsafe fn test_mm_cmpeq_epu16_mask() {
13424 let a = _mm_set1_epi16(1);
13425 let b = _mm_set1_epi16(1);
13426 let m = _mm_cmpeq_epu16_mask(a, b);
13427 assert_eq!(m, 0b11111111);
13428 }
13429
13430 #[simd_test(enable = "avx512bw,avx512vl")]
13431 unsafe fn test_mm_mask_cmpeq_epu16_mask() {
13432 let a = _mm_set1_epi16(1);
13433 let b = _mm_set1_epi16(1);
13434 let mask = 0b01010101;
13435 let r = _mm_mask_cmpeq_epu16_mask(mask, a, b);
13436 assert_eq!(r, 0b01010101);
13437 }
13438
fc512014
XL
13439 #[simd_test(enable = "avx512bw")]
13440 unsafe fn test_mm512_cmpeq_epu8_mask() {
13441 let a = _mm512_set1_epi8(1);
13442 let b = _mm512_set1_epi8(1);
13443 let m = _mm512_cmpeq_epu8_mask(a, b);
13444 assert_eq!(
13445 m,
13446 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13447 );
13448 }
13449
13450 #[simd_test(enable = "avx512bw")]
13451 unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
13452 let a = _mm512_set1_epi8(1);
13453 let b = _mm512_set1_epi8(1);
13454 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13455 let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b);
13456 assert_eq!(
13457 r,
13458 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13459 );
13460 }
13461
cdc7bbd5
XL
13462 #[simd_test(enable = "avx512bw,avx512vl")]
13463 unsafe fn test_mm256_cmpeq_epu8_mask() {
13464 let a = _mm256_set1_epi8(1);
13465 let b = _mm256_set1_epi8(1);
13466 let m = _mm256_cmpeq_epu8_mask(a, b);
13467 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13468 }
13469
13470 #[simd_test(enable = "avx512bw,avx512vl")]
13471 unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
13472 let a = _mm256_set1_epi8(1);
13473 let b = _mm256_set1_epi8(1);
13474 let mask = 0b01010101_01010101_01010101_01010101;
13475 let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b);
13476 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13477 }
13478
13479 #[simd_test(enable = "avx512bw,avx512vl")]
13480 unsafe fn test_mm_cmpeq_epu8_mask() {
13481 let a = _mm_set1_epi8(1);
13482 let b = _mm_set1_epi8(1);
13483 let m = _mm_cmpeq_epu8_mask(a, b);
13484 assert_eq!(m, 0b11111111_11111111);
13485 }
13486
13487 #[simd_test(enable = "avx512bw,avx512vl")]
13488 unsafe fn test_mm_mask_cmpeq_epu8_mask() {
13489 let a = _mm_set1_epi8(1);
13490 let b = _mm_set1_epi8(1);
13491 let mask = 0b01010101_01010101;
13492 let r = _mm_mask_cmpeq_epu8_mask(mask, a, b);
13493 assert_eq!(r, 0b01010101_01010101);
13494 }
13495
fc512014
XL
13496 #[simd_test(enable = "avx512bw")]
13497 unsafe fn test_mm512_cmpeq_epi16_mask() {
13498 let a = _mm512_set1_epi16(-1);
13499 let b = _mm512_set1_epi16(-1);
13500 let m = _mm512_cmpeq_epi16_mask(a, b);
13501 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13502 }
13503
13504 #[simd_test(enable = "avx512bw")]
13505 unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
13506 let a = _mm512_set1_epi16(-1);
13507 let b = _mm512_set1_epi16(-1);
13508 let mask = 0b01010101_01010101_01010101_01010101;
13509 let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b);
13510 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13511 }
13512
cdc7bbd5
XL
13513 #[simd_test(enable = "avx512bw,avx512vl")]
13514 unsafe fn test_mm256_cmpeq_epi16_mask() {
13515 let a = _mm256_set1_epi16(-1);
13516 let b = _mm256_set1_epi16(-1);
13517 let m = _mm256_cmpeq_epi16_mask(a, b);
13518 assert_eq!(m, 0b11111111_11111111);
13519 }
13520
13521 #[simd_test(enable = "avx512bw,avx512vl")]
13522 unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
13523 let a = _mm256_set1_epi16(-1);
13524 let b = _mm256_set1_epi16(-1);
13525 let mask = 0b01010101_01010101;
13526 let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b);
13527 assert_eq!(r, 0b01010101_01010101);
13528 }
13529
13530 #[simd_test(enable = "avx512bw,avx512vl")]
13531 unsafe fn test_mm_cmpeq_epi16_mask() {
13532 let a = _mm_set1_epi16(-1);
13533 let b = _mm_set1_epi16(-1);
13534 let m = _mm_cmpeq_epi16_mask(a, b);
13535 assert_eq!(m, 0b11111111);
13536 }
13537
13538 #[simd_test(enable = "avx512bw,avx512vl")]
13539 unsafe fn test_mm_mask_cmpeq_epi16_mask() {
13540 let a = _mm_set1_epi16(-1);
13541 let b = _mm_set1_epi16(-1);
13542 let mask = 0b01010101;
13543 let r = _mm_mask_cmpeq_epi16_mask(mask, a, b);
13544 assert_eq!(r, 0b01010101);
13545 }
13546
fc512014
XL
13547 #[simd_test(enable = "avx512bw")]
13548 unsafe fn test_mm512_cmpeq_epi8_mask() {
13549 let a = _mm512_set1_epi8(-1);
13550 let b = _mm512_set1_epi8(-1);
13551 let m = _mm512_cmpeq_epi8_mask(a, b);
13552 assert_eq!(
13553 m,
13554 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13555 );
13556 }
13557
13558 #[simd_test(enable = "avx512bw")]
13559 unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
13560 let a = _mm512_set1_epi8(-1);
13561 let b = _mm512_set1_epi8(-1);
13562 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13563 let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b);
13564 assert_eq!(
13565 r,
13566 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13567 );
13568 }
13569
cdc7bbd5
XL
13570 #[simd_test(enable = "avx512bw,avx512vl")]
13571 unsafe fn test_mm256_cmpeq_epi8_mask() {
13572 let a = _mm256_set1_epi8(-1);
13573 let b = _mm256_set1_epi8(-1);
13574 let m = _mm256_cmpeq_epi8_mask(a, b);
13575 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13576 }
13577
13578 #[simd_test(enable = "avx512bw,avx512vl")]
13579 unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
13580 let a = _mm256_set1_epi8(-1);
13581 let b = _mm256_set1_epi8(-1);
13582 let mask = 0b01010101_01010101_01010101_01010101;
13583 let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b);
13584 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13585 }
13586
13587 #[simd_test(enable = "avx512bw,avx512vl")]
13588 unsafe fn test_mm_cmpeq_epi8_mask() {
13589 let a = _mm_set1_epi8(-1);
13590 let b = _mm_set1_epi8(-1);
13591 let m = _mm_cmpeq_epi8_mask(a, b);
13592 assert_eq!(m, 0b11111111_11111111);
13593 }
13594
13595 #[simd_test(enable = "avx512bw,avx512vl")]
13596 unsafe fn test_mm_mask_cmpeq_epi8_mask() {
13597 let a = _mm_set1_epi8(-1);
13598 let b = _mm_set1_epi8(-1);
13599 let mask = 0b01010101_01010101;
13600 let r = _mm_mask_cmpeq_epi8_mask(mask, a, b);
13601 assert_eq!(r, 0b01010101_01010101);
13602 }
13603
fc512014
XL
13604 #[simd_test(enable = "avx512bw")]
13605 unsafe fn test_mm512_cmpneq_epu16_mask() {
13606 let a = _mm512_set1_epi16(2);
13607 let b = _mm512_set1_epi16(1);
13608 let m = _mm512_cmpneq_epu16_mask(a, b);
13609 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13610 }
13611
13612 #[simd_test(enable = "avx512bw")]
13613 unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
13614 let a = _mm512_set1_epi16(2);
13615 let b = _mm512_set1_epi16(1);
13616 let mask = 0b01010101_01010101_01010101_01010101;
13617 let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b);
13618 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13619 }
13620
cdc7bbd5
XL
13621 #[simd_test(enable = "avx512bw,avx512vl")]
13622 unsafe fn test_mm256_cmpneq_epu16_mask() {
13623 let a = _mm256_set1_epi16(2);
13624 let b = _mm256_set1_epi16(1);
13625 let m = _mm256_cmpneq_epu16_mask(a, b);
13626 assert_eq!(m, 0b11111111_11111111);
13627 }
13628
13629 #[simd_test(enable = "avx512bw,avx512vl")]
13630 unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
13631 let a = _mm256_set1_epi16(2);
13632 let b = _mm256_set1_epi16(1);
13633 let mask = 0b01010101_01010101;
13634 let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b);
13635 assert_eq!(r, 0b01010101_01010101);
13636 }
13637
13638 #[simd_test(enable = "avx512bw,avx512vl")]
13639 unsafe fn test_mm_cmpneq_epu16_mask() {
13640 let a = _mm_set1_epi16(2);
13641 let b = _mm_set1_epi16(1);
13642 let m = _mm_cmpneq_epu16_mask(a, b);
13643 assert_eq!(m, 0b11111111);
13644 }
13645
13646 #[simd_test(enable = "avx512bw,avx512vl")]
13647 unsafe fn test_mm_mask_cmpneq_epu16_mask() {
13648 let a = _mm_set1_epi16(2);
13649 let b = _mm_set1_epi16(1);
13650 let mask = 0b01010101;
13651 let r = _mm_mask_cmpneq_epu16_mask(mask, a, b);
13652 assert_eq!(r, 0b01010101);
13653 }
13654
fc512014
XL
13655 #[simd_test(enable = "avx512bw")]
13656 unsafe fn test_mm512_cmpneq_epu8_mask() {
13657 let a = _mm512_set1_epi8(2);
13658 let b = _mm512_set1_epi8(1);
13659 let m = _mm512_cmpneq_epu8_mask(a, b);
13660 assert_eq!(
13661 m,
13662 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13663 );
13664 }
13665
cdc7bbd5
XL
13666 #[simd_test(enable = "avx512bw")]
13667 unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
13668 let a = _mm512_set1_epi8(2);
13669 let b = _mm512_set1_epi8(1);
13670 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13671 let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b);
13672 assert_eq!(
13673 r,
13674 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13675 );
13676 }
13677
13678 #[simd_test(enable = "avx512bw,avx512vl")]
13679 unsafe fn test_mm256_cmpneq_epu8_mask() {
13680 let a = _mm256_set1_epi8(2);
13681 let b = _mm256_set1_epi8(1);
13682 let m = _mm256_cmpneq_epu8_mask(a, b);
13683 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13684 }
13685
13686 #[simd_test(enable = "avx512bw,avx512vl")]
13687 unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
13688 let a = _mm256_set1_epi8(2);
13689 let b = _mm256_set1_epi8(1);
13690 let mask = 0b01010101_01010101_01010101_01010101;
13691 let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b);
13692 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13693 }
13694
13695 #[simd_test(enable = "avx512bw,avx512vl")]
13696 unsafe fn test_mm_cmpneq_epu8_mask() {
13697 let a = _mm_set1_epi8(2);
13698 let b = _mm_set1_epi8(1);
13699 let m = _mm_cmpneq_epu8_mask(a, b);
13700 assert_eq!(m, 0b11111111_11111111);
13701 }
13702
13703 #[simd_test(enable = "avx512bw,avx512vl")]
13704 unsafe fn test_mm_mask_cmpneq_epu8_mask() {
13705 let a = _mm_set1_epi8(2);
13706 let b = _mm_set1_epi8(1);
13707 let mask = 0b01010101_01010101;
13708 let r = _mm_mask_cmpneq_epu8_mask(mask, a, b);
13709 assert_eq!(r, 0b01010101_01010101);
fc512014
XL
13710 }
13711
13712 #[simd_test(enable = "avx512bw")]
13713 unsafe fn test_mm512_cmpneq_epi16_mask() {
13714 let a = _mm512_set1_epi16(1);
13715 let b = _mm512_set1_epi16(-1);
13716 let m = _mm512_cmpneq_epi16_mask(a, b);
13717 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13718 }
13719
13720 #[simd_test(enable = "avx512bw")]
13721 unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
13722 let a = _mm512_set1_epi16(1);
13723 let b = _mm512_set1_epi16(-1);
13724 let mask = 0b01010101_01010101_01010101_01010101;
13725 let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b);
13726 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13727 }
13728
cdc7bbd5
XL
13729 #[simd_test(enable = "avx512bw,avx512vl")]
13730 unsafe fn test_mm256_cmpneq_epi16_mask() {
13731 let a = _mm256_set1_epi16(1);
13732 let b = _mm256_set1_epi16(-1);
13733 let m = _mm256_cmpneq_epi16_mask(a, b);
13734 assert_eq!(m, 0b11111111_11111111);
13735 }
13736
13737 #[simd_test(enable = "avx512bw,avx512vl")]
13738 unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
13739 let a = _mm256_set1_epi16(1);
13740 let b = _mm256_set1_epi16(-1);
13741 let mask = 0b01010101_01010101;
13742 let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b);
13743 assert_eq!(r, 0b01010101_01010101);
13744 }
13745
13746 #[simd_test(enable = "avx512bw,avx512vl")]
13747 unsafe fn test_mm_cmpneq_epi16_mask() {
13748 let a = _mm_set1_epi16(1);
13749 let b = _mm_set1_epi16(-1);
13750 let m = _mm_cmpneq_epi16_mask(a, b);
13751 assert_eq!(m, 0b11111111);
13752 }
13753
13754 #[simd_test(enable = "avx512bw,avx512vl")]
13755 unsafe fn test_mm_mask_cmpneq_epi16_mask() {
13756 let a = _mm_set1_epi16(1);
13757 let b = _mm_set1_epi16(-1);
13758 let mask = 0b01010101;
13759 let r = _mm_mask_cmpneq_epi16_mask(mask, a, b);
13760 assert_eq!(r, 0b01010101);
13761 }
13762
fc512014
XL
13763 #[simd_test(enable = "avx512bw")]
13764 unsafe fn test_mm512_cmpneq_epi8_mask() {
13765 let a = _mm512_set1_epi8(1);
13766 let b = _mm512_set1_epi8(-1);
13767 let m = _mm512_cmpneq_epi8_mask(a, b);
13768 assert_eq!(
13769 m,
13770 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13771 );
13772 }
13773
13774 #[simd_test(enable = "avx512bw")]
13775 unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
13776 let a = _mm512_set1_epi8(1);
13777 let b = _mm512_set1_epi8(-1);
13778 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
13779 let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b);
13780 assert_eq!(
13781 r,
13782 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13783 );
13784 }
13785
cdc7bbd5
XL
13786 #[simd_test(enable = "avx512bw,avx512vl")]
13787 unsafe fn test_mm256_cmpneq_epi8_mask() {
13788 let a = _mm256_set1_epi8(1);
13789 let b = _mm256_set1_epi8(-1);
13790 let m = _mm256_cmpneq_epi8_mask(a, b);
13791 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13792 }
13793
13794 #[simd_test(enable = "avx512bw,avx512vl")]
13795 unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
13796 let a = _mm256_set1_epi8(1);
13797 let b = _mm256_set1_epi8(-1);
13798 let mask = 0b01010101_01010101_01010101_01010101;
13799 let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b);
13800 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13801 }
13802
13803 #[simd_test(enable = "avx512bw,avx512vl")]
13804 unsafe fn test_mm_cmpneq_epi8_mask() {
13805 let a = _mm_set1_epi8(1);
13806 let b = _mm_set1_epi8(-1);
13807 let m = _mm_cmpneq_epi8_mask(a, b);
13808 assert_eq!(m, 0b11111111_11111111);
13809 }
13810
13811 #[simd_test(enable = "avx512bw,avx512vl")]
13812 unsafe fn test_mm_mask_cmpneq_epi8_mask() {
13813 let a = _mm_set1_epi8(1);
13814 let b = _mm_set1_epi8(-1);
13815 let mask = 0b01010101_01010101;
13816 let r = _mm_mask_cmpneq_epi8_mask(mask, a, b);
13817 assert_eq!(r, 0b01010101_01010101);
13818 }
13819
fc512014
XL
13820 #[simd_test(enable = "avx512bw")]
13821 unsafe fn test_mm512_cmp_epu16_mask() {
13822 let a = _mm512_set1_epi16(0);
13823 let b = _mm512_set1_epi16(1);
17df50a5 13824 let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
fc512014
XL
13825 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13826 }
13827
13828 #[simd_test(enable = "avx512bw")]
13829 unsafe fn test_mm512_mask_cmp_epu16_mask() {
13830 let a = _mm512_set1_epi16(0);
13831 let b = _mm512_set1_epi16(1);
13832 let mask = 0b01010101_01010101_01010101_01010101;
17df50a5 13833 let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
fc512014
XL
13834 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13835 }
13836
cdc7bbd5
XL
13837 #[simd_test(enable = "avx512bw,avx512vl")]
13838 unsafe fn test_mm256_cmp_epu16_mask() {
13839 let a = _mm256_set1_epi16(0);
13840 let b = _mm256_set1_epi16(1);
17df50a5 13841 let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
13842 assert_eq!(m, 0b11111111_11111111);
13843 }
13844
13845 #[simd_test(enable = "avx512bw,avx512vl")]
13846 unsafe fn test_mm256_mask_cmp_epu16_mask() {
13847 let a = _mm256_set1_epi16(0);
13848 let b = _mm256_set1_epi16(1);
13849 let mask = 0b01010101_01010101;
17df50a5 13850 let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
13851 assert_eq!(r, 0b01010101_01010101);
13852 }
13853
13854 #[simd_test(enable = "avx512bw,avx512vl")]
13855 unsafe fn test_mm_cmp_epu16_mask() {
13856 let a = _mm_set1_epi16(0);
13857 let b = _mm_set1_epi16(1);
17df50a5 13858 let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
13859 assert_eq!(m, 0b11111111);
13860 }
13861
13862 #[simd_test(enable = "avx512bw,avx512vl")]
13863 unsafe fn test_mm_mask_cmp_epu16_mask() {
13864 let a = _mm_set1_epi16(0);
13865 let b = _mm_set1_epi16(1);
13866 let mask = 0b01010101;
17df50a5 13867 let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
13868 assert_eq!(r, 0b01010101);
13869 }
13870
fc512014
XL
13871 #[simd_test(enable = "avx512bw")]
13872 unsafe fn test_mm512_cmp_epu8_mask() {
13873 let a = _mm512_set1_epi8(0);
13874 let b = _mm512_set1_epi8(1);
17df50a5 13875 let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
fc512014
XL
13876 assert_eq!(
13877 m,
13878 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13879 );
13880 }
13881
13882 #[simd_test(enable = "avx512bw")]
13883 unsafe fn test_mm512_mask_cmp_epu8_mask() {
13884 let a = _mm512_set1_epi8(0);
13885 let b = _mm512_set1_epi8(1);
13886 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
17df50a5 13887 let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
fc512014
XL
13888 assert_eq!(
13889 r,
13890 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13891 );
13892 }
13893
cdc7bbd5
XL
13894 #[simd_test(enable = "avx512bw,avx512vl")]
13895 unsafe fn test_mm256_cmp_epu8_mask() {
13896 let a = _mm256_set1_epi8(0);
13897 let b = _mm256_set1_epi8(1);
17df50a5 13898 let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
13899 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13900 }
13901
13902 #[simd_test(enable = "avx512bw,avx512vl")]
13903 unsafe fn test_mm256_mask_cmp_epu8_mask() {
13904 let a = _mm256_set1_epi8(0);
13905 let b = _mm256_set1_epi8(1);
13906 let mask = 0b01010101_01010101_01010101_01010101;
17df50a5 13907 let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
13908 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13909 }
13910
13911 #[simd_test(enable = "avx512bw,avx512vl")]
13912 unsafe fn test_mm_cmp_epu8_mask() {
13913 let a = _mm_set1_epi8(0);
13914 let b = _mm_set1_epi8(1);
17df50a5 13915 let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
13916 assert_eq!(m, 0b11111111_11111111);
13917 }
13918
13919 #[simd_test(enable = "avx512bw,avx512vl")]
13920 unsafe fn test_mm_mask_cmp_epu8_mask() {
13921 let a = _mm_set1_epi8(0);
13922 let b = _mm_set1_epi8(1);
13923 let mask = 0b01010101_01010101;
17df50a5 13924 let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
13925 assert_eq!(r, 0b01010101_01010101);
13926 }
13927
fc512014
XL
13928 #[simd_test(enable = "avx512bw")]
13929 unsafe fn test_mm512_cmp_epi16_mask() {
13930 let a = _mm512_set1_epi16(0);
13931 let b = _mm512_set1_epi16(1);
17df50a5 13932 let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
fc512014
XL
13933 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
13934 }
13935
13936 #[simd_test(enable = "avx512bw")]
13937 unsafe fn test_mm512_mask_cmp_epi16_mask() {
13938 let a = _mm512_set1_epi16(0);
13939 let b = _mm512_set1_epi16(1);
13940 let mask = 0b01010101_01010101_01010101_01010101;
17df50a5 13941 let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
fc512014
XL
13942 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
13943 }
13944
cdc7bbd5
XL
13945 #[simd_test(enable = "avx512bw,avx512vl")]
13946 unsafe fn test_mm256_cmp_epi16_mask() {
13947 let a = _mm256_set1_epi16(0);
13948 let b = _mm256_set1_epi16(1);
17df50a5 13949 let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
13950 assert_eq!(m, 0b11111111_11111111);
13951 }
13952
13953 #[simd_test(enable = "avx512bw,avx512vl")]
13954 unsafe fn test_mm256_mask_cmp_epi16_mask() {
13955 let a = _mm256_set1_epi16(0);
13956 let b = _mm256_set1_epi16(1);
13957 let mask = 0b01010101_01010101;
17df50a5 13958 let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
13959 assert_eq!(r, 0b01010101_01010101);
13960 }
13961
13962 #[simd_test(enable = "avx512bw,avx512vl")]
13963 unsafe fn test_mm_cmp_epi16_mask() {
13964 let a = _mm_set1_epi16(0);
13965 let b = _mm_set1_epi16(1);
17df50a5 13966 let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
13967 assert_eq!(m, 0b11111111);
13968 }
13969
13970 #[simd_test(enable = "avx512bw,avx512vl")]
13971 unsafe fn test_mm_mask_cmp_epi16_mask() {
13972 let a = _mm_set1_epi16(0);
13973 let b = _mm_set1_epi16(1);
13974 let mask = 0b01010101;
17df50a5 13975 let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
13976 assert_eq!(r, 0b01010101);
13977 }
13978
fc512014
XL
13979 #[simd_test(enable = "avx512bw")]
13980 unsafe fn test_mm512_cmp_epi8_mask() {
13981 let a = _mm512_set1_epi8(0);
13982 let b = _mm512_set1_epi8(1);
17df50a5 13983 let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
fc512014
XL
13984 assert_eq!(
13985 m,
13986 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111
13987 );
13988 }
13989
13990 #[simd_test(enable = "avx512bw")]
13991 unsafe fn test_mm512_mask_cmp_epi8_mask() {
13992 let a = _mm512_set1_epi8(0);
13993 let b = _mm512_set1_epi8(1);
13994 let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
17df50a5 13995 let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
fc512014
XL
13996 assert_eq!(
13997 r,
13998 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101
13999 );
14000 }
14001
cdc7bbd5
XL
14002 #[simd_test(enable = "avx512bw,avx512vl")]
14003 unsafe fn test_mm256_cmp_epi8_mask() {
14004 let a = _mm256_set1_epi8(0);
14005 let b = _mm256_set1_epi8(1);
17df50a5 14006 let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
14007 assert_eq!(m, 0b11111111_11111111_11111111_11111111);
14008 }
14009
14010 #[simd_test(enable = "avx512bw,avx512vl")]
14011 unsafe fn test_mm256_mask_cmp_epi8_mask() {
14012 let a = _mm256_set1_epi8(0);
14013 let b = _mm256_set1_epi8(1);
14014 let mask = 0b01010101_01010101_01010101_01010101;
17df50a5 14015 let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
14016 assert_eq!(r, 0b01010101_01010101_01010101_01010101);
14017 }
14018
14019 #[simd_test(enable = "avx512bw,avx512vl")]
14020 unsafe fn test_mm_cmp_epi8_mask() {
14021 let a = _mm_set1_epi8(0);
14022 let b = _mm_set1_epi8(1);
17df50a5 14023 let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
cdc7bbd5
XL
14024 assert_eq!(m, 0b11111111_11111111);
14025 }
14026
14027 #[simd_test(enable = "avx512bw,avx512vl")]
14028 unsafe fn test_mm_mask_cmp_epi8_mask() {
14029 let a = _mm_set1_epi8(0);
14030 let b = _mm_set1_epi8(1);
14031 let mask = 0b01010101_01010101;
17df50a5 14032 let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b);
cdc7bbd5
XL
14033 assert_eq!(r, 0b01010101_01010101);
14034 }
14035
fc512014
XL
14036 #[simd_test(enable = "avx512bw")]
14037 unsafe fn test_mm512_loadu_epi16() {
14038 #[rustfmt::skip]
14039 let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
14040 let r = _mm512_loadu_epi16(&a[0]);
14041 #[rustfmt::skip]
14042 let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
14043 assert_eq_m512i(r, e);
14044 }
14045
cdc7bbd5
XL
14046 #[simd_test(enable = "avx512bw,avx512vl")]
14047 unsafe fn test_mm256_loadu_epi16() {
14048 let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14049 let r = _mm256_loadu_epi16(&a[0]);
14050 let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
14051 assert_eq_m256i(r, e);
14052 }
14053
14054 #[simd_test(enable = "avx512bw,avx512vl")]
14055 unsafe fn test_mm_loadu_epi16() {
14056 let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
14057 let r = _mm_loadu_epi16(&a[0]);
14058 let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
14059 assert_eq_m128i(r, e);
14060 }
14061
fc512014
XL
14062 #[simd_test(enable = "avx512bw")]
14063 unsafe fn test_mm512_loadu_epi8() {
14064 #[rustfmt::skip]
14065 let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
14066 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
14067 let r = _mm512_loadu_epi8(&a[0]);
14068 #[rustfmt::skip]
14069 let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
14070 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
14071 assert_eq_m512i(r, e);
14072 }
14073
cdc7bbd5
XL
14074 #[simd_test(enable = "avx512bw,avx512vl")]
14075 unsafe fn test_mm256_loadu_epi8() {
14076 #[rustfmt::skip]
14077 let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
14078 let r = _mm256_loadu_epi8(&a[0]);
14079 #[rustfmt::skip]
14080 let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
14081 assert_eq_m256i(r, e);
14082 }
14083
14084 #[simd_test(enable = "avx512bw,avx512vl")]
14085 unsafe fn test_mm_loadu_epi8() {
14086 let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14087 let r = _mm_loadu_epi8(&a[0]);
14088 let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
14089 assert_eq_m128i(r, e);
14090 }
14091
fc512014
XL
14092 #[simd_test(enable = "avx512bw")]
14093 unsafe fn test_mm512_storeu_epi16() {
14094 let a = _mm512_set1_epi16(9);
14095 let mut r = _mm512_undefined_epi32();
14096 _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
14097 assert_eq_m512i(r, a);
14098 }
14099
cdc7bbd5
XL
14100 #[simd_test(enable = "avx512bw,avx512vl")]
14101 unsafe fn test_mm256_storeu_epi16() {
14102 let a = _mm256_set1_epi16(9);
14103 let mut r = _mm256_set1_epi32(0);
14104 _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
14105 assert_eq_m256i(r, a);
14106 }
14107
14108 #[simd_test(enable = "avx512bw,avx512vl")]
14109 unsafe fn test_mm_storeu_epi16() {
14110 let a = _mm_set1_epi16(9);
14111 let mut r = _mm_set1_epi32(0);
14112 _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
14113 assert_eq_m128i(r, a);
14114 }
14115
fc512014
XL
14116 #[simd_test(enable = "avx512bw")]
14117 unsafe fn test_mm512_storeu_epi8() {
14118 let a = _mm512_set1_epi8(9);
14119 let mut r = _mm512_undefined_epi32();
14120 _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
14121 assert_eq_m512i(r, a);
14122 }
14123
cdc7bbd5
XL
14124 #[simd_test(enable = "avx512bw,avx512vl")]
14125 unsafe fn test_mm256_storeu_epi8() {
14126 let a = _mm256_set1_epi8(9);
14127 let mut r = _mm256_set1_epi32(0);
14128 _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
14129 assert_eq_m256i(r, a);
14130 }
14131
14132 #[simd_test(enable = "avx512bw,avx512vl")]
14133 unsafe fn test_mm_storeu_epi8() {
14134 let a = _mm_set1_epi8(9);
14135 let mut r = _mm_set1_epi32(0);
14136 _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
14137 assert_eq_m128i(r, a);
14138 }
14139
a2a8927a
XL
14140 #[simd_test(enable = "avx512f,avx512bw")]
14141 unsafe fn test_mm512_mask_loadu_epi16() {
14142 let src = _mm512_set1_epi16(42);
14143 let a = &[
14144 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14145 24, 25, 26, 27, 28, 29, 30, 31, 32,
14146 ];
14147 let p = a.as_ptr();
14148 let m = 0b10101010_11001100_11101000_11001010;
14149 let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
14150 let e = &[
14151 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
14152 23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
14153 ];
14154 let e = _mm512_loadu_epi16(e.as_ptr());
14155 assert_eq_m512i(r, e);
14156 }
14157
14158 #[simd_test(enable = "avx512f,avx512bw")]
14159 unsafe fn test_mm512_maskz_loadu_epi16() {
14160 let a = &[
14161 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14162 24, 25, 26, 27, 28, 29, 30, 31, 32,
14163 ];
14164 let p = a.as_ptr();
14165 let m = 0b10101010_11001100_11101000_11001010;
14166 let r = _mm512_maskz_loadu_epi16(m, black_box(p));
14167 let e = &[
14168 0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
14169 26, 0, 28, 0, 30, 0, 32,
14170 ];
14171 let e = _mm512_loadu_epi16(e.as_ptr());
14172 assert_eq_m512i(r, e);
14173 }
14174
14175 #[simd_test(enable = "avx512f,avx512bw")]
14176 unsafe fn test_mm512_mask_storeu_epi16() {
14177 let mut r = [42_i16; 32];
14178 let a = &[
14179 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14180 24, 25, 26, 27, 28, 29, 30, 31, 32,
14181 ];
14182 let a = _mm512_loadu_epi16(a.as_ptr());
14183 let m = 0b10101010_11001100_11101000_11001010;
14184 _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
14185 let e = &[
14186 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
14187 23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
14188 ];
14189 let e = _mm512_loadu_epi16(e.as_ptr());
14190 assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
14191 }
14192
14193 #[simd_test(enable = "avx512f,avx512bw")]
14194 unsafe fn test_mm512_mask_loadu_epi8() {
14195 let src = _mm512_set1_epi8(42);
14196 let a = &[
14197 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14198 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
14199 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
14200 ];
14201 let p = a.as_ptr();
14202 let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
14203 let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
14204 let e = &[
14205 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
14206 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
14207 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
14208 ];
14209 let e = _mm512_loadu_epi8(e.as_ptr());
14210 assert_eq_m512i(r, e);
14211 }
14212
14213 #[simd_test(enable = "avx512f,avx512bw")]
14214 unsafe fn test_mm512_maskz_loadu_epi8() {
14215 let a = &[
14216 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14217 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
14218 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
14219 ];
14220 let p = a.as_ptr();
14221 let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
14222 let r = _mm512_maskz_loadu_epi8(m, black_box(p));
14223 let e = &[
14224 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
14225 26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
14226 50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
14227 ];
14228 let e = _mm512_loadu_epi8(e.as_ptr());
14229 assert_eq_m512i(r, e);
14230 }
14231
14232 #[simd_test(enable = "avx512f,avx512bw")]
14233 unsafe fn test_mm512_mask_storeu_epi8() {
14234 let mut r = [42_i8; 64];
14235 let a = &[
14236 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14237 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
14238 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
14239 ];
14240 let a = _mm512_loadu_epi8(a.as_ptr());
14241 let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
14242 _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
14243 let e = &[
14244 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
14245 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
14246 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
14247 ];
14248 let e = _mm512_loadu_epi8(e.as_ptr());
14249 assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
14250 }
14251
14252 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14253 unsafe fn test_mm256_mask_loadu_epi16() {
14254 let src = _mm256_set1_epi16(42);
14255 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14256 let p = a.as_ptr();
14257 let m = 0b11101000_11001010;
14258 let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
14259 let e = &[
14260 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
14261 ];
14262 let e = _mm256_loadu_epi16(e.as_ptr());
14263 assert_eq_m256i(r, e);
14264 }
14265
14266 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14267 unsafe fn test_mm256_maskz_loadu_epi16() {
14268 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14269 let p = a.as_ptr();
14270 let m = 0b11101000_11001010;
14271 let r = _mm256_maskz_loadu_epi16(m, black_box(p));
14272 let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
14273 let e = _mm256_loadu_epi16(e.as_ptr());
14274 assert_eq_m256i(r, e);
14275 }
14276
14277 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14278 unsafe fn test_mm256_mask_storeu_epi16() {
14279 let mut r = [42_i16; 16];
14280 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14281 let a = _mm256_loadu_epi16(a.as_ptr());
14282 let m = 0b11101000_11001010;
14283 _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
14284 let e = &[
14285 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
14286 ];
14287 let e = _mm256_loadu_epi16(e.as_ptr());
14288 assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
14289 }
14290
14291 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14292 unsafe fn test_mm256_mask_loadu_epi8() {
14293 let src = _mm256_set1_epi8(42);
14294 let a = &[
14295 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14296 24, 25, 26, 27, 28, 29, 30, 31, 32,
14297 ];
14298 let p = a.as_ptr();
14299 let m = 0b10101010_11001100_11101000_11001010;
14300 let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
14301 let e = &[
14302 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
14303 23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
14304 ];
14305 let e = _mm256_loadu_epi8(e.as_ptr());
14306 assert_eq_m256i(r, e);
14307 }
14308
14309 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14310 unsafe fn test_mm256_maskz_loadu_epi8() {
14311 let a = &[
14312 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14313 24, 25, 26, 27, 28, 29, 30, 31, 32,
14314 ];
14315 let p = a.as_ptr();
14316 let m = 0b10101010_11001100_11101000_11001010;
14317 let r = _mm256_maskz_loadu_epi8(m, black_box(p));
14318 let e = &[
14319 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
14320 26, 0, 28, 0, 30, 0, 32,
14321 ];
14322 let e = _mm256_loadu_epi8(e.as_ptr());
14323 assert_eq_m256i(r, e);
14324 }
14325
14326 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14327 unsafe fn test_mm256_mask_storeu_epi8() {
14328 let mut r = [42_i8; 32];
14329 let a = &[
14330 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
14331 24, 25, 26, 27, 28, 29, 30, 31, 32,
14332 ];
14333 let a = _mm256_loadu_epi8(a.as_ptr());
14334 let m = 0b10101010_11001100_11101000_11001010;
14335 _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
14336 let e = &[
14337 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
14338 23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
14339 ];
14340 let e = _mm256_loadu_epi8(e.as_ptr());
14341 assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
14342 }
14343
14344 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14345 unsafe fn test_mm_mask_loadu_epi16() {
14346 let src = _mm_set1_epi16(42);
14347 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
14348 let p = a.as_ptr();
14349 let m = 0b11001010;
14350 let r = _mm_mask_loadu_epi16(src, m, black_box(p));
14351 let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
14352 let e = _mm_loadu_epi16(e.as_ptr());
14353 assert_eq_m128i(r, e);
14354 }
14355
14356 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14357 unsafe fn test_mm_maskz_loadu_epi16() {
14358 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
14359 let p = a.as_ptr();
14360 let m = 0b11001010;
14361 let r = _mm_maskz_loadu_epi16(m, black_box(p));
14362 let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
14363 let e = _mm_loadu_epi16(e.as_ptr());
14364 assert_eq_m128i(r, e);
14365 }
14366
14367 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14368 unsafe fn test_mm_mask_storeu_epi16() {
14369 let mut r = [42_i16; 8];
14370 let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
14371 let a = _mm_loadu_epi16(a.as_ptr());
14372 let m = 0b11001010;
14373 _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
14374 let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
14375 let e = _mm_loadu_epi16(e.as_ptr());
14376 assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
14377 }
14378
14379 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14380 unsafe fn test_mm_mask_loadu_epi8() {
14381 let src = _mm_set1_epi8(42);
14382 let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14383 let p = a.as_ptr();
14384 let m = 0b11101000_11001010;
14385 let r = _mm_mask_loadu_epi8(src, m, black_box(p));
14386 let e = &[
14387 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
14388 ];
14389 let e = _mm_loadu_epi8(e.as_ptr());
14390 assert_eq_m128i(r, e);
14391 }
14392
14393 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14394 unsafe fn test_mm_maskz_loadu_epi8() {
14395 let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14396 let p = a.as_ptr();
14397 let m = 0b11101000_11001010;
14398 let r = _mm_maskz_loadu_epi8(m, black_box(p));
14399 let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
14400 let e = _mm_loadu_epi8(e.as_ptr());
14401 assert_eq_m128i(r, e);
14402 }
14403
14404 #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
14405 unsafe fn test_mm_mask_storeu_epi8() {
14406 let mut r = [42_i8; 16];
14407 let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
14408 let a = _mm_loadu_epi8(a.as_ptr());
14409 let m = 0b11101000_11001010;
14410 _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
14411 let e = &[
14412 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
14413 ];
14414 let e = _mm_loadu_epi8(e.as_ptr());
14415 assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
14416 }
14417
fc512014
XL
14418 #[simd_test(enable = "avx512bw")]
14419 unsafe fn test_mm512_madd_epi16() {
14420 let a = _mm512_set1_epi16(1);
14421 let b = _mm512_set1_epi16(1);
14422 let r = _mm512_madd_epi16(a, b);
14423 let e = _mm512_set1_epi32(2);
14424 assert_eq_m512i(r, e);
14425 }
14426
14427 #[simd_test(enable = "avx512bw")]
14428 unsafe fn test_mm512_mask_madd_epi16() {
14429 let a = _mm512_set1_epi16(1);
14430 let b = _mm512_set1_epi16(1);
14431 let r = _mm512_mask_madd_epi16(a, 0, a, b);
14432 assert_eq_m512i(r, a);
14433 let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b);
14434 let e = _mm512_set_epi32(
14435 1 << 16 | 1,
14436 1 << 16 | 1,
14437 1 << 16 | 1,
14438 1 << 16 | 1,
14439 1 << 16 | 1,
14440 1 << 16 | 1,
14441 1 << 16 | 1,
14442 1 << 16 | 1,
14443 1 << 16 | 1,
14444 1 << 16 | 1,
14445 1 << 16 | 1,
14446 1 << 16 | 1,
14447 2,
14448 2,
14449 2,
14450 2,
14451 );
14452 assert_eq_m512i(r, e);
14453 }
14454
14455 #[simd_test(enable = "avx512bw")]
14456 unsafe fn test_mm512_maskz_madd_epi16() {
14457 let a = _mm512_set1_epi16(1);
14458 let b = _mm512_set1_epi16(1);
14459 let r = _mm512_maskz_madd_epi16(0, a, b);
14460 assert_eq_m512i(r, _mm512_setzero_si512());
14461 let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b);
14462 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2);
14463 assert_eq_m512i(r, e);
14464 }
14465
14466 #[simd_test(enable = "avx512bw,avx512vl")]
14467 unsafe fn test_mm256_mask_madd_epi16() {
14468 let a = _mm256_set1_epi16(1);
14469 let b = _mm256_set1_epi16(1);
14470 let r = _mm256_mask_madd_epi16(a, 0, a, b);
14471 assert_eq_m256i(r, a);
14472 let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b);
14473 let e = _mm256_set_epi32(
14474 1 << 16 | 1,
14475 1 << 16 | 1,
14476 1 << 16 | 1,
14477 1 << 16 | 1,
14478 2,
14479 2,
14480 2,
14481 2,
14482 );
14483 assert_eq_m256i(r, e);
14484 }
14485
14486 #[simd_test(enable = "avx512bw,avx512vl")]
14487 unsafe fn test_mm256_maskz_madd_epi16() {
14488 let a = _mm256_set1_epi16(1);
14489 let b = _mm256_set1_epi16(1);
14490 let r = _mm256_maskz_madd_epi16(0, a, b);
14491 assert_eq_m256i(r, _mm256_setzero_si256());
14492 let r = _mm256_maskz_madd_epi16(0b00001111, a, b);
14493 let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2);
14494 assert_eq_m256i(r, e);
14495 }
14496
14497 #[simd_test(enable = "avx512bw,avx512vl")]
14498 unsafe fn test_mm_mask_madd_epi16() {
14499 let a = _mm_set1_epi16(1);
14500 let b = _mm_set1_epi16(1);
14501 let r = _mm_mask_madd_epi16(a, 0, a, b);
14502 assert_eq_m128i(r, a);
14503 let r = _mm_mask_madd_epi16(a, 0b00001111, a, b);
14504 let e = _mm_set_epi32(2, 2, 2, 2);
14505 assert_eq_m128i(r, e);
14506 }
14507
14508 #[simd_test(enable = "avx512bw,avx512vl")]
14509 unsafe fn test_mm_maskz_madd_epi16() {
14510 let a = _mm_set1_epi16(1);
14511 let b = _mm_set1_epi16(1);
14512 let r = _mm_maskz_madd_epi16(0, a, b);
14513 assert_eq_m128i(r, _mm_setzero_si128());
14514 let r = _mm_maskz_madd_epi16(0b00001111, a, b);
14515 let e = _mm_set_epi32(2, 2, 2, 2);
14516 assert_eq_m128i(r, e);
14517 }
14518
14519 #[simd_test(enable = "avx512bw")]
14520 unsafe fn test_mm512_maddubs_epi16() {
14521 let a = _mm512_set1_epi8(1);
14522 let b = _mm512_set1_epi8(1);
14523 let r = _mm512_maddubs_epi16(a, b);
14524 let e = _mm512_set1_epi16(2);
14525 assert_eq_m512i(r, e);
14526 }
14527
14528 #[simd_test(enable = "avx512bw")]
14529 unsafe fn test_mm512_mask_maddubs_epi16() {
14530 let a = _mm512_set1_epi8(1);
14531 let b = _mm512_set1_epi8(1);
14532 let src = _mm512_set1_epi16(1);
14533 let r = _mm512_mask_maddubs_epi16(src, 0, a, b);
14534 assert_eq_m512i(r, src);
14535 let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b);
14536 #[rustfmt::skip]
14537 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2);
14539 assert_eq_m512i(r, e);
14540 }
14541
14542 #[simd_test(enable = "avx512bw")]
14543 unsafe fn test_mm512_maskz_maddubs_epi16() {
14544 let a = _mm512_set1_epi8(1);
14545 let b = _mm512_set1_epi8(1);
14546 let r = _mm512_maskz_maddubs_epi16(0, a, b);
14547 assert_eq_m512i(r, _mm512_setzero_si512());
14548 let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b);
14549 #[rustfmt::skip]
14550 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2,
14551 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
14552 assert_eq_m512i(r, e);
14553 }
14554
14555 #[simd_test(enable = "avx512bw,avx512vl")]
14556 unsafe fn test_mm256_mask_maddubs_epi16() {
14557 let a = _mm256_set1_epi8(1);
14558 let b = _mm256_set1_epi8(1);
14559 let src = _mm256_set1_epi16(1);
14560 let r = _mm256_mask_maddubs_epi16(src, 0, a, b);
14561 assert_eq_m256i(r, src);
14562 let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b);
14563 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
14564 assert_eq_m256i(r, e);
14565 }
14566
14567 #[simd_test(enable = "avx512bw,avx512vl")]
14568 unsafe fn test_mm256_maskz_maddubs_epi16() {
14569 let a = _mm256_set1_epi8(1);
14570 let b = _mm256_set1_epi8(1);
14571 let r = _mm256_maskz_maddubs_epi16(0, a, b);
14572 assert_eq_m256i(r, _mm256_setzero_si256());
14573 let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b);
14574 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
14575 assert_eq_m256i(r, e);
14576 }
14577
14578 #[simd_test(enable = "avx512bw,avx512vl")]
14579 unsafe fn test_mm_mask_maddubs_epi16() {
14580 let a = _mm_set1_epi8(1);
14581 let b = _mm_set1_epi8(1);
14582 let src = _mm_set1_epi16(1);
14583 let r = _mm_mask_maddubs_epi16(src, 0, a, b);
14584 assert_eq_m128i(r, src);
14585 let r = _mm_mask_add_epi16(src, 0b00000001, a, b);
14586 let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2);
14587 assert_eq_m128i(r, e);
14588 }
14589
14590 #[simd_test(enable = "avx512bw,avx512vl")]
14591 unsafe fn test_mm_maskz_maddubs_epi16() {
14592 let a = _mm_set1_epi8(1);
14593 let b = _mm_set1_epi8(1);
14594 let r = _mm_maskz_maddubs_epi16(0, a, b);
14595 assert_eq_m128i(r, _mm_setzero_si128());
14596 let r = _mm_maskz_maddubs_epi16(0b00001111, a, b);
14597 let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2);
14598 assert_eq_m128i(r, e);
14599 }
14600
14601 #[simd_test(enable = "avx512bw")]
14602 unsafe fn test_mm512_packs_epi32() {
14603 let a = _mm512_set1_epi32(i32::MAX);
14604 let b = _mm512_set1_epi32(1);
14605 let r = _mm512_packs_epi32(a, b);
14606 #[rustfmt::skip]
14607 let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX,
14608 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14609 assert_eq_m512i(r, e);
14610 }
14611
14612 #[simd_test(enable = "avx512bw")]
14613 unsafe fn test_mm512_mask_packs_epi32() {
14614 let a = _mm512_set1_epi32(i32::MAX);
14615 let b = _mm512_set1_epi32(1 << 16 | 1);
14616 let r = _mm512_mask_packs_epi32(a, 0, a, b);
14617 assert_eq_m512i(r, a);
14618 let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
14619 #[rustfmt::skip]
14620 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14621 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14622 assert_eq_m512i(r, e);
14623 }
14624
14625 #[simd_test(enable = "avx512bw")]
14626 unsafe fn test_mm512_maskz_packs_epi32() {
14627 let a = _mm512_set1_epi32(i32::MAX);
14628 let b = _mm512_set1_epi32(1);
14629 let r = _mm512_maskz_packs_epi32(0, a, b);
14630 assert_eq_m512i(r, _mm512_setzero_si512());
14631 let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b);
14632 #[rustfmt::skip]
14633 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14635 assert_eq_m512i(r, e);
14636 }
14637
14638 #[simd_test(enable = "avx512bw,avx512vl")]
14639 unsafe fn test_mm256_mask_packs_epi32() {
14640 let a = _mm256_set1_epi32(i32::MAX);
14641 let b = _mm256_set1_epi32(1 << 16 | 1);
14642 let r = _mm256_mask_packs_epi32(a, 0, a, b);
14643 assert_eq_m256i(r, a);
14644 let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b);
14645 #[rustfmt::skip]
14646 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14647 assert_eq_m256i(r, e);
14648 }
14649
14650 #[simd_test(enable = "avx512bw,avx512vl")]
14651 unsafe fn test_mm256_maskz_packs_epi32() {
14652 let a = _mm256_set1_epi32(i32::MAX);
14653 let b = _mm256_set1_epi32(1);
14654 let r = _mm256_maskz_packs_epi32(0, a, b);
14655 assert_eq_m256i(r, _mm256_setzero_si256());
14656 let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b);
14657 #[rustfmt::skip]
14658 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14659 assert_eq_m256i(r, e);
14660 }
14661
14662 #[simd_test(enable = "avx512bw,avx512vl")]
14663 unsafe fn test_mm_mask_packs_epi32() {
14664 let a = _mm_set1_epi32(i32::MAX);
14665 let b = _mm_set1_epi32(1 << 16 | 1);
14666 let r = _mm_mask_packs_epi32(a, 0, a, b);
14667 assert_eq_m128i(r, a);
14668 let r = _mm_mask_packs_epi32(b, 0b00001111, a, b);
14669 let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14670 assert_eq_m128i(r, e);
14671 }
14672
14673 #[simd_test(enable = "avx512bw,avx512vl")]
14674 unsafe fn test_mm_maskz_packs_epi32() {
14675 let a = _mm_set1_epi32(i32::MAX);
14676 let b = _mm_set1_epi32(1);
14677 let r = _mm_maskz_packs_epi32(0, a, b);
14678 assert_eq_m128i(r, _mm_setzero_si128());
14679 let r = _mm_maskz_packs_epi32(0b00001111, a, b);
14680 let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
14681 assert_eq_m128i(r, e);
14682 }
14683
14684 #[simd_test(enable = "avx512bw")]
14685 unsafe fn test_mm512_packs_epi16() {
14686 let a = _mm512_set1_epi16(i16::MAX);
14687 let b = _mm512_set1_epi16(1);
14688 let r = _mm512_packs_epi16(a, b);
14689 #[rustfmt::skip]
14690 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
14691 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
14692 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
14693 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14694 assert_eq_m512i(r, e);
14695 }
14696
14697 #[simd_test(enable = "avx512bw")]
14698 unsafe fn test_mm512_mask_packs_epi16() {
14699 let a = _mm512_set1_epi16(i16::MAX);
14700 let b = _mm512_set1_epi16(1 << 8 | 1);
14701 let r = _mm512_mask_packs_epi16(a, 0, a, b);
14702 assert_eq_m512i(r, a);
14703 let r = _mm512_mask_packs_epi16(
14704 b,
14705 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14706 a,
14707 b,
14708 );
14709 #[rustfmt::skip]
14710 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14711 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14712 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14713 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14714 assert_eq_m512i(r, e);
14715 }
14716
14717 #[simd_test(enable = "avx512bw")]
14718 unsafe fn test_mm512_maskz_packs_epi16() {
14719 let a = _mm512_set1_epi16(i16::MAX);
14720 let b = _mm512_set1_epi16(1);
14721 let r = _mm512_maskz_packs_epi16(0, a, b);
14722 assert_eq_m512i(r, _mm512_setzero_si512());
14723 let r = _mm512_maskz_packs_epi16(
14724 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14725 a,
14726 b,
14727 );
14728 #[rustfmt::skip]
14729 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14730 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14731 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14732 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14733 assert_eq_m512i(r, e);
14734 }
14735
14736 #[simd_test(enable = "avx512bw,avx512vl")]
14737 unsafe fn test_mm256_mask_packs_epi16() {
14738 let a = _mm256_set1_epi16(i16::MAX);
14739 let b = _mm256_set1_epi16(1 << 8 | 1);
14740 let r = _mm256_mask_packs_epi16(a, 0, a, b);
14741 assert_eq_m256i(r, a);
14742 let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
14743 #[rustfmt::skip]
14744 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14745 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14746 assert_eq_m256i(r, e);
14747 }
14748
14749 #[simd_test(enable = "avx512bw,avx512vl")]
14750 unsafe fn test_mm256_maskz_packs_epi16() {
14751 let a = _mm256_set1_epi16(i16::MAX);
14752 let b = _mm256_set1_epi16(1);
14753 let r = _mm256_maskz_packs_epi16(0, a, b);
14754 assert_eq_m256i(r, _mm256_setzero_si256());
14755 let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b);
14756 #[rustfmt::skip]
14757 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14758 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14759 assert_eq_m256i(r, e);
14760 }
14761
14762 #[simd_test(enable = "avx512bw,avx512vl")]
14763 unsafe fn test_mm_mask_packs_epi16() {
14764 let a = _mm_set1_epi16(i16::MAX);
14765 let b = _mm_set1_epi16(1 << 8 | 1);
14766 let r = _mm_mask_packs_epi16(a, 0, a, b);
14767 assert_eq_m128i(r, a);
14768 let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b);
14769 #[rustfmt::skip]
14770 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14771 assert_eq_m128i(r, e);
14772 }
14773
14774 #[simd_test(enable = "avx512bw,avx512vl")]
14775 unsafe fn test_mm_maskz_packs_epi16() {
14776 let a = _mm_set1_epi16(i16::MAX);
14777 let b = _mm_set1_epi16(1);
14778 let r = _mm_maskz_packs_epi16(0, a, b);
14779 assert_eq_m128i(r, _mm_setzero_si128());
14780 let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b);
14781 #[rustfmt::skip]
14782 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
14783 assert_eq_m128i(r, e);
14784 }
14785
14786 #[simd_test(enable = "avx512bw")]
14787 unsafe fn test_mm512_packus_epi32() {
14788 let a = _mm512_set1_epi32(-1);
14789 let b = _mm512_set1_epi32(1);
14790 let r = _mm512_packus_epi32(a, b);
14791 #[rustfmt::skip]
14792 let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
14793 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0);
14794 assert_eq_m512i(r, e);
14795 }
14796
14797 #[simd_test(enable = "avx512bw")]
14798 unsafe fn test_mm512_mask_packus_epi32() {
14799 let a = _mm512_set1_epi32(-1);
14800 let b = _mm512_set1_epi32(1 << 16 | 1);
14801 let r = _mm512_mask_packus_epi32(a, 0, a, b);
14802 assert_eq_m512i(r, a);
14803 let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b);
14804 #[rustfmt::skip]
14805 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14806 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14807 assert_eq_m512i(r, e);
14808 }
14809
14810 #[simd_test(enable = "avx512bw")]
14811 unsafe fn test_mm512_maskz_packus_epi32() {
14812 let a = _mm512_set1_epi32(-1);
14813 let b = _mm512_set1_epi32(1);
14814 let r = _mm512_maskz_packus_epi32(0, a, b);
14815 assert_eq_m512i(r, _mm512_setzero_si512());
14816 let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b);
14817 #[rustfmt::skip]
14818 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14819 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14820 assert_eq_m512i(r, e);
14821 }
14822
14823 #[simd_test(enable = "avx512bw,avx512vl")]
14824 unsafe fn test_mm256_mask_packus_epi32() {
14825 let a = _mm256_set1_epi32(-1);
14826 let b = _mm256_set1_epi32(1 << 16 | 1);
14827 let r = _mm256_mask_packus_epi32(a, 0, a, b);
14828 assert_eq_m256i(r, a);
14829 let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b);
14830 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14831 assert_eq_m256i(r, e);
14832 }
14833
14834 #[simd_test(enable = "avx512bw,avx512vl")]
14835 unsafe fn test_mm256_maskz_packus_epi32() {
14836 let a = _mm256_set1_epi32(-1);
14837 let b = _mm256_set1_epi32(1);
14838 let r = _mm256_maskz_packus_epi32(0, a, b);
14839 assert_eq_m256i(r, _mm256_setzero_si256());
14840 let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b);
14841 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14842 assert_eq_m256i(r, e);
14843 }
14844
14845 #[simd_test(enable = "avx512bw,avx512vl")]
14846 unsafe fn test_mm_mask_packus_epi32() {
14847 let a = _mm_set1_epi32(-1);
14848 let b = _mm_set1_epi32(1 << 16 | 1);
14849 let r = _mm_mask_packus_epi32(a, 0, a, b);
14850 assert_eq_m128i(r, a);
14851 let r = _mm_mask_packus_epi32(b, 0b00001111, a, b);
14852 let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0);
14853 assert_eq_m128i(r, e);
14854 }
14855
14856 #[simd_test(enable = "avx512bw,avx512vl")]
14857 unsafe fn test_mm_maskz_packus_epi32() {
14858 let a = _mm_set1_epi32(-1);
14859 let b = _mm_set1_epi32(1);
14860 let r = _mm_maskz_packus_epi32(0, a, b);
14861 assert_eq_m128i(r, _mm_setzero_si128());
14862 let r = _mm_maskz_packus_epi32(0b00001111, a, b);
14863 let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
14864 assert_eq_m128i(r, e);
14865 }
14866
14867 #[simd_test(enable = "avx512bw")]
14868 unsafe fn test_mm512_packus_epi16() {
14869 let a = _mm512_set1_epi16(-1);
14870 let b = _mm512_set1_epi16(1);
14871 let r = _mm512_packus_epi16(a, b);
14872 #[rustfmt::skip]
14873 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
14874 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
14875 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
14876 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
14877 assert_eq_m512i(r, e);
14878 }
14879
14880 #[simd_test(enable = "avx512bw")]
14881 unsafe fn test_mm512_mask_packus_epi16() {
14882 let a = _mm512_set1_epi16(-1);
14883 let b = _mm512_set1_epi16(1 << 8 | 1);
14884 let r = _mm512_mask_packus_epi16(a, 0, a, b);
14885 assert_eq_m512i(r, a);
14886 let r = _mm512_mask_packus_epi16(
14887 b,
14888 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14889 a,
14890 b,
14891 );
14892 #[rustfmt::skip]
14893 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14894 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14895 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14896 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14897 assert_eq_m512i(r, e);
14898 }
14899
14900 #[simd_test(enable = "avx512bw")]
14901 unsafe fn test_mm512_maskz_packus_epi16() {
14902 let a = _mm512_set1_epi16(-1);
14903 let b = _mm512_set1_epi16(1);
14904 let r = _mm512_maskz_packus_epi16(0, a, b);
14905 assert_eq_m512i(r, _mm512_setzero_si512());
14906 let r = _mm512_maskz_packus_epi16(
14907 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
14908 a,
14909 b,
14910 );
14911 #[rustfmt::skip]
14912 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14913 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14914 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14915 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14916 assert_eq_m512i(r, e);
14917 }
14918
14919 #[simd_test(enable = "avx512bw,avx512vl")]
14920 unsafe fn test_mm256_mask_packus_epi16() {
14921 let a = _mm256_set1_epi16(-1);
14922 let b = _mm256_set1_epi16(1 << 8 | 1);
14923 let r = _mm256_mask_packus_epi16(a, 0, a, b);
14924 assert_eq_m256i(r, a);
14925 let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b);
14926 #[rustfmt::skip]
14927 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14928 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14929 assert_eq_m256i(r, e);
14930 }
14931
14932 #[simd_test(enable = "avx512bw,avx512vl")]
14933 unsafe fn test_mm256_maskz_packus_epi16() {
14934 let a = _mm256_set1_epi16(-1);
14935 let b = _mm256_set1_epi16(1);
14936 let r = _mm256_maskz_packus_epi16(0, a, b);
14937 assert_eq_m256i(r, _mm256_setzero_si256());
14938 let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b);
14939 #[rustfmt::skip]
14940 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14941 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14942 assert_eq_m256i(r, e);
14943 }
14944
14945 #[simd_test(enable = "avx512bw,avx512vl")]
14946 unsafe fn test_mm_mask_packus_epi16() {
14947 let a = _mm_set1_epi16(-1);
14948 let b = _mm_set1_epi16(1 << 8 | 1);
14949 let r = _mm_mask_packus_epi16(a, 0, a, b);
14950 assert_eq_m128i(r, a);
14951 let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b);
14952 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0);
14953 assert_eq_m128i(r, e);
14954 }
14955
14956 #[simd_test(enable = "avx512bw,avx512vl")]
14957 unsafe fn test_mm_maskz_packus_epi16() {
14958 let a = _mm_set1_epi16(-1);
14959 let b = _mm_set1_epi16(1);
14960 let r = _mm_maskz_packus_epi16(0, a, b);
14961 assert_eq_m128i(r, _mm_setzero_si128());
14962 let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b);
14963 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
14964 assert_eq_m128i(r, e);
14965 }
14966
14967 #[simd_test(enable = "avx512bw")]
14968 unsafe fn test_mm512_avg_epu16() {
14969 let a = _mm512_set1_epi16(1);
14970 let b = _mm512_set1_epi16(1);
14971 let r = _mm512_avg_epu16(a, b);
14972 let e = _mm512_set1_epi16(1);
14973 assert_eq_m512i(r, e);
14974 }
14975
14976 #[simd_test(enable = "avx512bw")]
14977 unsafe fn test_mm512_mask_avg_epu16() {
14978 let a = _mm512_set1_epi16(1);
14979 let b = _mm512_set1_epi16(1);
14980 let r = _mm512_mask_avg_epu16(a, 0, a, b);
14981 assert_eq_m512i(r, a);
14982 let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b);
14983 #[rustfmt::skip]
14984 let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14985 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
14986 assert_eq_m512i(r, e);
14987 }
14988
14989 #[simd_test(enable = "avx512bw")]
14990 unsafe fn test_mm512_maskz_avg_epu16() {
14991 let a = _mm512_set1_epi16(1);
14992 let b = _mm512_set1_epi16(1);
14993 let r = _mm512_maskz_avg_epu16(0, a, b);
14994 assert_eq_m512i(r, _mm512_setzero_si512());
14995 let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b);
14996 #[rustfmt::skip]
14997 let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
14998 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
14999 assert_eq_m512i(r, e);
15000 }
15001
15002 #[simd_test(enable = "avx512bw,avx512vl")]
15003 unsafe fn test_mm256_mask_avg_epu16() {
15004 let a = _mm256_set1_epi16(1);
15005 let b = _mm256_set1_epi16(1);
15006 let r = _mm256_mask_avg_epu16(a, 0, a, b);
15007 assert_eq_m256i(r, a);
15008 let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b);
15009 let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
15010 assert_eq_m256i(r, e);
15011 }
15012
15013 #[simd_test(enable = "avx512bw,avx512vl")]
15014 unsafe fn test_mm256_maskz_avg_epu16() {
15015 let a = _mm256_set1_epi16(1);
15016 let b = _mm256_set1_epi16(1);
15017 let r = _mm256_maskz_avg_epu16(0, a, b);
15018 assert_eq_m256i(r, _mm256_setzero_si256());
15019 let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b);
15020 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
15021 assert_eq_m256i(r, e);
15022 }
15023
15024 #[simd_test(enable = "avx512bw,avx512vl")]
15025 unsafe fn test_mm_mask_avg_epu16() {
15026 let a = _mm_set1_epi16(1);
15027 let b = _mm_set1_epi16(1);
15028 let r = _mm_mask_avg_epu16(a, 0, a, b);
15029 assert_eq_m128i(r, a);
15030 let r = _mm_mask_avg_epu16(a, 0b00001111, a, b);
15031 let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1);
15032 assert_eq_m128i(r, e);
15033 }
15034
15035 #[simd_test(enable = "avx512bw,avx512vl")]
15036 unsafe fn test_mm_maskz_avg_epu16() {
15037 let a = _mm_set1_epi16(1);
15038 let b = _mm_set1_epi16(1);
15039 let r = _mm_maskz_avg_epu16(0, a, b);
15040 assert_eq_m128i(r, _mm_setzero_si128());
15041 let r = _mm_maskz_avg_epu16(0b00001111, a, b);
15042 let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
15043 assert_eq_m128i(r, e);
15044 }
15045
15046 #[simd_test(enable = "avx512bw")]
15047 unsafe fn test_mm512_avg_epu8() {
15048 let a = _mm512_set1_epi8(1);
15049 let b = _mm512_set1_epi8(1);
15050 let r = _mm512_avg_epu8(a, b);
15051 let e = _mm512_set1_epi8(1);
15052 assert_eq_m512i(r, e);
15053 }
15054
15055 #[simd_test(enable = "avx512bw")]
15056 unsafe fn test_mm512_mask_avg_epu8() {
15057 let a = _mm512_set1_epi8(1);
15058 let b = _mm512_set1_epi8(1);
15059 let r = _mm512_mask_avg_epu8(a, 0, a, b);
15060 assert_eq_m512i(r, a);
15061 let r = _mm512_mask_avg_epu8(
15062 a,
15063 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111,
15064 a,
15065 b,
15066 );
15067 #[rustfmt::skip]
15068 let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
15072 assert_eq_m512i(r, e);
15073 }
15074
15075 #[simd_test(enable = "avx512bw")]
15076 unsafe fn test_mm512_maskz_avg_epu8() {
15077 let a = _mm512_set1_epi8(1);
15078 let b = _mm512_set1_epi8(1);
15079 let r = _mm512_maskz_avg_epu8(0, a, b);
15080 assert_eq_m512i(r, _mm512_setzero_si512());
15081 let r = _mm512_maskz_avg_epu8(
15082 0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111,
15083 a,
15084 b,
15085 );
15086 #[rustfmt::skip]
15087 let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15088 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15089 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15090 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
15091 assert_eq_m512i(r, e);
15092 }
15093
15094 #[simd_test(enable = "avx512bw,avx512vl")]
15095 unsafe fn test_mm256_mask_avg_epu8() {
15096 let a = _mm256_set1_epi8(1);
15097 let b = _mm256_set1_epi8(1);
15098 let r = _mm256_mask_avg_epu8(a, 0, a, b);
15099 assert_eq_m256i(r, a);
15100 let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b);
15101 #[rustfmt::skip]
15102 let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
15104 assert_eq_m256i(r, e);
15105 }
15106
15107 #[simd_test(enable = "avx512bw,avx512vl")]
15108 unsafe fn test_mm256_maskz_avg_epu8() {
15109 let a = _mm256_set1_epi8(1);
15110 let b = _mm256_set1_epi8(1);
15111 let r = _mm256_maskz_avg_epu8(0, a, b);
15112 assert_eq_m256i(r, _mm256_setzero_si256());
15113 let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b);
15114 #[rustfmt::skip]
15115 let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
15117 assert_eq_m256i(r, e);
15118 }
15119
15120 #[simd_test(enable = "avx512bw,avx512vl")]
15121 unsafe fn test_mm_mask_avg_epu8() {
15122 let a = _mm_set1_epi8(1);
15123 let b = _mm_set1_epi8(1);
15124 let r = _mm_mask_avg_epu8(a, 0, a, b);
15125 assert_eq_m128i(r, a);
15126 let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b);
15127 let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
15128 assert_eq_m128i(r, e);
15129 }
15130
15131 #[simd_test(enable = "avx512bw,avx512vl")]
15132 unsafe fn test_mm_maskz_avg_epu8() {
15133 let a = _mm_set1_epi8(1);
15134 let b = _mm_set1_epi8(1);
15135 let r = _mm_maskz_avg_epu8(0, a, b);
15136 assert_eq_m128i(r, _mm_setzero_si128());
15137 let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b);
15138 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1);
15139 assert_eq_m128i(r, e);
15140 }
15141
15142 #[simd_test(enable = "avx512bw")]
15143 unsafe fn test_mm512_sll_epi16() {
15144 let a = _mm512_set1_epi16(1 << 15);
15145 let count = _mm_set1_epi16(2);
15146 let r = _mm512_sll_epi16(a, count);
15147 let e = _mm512_set1_epi16(0);
15148 assert_eq_m512i(r, e);
15149 }
15150
15151 #[simd_test(enable = "avx512bw")]
15152 unsafe fn test_mm512_mask_sll_epi16() {
15153 let a = _mm512_set1_epi16(1 << 15);
15154 let count = _mm_set1_epi16(2);
15155 let r = _mm512_mask_sll_epi16(a, 0, a, count);
15156 assert_eq_m512i(r, a);
15157 let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15158 let e = _mm512_set1_epi16(0);
15159 assert_eq_m512i(r, e);
15160 }
15161
15162 #[simd_test(enable = "avx512bw")]
15163 unsafe fn test_mm512_maskz_sll_epi16() {
15164 let a = _mm512_set1_epi16(1 << 15);
15165 let count = _mm_set1_epi16(2);
15166 let r = _mm512_maskz_sll_epi16(0, a, count);
15167 assert_eq_m512i(r, _mm512_setzero_si512());
15168 let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count);
15169 let e = _mm512_set1_epi16(0);
15170 assert_eq_m512i(r, e);
15171 }
15172
15173 #[simd_test(enable = "avx512bw,avx512vl")]
15174 unsafe fn test_mm256_mask_sll_epi16() {
15175 let a = _mm256_set1_epi16(1 << 15);
15176 let count = _mm_set1_epi16(2);
15177 let r = _mm256_mask_sll_epi16(a, 0, a, count);
15178 assert_eq_m256i(r, a);
15179 let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count);
15180 let e = _mm256_set1_epi16(0);
15181 assert_eq_m256i(r, e);
15182 }
15183
15184 #[simd_test(enable = "avx512bw,avx512vl")]
15185 unsafe fn test_mm256_maskz_sll_epi16() {
15186 let a = _mm256_set1_epi16(1 << 15);
15187 let count = _mm_set1_epi16(2);
15188 let r = _mm256_maskz_sll_epi16(0, a, count);
15189 assert_eq_m256i(r, _mm256_setzero_si256());
15190 let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count);
15191 let e = _mm256_set1_epi16(0);
15192 assert_eq_m256i(r, e);
15193 }
15194
15195 #[simd_test(enable = "avx512bw,avx512vl")]
15196 unsafe fn test_mm_mask_sll_epi16() {
15197 let a = _mm_set1_epi16(1 << 15);
15198 let count = _mm_set1_epi16(2);
15199 let r = _mm_mask_sll_epi16(a, 0, a, count);
15200 assert_eq_m128i(r, a);
15201 let r = _mm_mask_sll_epi16(a, 0b11111111, a, count);
15202 let e = _mm_set1_epi16(0);
15203 assert_eq_m128i(r, e);
15204 }
15205
15206 #[simd_test(enable = "avx512bw,avx512vl")]
15207 unsafe fn test_mm_maskz_sll_epi16() {
15208 let a = _mm_set1_epi16(1 << 15);
15209 let count = _mm_set1_epi16(2);
15210 let r = _mm_maskz_sll_epi16(0, a, count);
15211 assert_eq_m128i(r, _mm_setzero_si128());
15212 let r = _mm_maskz_sll_epi16(0b11111111, a, count);
15213 let e = _mm_set1_epi16(0);
15214 assert_eq_m128i(r, e);
15215 }
15216
15217 #[simd_test(enable = "avx512bw")]
15218 unsafe fn test_mm512_slli_epi16() {
15219 let a = _mm512_set1_epi16(1 << 15);
17df50a5 15220 let r = _mm512_slli_epi16::<1>(a);
fc512014
XL
15221 let e = _mm512_set1_epi16(0);
15222 assert_eq_m512i(r, e);
15223 }
15224
15225 #[simd_test(enable = "avx512bw")]
15226 unsafe fn test_mm512_mask_slli_epi16() {
15227 let a = _mm512_set1_epi16(1 << 15);
17df50a5 15228 let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
fc512014 15229 assert_eq_m512i(r, a);
17df50a5 15230 let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a);
fc512014
XL
15231 let e = _mm512_set1_epi16(0);
15232 assert_eq_m512i(r, e);
15233 }
15234
15235 #[simd_test(enable = "avx512bw")]
15236 unsafe fn test_mm512_maskz_slli_epi16() {
15237 let a = _mm512_set1_epi16(1 << 15);
17df50a5 15238 let r = _mm512_maskz_slli_epi16::<1>(0, a);
fc512014 15239 assert_eq_m512i(r, _mm512_setzero_si512());
17df50a5 15240 let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a);
fc512014
XL
15241 let e = _mm512_set1_epi16(0);
15242 assert_eq_m512i(r, e);
15243 }
15244
15245 #[simd_test(enable = "avx512bw,avx512vl")]
15246 unsafe fn test_mm256_mask_slli_epi16() {
15247 let a = _mm256_set1_epi16(1 << 15);
17df50a5 15248 let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
fc512014 15249 assert_eq_m256i(r, a);
17df50a5 15250 let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a);
fc512014
XL
15251 let e = _mm256_set1_epi16(0);
15252 assert_eq_m256i(r, e);
15253 }
15254
15255 #[simd_test(enable = "avx512bw,avx512vl")]
15256 unsafe fn test_mm256_maskz_slli_epi16() {
15257 let a = _mm256_set1_epi16(1 << 15);
17df50a5 15258 let r = _mm256_maskz_slli_epi16::<1>(0, a);
fc512014 15259 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 15260 let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a);
fc512014
XL
15261 let e = _mm256_set1_epi16(0);
15262 assert_eq_m256i(r, e);
15263 }
15264
15265 #[simd_test(enable = "avx512bw,avx512vl")]
15266 unsafe fn test_mm_mask_slli_epi16() {
15267 let a = _mm_set1_epi16(1 << 15);
17df50a5 15268 let r = _mm_mask_slli_epi16::<1>(a, 0, a);
fc512014 15269 assert_eq_m128i(r, a);
17df50a5 15270 let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a);
fc512014
XL
15271 let e = _mm_set1_epi16(0);
15272 assert_eq_m128i(r, e);
15273 }
15274
15275 #[simd_test(enable = "avx512bw,avx512vl")]
15276 unsafe fn test_mm_maskz_slli_epi16() {
15277 let a = _mm_set1_epi16(1 << 15);
17df50a5 15278 let r = _mm_maskz_slli_epi16::<1>(0, a);
fc512014 15279 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 15280 let r = _mm_maskz_slli_epi16::<1>(0b11111111, a);
fc512014
XL
15281 let e = _mm_set1_epi16(0);
15282 assert_eq_m128i(r, e);
15283 }
15284
15285 #[simd_test(enable = "avx512bw")]
15286 unsafe fn test_mm512_sllv_epi16() {
15287 let a = _mm512_set1_epi16(1 << 15);
15288 let count = _mm512_set1_epi16(2);
15289 let r = _mm512_sllv_epi16(a, count);
15290 let e = _mm512_set1_epi16(0);
15291 assert_eq_m512i(r, e);
15292 }
15293
15294 #[simd_test(enable = "avx512bw")]
15295 unsafe fn test_mm512_mask_sllv_epi16() {
15296 let a = _mm512_set1_epi16(1 << 15);
15297 let count = _mm512_set1_epi16(2);
15298 let r = _mm512_mask_sllv_epi16(a, 0, a, count);
15299 assert_eq_m512i(r, a);
15300 let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15301 let e = _mm512_set1_epi16(0);
15302 assert_eq_m512i(r, e);
15303 }
15304
15305 #[simd_test(enable = "avx512bw")]
15306 unsafe fn test_mm512_maskz_sllv_epi16() {
15307 let a = _mm512_set1_epi16(1 << 15);
15308 let count = _mm512_set1_epi16(2);
15309 let r = _mm512_maskz_sllv_epi16(0, a, count);
15310 assert_eq_m512i(r, _mm512_setzero_si512());
15311 let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count);
15312 let e = _mm512_set1_epi16(0);
15313 assert_eq_m512i(r, e);
15314 }
15315
15316 #[simd_test(enable = "avx512bw,avx512vl")]
15317 unsafe fn test_mm256_sllv_epi16() {
15318 let a = _mm256_set1_epi16(1 << 15);
15319 let count = _mm256_set1_epi16(2);
15320 let r = _mm256_sllv_epi16(a, count);
15321 let e = _mm256_set1_epi16(0);
15322 assert_eq_m256i(r, e);
15323 }
15324
15325 #[simd_test(enable = "avx512bw,avx512vl")]
15326 unsafe fn test_mm256_mask_sllv_epi16() {
15327 let a = _mm256_set1_epi16(1 << 15);
15328 let count = _mm256_set1_epi16(2);
15329 let r = _mm256_mask_sllv_epi16(a, 0, a, count);
15330 assert_eq_m256i(r, a);
15331 let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count);
15332 let e = _mm256_set1_epi16(0);
15333 assert_eq_m256i(r, e);
15334 }
15335
15336 #[simd_test(enable = "avx512bw,avx512vl")]
15337 unsafe fn test_mm256_maskz_sllv_epi16() {
15338 let a = _mm256_set1_epi16(1 << 15);
15339 let count = _mm256_set1_epi16(2);
15340 let r = _mm256_maskz_sllv_epi16(0, a, count);
15341 assert_eq_m256i(r, _mm256_setzero_si256());
15342 let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count);
15343 let e = _mm256_set1_epi16(0);
15344 assert_eq_m256i(r, e);
15345 }
15346
15347 #[simd_test(enable = "avx512bw,avx512vl")]
15348 unsafe fn test_mm_sllv_epi16() {
15349 let a = _mm_set1_epi16(1 << 15);
15350 let count = _mm_set1_epi16(2);
15351 let r = _mm_sllv_epi16(a, count);
15352 let e = _mm_set1_epi16(0);
15353 assert_eq_m128i(r, e);
15354 }
15355
15356 #[simd_test(enable = "avx512bw,avx512vl")]
15357 unsafe fn test_mm_mask_sllv_epi16() {
15358 let a = _mm_set1_epi16(1 << 15);
15359 let count = _mm_set1_epi16(2);
15360 let r = _mm_mask_sllv_epi16(a, 0, a, count);
15361 assert_eq_m128i(r, a);
15362 let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count);
15363 let e = _mm_set1_epi16(0);
15364 assert_eq_m128i(r, e);
15365 }
15366
15367 #[simd_test(enable = "avx512bw,avx512vl")]
15368 unsafe fn test_mm_maskz_sllv_epi16() {
15369 let a = _mm_set1_epi16(1 << 15);
15370 let count = _mm_set1_epi16(2);
15371 let r = _mm_maskz_sllv_epi16(0, a, count);
15372 assert_eq_m128i(r, _mm_setzero_si128());
15373 let r = _mm_maskz_sllv_epi16(0b11111111, a, count);
15374 let e = _mm_set1_epi16(0);
15375 assert_eq_m128i(r, e);
15376 }
15377
15378 #[simd_test(enable = "avx512bw")]
15379 unsafe fn test_mm512_srl_epi16() {
15380 let a = _mm512_set1_epi16(1 << 1);
15381 let count = _mm_set1_epi16(2);
15382 let r = _mm512_srl_epi16(a, count);
15383 let e = _mm512_set1_epi16(0);
15384 assert_eq_m512i(r, e);
15385 }
15386
15387 #[simd_test(enable = "avx512bw")]
15388 unsafe fn test_mm512_mask_srl_epi16() {
15389 let a = _mm512_set1_epi16(1 << 1);
15390 let count = _mm_set1_epi16(2);
15391 let r = _mm512_mask_srl_epi16(a, 0, a, count);
15392 assert_eq_m512i(r, a);
15393 let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15394 let e = _mm512_set1_epi16(0);
15395 assert_eq_m512i(r, e);
15396 }
15397
15398 #[simd_test(enable = "avx512bw")]
15399 unsafe fn test_mm512_maskz_srl_epi16() {
15400 let a = _mm512_set1_epi16(1 << 1);
15401 let count = _mm_set1_epi16(2);
15402 let r = _mm512_maskz_srl_epi16(0, a, count);
15403 assert_eq_m512i(r, _mm512_setzero_si512());
15404 let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count);
15405 let e = _mm512_set1_epi16(0);
15406 assert_eq_m512i(r, e);
15407 }
15408
15409 #[simd_test(enable = "avx512bw,avx512vl")]
15410 unsafe fn test_mm256_mask_srl_epi16() {
15411 let a = _mm256_set1_epi16(1 << 1);
15412 let count = _mm_set1_epi16(2);
15413 let r = _mm256_mask_srl_epi16(a, 0, a, count);
15414 assert_eq_m256i(r, a);
15415 let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count);
15416 let e = _mm256_set1_epi16(0);
15417 assert_eq_m256i(r, e);
15418 }
15419
15420 #[simd_test(enable = "avx512bw,avx512vl")]
15421 unsafe fn test_mm256_maskz_srl_epi16() {
15422 let a = _mm256_set1_epi16(1 << 1);
15423 let count = _mm_set1_epi16(2);
15424 let r = _mm256_maskz_srl_epi16(0, a, count);
15425 assert_eq_m256i(r, _mm256_setzero_si256());
15426 let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count);
15427 let e = _mm256_set1_epi16(0);
15428 assert_eq_m256i(r, e);
15429 }
15430
15431 #[simd_test(enable = "avx512bw,avx512vl")]
15432 unsafe fn test_mm_mask_srl_epi16() {
15433 let a = _mm_set1_epi16(1 << 1);
15434 let count = _mm_set1_epi16(2);
15435 let r = _mm_mask_srl_epi16(a, 0, a, count);
15436 assert_eq_m128i(r, a);
15437 let r = _mm_mask_srl_epi16(a, 0b11111111, a, count);
15438 let e = _mm_set1_epi16(0);
15439 assert_eq_m128i(r, e);
15440 }
15441
15442 #[simd_test(enable = "avx512bw,avx512vl")]
15443 unsafe fn test_mm_maskz_srl_epi16() {
15444 let a = _mm_set1_epi16(1 << 1);
15445 let count = _mm_set1_epi16(2);
15446 let r = _mm_maskz_srl_epi16(0, a, count);
15447 assert_eq_m128i(r, _mm_setzero_si128());
15448 let r = _mm_maskz_srl_epi16(0b11111111, a, count);
15449 let e = _mm_set1_epi16(0);
15450 assert_eq_m128i(r, e);
15451 }
15452
15453 #[simd_test(enable = "avx512bw")]
15454 unsafe fn test_mm512_srli_epi16() {
15455 let a = _mm512_set1_epi16(1 << 1);
17df50a5 15456 let r = _mm512_srli_epi16::<2>(a);
fc512014
XL
15457 let e = _mm512_set1_epi16(0);
15458 assert_eq_m512i(r, e);
15459 }
15460
15461 #[simd_test(enable = "avx512bw")]
15462 unsafe fn test_mm512_mask_srli_epi16() {
15463 let a = _mm512_set1_epi16(1 << 1);
17df50a5 15464 let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
fc512014 15465 assert_eq_m512i(r, a);
17df50a5 15466 let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
fc512014
XL
15467 let e = _mm512_set1_epi16(0);
15468 assert_eq_m512i(r, e);
15469 }
15470
15471 #[simd_test(enable = "avx512bw")]
15472 unsafe fn test_mm512_maskz_srli_epi16() {
15473 let a = _mm512_set1_epi16(1 << 1);
17df50a5 15474 let r = _mm512_maskz_srli_epi16::<2>(0, a);
fc512014 15475 assert_eq_m512i(r, _mm512_setzero_si512());
17df50a5 15476 let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
fc512014
XL
15477 let e = _mm512_set1_epi16(0);
15478 assert_eq_m512i(r, e);
15479 }
15480
15481 #[simd_test(enable = "avx512bw,avx512vl")]
15482 unsafe fn test_mm256_mask_srli_epi16() {
15483 let a = _mm256_set1_epi16(1 << 1);
17df50a5 15484 let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
fc512014 15485 assert_eq_m256i(r, a);
17df50a5 15486 let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a);
fc512014
XL
15487 let e = _mm256_set1_epi16(0);
15488 assert_eq_m256i(r, e);
15489 }
15490
15491 #[simd_test(enable = "avx512bw,avx512vl")]
15492 unsafe fn test_mm256_maskz_srli_epi16() {
15493 let a = _mm256_set1_epi16(1 << 1);
17df50a5 15494 let r = _mm256_maskz_srli_epi16::<2>(0, a);
fc512014 15495 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 15496 let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a);
fc512014
XL
15497 let e = _mm256_set1_epi16(0);
15498 assert_eq_m256i(r, e);
15499 }
15500
15501 #[simd_test(enable = "avx512bw,avx512vl")]
15502 unsafe fn test_mm_mask_srli_epi16() {
15503 let a = _mm_set1_epi16(1 << 1);
17df50a5 15504 let r = _mm_mask_srli_epi16::<2>(a, 0, a);
fc512014 15505 assert_eq_m128i(r, a);
17df50a5 15506 let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a);
fc512014
XL
15507 let e = _mm_set1_epi16(0);
15508 assert_eq_m128i(r, e);
15509 }
15510
15511 #[simd_test(enable = "avx512bw,avx512vl")]
15512 unsafe fn test_mm_maskz_srli_epi16() {
15513 let a = _mm_set1_epi16(1 << 1);
17df50a5 15514 let r = _mm_maskz_srli_epi16::<2>(0, a);
fc512014 15515 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 15516 let r = _mm_maskz_srli_epi16::<2>(0b11111111, a);
fc512014
XL
15517 let e = _mm_set1_epi16(0);
15518 assert_eq_m128i(r, e);
15519 }
15520
15521 #[simd_test(enable = "avx512bw")]
15522 unsafe fn test_mm512_srlv_epi16() {
15523 let a = _mm512_set1_epi16(1 << 1);
15524 let count = _mm512_set1_epi16(2);
15525 let r = _mm512_srlv_epi16(a, count);
15526 let e = _mm512_set1_epi16(0);
15527 assert_eq_m512i(r, e);
15528 }
15529
15530 #[simd_test(enable = "avx512bw")]
15531 unsafe fn test_mm512_mask_srlv_epi16() {
15532 let a = _mm512_set1_epi16(1 << 1);
15533 let count = _mm512_set1_epi16(2);
15534 let r = _mm512_mask_srlv_epi16(a, 0, a, count);
15535 assert_eq_m512i(r, a);
15536 let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15537 let e = _mm512_set1_epi16(0);
15538 assert_eq_m512i(r, e);
15539 }
15540
15541 #[simd_test(enable = "avx512bw")]
15542 unsafe fn test_mm512_maskz_srlv_epi16() {
15543 let a = _mm512_set1_epi16(1 << 1);
15544 let count = _mm512_set1_epi16(2);
15545 let r = _mm512_maskz_srlv_epi16(0, a, count);
15546 assert_eq_m512i(r, _mm512_setzero_si512());
15547 let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count);
15548 let e = _mm512_set1_epi16(0);
15549 assert_eq_m512i(r, e);
15550 }
15551
15552 #[simd_test(enable = "avx512bw,avx512vl")]
15553 unsafe fn test_mm256_srlv_epi16() {
15554 let a = _mm256_set1_epi16(1 << 1);
15555 let count = _mm256_set1_epi16(2);
15556 let r = _mm256_srlv_epi16(a, count);
15557 let e = _mm256_set1_epi16(0);
15558 assert_eq_m256i(r, e);
15559 }
15560
15561 #[simd_test(enable = "avx512bw,avx512vl")]
15562 unsafe fn test_mm256_mask_srlv_epi16() {
15563 let a = _mm256_set1_epi16(1 << 1);
15564 let count = _mm256_set1_epi16(2);
15565 let r = _mm256_mask_srlv_epi16(a, 0, a, count);
15566 assert_eq_m256i(r, a);
15567 let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count);
15568 let e = _mm256_set1_epi16(0);
15569 assert_eq_m256i(r, e);
15570 }
15571
15572 #[simd_test(enable = "avx512bw,avx512vl")]
15573 unsafe fn test_mm256_maskz_srlv_epi16() {
15574 let a = _mm256_set1_epi16(1 << 1);
15575 let count = _mm256_set1_epi16(2);
15576 let r = _mm256_maskz_srlv_epi16(0, a, count);
15577 assert_eq_m256i(r, _mm256_setzero_si256());
15578 let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count);
15579 let e = _mm256_set1_epi16(0);
15580 assert_eq_m256i(r, e);
15581 }
15582
15583 #[simd_test(enable = "avx512bw,avx512vl")]
15584 unsafe fn test_mm_srlv_epi16() {
15585 let a = _mm_set1_epi16(1 << 1);
15586 let count = _mm_set1_epi16(2);
15587 let r = _mm_srlv_epi16(a, count);
15588 let e = _mm_set1_epi16(0);
15589 assert_eq_m128i(r, e);
15590 }
15591
15592 #[simd_test(enable = "avx512bw,avx512vl")]
15593 unsafe fn test_mm_mask_srlv_epi16() {
15594 let a = _mm_set1_epi16(1 << 1);
15595 let count = _mm_set1_epi16(2);
15596 let r = _mm_mask_srlv_epi16(a, 0, a, count);
15597 assert_eq_m128i(r, a);
15598 let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count);
15599 let e = _mm_set1_epi16(0);
15600 assert_eq_m128i(r, e);
15601 }
15602
15603 #[simd_test(enable = "avx512bw,avx512vl")]
15604 unsafe fn test_mm_maskz_srlv_epi16() {
15605 let a = _mm_set1_epi16(1 << 1);
15606 let count = _mm_set1_epi16(2);
15607 let r = _mm_maskz_srlv_epi16(0, a, count);
15608 assert_eq_m128i(r, _mm_setzero_si128());
15609 let r = _mm_maskz_srlv_epi16(0b11111111, a, count);
15610 let e = _mm_set1_epi16(0);
15611 assert_eq_m128i(r, e);
15612 }
15613
15614 #[simd_test(enable = "avx512bw")]
15615 unsafe fn test_mm512_sra_epi16() {
15616 let a = _mm512_set1_epi16(8);
15617 let count = _mm_set1_epi16(1);
15618 let r = _mm512_sra_epi16(a, count);
15619 let e = _mm512_set1_epi16(0);
15620 assert_eq_m512i(r, e);
15621 }
15622
15623 #[simd_test(enable = "avx512bw")]
15624 unsafe fn test_mm512_mask_sra_epi16() {
15625 let a = _mm512_set1_epi16(8);
15626 let count = _mm_set1_epi16(1);
15627 let r = _mm512_mask_sra_epi16(a, 0, a, count);
15628 assert_eq_m512i(r, a);
15629 let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15630 let e = _mm512_set1_epi16(0);
15631 assert_eq_m512i(r, e);
15632 }
15633
15634 #[simd_test(enable = "avx512bw")]
15635 unsafe fn test_mm512_maskz_sra_epi16() {
15636 let a = _mm512_set1_epi16(8);
15637 let count = _mm_set1_epi16(1);
15638 let r = _mm512_maskz_sra_epi16(0, a, count);
15639 assert_eq_m512i(r, _mm512_setzero_si512());
15640 let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count);
15641 let e = _mm512_set1_epi16(0);
15642 assert_eq_m512i(r, e);
15643 }
15644
15645 #[simd_test(enable = "avx512bw,avx512vl")]
15646 unsafe fn test_mm256_mask_sra_epi16() {
15647 let a = _mm256_set1_epi16(8);
15648 let count = _mm_set1_epi16(1);
15649 let r = _mm256_mask_sra_epi16(a, 0, a, count);
15650 assert_eq_m256i(r, a);
15651 let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count);
15652 let e = _mm256_set1_epi16(0);
15653 assert_eq_m256i(r, e);
15654 }
15655
15656 #[simd_test(enable = "avx512bw,avx512vl")]
15657 unsafe fn test_mm256_maskz_sra_epi16() {
15658 let a = _mm256_set1_epi16(8);
15659 let count = _mm_set1_epi16(1);
15660 let r = _mm256_maskz_sra_epi16(0, a, count);
15661 assert_eq_m256i(r, _mm256_setzero_si256());
15662 let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count);
15663 let e = _mm256_set1_epi16(0);
15664 assert_eq_m256i(r, e);
15665 }
15666
15667 #[simd_test(enable = "avx512bw,avx512vl")]
15668 unsafe fn test_mm_mask_sra_epi16() {
15669 let a = _mm_set1_epi16(8);
15670 let count = _mm_set1_epi16(1);
15671 let r = _mm_mask_sra_epi16(a, 0, a, count);
15672 assert_eq_m128i(r, a);
15673 let r = _mm_mask_sra_epi16(a, 0b11111111, a, count);
15674 let e = _mm_set1_epi16(0);
15675 assert_eq_m128i(r, e);
15676 }
15677
15678 #[simd_test(enable = "avx512bw,avx512vl")]
15679 unsafe fn test_mm_maskz_sra_epi16() {
15680 let a = _mm_set1_epi16(8);
15681 let count = _mm_set1_epi16(1);
15682 let r = _mm_maskz_sra_epi16(0, a, count);
15683 assert_eq_m128i(r, _mm_setzero_si128());
15684 let r = _mm_maskz_sra_epi16(0b11111111, a, count);
15685 let e = _mm_set1_epi16(0);
15686 assert_eq_m128i(r, e);
15687 }
15688
15689 #[simd_test(enable = "avx512bw")]
15690 unsafe fn test_mm512_srai_epi16() {
15691 let a = _mm512_set1_epi16(8);
17df50a5 15692 let r = _mm512_srai_epi16::<2>(a);
fc512014
XL
15693 let e = _mm512_set1_epi16(2);
15694 assert_eq_m512i(r, e);
15695 }
15696
15697 #[simd_test(enable = "avx512bw")]
15698 unsafe fn test_mm512_mask_srai_epi16() {
15699 let a = _mm512_set1_epi16(8);
17df50a5 15700 let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
fc512014 15701 assert_eq_m512i(r, a);
17df50a5 15702 let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a);
fc512014
XL
15703 let e = _mm512_set1_epi16(2);
15704 assert_eq_m512i(r, e);
15705 }
15706
15707 #[simd_test(enable = "avx512bw")]
15708 unsafe fn test_mm512_maskz_srai_epi16() {
15709 let a = _mm512_set1_epi16(8);
17df50a5 15710 let r = _mm512_maskz_srai_epi16::<2>(0, a);
fc512014 15711 assert_eq_m512i(r, _mm512_setzero_si512());
17df50a5 15712 let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a);
fc512014
XL
15713 let e = _mm512_set1_epi16(2);
15714 assert_eq_m512i(r, e);
15715 }
15716
15717 #[simd_test(enable = "avx512bw,avx512vl")]
15718 unsafe fn test_mm256_mask_srai_epi16() {
15719 let a = _mm256_set1_epi16(8);
17df50a5 15720 let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
fc512014 15721 assert_eq_m256i(r, a);
17df50a5 15722 let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a);
fc512014
XL
15723 let e = _mm256_set1_epi16(2);
15724 assert_eq_m256i(r, e);
15725 }
15726
15727 #[simd_test(enable = "avx512bw,avx512vl")]
15728 unsafe fn test_mm256_maskz_srai_epi16() {
15729 let a = _mm256_set1_epi16(8);
17df50a5 15730 let r = _mm256_maskz_srai_epi16::<2>(0, a);
fc512014 15731 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 15732 let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a);
fc512014
XL
15733 let e = _mm256_set1_epi16(2);
15734 assert_eq_m256i(r, e);
15735 }
15736
15737 #[simd_test(enable = "avx512bw,avx512vl")]
15738 unsafe fn test_mm_mask_srai_epi16() {
15739 let a = _mm_set1_epi16(8);
17df50a5 15740 let r = _mm_mask_srai_epi16::<2>(a, 0, a);
fc512014 15741 assert_eq_m128i(r, a);
17df50a5 15742 let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a);
fc512014
XL
15743 let e = _mm_set1_epi16(2);
15744 assert_eq_m128i(r, e);
15745 }
15746
15747 #[simd_test(enable = "avx512bw,avx512vl")]
15748 unsafe fn test_mm_maskz_srai_epi16() {
15749 let a = _mm_set1_epi16(8);
17df50a5 15750 let r = _mm_maskz_srai_epi16::<2>(0, a);
fc512014 15751 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 15752 let r = _mm_maskz_srai_epi16::<2>(0b11111111, a);
fc512014
XL
15753 let e = _mm_set1_epi16(2);
15754 assert_eq_m128i(r, e);
15755 }
15756
15757 #[simd_test(enable = "avx512bw")]
15758 unsafe fn test_mm512_srav_epi16() {
15759 let a = _mm512_set1_epi16(8);
15760 let count = _mm512_set1_epi16(2);
15761 let r = _mm512_srav_epi16(a, count);
15762 let e = _mm512_set1_epi16(2);
15763 assert_eq_m512i(r, e);
15764 }
15765
15766 #[simd_test(enable = "avx512bw")]
15767 unsafe fn test_mm512_mask_srav_epi16() {
15768 let a = _mm512_set1_epi16(8);
15769 let count = _mm512_set1_epi16(2);
15770 let r = _mm512_mask_srav_epi16(a, 0, a, count);
15771 assert_eq_m512i(r, a);
15772 let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count);
15773 let e = _mm512_set1_epi16(2);
15774 assert_eq_m512i(r, e);
15775 }
15776
15777 #[simd_test(enable = "avx512bw")]
15778 unsafe fn test_mm512_maskz_srav_epi16() {
15779 let a = _mm512_set1_epi16(8);
15780 let count = _mm512_set1_epi16(2);
15781 let r = _mm512_maskz_srav_epi16(0, a, count);
15782 assert_eq_m512i(r, _mm512_setzero_si512());
15783 let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count);
15784 let e = _mm512_set1_epi16(2);
15785 assert_eq_m512i(r, e);
15786 }
15787
15788 #[simd_test(enable = "avx512bw,avx512vl")]
15789 unsafe fn test_mm256_srav_epi16() {
15790 let a = _mm256_set1_epi16(8);
15791 let count = _mm256_set1_epi16(2);
15792 let r = _mm256_srav_epi16(a, count);
15793 let e = _mm256_set1_epi16(2);
15794 assert_eq_m256i(r, e);
15795 }
15796
15797 #[simd_test(enable = "avx512bw,avx512vl")]
15798 unsafe fn test_mm256_mask_srav_epi16() {
15799 let a = _mm256_set1_epi16(8);
15800 let count = _mm256_set1_epi16(2);
15801 let r = _mm256_mask_srav_epi16(a, 0, a, count);
15802 assert_eq_m256i(r, a);
15803 let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count);
15804 let e = _mm256_set1_epi16(2);
15805 assert_eq_m256i(r, e);
15806 }
15807
15808 #[simd_test(enable = "avx512bw,avx512vl")]
15809 unsafe fn test_mm256_maskz_srav_epi16() {
15810 let a = _mm256_set1_epi16(8);
15811 let count = _mm256_set1_epi16(2);
15812 let r = _mm256_maskz_srav_epi16(0, a, count);
15813 assert_eq_m256i(r, _mm256_setzero_si256());
15814 let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count);
15815 let e = _mm256_set1_epi16(2);
15816 assert_eq_m256i(r, e);
15817 }
15818
15819 #[simd_test(enable = "avx512bw,avx512vl")]
15820 unsafe fn test_mm_srav_epi16() {
15821 let a = _mm_set1_epi16(8);
15822 let count = _mm_set1_epi16(2);
15823 let r = _mm_srav_epi16(a, count);
15824 let e = _mm_set1_epi16(2);
15825 assert_eq_m128i(r, e);
15826 }
15827
15828 #[simd_test(enable = "avx512bw,avx512vl")]
15829 unsafe fn test_mm_mask_srav_epi16() {
15830 let a = _mm_set1_epi16(8);
15831 let count = _mm_set1_epi16(2);
15832 let r = _mm_mask_srav_epi16(a, 0, a, count);
15833 assert_eq_m128i(r, a);
15834 let r = _mm_mask_srav_epi16(a, 0b11111111, a, count);
15835 let e = _mm_set1_epi16(2);
15836 assert_eq_m128i(r, e);
15837 }
15838
15839 #[simd_test(enable = "avx512bw,avx512vl")]
15840 unsafe fn test_mm_maskz_srav_epi16() {
15841 let a = _mm_set1_epi16(8);
15842 let count = _mm_set1_epi16(2);
15843 let r = _mm_maskz_srav_epi16(0, a, count);
15844 assert_eq_m128i(r, _mm_setzero_si128());
15845 let r = _mm_maskz_srav_epi16(0b11111111, a, count);
15846 let e = _mm_set1_epi16(2);
15847 assert_eq_m128i(r, e);
15848 }
15849
15850 #[simd_test(enable = "avx512bw")]
15851 unsafe fn test_mm512_permutex2var_epi16() {
15852 #[rustfmt::skip]
15853 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15854 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15855 #[rustfmt::skip]
15856 let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15857 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15858 let b = _mm512_set1_epi16(100);
15859 let r = _mm512_permutex2var_epi16(a, idx, b);
15860 #[rustfmt::skip]
15861 let e = _mm512_set_epi16(
15862 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15863 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15864 );
15865 assert_eq_m512i(r, e);
15866 }
15867
15868 #[simd_test(enable = "avx512bw")]
15869 unsafe fn test_mm512_mask_permutex2var_epi16() {
15870 #[rustfmt::skip]
15871 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15872 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15873 #[rustfmt::skip]
15874 let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15875 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15876 let b = _mm512_set1_epi16(100);
15877 let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b);
15878 assert_eq_m512i(r, a);
15879 let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b);
15880 #[rustfmt::skip]
15881 let e = _mm512_set_epi16(
15882 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15883 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15884 );
15885 assert_eq_m512i(r, e);
15886 }
15887
15888 #[simd_test(enable = "avx512bw")]
15889 unsafe fn test_mm512_maskz_permutex2var_epi16() {
15890 #[rustfmt::skip]
15891 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15892 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15893 #[rustfmt::skip]
15894 let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15895 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15896 let b = _mm512_set1_epi16(100);
15897 let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b);
15898 assert_eq_m512i(r, _mm512_setzero_si512());
15899 let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b);
15900 #[rustfmt::skip]
15901 let e = _mm512_set_epi16(
15902 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15903 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15904 );
15905 assert_eq_m512i(r, e);
15906 }
15907
15908 #[simd_test(enable = "avx512bw")]
15909 unsafe fn test_mm512_mask2_permutex2var_epi16() {
15910 #[rustfmt::skip]
15911 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
15912 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
15913 #[rustfmt::skip]
15914 let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5,
15915 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5);
15916 let b = _mm512_set1_epi16(100);
15917 let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b);
15918 assert_eq_m512i(r, idx);
15919 let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b);
15920 #[rustfmt::skip]
15921 let e = _mm512_set_epi16(
15922 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100,
15923 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100,
15924 );
15925 assert_eq_m512i(r, e);
15926 }
15927
15928 #[simd_test(enable = "avx512bw,avx512vl")]
15929 unsafe fn test_mm256_permutex2var_epi16() {
15930 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15931 #[rustfmt::skip]
15932 let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15933 let b = _mm256_set1_epi16(100);
15934 let r = _mm256_permutex2var_epi16(a, idx, b);
15935 let e = _mm256_set_epi16(
15936 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15937 );
15938 assert_eq_m256i(r, e);
15939 }
15940
15941 #[simd_test(enable = "avx512bw,avx512vl")]
15942 unsafe fn test_mm256_mask_permutex2var_epi16() {
15943 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15944 #[rustfmt::skip]
15945 let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15946 let b = _mm256_set1_epi16(100);
15947 let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b);
15948 assert_eq_m256i(r, a);
15949 let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b);
15950 let e = _mm256_set_epi16(
15951 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15952 );
15953 assert_eq_m256i(r, e);
15954 }
15955
15956 #[simd_test(enable = "avx512bw,avx512vl")]
15957 unsafe fn test_mm256_maskz_permutex2var_epi16() {
15958 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15959 #[rustfmt::skip]
15960 let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15961 let b = _mm256_set1_epi16(100);
15962 let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b);
15963 assert_eq_m256i(r, _mm256_setzero_si256());
15964 let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b);
15965 let e = _mm256_set_epi16(
15966 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15967 );
15968 assert_eq_m256i(r, e);
15969 }
15970
15971 #[simd_test(enable = "avx512bw,avx512vl")]
15972 unsafe fn test_mm256_mask2_permutex2var_epi16() {
15973 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
15974 #[rustfmt::skip]
15975 let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
15976 let b = _mm256_set1_epi16(100);
15977 let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b);
15978 assert_eq_m256i(r, idx);
15979 let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b);
15980 #[rustfmt::skip]
15981 let e = _mm256_set_epi16(
15982 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
15983 );
15984 assert_eq_m256i(r, e);
15985 }
15986
15987 #[simd_test(enable = "avx512bw,avx512vl")]
15988 unsafe fn test_mm_permutex2var_epi16() {
15989 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
15990 let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
15991 let b = _mm_set1_epi16(100);
15992 let r = _mm_permutex2var_epi16(a, idx, b);
15993 let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
15994 assert_eq_m128i(r, e);
15995 }
15996
15997 #[simd_test(enable = "avx512bw,avx512vl")]
15998 unsafe fn test_mm_mask_permutex2var_epi16() {
15999 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16000 let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
16001 let b = _mm_set1_epi16(100);
16002 let r = _mm_mask_permutex2var_epi16(a, 0, idx, b);
16003 assert_eq_m128i(r, a);
16004 let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b);
16005 let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
16006 assert_eq_m128i(r, e);
16007 }
16008
16009 #[simd_test(enable = "avx512bw,avx512vl")]
16010 unsafe fn test_mm_maskz_permutex2var_epi16() {
16011 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16012 let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
16013 let b = _mm_set1_epi16(100);
16014 let r = _mm_maskz_permutex2var_epi16(0, a, idx, b);
16015 assert_eq_m128i(r, _mm_setzero_si128());
16016 let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b);
16017 let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
16018 assert_eq_m128i(r, e);
16019 }
16020
16021 #[simd_test(enable = "avx512bw,avx512vl")]
16022 unsafe fn test_mm_mask2_permutex2var_epi16() {
16023 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16024 let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
16025 let b = _mm_set1_epi16(100);
16026 let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b);
16027 assert_eq_m128i(r, idx);
16028 let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b);
16029 let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100);
16030 assert_eq_m128i(r, e);
16031 }
16032
16033 #[simd_test(enable = "avx512bw")]
16034 unsafe fn test_mm512_permutexvar_epi16() {
16035 let idx = _mm512_set1_epi16(1);
16036 #[rustfmt::skip]
16037 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16038 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
16039 let r = _mm512_permutexvar_epi16(idx, a);
16040 let e = _mm512_set1_epi16(30);
16041 assert_eq_m512i(r, e);
16042 }
16043
16044 #[simd_test(enable = "avx512bw")]
16045 unsafe fn test_mm512_mask_permutexvar_epi16() {
16046 let idx = _mm512_set1_epi16(1);
16047 #[rustfmt::skip]
16048 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16049 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
16050 let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a);
16051 assert_eq_m512i(r, a);
16052 let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a);
16053 let e = _mm512_set1_epi16(30);
16054 assert_eq_m512i(r, e);
16055 }
16056
16057 #[simd_test(enable = "avx512bw")]
16058 unsafe fn test_mm512_maskz_permutexvar_epi16() {
16059 let idx = _mm512_set1_epi16(1);
16060 #[rustfmt::skip]
16061 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16062 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
16063 let r = _mm512_maskz_permutexvar_epi16(0, idx, a);
16064 assert_eq_m512i(r, _mm512_setzero_si512());
16065 let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a);
16066 let e = _mm512_set1_epi16(30);
16067 assert_eq_m512i(r, e);
16068 }
16069
16070 #[simd_test(enable = "avx512bw,avx512vl")]
16071 unsafe fn test_mm256_permutexvar_epi16() {
16072 let idx = _mm256_set1_epi16(1);
16073 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16074 let r = _mm256_permutexvar_epi16(idx, a);
16075 let e = _mm256_set1_epi16(14);
16076 assert_eq_m256i(r, e);
16077 }
16078
16079 #[simd_test(enable = "avx512bw,avx512vl")]
16080 unsafe fn test_mm256_mask_permutexvar_epi16() {
16081 let idx = _mm256_set1_epi16(1);
16082 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16083 let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
16084 assert_eq_m256i(r, a);
16085 let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a);
16086 let e = _mm256_set1_epi16(14);
16087 assert_eq_m256i(r, e);
16088 }
16089
16090 #[simd_test(enable = "avx512bw,avx512vl")]
16091 unsafe fn test_mm256_maskz_permutexvar_epi16() {
16092 let idx = _mm256_set1_epi16(1);
16093 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16094 let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
16095 assert_eq_m256i(r, _mm256_setzero_si256());
16096 let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a);
16097 let e = _mm256_set1_epi16(14);
16098 assert_eq_m256i(r, e);
16099 }
16100
16101 #[simd_test(enable = "avx512bw,avx512vl")]
16102 unsafe fn test_mm_permutexvar_epi16() {
16103 let idx = _mm_set1_epi16(1);
16104 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16105 let r = _mm_permutexvar_epi16(idx, a);
16106 let e = _mm_set1_epi16(6);
16107 assert_eq_m128i(r, e);
16108 }
16109
16110 #[simd_test(enable = "avx512bw,avx512vl")]
16111 unsafe fn test_mm_mask_permutexvar_epi16() {
16112 let idx = _mm_set1_epi16(1);
16113 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16114 let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
16115 assert_eq_m128i(r, a);
16116 let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a);
16117 let e = _mm_set1_epi16(6);
16118 assert_eq_m128i(r, e);
16119 }
16120
16121 #[simd_test(enable = "avx512bw,avx512vl")]
16122 unsafe fn test_mm_maskz_permutexvar_epi16() {
16123 let idx = _mm_set1_epi16(1);
16124 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
16125 let r = _mm_maskz_permutexvar_epi16(0, idx, a);
16126 assert_eq_m128i(r, _mm_setzero_si128());
16127 let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a);
16128 let e = _mm_set1_epi16(6);
16129 assert_eq_m128i(r, e);
16130 }
16131
16132 #[simd_test(enable = "avx512bw")]
16133 unsafe fn test_mm512_mask_blend_epi16() {
16134 let a = _mm512_set1_epi16(1);
16135 let b = _mm512_set1_epi16(2);
16136 let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
16137 #[rustfmt::skip]
16138 let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
16139 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16140 assert_eq_m512i(r, e);
16141 }
16142
16143 #[simd_test(enable = "avx512bw,avx512vl")]
16144 unsafe fn test_mm256_mask_blend_epi16() {
16145 let a = _mm256_set1_epi16(1);
16146 let b = _mm256_set1_epi16(2);
16147 let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
16148 let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16149 assert_eq_m256i(r, e);
16150 }
16151
16152 #[simd_test(enable = "avx512bw,avx512vl")]
16153 unsafe fn test_mm_mask_blend_epi16() {
16154 let a = _mm_set1_epi16(1);
16155 let b = _mm_set1_epi16(2);
16156 let r = _mm_mask_blend_epi16(0b11110000, a, b);
16157 let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
16158 assert_eq_m128i(r, e);
16159 }
16160
16161 #[simd_test(enable = "avx512bw")]
16162 unsafe fn test_mm512_mask_blend_epi8() {
16163 let a = _mm512_set1_epi8(1);
16164 let b = _mm512_set1_epi8(2);
16165 let r = _mm512_mask_blend_epi8(
16166 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000,
16167 a,
16168 b,
16169 );
16170 #[rustfmt::skip]
16171 let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
16172 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
16173 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
16174 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16175 assert_eq_m512i(r, e);
16176 }
16177
16178 #[simd_test(enable = "avx512bw,avx512vl")]
16179 unsafe fn test_mm256_mask_blend_epi8() {
16180 let a = _mm256_set1_epi8(1);
16181 let b = _mm256_set1_epi8(2);
16182 let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
16183 #[rustfmt::skip]
16184 let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
16185 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16186 assert_eq_m256i(r, e);
16187 }
16188
16189 #[simd_test(enable = "avx512bw,avx512vl")]
16190 unsafe fn test_mm_mask_blend_epi8() {
16191 let a = _mm_set1_epi8(1);
16192 let b = _mm_set1_epi8(2);
16193 let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
16194 let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
16195 assert_eq_m128i(r, e);
16196 }
16197
16198 #[simd_test(enable = "avx512bw")]
16199 unsafe fn test_mm512_broadcastw_epi16() {
16200 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16201 let r = _mm512_broadcastw_epi16(a);
16202 let e = _mm512_set1_epi16(24);
16203 assert_eq_m512i(r, e);
16204 }
16205
16206 #[simd_test(enable = "avx512bw")]
16207 unsafe fn test_mm512_mask_broadcastw_epi16() {
16208 let src = _mm512_set1_epi16(1);
16209 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16210 let r = _mm512_mask_broadcastw_epi16(src, 0, a);
16211 assert_eq_m512i(r, src);
16212 let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a);
16213 let e = _mm512_set1_epi16(24);
16214 assert_eq_m512i(r, e);
16215 }
16216
16217 #[simd_test(enable = "avx512bw")]
16218 unsafe fn test_mm512_maskz_broadcastw_epi16() {
16219 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16220 let r = _mm512_maskz_broadcastw_epi16(0, a);
16221 assert_eq_m512i(r, _mm512_setzero_si512());
16222 let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a);
16223 let e = _mm512_set1_epi16(24);
16224 assert_eq_m512i(r, e);
16225 }
16226
16227 #[simd_test(enable = "avx512bw,avx512vl")]
16228 unsafe fn test_mm256_mask_broadcastw_epi16() {
16229 let src = _mm256_set1_epi16(1);
16230 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16231 let r = _mm256_mask_broadcastw_epi16(src, 0, a);
16232 assert_eq_m256i(r, src);
16233 let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a);
16234 let e = _mm256_set1_epi16(24);
16235 assert_eq_m256i(r, e);
16236 }
16237
16238 #[simd_test(enable = "avx512bw,avx512vl")]
16239 unsafe fn test_mm256_maskz_broadcastw_epi16() {
16240 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16241 let r = _mm256_maskz_broadcastw_epi16(0, a);
16242 assert_eq_m256i(r, _mm256_setzero_si256());
16243 let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a);
16244 let e = _mm256_set1_epi16(24);
16245 assert_eq_m256i(r, e);
16246 }
16247
16248 #[simd_test(enable = "avx512bw,avx512vl")]
16249 unsafe fn test_mm_mask_broadcastw_epi16() {
16250 let src = _mm_set1_epi16(1);
16251 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16252 let r = _mm_mask_broadcastw_epi16(src, 0, a);
16253 assert_eq_m128i(r, src);
16254 let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a);
16255 let e = _mm_set1_epi16(24);
16256 assert_eq_m128i(r, e);
16257 }
16258
16259 #[simd_test(enable = "avx512bw,avx512vl")]
16260 unsafe fn test_mm_maskz_broadcastw_epi16() {
16261 let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
16262 let r = _mm_maskz_broadcastw_epi16(0, a);
16263 assert_eq_m128i(r, _mm_setzero_si128());
16264 let r = _mm_maskz_broadcastw_epi16(0b11111111, a);
16265 let e = _mm_set1_epi16(24);
16266 assert_eq_m128i(r, e);
16267 }
16268
16269 #[simd_test(enable = "avx512bw")]
16270 unsafe fn test_mm512_broadcastb_epi8() {
16271 let a = _mm_set_epi8(
16272 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16273 );
16274 let r = _mm512_broadcastb_epi8(a);
16275 let e = _mm512_set1_epi8(32);
16276 assert_eq_m512i(r, e);
16277 }
16278
16279 #[simd_test(enable = "avx512bw")]
16280 unsafe fn test_mm512_mask_broadcastb_epi8() {
16281 let src = _mm512_set1_epi8(1);
16282 let a = _mm_set_epi8(
16283 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16284 );
16285 let r = _mm512_mask_broadcastb_epi8(src, 0, a);
16286 assert_eq_m512i(r, src);
16287 let r = _mm512_mask_broadcastb_epi8(
16288 src,
16289 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16290 a,
16291 );
16292 let e = _mm512_set1_epi8(32);
16293 assert_eq_m512i(r, e);
16294 }
16295
16296 #[simd_test(enable = "avx512bw")]
16297 unsafe fn test_mm512_maskz_broadcastb_epi8() {
16298 let a = _mm_set_epi8(
16299 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16300 );
16301 let r = _mm512_maskz_broadcastb_epi8(0, a);
16302 assert_eq_m512i(r, _mm512_setzero_si512());
16303 let r = _mm512_maskz_broadcastb_epi8(
16304 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16305 a,
16306 );
16307 let e = _mm512_set1_epi8(32);
16308 assert_eq_m512i(r, e);
16309 }
16310
16311 #[simd_test(enable = "avx512bw,avx512vl")]
16312 unsafe fn test_mm256_mask_broadcastb_epi8() {
16313 let src = _mm256_set1_epi8(1);
16314 let a = _mm_set_epi8(
16315 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16316 );
16317 let r = _mm256_mask_broadcastb_epi8(src, 0, a);
16318 assert_eq_m256i(r, src);
16319 let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a);
16320 let e = _mm256_set1_epi8(32);
16321 assert_eq_m256i(r, e);
16322 }
16323
16324 #[simd_test(enable = "avx512bw,avx512vl")]
16325 unsafe fn test_mm256_maskz_broadcastb_epi8() {
16326 let a = _mm_set_epi8(
16327 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16328 );
16329 let r = _mm256_maskz_broadcastb_epi8(0, a);
16330 assert_eq_m256i(r, _mm256_setzero_si256());
16331 let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a);
16332 let e = _mm256_set1_epi8(32);
16333 assert_eq_m256i(r, e);
16334 }
16335
16336 #[simd_test(enable = "avx512bw,avx512vl")]
16337 unsafe fn test_mm_mask_broadcastb_epi8() {
16338 let src = _mm_set1_epi8(1);
16339 let a = _mm_set_epi8(
16340 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16341 );
16342 let r = _mm_mask_broadcastb_epi8(src, 0, a);
16343 assert_eq_m128i(r, src);
16344 let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a);
16345 let e = _mm_set1_epi8(32);
16346 assert_eq_m128i(r, e);
16347 }
16348
16349 #[simd_test(enable = "avx512bw,avx512vl")]
16350 unsafe fn test_mm_maskz_broadcastb_epi8() {
16351 let a = _mm_set_epi8(
16352 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16353 );
16354 let r = _mm_maskz_broadcastb_epi8(0, a);
16355 assert_eq_m128i(r, _mm_setzero_si128());
16356 let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a);
16357 let e = _mm_set1_epi8(32);
16358 assert_eq_m128i(r, e);
16359 }
16360
16361 #[simd_test(enable = "avx512bw")]
16362 unsafe fn test_mm512_unpackhi_epi16() {
16363 #[rustfmt::skip]
16364 let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16365 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16366 #[rustfmt::skip]
16367 let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16368 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16369 let r = _mm512_unpackhi_epi16(a, b);
16370 #[rustfmt::skip]
16371 let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12,
16372 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
16373 assert_eq_m512i(r, e);
16374 }
16375
16376 #[simd_test(enable = "avx512bw")]
16377 unsafe fn test_mm512_mask_unpackhi_epi16() {
16378 #[rustfmt::skip]
16379 let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16380 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16381 #[rustfmt::skip]
16382 let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16383 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16384 let r = _mm512_mask_unpackhi_epi16(a, 0, a, b);
16385 assert_eq_m512i(r, a);
16386 let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
16387 #[rustfmt::skip]
16388 let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12,
16389 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
16390 assert_eq_m512i(r, e);
16391 }
16392
16393 #[simd_test(enable = "avx512bw")]
16394 unsafe fn test_mm512_maskz_unpackhi_epi16() {
16395 #[rustfmt::skip]
16396 let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16397 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16398 #[rustfmt::skip]
16399 let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16400 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16401 let r = _mm512_maskz_unpackhi_epi16(0, a, b);
16402 assert_eq_m512i(r, _mm512_setzero_si512());
16403 let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b);
16404 #[rustfmt::skip]
16405 let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12,
16406 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28);
16407 assert_eq_m512i(r, e);
16408 }
16409
16410 #[simd_test(enable = "avx512bw,avx512vl")]
16411 unsafe fn test_mm256_mask_unpackhi_epi16() {
16412 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16413 let b = _mm256_set_epi16(
16414 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16415 );
16416 let r = _mm256_mask_unpackhi_epi16(a, 0, a, b);
16417 assert_eq_m256i(r, a);
16418 let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b);
16419 let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
16420 assert_eq_m256i(r, e);
16421 }
16422
16423 #[simd_test(enable = "avx512bw,avx512vl")]
16424 unsafe fn test_mm256_maskz_unpackhi_epi16() {
16425 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16426 let b = _mm256_set_epi16(
16427 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16428 );
16429 let r = _mm256_maskz_unpackhi_epi16(0, a, b);
16430 assert_eq_m256i(r, _mm256_setzero_si256());
16431 let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b);
16432 let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12);
16433 assert_eq_m256i(r, e);
16434 }
16435
16436 #[simd_test(enable = "avx512bw,avx512vl")]
16437 unsafe fn test_mm_mask_unpackhi_epi16() {
16438 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
16439 let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
16440 let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
16441 assert_eq_m128i(r, a);
16442 let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b);
16443 let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
16444 assert_eq_m128i(r, e);
16445 }
16446
16447 #[simd_test(enable = "avx512bw,avx512vl")]
16448 unsafe fn test_mm_maskz_unpackhi_epi16() {
16449 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
16450 let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
16451 let r = _mm_maskz_unpackhi_epi16(0, a, b);
16452 assert_eq_m128i(r, _mm_setzero_si128());
16453 let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b);
16454 let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4);
16455 assert_eq_m128i(r, e);
16456 }
16457
16458 #[simd_test(enable = "avx512bw")]
16459 unsafe fn test_mm512_unpackhi_epi8() {
16460 #[rustfmt::skip]
16461 let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16462 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16463 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16464 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16465 #[rustfmt::skip]
16466 let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16467 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
16468 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16469 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16470 let r = _mm512_unpackhi_epi8(a, b);
16471 #[rustfmt::skip]
16472 let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
16473 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
16474 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
16475 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
16476 assert_eq_m512i(r, e);
16477 }
16478
16479 #[simd_test(enable = "avx512bw")]
16480 unsafe fn test_mm512_mask_unpackhi_epi8() {
16481 #[rustfmt::skip]
16482 let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16483 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16484 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16485 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16486 #[rustfmt::skip]
16487 let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16488 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
16489 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16490 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16491 let r = _mm512_mask_unpackhi_epi8(a, 0, a, b);
16492 assert_eq_m512i(r, a);
16493 let r = _mm512_mask_unpackhi_epi8(
16494 a,
16495 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16496 a,
16497 b,
16498 );
16499 #[rustfmt::skip]
16500 let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
16501 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
16502 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
16503 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
16504 assert_eq_m512i(r, e);
16505 }
16506
16507 #[simd_test(enable = "avx512bw")]
16508 unsafe fn test_mm512_maskz_unpackhi_epi8() {
16509 #[rustfmt::skip]
16510 let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16511 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16512 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16513 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16514 #[rustfmt::skip]
16515 let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16516 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
16517 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16518 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16519 let r = _mm512_maskz_unpackhi_epi8(0, a, b);
16520 assert_eq_m512i(r, _mm512_setzero_si512());
16521 let r = _mm512_maskz_unpackhi_epi8(
16522 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16523 a,
16524 b,
16525 );
16526 #[rustfmt::skip]
16527 let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
16528 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24,
16529 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40,
16530 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56);
16531 assert_eq_m512i(r, e);
16532 }
16533
16534 #[simd_test(enable = "avx512bw,avx512vl")]
16535 unsafe fn test_mm256_mask_unpackhi_epi8() {
16536 #[rustfmt::skip]
16537 let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16538 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16539 #[rustfmt::skip]
16540 let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16541 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96);
16542 let r = _mm256_mask_unpackhi_epi8(a, 0, a, b);
16543 assert_eq_m256i(r, a);
16544 let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
16545 #[rustfmt::skip]
16546 let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
16547 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
16548 assert_eq_m256i(r, e);
16549 }
16550
16551 #[simd_test(enable = "avx512bw,avx512vl")]
16552 unsafe fn test_mm256_maskz_unpackhi_epi8() {
16553 #[rustfmt::skip]
16554 let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16555 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16556 #[rustfmt::skip]
16557 let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16558 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96);
16559 let r = _mm256_maskz_unpackhi_epi8(0, a, b);
16560 assert_eq_m256i(r, _mm256_setzero_si256());
16561 let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b);
16562 #[rustfmt::skip]
16563 let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8,
16564 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24);
16565 assert_eq_m256i(r, e);
16566 }
16567
16568 #[simd_test(enable = "avx512bw,avx512vl")]
16569 unsafe fn test_mm_mask_unpackhi_epi8() {
16570 let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16571 let b = _mm_set_epi8(
16572 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16573 );
16574 let r = _mm_mask_unpackhi_epi8(a, 0, a, b);
16575 assert_eq_m128i(r, a);
16576 let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b);
16577 let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
16578 assert_eq_m128i(r, e);
16579 }
16580
16581 #[simd_test(enable = "avx512bw,avx512vl")]
16582 unsafe fn test_mm_maskz_unpackhi_epi8() {
16583 let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16584 let b = _mm_set_epi8(
16585 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16586 );
16587 let r = _mm_maskz_unpackhi_epi8(0, a, b);
16588 assert_eq_m128i(r, _mm_setzero_si128());
16589 let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b);
16590 let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8);
16591 assert_eq_m128i(r, e);
16592 }
16593
16594 #[simd_test(enable = "avx512bw")]
16595 unsafe fn test_mm512_unpacklo_epi16() {
16596 #[rustfmt::skip]
16597 let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16598 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16599 #[rustfmt::skip]
16600 let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16601 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16602 let r = _mm512_unpacklo_epi16(a, b);
16603 #[rustfmt::skip]
16604 let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16,
16605 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
16606 assert_eq_m512i(r, e);
16607 }
16608
16609 #[simd_test(enable = "avx512bw")]
16610 unsafe fn test_mm512_mask_unpacklo_epi16() {
16611 #[rustfmt::skip]
16612 let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16613 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16614 #[rustfmt::skip]
16615 let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16616 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16617 let r = _mm512_mask_unpacklo_epi16(a, 0, a, b);
16618 assert_eq_m512i(r, a);
16619 let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b);
16620 #[rustfmt::skip]
16621 let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16,
16622 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
16623 assert_eq_m512i(r, e);
16624 }
16625
16626 #[simd_test(enable = "avx512bw")]
16627 unsafe fn test_mm512_maskz_unpacklo_epi16() {
16628 #[rustfmt::skip]
16629 let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16630 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16631 #[rustfmt::skip]
16632 let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16633 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16634 let r = _mm512_maskz_unpacklo_epi16(0, a, b);
16635 assert_eq_m512i(r, _mm512_setzero_si512());
16636 let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b);
16637 #[rustfmt::skip]
16638 let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16,
16639 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32);
16640 assert_eq_m512i(r, e);
16641 }
16642
16643 #[simd_test(enable = "avx512bw,avx512vl")]
16644 unsafe fn test_mm256_mask_unpacklo_epi16() {
16645 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16646 let b = _mm256_set_epi16(
16647 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16648 );
16649 let r = _mm256_mask_unpacklo_epi16(a, 0, a, b);
16650 assert_eq_m256i(r, a);
16651 let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b);
16652 let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
16653 assert_eq_m256i(r, e);
16654 }
16655
16656 #[simd_test(enable = "avx512bw,avx512vl")]
16657 unsafe fn test_mm256_maskz_unpacklo_epi16() {
16658 let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16659 let b = _mm256_set_epi16(
16660 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16661 );
16662 let r = _mm256_maskz_unpacklo_epi16(0, a, b);
16663 assert_eq_m256i(r, _mm256_setzero_si256());
16664 let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b);
16665 let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16);
16666 assert_eq_m256i(r, e);
16667 }
16668
16669 #[simd_test(enable = "avx512bw,avx512vl")]
16670 unsafe fn test_mm_mask_unpacklo_epi16() {
16671 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
16672 let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
16673 let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
16674 assert_eq_m128i(r, a);
16675 let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b);
16676 let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
16677 assert_eq_m128i(r, e);
16678 }
16679
16680 #[simd_test(enable = "avx512bw,avx512vl")]
16681 unsafe fn test_mm_maskz_unpacklo_epi16() {
16682 let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
16683 let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
16684 let r = _mm_maskz_unpacklo_epi16(0, a, b);
16685 assert_eq_m128i(r, _mm_setzero_si128());
16686 let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b);
16687 let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8);
16688 assert_eq_m128i(r, e);
16689 }
16690
16691 #[simd_test(enable = "avx512bw")]
16692 unsafe fn test_mm512_unpacklo_epi8() {
16693 #[rustfmt::skip]
16694 let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16695 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16696 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16697 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16698 #[rustfmt::skip]
16699 let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16700 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
16701 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16702 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16703 let r = _mm512_unpacklo_epi8(a, b);
16704 #[rustfmt::skip]
16705 let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16706 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32,
16707 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
16708 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64);
16709 assert_eq_m512i(r, e);
16710 }
16711
16712 #[simd_test(enable = "avx512bw")]
16713 unsafe fn test_mm512_mask_unpacklo_epi8() {
16714 #[rustfmt::skip]
16715 let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16716 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16717 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16718 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16719 #[rustfmt::skip]
16720 let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16721 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
16722 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16723 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16724 let r = _mm512_mask_unpacklo_epi8(a, 0, a, b);
16725 assert_eq_m512i(r, a);
16726 let r = _mm512_mask_unpacklo_epi8(
16727 a,
16728 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16729 a,
16730 b,
16731 );
16732 #[rustfmt::skip]
16733 let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16734 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32,
16735 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
16736 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64);
16737 assert_eq_m512i(r, e);
16738 }
16739
16740 #[simd_test(enable = "avx512bw")]
16741 unsafe fn test_mm512_maskz_unpacklo_epi8() {
16742 #[rustfmt::skip]
16743 let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16744 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
16745 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
16746 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64);
16747 #[rustfmt::skip]
16748 let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16749 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
16750 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
16751 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0);
16752 let r = _mm512_maskz_unpacklo_epi8(0, a, b);
16753 assert_eq_m512i(r, _mm512_setzero_si512());
16754 let r = _mm512_maskz_unpacklo_epi8(
16755 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16756 a,
16757 b,
16758 );
16759 #[rustfmt::skip]
16760 let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16761 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32,
16762 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48,
16763 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64);
16764 assert_eq_m512i(r, e);
16765 }
16766
16767 #[simd_test(enable = "avx512bw,avx512vl")]
16768 unsafe fn test_mm256_mask_unpacklo_epi8() {
16769 #[rustfmt::skip]
16770 let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16771 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16772 #[rustfmt::skip]
16773 let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16774 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96);
16775 let r = _mm256_mask_unpacklo_epi8(a, 0, a, b);
16776 assert_eq_m256i(r, a);
16777 let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
16778 #[rustfmt::skip]
16779 let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16780 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32);
16781 assert_eq_m256i(r, e);
16782 }
16783
16784 #[simd_test(enable = "avx512bw,avx512vl")]
16785 unsafe fn test_mm256_maskz_unpacklo_epi8() {
16786 #[rustfmt::skip]
16787 let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
16788 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
16789 #[rustfmt::skip]
16790 let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16791 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96);
16792 let r = _mm256_maskz_unpacklo_epi8(0, a, b);
16793 assert_eq_m256i(r, _mm256_setzero_si256());
16794 let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b);
16795 #[rustfmt::skip]
16796 let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16797 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32);
16798 assert_eq_m256i(r, e);
16799 }
16800
16801 #[simd_test(enable = "avx512bw,avx512vl")]
16802 unsafe fn test_mm_mask_unpacklo_epi8() {
16803 let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16804 let b = _mm_set_epi8(
16805 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16806 );
16807 let r = _mm_mask_unpacklo_epi8(a, 0, a, b);
16808 assert_eq_m128i(r, a);
16809 let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b);
16810 let e = _mm_set_epi8(
16811 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16812 );
16813 assert_eq_m128i(r, e);
16814 }
16815
16816 #[simd_test(enable = "avx512bw,avx512vl")]
16817 unsafe fn test_mm_maskz_unpacklo_epi8() {
16818 let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16819 let b = _mm_set_epi8(
16820 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
16821 );
16822 let r = _mm_maskz_unpacklo_epi8(0, a, b);
16823 assert_eq_m128i(r, _mm_setzero_si128());
16824 let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b);
16825 let e = _mm_set_epi8(
16826 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16,
16827 );
16828 assert_eq_m128i(r, e);
16829 }
16830
16831 #[simd_test(enable = "avx512bw")]
16832 unsafe fn test_mm512_mask_mov_epi16() {
16833 let src = _mm512_set1_epi16(1);
16834 let a = _mm512_set1_epi16(2);
16835 let r = _mm512_mask_mov_epi16(src, 0, a);
16836 assert_eq_m512i(r, src);
16837 let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a);
16838 assert_eq_m512i(r, a);
16839 }
16840
16841 #[simd_test(enable = "avx512bw")]
16842 unsafe fn test_mm512_maskz_mov_epi16() {
16843 let a = _mm512_set1_epi16(2);
16844 let r = _mm512_maskz_mov_epi16(0, a);
16845 assert_eq_m512i(r, _mm512_setzero_si512());
16846 let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a);
16847 assert_eq_m512i(r, a);
16848 }
16849
16850 #[simd_test(enable = "avx512bw,avx512vl")]
16851 unsafe fn test_mm256_mask_mov_epi16() {
16852 let src = _mm256_set1_epi16(1);
16853 let a = _mm256_set1_epi16(2);
16854 let r = _mm256_mask_mov_epi16(src, 0, a);
16855 assert_eq_m256i(r, src);
16856 let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a);
16857 assert_eq_m256i(r, a);
16858 }
16859
16860 #[simd_test(enable = "avx512bw,avx512vl")]
16861 unsafe fn test_mm256_maskz_mov_epi16() {
16862 let a = _mm256_set1_epi16(2);
16863 let r = _mm256_maskz_mov_epi16(0, a);
16864 assert_eq_m256i(r, _mm256_setzero_si256());
16865 let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a);
16866 assert_eq_m256i(r, a);
16867 }
16868
16869 #[simd_test(enable = "avx512bw,avx512vl")]
16870 unsafe fn test_mm_mask_mov_epi16() {
16871 let src = _mm_set1_epi16(1);
16872 let a = _mm_set1_epi16(2);
16873 let r = _mm_mask_mov_epi16(src, 0, a);
16874 assert_eq_m128i(r, src);
16875 let r = _mm_mask_mov_epi16(src, 0b11111111, a);
16876 assert_eq_m128i(r, a);
16877 }
16878
16879 #[simd_test(enable = "avx512bw,avx512vl")]
16880 unsafe fn test_mm_maskz_mov_epi16() {
16881 let a = _mm_set1_epi16(2);
16882 let r = _mm_maskz_mov_epi16(0, a);
16883 assert_eq_m128i(r, _mm_setzero_si128());
16884 let r = _mm_maskz_mov_epi16(0b11111111, a);
16885 assert_eq_m128i(r, a);
16886 }
16887
16888 #[simd_test(enable = "avx512bw")]
16889 unsafe fn test_mm512_mask_mov_epi8() {
16890 let src = _mm512_set1_epi8(1);
16891 let a = _mm512_set1_epi8(2);
16892 let r = _mm512_mask_mov_epi8(src, 0, a);
16893 assert_eq_m512i(r, src);
16894 let r = _mm512_mask_mov_epi8(
16895 src,
16896 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16897 a,
16898 );
16899 assert_eq_m512i(r, a);
16900 }
16901
16902 #[simd_test(enable = "avx512bw")]
16903 unsafe fn test_mm512_maskz_mov_epi8() {
16904 let a = _mm512_set1_epi8(2);
16905 let r = _mm512_maskz_mov_epi8(0, a);
16906 assert_eq_m512i(r, _mm512_setzero_si512());
16907 let r = _mm512_maskz_mov_epi8(
16908 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
16909 a,
16910 );
16911 assert_eq_m512i(r, a);
16912 }
16913
16914 #[simd_test(enable = "avx512bw,avx512vl")]
16915 unsafe fn test_mm256_mask_mov_epi8() {
16916 let src = _mm256_set1_epi8(1);
16917 let a = _mm256_set1_epi8(2);
16918 let r = _mm256_mask_mov_epi8(src, 0, a);
16919 assert_eq_m256i(r, src);
16920 let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a);
16921 assert_eq_m256i(r, a);
16922 }
16923
16924 #[simd_test(enable = "avx512bw,avx512vl")]
16925 unsafe fn test_mm256_maskz_mov_epi8() {
16926 let a = _mm256_set1_epi8(2);
16927 let r = _mm256_maskz_mov_epi8(0, a);
16928 assert_eq_m256i(r, _mm256_setzero_si256());
16929 let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a);
16930 assert_eq_m256i(r, a);
16931 }
16932
16933 #[simd_test(enable = "avx512bw,avx512vl")]
16934 unsafe fn test_mm_mask_mov_epi8() {
16935 let src = _mm_set1_epi8(1);
16936 let a = _mm_set1_epi8(2);
16937 let r = _mm_mask_mov_epi8(src, 0, a);
16938 assert_eq_m128i(r, src);
16939 let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a);
16940 assert_eq_m128i(r, a);
16941 }
16942
16943 #[simd_test(enable = "avx512bw,avx512vl")]
16944 unsafe fn test_mm_maskz_mov_epi8() {
16945 let a = _mm_set1_epi8(2);
16946 let r = _mm_maskz_mov_epi8(0, a);
16947 assert_eq_m128i(r, _mm_setzero_si128());
16948 let r = _mm_maskz_mov_epi8(0b11111111_11111111, a);
16949 assert_eq_m128i(r, a);
16950 }
16951
16952 #[simd_test(enable = "avx512bw")]
16953 unsafe fn test_mm512_mask_set1_epi16() {
16954 let src = _mm512_set1_epi16(2);
16955 let a: i16 = 11;
16956 let r = _mm512_mask_set1_epi16(src, 0, a);
16957 assert_eq_m512i(r, src);
16958 let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a);
16959 let e = _mm512_set1_epi16(11);
16960 assert_eq_m512i(r, e);
16961 }
16962
16963 #[simd_test(enable = "avx512bw")]
16964 unsafe fn test_mm512_maskz_set1_epi16() {
16965 let a: i16 = 11;
16966 let r = _mm512_maskz_set1_epi16(0, a);
16967 assert_eq_m512i(r, _mm512_setzero_si512());
16968 let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a);
16969 let e = _mm512_set1_epi16(11);
16970 assert_eq_m512i(r, e);
16971 }
16972
16973 #[simd_test(enable = "avx512bw,avx512vl")]
16974 unsafe fn test_mm256_mask_set1_epi16() {
16975 let src = _mm256_set1_epi16(2);
16976 let a: i16 = 11;
16977 let r = _mm256_mask_set1_epi16(src, 0, a);
16978 assert_eq_m256i(r, src);
16979 let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a);
16980 let e = _mm256_set1_epi16(11);
16981 assert_eq_m256i(r, e);
16982 }
16983
16984 #[simd_test(enable = "avx512bw,avx512vl")]
16985 unsafe fn test_mm256_maskz_set1_epi16() {
16986 let a: i16 = 11;
16987 let r = _mm256_maskz_set1_epi16(0, a);
16988 assert_eq_m256i(r, _mm256_setzero_si256());
16989 let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a);
16990 let e = _mm256_set1_epi16(11);
16991 assert_eq_m256i(r, e);
16992 }
16993
16994 #[simd_test(enable = "avx512bw,avx512vl")]
16995 unsafe fn test_mm_mask_set1_epi16() {
16996 let src = _mm_set1_epi16(2);
16997 let a: i16 = 11;
16998 let r = _mm_mask_set1_epi16(src, 0, a);
16999 assert_eq_m128i(r, src);
17000 let r = _mm_mask_set1_epi16(src, 0b11111111, a);
17001 let e = _mm_set1_epi16(11);
17002 assert_eq_m128i(r, e);
17003 }
17004
17005 #[simd_test(enable = "avx512bw,avx512vl")]
17006 unsafe fn test_mm_maskz_set1_epi16() {
17007 let a: i16 = 11;
17008 let r = _mm_maskz_set1_epi16(0, a);
17009 assert_eq_m128i(r, _mm_setzero_si128());
17010 let r = _mm_maskz_set1_epi16(0b11111111, a);
17011 let e = _mm_set1_epi16(11);
17012 assert_eq_m128i(r, e);
17013 }
17014
17015 #[simd_test(enable = "avx512bw")]
17016 unsafe fn test_mm512_mask_set1_epi8() {
17017 let src = _mm512_set1_epi8(2);
17018 let a: i8 = 11;
17019 let r = _mm512_mask_set1_epi8(src, 0, a);
17020 assert_eq_m512i(r, src);
17021 let r = _mm512_mask_set1_epi8(
17022 src,
17023 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17024 a,
17025 );
17026 let e = _mm512_set1_epi8(11);
17027 assert_eq_m512i(r, e);
17028 }
17029
17030 #[simd_test(enable = "avx512bw")]
17031 unsafe fn test_mm512_maskz_set1_epi8() {
17032 let a: i8 = 11;
17033 let r = _mm512_maskz_set1_epi8(0, a);
17034 assert_eq_m512i(r, _mm512_setzero_si512());
17035 let r = _mm512_maskz_set1_epi8(
17036 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17037 a,
17038 );
17039 let e = _mm512_set1_epi8(11);
17040 assert_eq_m512i(r, e);
17041 }
17042
17043 #[simd_test(enable = "avx512bw,avx512vl")]
17044 unsafe fn test_mm256_mask_set1_epi8() {
17045 let src = _mm256_set1_epi8(2);
17046 let a: i8 = 11;
17047 let r = _mm256_mask_set1_epi8(src, 0, a);
17048 assert_eq_m256i(r, src);
17049 let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a);
17050 let e = _mm256_set1_epi8(11);
17051 assert_eq_m256i(r, e);
17052 }
17053
17054 #[simd_test(enable = "avx512bw,avx512vl")]
17055 unsafe fn test_mm256_maskz_set1_epi8() {
17056 let a: i8 = 11;
17057 let r = _mm256_maskz_set1_epi8(0, a);
17058 assert_eq_m256i(r, _mm256_setzero_si256());
17059 let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a);
17060 let e = _mm256_set1_epi8(11);
17061 assert_eq_m256i(r, e);
17062 }
17063
17064 #[simd_test(enable = "avx512bw,avx512vl")]
17065 unsafe fn test_mm_mask_set1_epi8() {
17066 let src = _mm_set1_epi8(2);
17067 let a: i8 = 11;
17068 let r = _mm_mask_set1_epi8(src, 0, a);
17069 assert_eq_m128i(r, src);
17070 let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a);
17071 let e = _mm_set1_epi8(11);
17072 assert_eq_m128i(r, e);
17073 }
17074
17075 #[simd_test(enable = "avx512bw,avx512vl")]
17076 unsafe fn test_mm_maskz_set1_epi8() {
17077 let a: i8 = 11;
17078 let r = _mm_maskz_set1_epi8(0, a);
17079 assert_eq_m128i(r, _mm_setzero_si128());
17080 let r = _mm_maskz_set1_epi8(0b11111111_11111111, a);
17081 let e = _mm_set1_epi8(11);
17082 assert_eq_m128i(r, e);
17083 }
17084
17085 #[simd_test(enable = "avx512bw")]
17086 unsafe fn test_mm512_shufflelo_epi16() {
17087 #[rustfmt::skip]
17088 let a = _mm512_set_epi16(
17089 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17090 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17091 );
17092 #[rustfmt::skip]
17093 let e = _mm512_set_epi16(
17094 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
17095 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
17096 );
17df50a5 17097 let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a);
fc512014
XL
17098 assert_eq_m512i(r, e);
17099 }
17100
17101 #[simd_test(enable = "avx512bw")]
17102 unsafe fn test_mm512_mask_shufflelo_epi16() {
17103 #[rustfmt::skip]
17104 let a = _mm512_set_epi16(
17105 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17106 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17107 );
17df50a5 17108 let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
fc512014 17109 assert_eq_m512i(r, a);
17df50a5
XL
17110 let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(
17111 a,
17112 0b11111111_11111111_11111111_11111111,
17113 a,
17114 );
fc512014
XL
17115 #[rustfmt::skip]
17116 let e = _mm512_set_epi16(
17117 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
17118 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
17119 );
17120 assert_eq_m512i(r, e);
17121 }
17122
17123 #[simd_test(enable = "avx512bw")]
17124 unsafe fn test_mm512_maskz_shufflelo_epi16() {
17125 #[rustfmt::skip]
17126 let a = _mm512_set_epi16(
17127 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17128 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17129 );
17df50a5 17130 let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
fc512014
XL
17131 assert_eq_m512i(r, _mm512_setzero_si512());
17132 let r =
17df50a5 17133 _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
fc512014
XL
17134 #[rustfmt::skip]
17135 let e = _mm512_set_epi16(
17136 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12,
17137 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28,
17138 );
17139 assert_eq_m512i(r, e);
17140 }
17141
cdc7bbd5
XL
17142 #[simd_test(enable = "avx512bw,avx512vl")]
17143 unsafe fn test_mm256_mask_shufflelo_epi16() {
17144 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17df50a5 17145 let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
cdc7bbd5 17146 assert_eq_m256i(r, a);
17df50a5 17147 let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
cdc7bbd5
XL
17148 let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
17149 assert_eq_m256i(r, e);
17150 }
17151
17152 #[simd_test(enable = "avx512bw,avx512vl")]
17153 unsafe fn test_mm256_maskz_shufflelo_epi16() {
17154 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17df50a5 17155 let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
cdc7bbd5 17156 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 17157 let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
cdc7bbd5
XL
17158 let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12);
17159 assert_eq_m256i(r, e);
17160 }
17161
17162 #[simd_test(enable = "avx512bw,avx512vl")]
17163 unsafe fn test_mm_mask_shufflelo_epi16() {
17164 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17df50a5 17165 let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
cdc7bbd5 17166 assert_eq_m128i(r, a);
17df50a5 17167 let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a);
cdc7bbd5
XL
17168 let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
17169 assert_eq_m128i(r, e);
17170 }
17171
17172 #[simd_test(enable = "avx512bw,avx512vl")]
17173 unsafe fn test_mm_maskz_shufflelo_epi16() {
17174 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17df50a5 17175 let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
cdc7bbd5 17176 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 17177 let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a);
cdc7bbd5
XL
17178 let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4);
17179 assert_eq_m128i(r, e);
17180 }
17181
fc512014
XL
17182 #[simd_test(enable = "avx512bw")]
17183 unsafe fn test_mm512_shufflehi_epi16() {
17184 #[rustfmt::skip]
17185 let a = _mm512_set_epi16(
17186 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17187 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17188 );
17189 #[rustfmt::skip]
17190 let e = _mm512_set_epi16(
17191 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
17192 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
17193 );
17df50a5 17194 let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a);
fc512014
XL
17195 assert_eq_m512i(r, e);
17196 }
17197
17198 #[simd_test(enable = "avx512bw")]
17199 unsafe fn test_mm512_mask_shufflehi_epi16() {
17200 #[rustfmt::skip]
17201 let a = _mm512_set_epi16(
17202 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17203 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17204 );
17df50a5 17205 let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
fc512014 17206 assert_eq_m512i(r, a);
17df50a5
XL
17207 let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(
17208 a,
17209 0b11111111_11111111_11111111_11111111,
17210 a,
17211 );
fc512014
XL
17212 #[rustfmt::skip]
17213 let e = _mm512_set_epi16(
17214 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
17215 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
17216 );
17217 assert_eq_m512i(r, e);
17218 }
17219
17220 #[simd_test(enable = "avx512bw")]
17221 unsafe fn test_mm512_maskz_shufflehi_epi16() {
17222 #[rustfmt::skip]
17223 let a = _mm512_set_epi16(
17224 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17225 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17226 );
17df50a5 17227 let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
fc512014
XL
17228 assert_eq_m512i(r, _mm512_setzero_si512());
17229 let r =
17df50a5 17230 _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a);
fc512014
XL
17231 #[rustfmt::skip]
17232 let e = _mm512_set_epi16(
17233 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15,
17234 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31,
17235 );
17236 assert_eq_m512i(r, e);
17237 }
17238
cdc7bbd5
XL
17239 #[simd_test(enable = "avx512bw,avx512vl")]
17240 unsafe fn test_mm256_mask_shufflehi_epi16() {
17241 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17df50a5 17242 let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
cdc7bbd5 17243 assert_eq_m256i(r, a);
17df50a5 17244 let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a);
cdc7bbd5
XL
17245 let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
17246 assert_eq_m256i(r, e);
17247 }
17248
17249 #[simd_test(enable = "avx512bw,avx512vl")]
17250 unsafe fn test_mm256_maskz_shufflehi_epi16() {
17251 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17df50a5 17252 let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
cdc7bbd5 17253 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 17254 let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a);
cdc7bbd5
XL
17255 let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15);
17256 assert_eq_m256i(r, e);
17257 }
17258
17259 #[simd_test(enable = "avx512bw,avx512vl")]
17260 unsafe fn test_mm_mask_shufflehi_epi16() {
17261 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17df50a5 17262 let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
cdc7bbd5 17263 assert_eq_m128i(r, a);
17df50a5 17264 let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a);
cdc7bbd5
XL
17265 let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
17266 assert_eq_m128i(r, e);
17267 }
17268
17269 #[simd_test(enable = "avx512bw,avx512vl")]
17270 unsafe fn test_mm_maskz_shufflehi_epi16() {
17271 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17df50a5 17272 let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
cdc7bbd5 17273 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 17274 let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a);
cdc7bbd5
XL
17275 let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7);
17276 assert_eq_m128i(r, e);
17277 }
17278
fc512014
XL
17279 #[simd_test(enable = "avx512bw")]
17280 unsafe fn test_mm512_shuffle_epi8() {
17281 #[rustfmt::skip]
17282 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17283 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17284 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
17285 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
17286 let b = _mm512_set1_epi8(1);
17287 let r = _mm512_shuffle_epi8(a, b);
17288 #[rustfmt::skip]
17289 let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
17290 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
17291 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
17292 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
17293 assert_eq_m512i(r, e);
17294 }
17295
17296 #[simd_test(enable = "avx512bw")]
17297 unsafe fn test_mm512_mask_shuffle_epi8() {
17298 #[rustfmt::skip]
17299 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17300 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17301 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
17302 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
17303 let b = _mm512_set1_epi8(1);
17304 let r = _mm512_mask_shuffle_epi8(a, 0, a, b);
17305 assert_eq_m512i(r, a);
17306 let r = _mm512_mask_shuffle_epi8(
17307 a,
17308 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17309 a,
17310 b,
17311 );
cdc7bbd5
XL
17312 #[rustfmt::skip]
17313 let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
17314 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
17315 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
17316 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
17317 assert_eq_m512i(r, e);
17318 }
17319
17320 #[simd_test(enable = "avx512bw")]
17321 unsafe fn test_mm512_maskz_shuffle_epi8() {
17322 #[rustfmt::skip]
17323 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17324 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
17325 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
17326 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
17327 let b = _mm512_set1_epi8(1);
17328 let r = _mm512_maskz_shuffle_epi8(0, a, b);
17329 assert_eq_m512i(r, _mm512_setzero_si512());
17330 let r = _mm512_maskz_shuffle_epi8(
17331 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17332 a,
17333 b,
17334 );
17335 #[rustfmt::skip]
17336 let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
17337 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
17338 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
17339 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62);
17340 assert_eq_m512i(r, e);
17341 }
17342
17343 #[simd_test(enable = "avx512bw,avx512vl")]
17344 unsafe fn test_mm256_mask_shuffle_epi8() {
17345 #[rustfmt::skip]
17346 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17347 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17348 let b = _mm256_set1_epi8(1);
17349 let r = _mm256_mask_shuffle_epi8(a, 0, a, b);
17350 assert_eq_m256i(r, a);
17351 let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b);
17352 #[rustfmt::skip]
17353 let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
17354 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
17355 assert_eq_m256i(r, e);
17356 }
17357
17358 #[simd_test(enable = "avx512bw,avx512vl")]
17359 unsafe fn test_mm256_maskz_shuffle_epi8() {
17360 #[rustfmt::skip]
17361 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17362 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
17363 let b = _mm256_set1_epi8(1);
17364 let r = _mm256_maskz_shuffle_epi8(0, a, b);
17365 assert_eq_m256i(r, _mm256_setzero_si256());
17366 let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b);
17367 #[rustfmt::skip]
17368 let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
17369 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30);
17370 assert_eq_m256i(r, e);
17371 }
17372
17373 #[simd_test(enable = "avx512bw,avx512vl")]
17374 unsafe fn test_mm_mask_shuffle_epi8() {
17375 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17376 let b = _mm_set1_epi8(1);
17377 let r = _mm_mask_shuffle_epi8(a, 0, a, b);
17378 assert_eq_m128i(r, a);
17379 let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b);
17380 let e = _mm_set_epi8(
17381 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
17382 );
17383 assert_eq_m128i(r, e);
fc512014
XL
17384 }
17385
cdc7bbd5
XL
17386 #[simd_test(enable = "avx512bw,avx512vl")]
17387 unsafe fn test_mm_maskz_shuffle_epi8() {
fc512014 17388 #[rustfmt::skip]
cdc7bbd5
XL
17389 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17390 let b = _mm_set1_epi8(1);
17391 let r = _mm_maskz_shuffle_epi8(0, a, b);
17392 assert_eq_m128i(r, _mm_setzero_si128());
17393 let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b);
17394 let e = _mm_set_epi8(
17395 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
fc512014 17396 );
cdc7bbd5 17397 assert_eq_m128i(r, e);
fc512014
XL
17398 }
17399
17400 #[simd_test(enable = "avx512bw")]
17401 unsafe fn test_mm512_test_epi16_mask() {
17402 let a = _mm512_set1_epi16(1 << 0);
17403 let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
17404 let r = _mm512_test_epi16_mask(a, b);
17405 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17406 assert_eq!(r, e);
17407 }
17408
17409 #[simd_test(enable = "avx512bw")]
17410 unsafe fn test_mm512_mask_test_epi16_mask() {
17411 let a = _mm512_set1_epi16(1 << 0);
17412 let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
17413 let r = _mm512_mask_test_epi16_mask(0, a, b);
17414 assert_eq!(r, 0);
17415 let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
17416 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17417 assert_eq!(r, e);
17418 }
17419
cdc7bbd5
XL
17420 #[simd_test(enable = "avx512bw,avx512vl")]
17421 unsafe fn test_mm256_test_epi16_mask() {
17422 let a = _mm256_set1_epi16(1 << 0);
17423 let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
17424 let r = _mm256_test_epi16_mask(a, b);
17425 let e: __mmask16 = 0b11111111_11111111;
17426 assert_eq!(r, e);
17427 }
17428
17429 #[simd_test(enable = "avx512bw,avx512vl")]
17430 unsafe fn test_mm256_mask_test_epi16_mask() {
17431 let a = _mm256_set1_epi16(1 << 0);
17432 let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
17433 let r = _mm256_mask_test_epi16_mask(0, a, b);
17434 assert_eq!(r, 0);
17435 let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b);
17436 let e: __mmask16 = 0b11111111_11111111;
17437 assert_eq!(r, e);
17438 }
17439
17440 #[simd_test(enable = "avx512bw,avx512vl")]
17441 unsafe fn test_mm_test_epi16_mask() {
17442 let a = _mm_set1_epi16(1 << 0);
17443 let b = _mm_set1_epi16(1 << 0 | 1 << 1);
17444 let r = _mm_test_epi16_mask(a, b);
17445 let e: __mmask8 = 0b11111111;
17446 assert_eq!(r, e);
17447 }
17448
17449 #[simd_test(enable = "avx512bw,avx512vl")]
17450 unsafe fn test_mm_mask_test_epi16_mask() {
17451 let a = _mm_set1_epi16(1 << 0);
17452 let b = _mm_set1_epi16(1 << 0 | 1 << 1);
17453 let r = _mm_mask_test_epi16_mask(0, a, b);
17454 assert_eq!(r, 0);
17455 let r = _mm_mask_test_epi16_mask(0b11111111, a, b);
17456 let e: __mmask8 = 0b11111111;
17457 assert_eq!(r, e);
17458 }
17459
fc512014
XL
17460 #[simd_test(enable = "avx512bw")]
17461 unsafe fn test_mm512_test_epi8_mask() {
17462 let a = _mm512_set1_epi8(1 << 0);
17463 let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
17464 let r = _mm512_test_epi8_mask(a, b);
17465 let e: __mmask64 =
17466 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17467 assert_eq!(r, e);
17468 }
17469
17470 #[simd_test(enable = "avx512bw")]
17471 unsafe fn test_mm512_mask_test_epi8_mask() {
17472 let a = _mm512_set1_epi8(1 << 0);
17473 let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
17474 let r = _mm512_mask_test_epi8_mask(0, a, b);
17475 assert_eq!(r, 0);
17476 let r = _mm512_mask_test_epi8_mask(
17477 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17478 a,
17479 b,
17480 );
17481 let e: __mmask64 =
17482 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17483 assert_eq!(r, e);
17484 }
17485
cdc7bbd5
XL
17486 #[simd_test(enable = "avx512bw,avx512vl")]
17487 unsafe fn test_mm256_test_epi8_mask() {
17488 let a = _mm256_set1_epi8(1 << 0);
17489 let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
17490 let r = _mm256_test_epi8_mask(a, b);
17491 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17492 assert_eq!(r, e);
17493 }
17494
17495 #[simd_test(enable = "avx512bw,avx512vl")]
17496 unsafe fn test_mm256_mask_test_epi8_mask() {
17497 let a = _mm256_set1_epi8(1 << 0);
17498 let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
17499 let r = _mm256_mask_test_epi8_mask(0, a, b);
17500 assert_eq!(r, 0);
17501 let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
17502 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17503 assert_eq!(r, e);
17504 }
17505
17506 #[simd_test(enable = "avx512bw,avx512vl")]
17507 unsafe fn test_mm_test_epi8_mask() {
17508 let a = _mm_set1_epi8(1 << 0);
17509 let b = _mm_set1_epi8(1 << 0 | 1 << 1);
17510 let r = _mm_test_epi8_mask(a, b);
17511 let e: __mmask16 = 0b11111111_11111111;
17512 assert_eq!(r, e);
17513 }
17514
17515 #[simd_test(enable = "avx512bw,avx512vl")]
17516 unsafe fn test_mm_mask_test_epi8_mask() {
17517 let a = _mm_set1_epi8(1 << 0);
17518 let b = _mm_set1_epi8(1 << 0 | 1 << 1);
17519 let r = _mm_mask_test_epi8_mask(0, a, b);
17520 assert_eq!(r, 0);
17521 let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b);
17522 let e: __mmask16 = 0b11111111_11111111;
17523 assert_eq!(r, e);
17524 }
17525
fc512014
XL
17526 #[simd_test(enable = "avx512bw")]
17527 unsafe fn test_mm512_testn_epi16_mask() {
17528 let a = _mm512_set1_epi16(1 << 0);
17529 let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
17530 let r = _mm512_testn_epi16_mask(a, b);
17531 let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17532 assert_eq!(r, e);
17533 }
17534
17535 #[simd_test(enable = "avx512bw")]
17536 unsafe fn test_mm512_mask_testn_epi16_mask() {
17537 let a = _mm512_set1_epi16(1 << 0);
17538 let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
17539 let r = _mm512_mask_testn_epi16_mask(0, a, b);
17540 assert_eq!(r, 0);
17541 let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b);
17542 let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17543 assert_eq!(r, e);
17544 }
17545
cdc7bbd5
XL
17546 #[simd_test(enable = "avx512bw,avx512vl")]
17547 unsafe fn test_mm256_testn_epi16_mask() {
17548 let a = _mm256_set1_epi16(1 << 0);
17549 let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
17550 let r = _mm256_testn_epi16_mask(a, b);
17551 let e: __mmask16 = 0b00000000_00000000;
17552 assert_eq!(r, e);
17553 }
17554
17555 #[simd_test(enable = "avx512bw,avx512vl")]
17556 unsafe fn test_mm256_mask_testn_epi16_mask() {
17557 let a = _mm256_set1_epi16(1 << 0);
17558 let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
17559 let r = _mm256_mask_testn_epi16_mask(0, a, b);
17560 assert_eq!(r, 0);
17561 let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b);
17562 let e: __mmask16 = 0b00000000_00000000;
17563 assert_eq!(r, e);
17564 }
17565
17566 #[simd_test(enable = "avx512bw,avx512vl")]
17567 unsafe fn test_mm_testn_epi16_mask() {
17568 let a = _mm_set1_epi16(1 << 0);
17569 let b = _mm_set1_epi16(1 << 0 | 1 << 1);
17570 let r = _mm_testn_epi16_mask(a, b);
17571 let e: __mmask8 = 0b00000000;
17572 assert_eq!(r, e);
17573 }
17574
17575 #[simd_test(enable = "avx512bw,avx512vl")]
17576 unsafe fn test_mm_mask_testn_epi16_mask() {
17577 let a = _mm_set1_epi16(1 << 0);
17578 let b = _mm_set1_epi16(1 << 0 | 1 << 1);
17579 let r = _mm_mask_testn_epi16_mask(0, a, b);
17580 assert_eq!(r, 0);
17581 let r = _mm_mask_testn_epi16_mask(0b11111111, a, b);
17582 let e: __mmask8 = 0b00000000;
17583 assert_eq!(r, e);
17584 }
17585
fc512014
XL
17586 #[simd_test(enable = "avx512bw")]
17587 unsafe fn test_mm512_testn_epi8_mask() {
17588 let a = _mm512_set1_epi8(1 << 0);
17589 let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
17590 let r = _mm512_testn_epi8_mask(a, b);
17591 let e: __mmask64 =
17592 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
17593 assert_eq!(r, e);
17594 }
17595
17596 #[simd_test(enable = "avx512bw")]
17597 unsafe fn test_mm512_mask_testn_epi8_mask() {
17598 let a = _mm512_set1_epi8(1 << 0);
17599 let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
17600 let r = _mm512_mask_testn_epi8_mask(0, a, b);
17601 assert_eq!(r, 0);
17602 let r = _mm512_mask_testn_epi8_mask(
17603 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
17604 a,
17605 b,
17606 );
17607 let e: __mmask64 =
17608 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
17609 assert_eq!(r, e);
17610 }
17611
cdc7bbd5
XL
17612 #[simd_test(enable = "avx512bw,avx512vl")]
17613 unsafe fn test_mm256_testn_epi8_mask() {
17614 let a = _mm256_set1_epi8(1 << 0);
17615 let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
17616 let r = _mm256_testn_epi8_mask(a, b);
17617 let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17618 assert_eq!(r, e);
17619 }
17620
17621 #[simd_test(enable = "avx512bw,avx512vl")]
17622 unsafe fn test_mm256_mask_testn_epi8_mask() {
17623 let a = _mm256_set1_epi8(1 << 0);
17624 let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
17625 let r = _mm256_mask_testn_epi8_mask(0, a, b);
17626 assert_eq!(r, 0);
17627 let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b);
17628 let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
17629 assert_eq!(r, e);
17630 }
17631
17632 #[simd_test(enable = "avx512bw,avx512vl")]
17633 unsafe fn test_mm_testn_epi8_mask() {
17634 let a = _mm_set1_epi8(1 << 0);
17635 let b = _mm_set1_epi8(1 << 0 | 1 << 1);
17636 let r = _mm_testn_epi8_mask(a, b);
17637 let e: __mmask16 = 0b00000000_00000000;
17638 assert_eq!(r, e);
17639 }
17640
17641 #[simd_test(enable = "avx512bw,avx512vl")]
17642 unsafe fn test_mm_mask_testn_epi8_mask() {
17643 let a = _mm_set1_epi8(1 << 0);
17644 let b = _mm_set1_epi8(1 << 0 | 1 << 1);
17645 let r = _mm_mask_testn_epi8_mask(0, a, b);
17646 assert_eq!(r, 0);
17647 let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b);
17648 let e: __mmask16 = 0b00000000_00000000;
17649 assert_eq!(r, e);
17650 }
17651
fc512014
XL
17652 #[simd_test(enable = "avx512bw")]
17653 unsafe fn test_store_mask64() {
17654 let a: __mmask64 =
17655 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
17656 let mut r = 0;
17657 _store_mask64(&mut r as *mut _ as *mut u64, a);
17658 assert_eq!(r, a);
17659 }
17660
17661 #[simd_test(enable = "avx512bw")]
17662 unsafe fn test_store_mask32() {
17663 let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
17664 let mut r = 0;
17665 _store_mask32(&mut r as *mut _ as *mut u32, a);
17666 assert_eq!(r, a);
17667 }
17668
17669 #[simd_test(enable = "avx512bw")]
17670 unsafe fn test_load_mask64() {
17671 let p: __mmask64 =
17672 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
17673 let r = _load_mask64(&p);
17674 let e: __mmask64 =
17675 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
17676 assert_eq!(r, e);
17677 }
17678
17679 #[simd_test(enable = "avx512bw")]
17680 unsafe fn test_load_mask32() {
17681 let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
17682 let r = _load_mask32(&p);
17683 let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
17684 assert_eq!(r, e);
17685 }
17686
17687 #[simd_test(enable = "avx512bw")]
17688 unsafe fn test_mm512_sad_epu8() {
17689 let a = _mm512_set1_epi8(2);
17690 let b = _mm512_set1_epi8(4);
17691 let r = _mm512_sad_epu8(a, b);
17692 let e = _mm512_set1_epi64(16);
17693 assert_eq_m512i(r, e);
17694 }
17695
17696 #[simd_test(enable = "avx512bw")]
17697 unsafe fn test_mm512_dbsad_epu8() {
17698 let a = _mm512_set1_epi8(2);
17699 let b = _mm512_set1_epi8(4);
17df50a5 17700 let r = _mm512_dbsad_epu8::<0>(a, b);
fc512014
XL
17701 let e = _mm512_set1_epi16(8);
17702 assert_eq_m512i(r, e);
17703 }
17704
17705 #[simd_test(enable = "avx512bw")]
17706 unsafe fn test_mm512_mask_dbsad_epu8() {
17707 let src = _mm512_set1_epi16(1);
17708 let a = _mm512_set1_epi8(2);
17709 let b = _mm512_set1_epi8(4);
17df50a5 17710 let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b);
fc512014 17711 assert_eq_m512i(r, src);
17df50a5 17712 let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b);
fc512014
XL
17713 let e = _mm512_set1_epi16(8);
17714 assert_eq_m512i(r, e);
17715 }
17716
17717 #[simd_test(enable = "avx512bw")]
17718 unsafe fn test_mm512_maskz_dbsad_epu8() {
17719 let a = _mm512_set1_epi8(2);
17720 let b = _mm512_set1_epi8(4);
17df50a5 17721 let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
fc512014 17722 assert_eq_m512i(r, _mm512_setzero_si512());
17df50a5 17723 let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b);
fc512014
XL
17724 let e = _mm512_set1_epi16(8);
17725 assert_eq_m512i(r, e);
17726 }
17727
cdc7bbd5
XL
17728 #[simd_test(enable = "avx512bw,avx512vl")]
17729 unsafe fn test_mm256_dbsad_epu8() {
17730 let a = _mm256_set1_epi8(2);
17731 let b = _mm256_set1_epi8(4);
17df50a5 17732 let r = _mm256_dbsad_epu8::<0>(a, b);
cdc7bbd5
XL
17733 let e = _mm256_set1_epi16(8);
17734 assert_eq_m256i(r, e);
17735 }
17736
17737 #[simd_test(enable = "avx512bw,avx512vl")]
17738 unsafe fn test_mm256_mask_dbsad_epu8() {
17739 let src = _mm256_set1_epi16(1);
17740 let a = _mm256_set1_epi8(2);
17741 let b = _mm256_set1_epi8(4);
17df50a5 17742 let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b);
cdc7bbd5 17743 assert_eq_m256i(r, src);
17df50a5 17744 let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b);
cdc7bbd5
XL
17745 let e = _mm256_set1_epi16(8);
17746 assert_eq_m256i(r, e);
17747 }
17748
17749 #[simd_test(enable = "avx512bw,avx512vl")]
17750 unsafe fn test_mm256_maskz_dbsad_epu8() {
17751 let a = _mm256_set1_epi8(2);
17752 let b = _mm256_set1_epi8(4);
17df50a5 17753 let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
cdc7bbd5 17754 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 17755 let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b);
cdc7bbd5
XL
17756 let e = _mm256_set1_epi16(8);
17757 assert_eq_m256i(r, e);
17758 }
17759
17760 #[simd_test(enable = "avx512bw,avx512vl")]
17761 unsafe fn test_mm_dbsad_epu8() {
17762 let a = _mm_set1_epi8(2);
17763 let b = _mm_set1_epi8(4);
17df50a5 17764 let r = _mm_dbsad_epu8::<0>(a, b);
cdc7bbd5
XL
17765 let e = _mm_set1_epi16(8);
17766 assert_eq_m128i(r, e);
17767 }
17768
17769 #[simd_test(enable = "avx512bw,avx512vl")]
17770 unsafe fn test_mm_mask_dbsad_epu8() {
17771 let src = _mm_set1_epi16(1);
17772 let a = _mm_set1_epi8(2);
17773 let b = _mm_set1_epi8(4);
17df50a5 17774 let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b);
cdc7bbd5 17775 assert_eq_m128i(r, src);
17df50a5 17776 let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b);
cdc7bbd5
XL
17777 let e = _mm_set1_epi16(8);
17778 assert_eq_m128i(r, e);
17779 }
17780
17781 #[simd_test(enable = "avx512bw,avx512vl")]
17782 unsafe fn test_mm_maskz_dbsad_epu8() {
17783 let a = _mm_set1_epi8(2);
17784 let b = _mm_set1_epi8(4);
17df50a5 17785 let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
cdc7bbd5 17786 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 17787 let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b);
cdc7bbd5
XL
17788 let e = _mm_set1_epi16(8);
17789 assert_eq_m128i(r, e);
17790 }
17791
fc512014
XL
17792 #[simd_test(enable = "avx512bw")]
17793 unsafe fn test_mm512_movepi16_mask() {
17794 let a = _mm512_set1_epi16(1 << 15);
17795 let r = _mm512_movepi16_mask(a);
17796 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17797 assert_eq!(r, e);
17798 }
17799
cdc7bbd5
XL
17800 #[simd_test(enable = "avx512bw,avx512vl")]
17801 unsafe fn test_mm256_movepi16_mask() {
17802 let a = _mm256_set1_epi16(1 << 15);
17803 let r = _mm256_movepi16_mask(a);
17804 let e: __mmask16 = 0b11111111_11111111;
17805 assert_eq!(r, e);
17806 }
17807
17808 #[simd_test(enable = "avx512bw,avx512vl")]
17809 unsafe fn test_mm_movepi16_mask() {
17810 let a = _mm_set1_epi16(1 << 15);
17811 let r = _mm_movepi16_mask(a);
17812 let e: __mmask8 = 0b11111111;
17813 assert_eq!(r, e);
17814 }
17815
fc512014
XL
17816 #[simd_test(enable = "avx512bw")]
17817 unsafe fn test_mm512_movepi8_mask() {
17818 let a = _mm512_set1_epi8(1 << 7);
17819 let r = _mm512_movepi8_mask(a);
17820 let e: __mmask64 =
17821 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17822 assert_eq!(r, e);
17823 }
17824
cdc7bbd5
XL
17825 #[simd_test(enable = "avx512bw,avx512vl")]
17826 unsafe fn test_mm256_movepi8_mask() {
17827 let a = _mm256_set1_epi8(1 << 7);
17828 let r = _mm256_movepi8_mask(a);
17829 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
17830 assert_eq!(r, e);
17831 }
17832
17833 #[simd_test(enable = "avx512bw,avx512vl")]
17834 unsafe fn test_mm_movepi8_mask() {
17835 let a = _mm_set1_epi8(1 << 7);
17836 let r = _mm_movepi8_mask(a);
17837 let e: __mmask16 = 0b11111111_11111111;
17838 assert_eq!(r, e);
17839 }
17840
fc512014
XL
17841 #[simd_test(enable = "avx512bw")]
17842 unsafe fn test_mm512_movm_epi16() {
17843 let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
17844 let r = _mm512_movm_epi16(a);
17845 let e = _mm512_set1_epi16(
17846 1 << 15
17847 | 1 << 14
17848 | 1 << 13
17849 | 1 << 12
17850 | 1 << 11
17851 | 1 << 10
17852 | 1 << 9
17853 | 1 << 8
17854 | 1 << 7
17855 | 1 << 6
17856 | 1 << 5
17857 | 1 << 4
17858 | 1 << 3
17859 | 1 << 2
17860 | 1 << 1
17861 | 1 << 0,
17862 );
17863 assert_eq_m512i(r, e);
17864 }
17865
cdc7bbd5
XL
17866 #[simd_test(enable = "avx512bw,avx512vl")]
17867 unsafe fn test_mm256_movm_epi16() {
17868 let a: __mmask16 = 0b11111111_11111111;
17869 let r = _mm256_movm_epi16(a);
17870 let e = _mm256_set1_epi16(
17871 1 << 15
17872 | 1 << 14
17873 | 1 << 13
17874 | 1 << 12
17875 | 1 << 11
17876 | 1 << 10
17877 | 1 << 9
17878 | 1 << 8
17879 | 1 << 7
17880 | 1 << 6
17881 | 1 << 5
17882 | 1 << 4
17883 | 1 << 3
17884 | 1 << 2
17885 | 1 << 1
17886 | 1 << 0,
17887 );
17888 assert_eq_m256i(r, e);
17889 }
17890
17891 #[simd_test(enable = "avx512bw,avx512vl")]
17892 unsafe fn test_mm_movm_epi16() {
17893 let a: __mmask8 = 0b11111111;
17894 let r = _mm_movm_epi16(a);
17895 let e = _mm_set1_epi16(
17896 1 << 15
17897 | 1 << 14
17898 | 1 << 13
17899 | 1 << 12
17900 | 1 << 11
17901 | 1 << 10
17902 | 1 << 9
17903 | 1 << 8
17904 | 1 << 7
17905 | 1 << 6
17906 | 1 << 5
17907 | 1 << 4
17908 | 1 << 3
17909 | 1 << 2
17910 | 1 << 1
17911 | 1 << 0,
17912 );
17913 assert_eq_m128i(r, e);
17914 }
17915
fc512014
XL
17916 #[simd_test(enable = "avx512bw")]
17917 unsafe fn test_mm512_movm_epi8() {
17918 let a: __mmask64 =
17919 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
17920 let r = _mm512_movm_epi8(a);
17921 let e =
17922 _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
17923 assert_eq_m512i(r, e);
17924 }
17925
cdc7bbd5
XL
17926 #[simd_test(enable = "avx512bw,avx512vl")]
17927 unsafe fn test_mm256_movm_epi8() {
17928 let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
17929 let r = _mm256_movm_epi8(a);
17930 let e =
17931 _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
17932 assert_eq_m256i(r, e);
17933 }
17934
17935 #[simd_test(enable = "avx512bw,avx512vl")]
17936 unsafe fn test_mm_movm_epi8() {
17937 let a: __mmask16 = 0b11111111_11111111;
17938 let r = _mm_movm_epi8(a);
17939 let e =
17940 _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0);
17941 assert_eq_m128i(r, e);
17942 }
17943
fc512014
XL
17944 #[simd_test(enable = "avx512bw")]
17945 unsafe fn test_kadd_mask32() {
17946 let a: __mmask32 = 11;
17947 let b: __mmask32 = 22;
17948 let r = _kadd_mask32(a, b);
17949 let e: __mmask32 = 33;
17950 assert_eq!(r, e);
17951 }
17952
17953 #[simd_test(enable = "avx512bw")]
17954 unsafe fn test_kadd_mask64() {
17955 let a: __mmask64 = 11;
17956 let b: __mmask64 = 22;
17957 let r = _kadd_mask64(a, b);
17958 let e: __mmask64 = 33;
17959 assert_eq!(r, e);
17960 }
17961
17962 #[simd_test(enable = "avx512bw")]
17963 unsafe fn test_kand_mask32() {
17964 let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
17965 let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
17966 let r = _kand_mask32(a, b);
17967 let e: __mmask32 = 0b11001100_00110011_11001100_00110011;
17968 assert_eq!(r, e);
17969 }
17970
17971 #[simd_test(enable = "avx512bw")]
17972 unsafe fn test_kand_mask64() {
17973 let a: __mmask64 =
17974 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17975 let b: __mmask64 =
17976 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17977 let r = _kand_mask64(a, b);
17978 let e: __mmask64 =
17979 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17980 assert_eq!(r, e);
17981 }
17982
17983 #[simd_test(enable = "avx512bw")]
17984 unsafe fn test_knot_mask32() {
17985 let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
17986 let r = _knot_mask32(a);
17987 let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
17988 assert_eq!(r, e);
17989 }
17990
17991 #[simd_test(enable = "avx512bw")]
17992 unsafe fn test_knot_mask64() {
17993 let a: __mmask64 =
17994 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
17995 let r = _knot_mask64(a);
17996 let e: __mmask64 =
17997 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
17998 assert_eq!(r, e);
17999 }
18000
18001 #[simd_test(enable = "avx512bw")]
18002 unsafe fn test_kandn_mask32() {
18003 let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
18004 let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
18005 let r = _kandn_mask32(a, b);
18006 let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
18007 assert_eq!(r, e);
18008 }
18009
18010 #[simd_test(enable = "avx512bw")]
18011 unsafe fn test_kandn_mask64() {
18012 let a: __mmask64 =
18013 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
18014 let b: __mmask64 =
18015 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
18016 let r = _kandn_mask64(a, b);
18017 let e: __mmask64 =
18018 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
18019 assert_eq!(r, e);
18020 }
18021
18022 #[simd_test(enable = "avx512bw")]
18023 unsafe fn test_kor_mask32() {
18024 let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
18025 let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
18026 let r = _kor_mask32(a, b);
18027 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
18028 assert_eq!(r, e);
18029 }
18030
18031 #[simd_test(enable = "avx512bw")]
18032 unsafe fn test_kor_mask64() {
18033 let a: __mmask64 =
18034 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
18035 let b: __mmask64 =
18036 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
18037 let r = _kor_mask64(a, b);
18038 let e: __mmask64 =
18039 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
18040 assert_eq!(r, e);
18041 }
18042
18043 #[simd_test(enable = "avx512bw")]
18044 unsafe fn test_kxor_mask32() {
18045 let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
18046 let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
18047 let r = _kxor_mask32(a, b);
18048 let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
18049 assert_eq!(r, e);
18050 }
18051
18052 #[simd_test(enable = "avx512bw")]
18053 unsafe fn test_kxor_mask64() {
18054 let a: __mmask64 =
18055 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
18056 let b: __mmask64 =
18057 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
18058 let r = _kxor_mask64(a, b);
18059 let e: __mmask64 =
18060 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
18061 assert_eq!(r, e);
18062 }
18063
18064 #[simd_test(enable = "avx512bw")]
18065 unsafe fn test_kxnor_mask32() {
18066 let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
18067 let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
18068 let r = _kxnor_mask32(a, b);
18069 let e: __mmask32 = 0b00000000_00000000_00000000_00000000;
18070 assert_eq!(r, e);
18071 }
18072
18073 #[simd_test(enable = "avx512bw")]
18074 unsafe fn test_kxnor_mask64() {
18075 let a: __mmask64 =
18076 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
18077 let b: __mmask64 =
18078 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
18079 let r = _kxnor_mask64(a, b);
18080 let e: __mmask64 =
18081 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000;
18082 assert_eq!(r, e);
18083 }
18084
18085 #[simd_test(enable = "avx512bw")]
18086 unsafe fn test_mm512_cvtepi16_epi8() {
18087 let a = _mm512_set1_epi16(2);
18088 let r = _mm512_cvtepi16_epi8(a);
18089 let e = _mm256_set1_epi8(2);
18090 assert_eq_m256i(r, e);
18091 }
18092
cdc7bbd5
XL
18093 #[simd_test(enable = "avx512bw")]
18094 unsafe fn test_mm512_mask_cvtepi16_epi8() {
18095 let src = _mm256_set1_epi8(1);
18096 let a = _mm512_set1_epi16(2);
18097 let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
18098 assert_eq_m256i(r, src);
18099 let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18100 let e = _mm256_set1_epi8(2);
18101 assert_eq_m256i(r, e);
18102 }
18103
18104 #[simd_test(enable = "avx512bw")]
18105 unsafe fn test_mm512_maskz_cvtepi16_epi8() {
18106 let a = _mm512_set1_epi16(2);
18107 let r = _mm512_maskz_cvtepi16_epi8(0, a);
18108 assert_eq_m256i(r, _mm256_setzero_si256());
18109 let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a);
18110 let e = _mm256_set1_epi8(2);
18111 assert_eq_m256i(r, e);
18112 }
18113
18114 #[simd_test(enable = "avx512bw,avx512vl")]
18115 unsafe fn test_mm256_cvtepi16_epi8() {
18116 let a = _mm256_set1_epi16(2);
18117 let r = _mm256_cvtepi16_epi8(a);
18118 let e = _mm_set1_epi8(2);
18119 assert_eq_m128i(r, e);
18120 }
18121
18122 #[simd_test(enable = "avx512bw,avx512vl")]
18123 unsafe fn test_mm256_mask_cvtepi16_epi8() {
18124 let src = _mm_set1_epi8(1);
18125 let a = _mm256_set1_epi16(2);
18126 let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
18127 assert_eq_m128i(r, src);
18128 let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a);
18129 let e = _mm_set1_epi8(2);
18130 assert_eq_m128i(r, e);
18131 }
18132
18133 #[simd_test(enable = "avx512bw,avx512vl")]
18134 unsafe fn test_mm256_maskz_cvtepi16_epi8() {
18135 let a = _mm256_set1_epi16(2);
18136 let r = _mm256_maskz_cvtepi16_epi8(0, a);
18137 assert_eq_m128i(r, _mm_setzero_si128());
18138 let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a);
18139 let e = _mm_set1_epi8(2);
18140 assert_eq_m128i(r, e);
18141 }
18142
18143 #[simd_test(enable = "avx512bw,avx512vl")]
18144 unsafe fn test_mm_cvtepi16_epi8() {
18145 let a = _mm_set1_epi16(2);
18146 let r = _mm_cvtepi16_epi8(a);
18147 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
18148 assert_eq_m128i(r, e);
18149 }
18150
18151 #[simd_test(enable = "avx512bw,avx512vl")]
18152 unsafe fn test_mm_mask_cvtepi16_epi8() {
18153 let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
18154 let a = _mm_set1_epi16(2);
18155 let r = _mm_mask_cvtepi16_epi8(src, 0, a);
18156 assert_eq_m128i(r, src);
18157 let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a);
18158 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
18159 assert_eq_m128i(r, e);
fc512014
XL
18160 }
18161
cdc7bbd5
XL
18162 #[simd_test(enable = "avx512bw,avx512vl")]
18163 unsafe fn test_mm_maskz_cvtepi16_epi8() {
18164 let a = _mm_set1_epi16(2);
18165 let r = _mm_maskz_cvtepi16_epi8(0, a);
18166 assert_eq_m128i(r, _mm_setzero_si128());
18167 let r = _mm_maskz_cvtepi16_epi8(0b11111111, a);
18168 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
18169 assert_eq_m128i(r, e);
fc512014
XL
18170 }
18171
18172 #[simd_test(enable = "avx512bw")]
18173 unsafe fn test_mm512_cvtsepi16_epi8() {
18174 let a = _mm512_set1_epi16(i16::MAX);
18175 let r = _mm512_cvtsepi16_epi8(a);
18176 let e = _mm256_set1_epi8(i8::MAX);
18177 assert_eq_m256i(r, e);
18178 }
18179
18180 #[simd_test(enable = "avx512bw")]
18181 unsafe fn test_mm512_mask_cvtsepi16_epi8() {
18182 let src = _mm256_set1_epi8(1);
18183 let a = _mm512_set1_epi16(i16::MAX);
18184 let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
18185 assert_eq_m256i(r, src);
18186 let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18187 let e = _mm256_set1_epi8(i8::MAX);
18188 assert_eq_m256i(r, e);
18189 }
18190
cdc7bbd5
XL
18191 #[simd_test(enable = "avx512bw,avx512vl")]
18192 unsafe fn test_mm256_cvtsepi16_epi8() {
18193 let a = _mm256_set1_epi16(i16::MAX);
18194 let r = _mm256_cvtsepi16_epi8(a);
18195 let e = _mm_set1_epi8(i8::MAX);
18196 assert_eq_m128i(r, e);
18197 }
18198
18199 #[simd_test(enable = "avx512bw,avx512vl")]
18200 unsafe fn test_mm256_mask_cvtsepi16_epi8() {
18201 let src = _mm_set1_epi8(1);
18202 let a = _mm256_set1_epi16(i16::MAX);
18203 let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
18204 assert_eq_m128i(r, src);
18205 let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a);
18206 let e = _mm_set1_epi8(i8::MAX);
18207 assert_eq_m128i(r, e);
18208 }
18209
18210 #[simd_test(enable = "avx512bw,avx512vl")]
18211 unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
18212 let a = _mm256_set1_epi16(i16::MAX);
18213 let r = _mm256_maskz_cvtsepi16_epi8(0, a);
18214 assert_eq_m128i(r, _mm_setzero_si128());
18215 let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a);
18216 let e = _mm_set1_epi8(i8::MAX);
18217 assert_eq_m128i(r, e);
18218 }
18219
18220 #[simd_test(enable = "avx512bw,avx512vl")]
18221 unsafe fn test_mm_cvtsepi16_epi8() {
18222 let a = _mm_set1_epi16(i16::MAX);
18223 let r = _mm_cvtsepi16_epi8(a);
18224 #[rustfmt::skip]
18225 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18226 assert_eq_m128i(r, e);
18227 }
18228
18229 #[simd_test(enable = "avx512bw,avx512vl")]
18230 unsafe fn test_mm_mask_cvtsepi16_epi8() {
18231 let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
18232 let a = _mm_set1_epi16(i16::MAX);
18233 let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
18234 assert_eq_m128i(r, src);
18235 let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a);
18236 #[rustfmt::skip]
18237 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18238 assert_eq_m128i(r, e);
18239 }
18240
18241 #[simd_test(enable = "avx512bw,avx512vl")]
18242 unsafe fn test_mm_maskz_cvtsepi16_epi8() {
18243 let a = _mm_set1_epi16(i16::MAX);
18244 let r = _mm_maskz_cvtsepi16_epi8(0, a);
18245 assert_eq_m128i(r, _mm_setzero_si128());
18246 let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a);
18247 #[rustfmt::skip]
18248 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX);
18249 assert_eq_m128i(r, e);
18250 }
18251
fc512014
XL
18252 #[simd_test(enable = "avx512bw")]
18253 unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
18254 let a = _mm512_set1_epi16(i16::MAX);
18255 let r = _mm512_maskz_cvtsepi16_epi8(0, a);
18256 assert_eq_m256i(r, _mm256_setzero_si256());
18257 let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a);
18258 let e = _mm256_set1_epi8(i8::MAX);
18259 assert_eq_m256i(r, e);
18260 }
18261
18262 #[simd_test(enable = "avx512bw")]
18263 unsafe fn test_mm512_cvtusepi16_epi8() {
18264 let a = _mm512_set1_epi16(i16::MIN);
18265 let r = _mm512_cvtusepi16_epi8(a);
18266 let e = _mm256_set1_epi8(-1);
18267 assert_eq_m256i(r, e);
18268 }
18269
18270 #[simd_test(enable = "avx512bw")]
18271 unsafe fn test_mm512_mask_cvtusepi16_epi8() {
18272 let src = _mm256_set1_epi8(1);
18273 let a = _mm512_set1_epi16(i16::MIN);
18274 let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
18275 assert_eq_m256i(r, src);
18276 let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a);
18277 let e = _mm256_set1_epi8(-1);
18278 assert_eq_m256i(r, e);
18279 }
18280
18281 #[simd_test(enable = "avx512bw")]
18282 unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
18283 let a = _mm512_set1_epi16(i16::MIN);
18284 let r = _mm512_maskz_cvtusepi16_epi8(0, a);
18285 assert_eq_m256i(r, _mm256_setzero_si256());
18286 let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a);
18287 let e = _mm256_set1_epi8(-1);
18288 assert_eq_m256i(r, e);
18289 }
18290
cdc7bbd5
XL
18291 #[simd_test(enable = "avx512bw,avx512vl")]
18292 unsafe fn test_mm256_cvtusepi16_epi8() {
18293 let a = _mm256_set1_epi16(i16::MIN);
18294 let r = _mm256_cvtusepi16_epi8(a);
18295 let e = _mm_set1_epi8(-1);
18296 assert_eq_m128i(r, e);
18297 }
18298
18299 #[simd_test(enable = "avx512bw,avx512vl")]
18300 unsafe fn test_mm256_mask_cvtusepi16_epi8() {
18301 let src = _mm_set1_epi8(1);
18302 let a = _mm256_set1_epi16(i16::MIN);
18303 let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
18304 assert_eq_m128i(r, src);
18305 let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a);
18306 let e = _mm_set1_epi8(-1);
18307 assert_eq_m128i(r, e);
18308 }
18309
18310 #[simd_test(enable = "avx512bw,avx512vl")]
18311 unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
18312 let a = _mm256_set1_epi16(i16::MIN);
18313 let r = _mm256_maskz_cvtusepi16_epi8(0, a);
18314 assert_eq_m128i(r, _mm_setzero_si128());
18315 let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a);
18316 let e = _mm_set1_epi8(-1);
18317 assert_eq_m128i(r, e);
18318 }
18319
18320 #[simd_test(enable = "avx512bw,avx512vl")]
18321 unsafe fn test_mm_cvtusepi16_epi8() {
18322 let a = _mm_set1_epi16(i16::MIN);
18323 let r = _mm_cvtusepi16_epi8(a);
18324 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
18325 assert_eq_m128i(r, e);
18326 }
18327
18328 #[simd_test(enable = "avx512bw,avx512vl")]
18329 unsafe fn test_mm_mask_cvtusepi16_epi8() {
18330 let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
18331 let a = _mm_set1_epi16(i16::MIN);
18332 let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
18333 assert_eq_m128i(r, src);
18334 let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a);
18335 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
18336 assert_eq_m128i(r, e);
18337 }
18338
18339 #[simd_test(enable = "avx512bw,avx512vl")]
18340 unsafe fn test_mm_maskz_cvtusepi16_epi8() {
18341 let a = _mm_set1_epi16(i16::MIN);
18342 let r = _mm_maskz_cvtusepi16_epi8(0, a);
18343 assert_eq_m128i(r, _mm_setzero_si128());
18344 let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a);
18345 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
18346 assert_eq_m128i(r, e);
18347 }
18348
fc512014
XL
18349 #[simd_test(enable = "avx512bw")]
18350 unsafe fn test_mm512_cvtepi8_epi16() {
18351 let a = _mm256_set1_epi8(2);
18352 let r = _mm512_cvtepi8_epi16(a);
18353 let e = _mm512_set1_epi16(2);
18354 assert_eq_m512i(r, e);
18355 }
18356
18357 #[simd_test(enable = "avx512bw")]
18358 unsafe fn test_mm512_mask_cvtepi8_epi16() {
18359 let src = _mm512_set1_epi16(1);
18360 let a = _mm256_set1_epi8(2);
18361 let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
18362 assert_eq_m512i(r, src);
18363 let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18364 let e = _mm512_set1_epi16(2);
18365 assert_eq_m512i(r, e);
18366 }
18367
18368 #[simd_test(enable = "avx512bw")]
18369 unsafe fn test_mm512_maskz_cvtepi8_epi16() {
18370 let a = _mm256_set1_epi8(2);
18371 let r = _mm512_maskz_cvtepi8_epi16(0, a);
18372 assert_eq_m512i(r, _mm512_setzero_si512());
18373 let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a);
18374 let e = _mm512_set1_epi16(2);
18375 assert_eq_m512i(r, e);
18376 }
18377
cdc7bbd5
XL
18378 #[simd_test(enable = "avx512bw,avx512vl")]
18379 unsafe fn test_mm256_mask_cvtepi8_epi16() {
18380 let src = _mm256_set1_epi16(1);
18381 let a = _mm_set1_epi8(2);
18382 let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
18383 assert_eq_m256i(r, src);
18384 let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a);
18385 let e = _mm256_set1_epi16(2);
18386 assert_eq_m256i(r, e);
18387 }
18388
18389 #[simd_test(enable = "avx512bw,avx512vl")]
18390 unsafe fn test_mm256_maskz_cvtepi8_epi16() {
18391 let a = _mm_set1_epi8(2);
18392 let r = _mm256_maskz_cvtepi8_epi16(0, a);
18393 assert_eq_m256i(r, _mm256_setzero_si256());
18394 let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a);
18395 let e = _mm256_set1_epi16(2);
18396 assert_eq_m256i(r, e);
18397 }
18398
18399 #[simd_test(enable = "avx512bw,avx512vl")]
18400 unsafe fn test_mm_mask_cvtepi8_epi16() {
18401 let src = _mm_set1_epi16(1);
18402 let a = _mm_set1_epi8(2);
18403 let r = _mm_mask_cvtepi8_epi16(src, 0, a);
18404 assert_eq_m128i(r, src);
18405 let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a);
18406 let e = _mm_set1_epi16(2);
18407 assert_eq_m128i(r, e);
18408 }
18409
18410 #[simd_test(enable = "avx512bw,avx512vl")]
18411 unsafe fn test_mm_maskz_cvtepi8_epi16() {
18412 let a = _mm_set1_epi8(2);
18413 let r = _mm_maskz_cvtepi8_epi16(0, a);
18414 assert_eq_m128i(r, _mm_setzero_si128());
18415 let r = _mm_maskz_cvtepi8_epi16(0b11111111, a);
18416 let e = _mm_set1_epi16(2);
18417 assert_eq_m128i(r, e);
18418 }
18419
fc512014
XL
18420 #[simd_test(enable = "avx512bw")]
18421 unsafe fn test_mm512_cvtepu8_epi16() {
18422 let a = _mm256_set1_epi8(2);
18423 let r = _mm512_cvtepu8_epi16(a);
18424 let e = _mm512_set1_epi16(2);
18425 assert_eq_m512i(r, e);
18426 }
18427
18428 #[simd_test(enable = "avx512bw")]
18429 unsafe fn test_mm512_mask_cvtepu8_epi16() {
18430 let src = _mm512_set1_epi16(1);
18431 let a = _mm256_set1_epi8(2);
18432 let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
18433 assert_eq_m512i(r, src);
18434 let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a);
18435 let e = _mm512_set1_epi16(2);
18436 assert_eq_m512i(r, e);
18437 }
18438
18439 #[simd_test(enable = "avx512bw")]
18440 unsafe fn test_mm512_maskz_cvtepu8_epi16() {
18441 let a = _mm256_set1_epi8(2);
18442 let r = _mm512_maskz_cvtepu8_epi16(0, a);
18443 assert_eq_m512i(r, _mm512_setzero_si512());
18444 let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a);
18445 let e = _mm512_set1_epi16(2);
18446 assert_eq_m512i(r, e);
18447 }
18448
cdc7bbd5
XL
18449 #[simd_test(enable = "avx512bw,avx512vl")]
18450 unsafe fn test_mm256_mask_cvtepu8_epi16() {
18451 let src = _mm256_set1_epi16(1);
18452 let a = _mm_set1_epi8(2);
18453 let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
18454 assert_eq_m256i(r, src);
18455 let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a);
18456 let e = _mm256_set1_epi16(2);
18457 assert_eq_m256i(r, e);
18458 }
18459
18460 #[simd_test(enable = "avx512bw,avx512vl")]
18461 unsafe fn test_mm256_maskz_cvtepu8_epi16() {
18462 let a = _mm_set1_epi8(2);
18463 let r = _mm256_maskz_cvtepu8_epi16(0, a);
18464 assert_eq_m256i(r, _mm256_setzero_si256());
18465 let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a);
18466 let e = _mm256_set1_epi16(2);
18467 assert_eq_m256i(r, e);
18468 }
18469
18470 #[simd_test(enable = "avx512bw,avx512vl")]
18471 unsafe fn test_mm_mask_cvtepu8_epi16() {
18472 let src = _mm_set1_epi16(1);
18473 let a = _mm_set1_epi8(2);
18474 let r = _mm_mask_cvtepu8_epi16(src, 0, a);
18475 assert_eq_m128i(r, src);
18476 let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a);
18477 let e = _mm_set1_epi16(2);
18478 assert_eq_m128i(r, e);
18479 }
18480
18481 #[simd_test(enable = "avx512bw,avx512vl")]
18482 unsafe fn test_mm_maskz_cvtepu8_epi16() {
18483 let a = _mm_set1_epi8(2);
18484 let r = _mm_maskz_cvtepu8_epi16(0, a);
18485 assert_eq_m128i(r, _mm_setzero_si128());
18486 let r = _mm_maskz_cvtepu8_epi16(0b11111111, a);
18487 let e = _mm_set1_epi16(2);
18488 assert_eq_m128i(r, e);
18489 }
18490
fc512014
XL
18491 #[simd_test(enable = "avx512bw")]
18492 unsafe fn test_mm512_bslli_epi128() {
18493 #[rustfmt::skip]
18494 let a = _mm512_set_epi8(
18495 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18496 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18497 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18498 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18499 );
17df50a5 18500 let r = _mm512_bslli_epi128::<9>(a);
fc512014
XL
18501 #[rustfmt::skip]
18502 let e = _mm512_set_epi8(
18503 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18504 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18505 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18506 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18507 );
18508 assert_eq_m512i(r, e);
18509 }
18510
18511 #[simd_test(enable = "avx512bw")]
18512 unsafe fn test_mm512_bsrli_epi128() {
18513 #[rustfmt::skip]
18514 let a = _mm512_set_epi8(
17df50a5
XL
18515 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
18516 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
18517 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
18518 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
fc512014 18519 );
17df50a5 18520 let r = _mm512_bsrli_epi128::<3>(a);
fc512014
XL
18521 #[rustfmt::skip]
18522 let e = _mm512_set_epi8(
17df50a5
XL
18523 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
18524 0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
18525 0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
18526 0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
fc512014
XL
18527 );
18528 assert_eq_m512i(r, e);
18529 }
18530
18531 #[simd_test(enable = "avx512bw")]
18532 unsafe fn test_mm512_alignr_epi8() {
18533 #[rustfmt::skip]
18534 let a = _mm512_set_epi8(
18535 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18536 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18537 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18538 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18539 );
18540 let b = _mm512_set1_epi8(1);
17df50a5 18541 let r = _mm512_alignr_epi8::<14>(a, b);
fc512014
XL
18542 #[rustfmt::skip]
18543 let e = _mm512_set_epi8(
18544 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18545 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18546 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18547 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18548 );
18549 assert_eq_m512i(r, e);
18550 }
18551
18552 #[simd_test(enable = "avx512bw")]
18553 unsafe fn test_mm512_mask_alignr_epi8() {
18554 #[rustfmt::skip]
18555 let a = _mm512_set_epi8(
18556 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18557 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18558 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18559 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18560 );
18561 let b = _mm512_set1_epi8(1);
17df50a5 18562 let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b);
fc512014 18563 assert_eq_m512i(r, a);
17df50a5 18564 let r = _mm512_mask_alignr_epi8::<14>(
fc512014
XL
18565 a,
18566 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18567 a,
18568 b,
fc512014
XL
18569 );
18570 #[rustfmt::skip]
18571 let e = _mm512_set_epi8(
18572 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18573 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18574 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18575 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18576 );
18577 assert_eq_m512i(r, e);
18578 }
18579
18580 #[simd_test(enable = "avx512bw")]
18581 unsafe fn test_mm512_maskz_alignr_epi8() {
18582 #[rustfmt::skip]
18583 let a = _mm512_set_epi8(
18584 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18585 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18586 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18587 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18588 );
18589 let b = _mm512_set1_epi8(1);
17df50a5 18590 let r = _mm512_maskz_alignr_epi8::<14>(0, a, b);
fc512014 18591 assert_eq_m512i(r, _mm512_setzero_si512());
17df50a5 18592 let r = _mm512_maskz_alignr_epi8::<14>(
fc512014
XL
18593 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111,
18594 a,
18595 b,
fc512014
XL
18596 );
18597 #[rustfmt::skip]
18598 let e = _mm512_set_epi8(
18599 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18600 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18601 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18602 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18603 );
18604 assert_eq_m512i(r, e);
18605 }
cdc7bbd5
XL
18606
18607 #[simd_test(enable = "avx512bw,avx512vl")]
18608 unsafe fn test_mm256_mask_alignr_epi8() {
18609 #[rustfmt::skip]
18610 let a = _mm256_set_epi8(
18611 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18612 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18613 );
18614 let b = _mm256_set1_epi8(1);
17df50a5 18615 let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b);
cdc7bbd5 18616 assert_eq_m256i(r, a);
17df50a5 18617 let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b);
cdc7bbd5
XL
18618 #[rustfmt::skip]
18619 let e = _mm256_set_epi8(
18620 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18621 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18622 );
18623 assert_eq_m256i(r, e);
18624 }
18625
18626 #[simd_test(enable = "avx512bw,avx512vl")]
18627 unsafe fn test_mm256_maskz_alignr_epi8() {
18628 #[rustfmt::skip]
18629 let a = _mm256_set_epi8(
18630 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18631 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
18632 );
18633 let b = _mm256_set1_epi8(1);
17df50a5 18634 let r = _mm256_maskz_alignr_epi8::<14>(0, a, b);
cdc7bbd5 18635 assert_eq_m256i(r, _mm256_setzero_si256());
17df50a5 18636 let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b);
cdc7bbd5
XL
18637 #[rustfmt::skip]
18638 let e = _mm256_set_epi8(
18639 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18640 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1,
18641 );
18642 assert_eq_m256i(r, e);
18643 }
18644
18645 #[simd_test(enable = "avx512bw,avx512vl")]
18646 unsafe fn test_mm_mask_alignr_epi8() {
18647 let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
18648 let b = _mm_set1_epi8(1);
17df50a5 18649 let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
cdc7bbd5 18650 assert_eq_m128i(r, a);
17df50a5 18651 let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b);
cdc7bbd5
XL
18652 let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
18653 assert_eq_m128i(r, e);
18654 }
18655
18656 #[simd_test(enable = "avx512bw,avx512vl")]
18657 unsafe fn test_mm_maskz_alignr_epi8() {
18658 let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
18659 let b = _mm_set1_epi8(1);
17df50a5 18660 let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
cdc7bbd5 18661 assert_eq_m128i(r, _mm_setzero_si128());
17df50a5 18662 let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b);
cdc7bbd5
XL
18663 let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1);
18664 assert_eq_m128i(r, e);
18665 }
18666
18667 #[simd_test(enable = "avx512bw")]
18668 unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
18669 let a = _mm512_set1_epi16(i16::MAX);
18670 let mut r = _mm256_undefined_si256();
18671 _mm512_mask_cvtsepi16_storeu_epi8(
18672 &mut r as *mut _ as *mut i8,
18673 0b11111111_11111111_11111111_11111111,
18674 a,
18675 );
18676 let e = _mm256_set1_epi8(i8::MAX);
18677 assert_eq_m256i(r, e);
18678 }
18679
18680 #[simd_test(enable = "avx512bw,avx512vl")]
18681 unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
18682 let a = _mm256_set1_epi16(i16::MAX);
18683 let mut r = _mm_undefined_si128();
18684 _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18685 let e = _mm_set1_epi8(i8::MAX);
18686 assert_eq_m128i(r, e);
18687 }
18688
18689 #[simd_test(enable = "avx512bw,avx512vl")]
18690 unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
18691 let a = _mm_set1_epi16(i16::MAX);
18692 let mut r = _mm_set1_epi8(0);
18693 _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18694 #[rustfmt::skip]
18695 let e = _mm_set_epi8(
18696 0, 0, 0, 0, 0, 0, 0, 0,
18697 i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX,
18698 );
18699 assert_eq_m128i(r, e);
18700 }
18701
18702 #[simd_test(enable = "avx512bw")]
18703 unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
18704 let a = _mm512_set1_epi16(8);
18705 let mut r = _mm256_undefined_si256();
18706 _mm512_mask_cvtepi16_storeu_epi8(
18707 &mut r as *mut _ as *mut i8,
18708 0b11111111_11111111_11111111_11111111,
18709 a,
18710 );
18711 let e = _mm256_set1_epi8(8);
18712 assert_eq_m256i(r, e);
18713 }
18714
18715 #[simd_test(enable = "avx512bw,avx512vl")]
18716 unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
18717 let a = _mm256_set1_epi16(8);
18718 let mut r = _mm_undefined_si128();
18719 _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18720 let e = _mm_set1_epi8(8);
18721 assert_eq_m128i(r, e);
18722 }
18723
18724 #[simd_test(enable = "avx512bw,avx512vl")]
18725 unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
18726 let a = _mm_set1_epi16(8);
18727 let mut r = _mm_set1_epi8(0);
18728 _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18729 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
18730 assert_eq_m128i(r, e);
18731 }
18732
18733 #[simd_test(enable = "avx512bw")]
18734 unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
18735 let a = _mm512_set1_epi16(i16::MAX);
18736 let mut r = _mm256_undefined_si256();
18737 _mm512_mask_cvtusepi16_storeu_epi8(
18738 &mut r as *mut _ as *mut i8,
18739 0b11111111_11111111_11111111_11111111,
18740 a,
18741 );
18742 let e = _mm256_set1_epi8(u8::MAX as i8);
18743 assert_eq_m256i(r, e);
18744 }
18745
18746 #[simd_test(enable = "avx512bw,avx512vl")]
18747 unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
18748 let a = _mm256_set1_epi16(i16::MAX);
18749 let mut r = _mm_undefined_si128();
18750 _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
18751 let e = _mm_set1_epi8(u8::MAX as i8);
18752 assert_eq_m128i(r, e);
18753 }
18754
18755 #[simd_test(enable = "avx512bw,avx512vl")]
18756 unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
18757 let a = _mm_set1_epi16(i16::MAX);
18758 let mut r = _mm_set1_epi8(0);
18759 _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
18760 #[rustfmt::skip]
18761 let e = _mm_set_epi8(
18762 0, 0, 0, 0,
18763 0, 0, 0, 0,
18764 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18765 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
18766 );
18767 assert_eq_m128i(r, e);
18768 }
fc512014 18769}