]> git.proxmox.com Git - rustc.git/blob - library/stdarch/crates/core_arch/src/x86/avx512vbmi2.rs
New upstream version 1.53.0+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / avx512vbmi2.rs
1 use crate::core_arch::{simd::*, simd_llvm::*, x86::*};
2
3 #[cfg(test)]
4 use stdarch_test::assert_instr;
5
6 /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
7 ///
8 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compress_epi16&expand=1192)
9 #[inline]
10 #[target_feature(enable = "avx512vbmi2")]
11 #[cfg_attr(test, assert_instr(vpcompressw))]
12 pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
13 transmute(vpcompressw(a.as_i16x32(), src.as_i16x32(), k))
14 }
15
16 /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
17 ///
18 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_compress_epi16&expand=1193)
19 #[inline]
20 #[target_feature(enable = "avx512vbmi2")]
21 #[cfg_attr(test, assert_instr(vpcompressw))]
22 pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i {
23 transmute(vpcompressw(
24 a.as_i16x32(),
25 _mm512_setzero_si512().as_i16x32(),
26 k,
27 ))
28 }
29
30 /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
31 ///
32 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compress_epi16&expand=1190)
33 #[inline]
34 #[target_feature(enable = "avx512vbmi2,avx512vl")]
35 #[cfg_attr(test, assert_instr(vpcompressw))]
36 pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
37 transmute(vpcompressw256(a.as_i16x16(), src.as_i16x16(), k))
38 }
39
40 /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
41 ///
42 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_compress_epi16&expand=1191)
43 #[inline]
44 #[target_feature(enable = "avx512vbmi2,avx512vl")]
45 #[cfg_attr(test, assert_instr(vpcompressw))]
46 pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i {
47 transmute(vpcompressw256(
48 a.as_i16x16(),
49 _mm256_setzero_si256().as_i16x16(),
50 k,
51 ))
52 }
53
54 /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
55 ///
56 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compress_epi16&expand=1188)
57 #[inline]
58 #[target_feature(enable = "avx512vbmi2,avx512vl")]
59 #[cfg_attr(test, assert_instr(vpcompressw))]
60 pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
61 transmute(vpcompressw128(a.as_i16x8(), src.as_i16x8(), k))
62 }
63
64 /// Contiguously store the active 16-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
65 ///
66 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_compress_epi16&expand=1189)
67 #[inline]
68 #[target_feature(enable = "avx512vbmi2,avx512vl")]
69 #[cfg_attr(test, assert_instr(vpcompressw))]
70 pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i {
71 transmute(vpcompressw128(
72 a.as_i16x8(),
73 _mm_setzero_si128().as_i16x8(),
74 k,
75 ))
76 }
77
78 /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
79 ///
80 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compress_epi8&expand=1210)
81 #[inline]
82 #[target_feature(enable = "avx512vbmi2")]
83 #[cfg_attr(test, assert_instr(vpcompressb))]
84 pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
85 transmute(vpcompressb(a.as_i8x64(), src.as_i8x64(), k))
86 }
87
88 /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
89 ///
90 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_compress_epi8&expand=1211)
91 #[inline]
92 #[target_feature(enable = "avx512vbmi2")]
93 #[cfg_attr(test, assert_instr(vpcompressb))]
94 pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i {
95 transmute(vpcompressb(
96 a.as_i8x64(),
97 _mm512_setzero_si512().as_i8x64(),
98 k,
99 ))
100 }
101
102 /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
103 ///
104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compress_epi8&expand=1208)
105 #[inline]
106 #[target_feature(enable = "avx512vbmi2,avx512vl")]
107 #[cfg_attr(test, assert_instr(vpcompressb))]
108 pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
109 transmute(vpcompressb256(a.as_i8x32(), src.as_i8x32(), k))
110 }
111
112 /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
113 ///
114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_compress_epi8&expand=1209)
115 #[inline]
116 #[target_feature(enable = "avx512vbmi2,avx512vl")]
117 #[cfg_attr(test, assert_instr(vpcompressb))]
118 pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i {
119 transmute(vpcompressb256(
120 a.as_i8x32(),
121 _mm256_setzero_si256().as_i8x32(),
122 k,
123 ))
124 }
125
126 /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
127 ///
128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compress_epi8&expand=1206)
129 #[inline]
130 #[target_feature(enable = "avx512vbmi2,avx512vl")]
131 #[cfg_attr(test, assert_instr(vpcompressb))]
132 pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
133 transmute(vpcompressb128(a.as_i8x16(), src.as_i8x16(), k))
134 }
135
136 /// Contiguously store the active 8-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
137 ///
138 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_compress_epi8&expand=1207)
139 #[inline]
140 #[target_feature(enable = "avx512vbmi2,avx512vl")]
141 #[cfg_attr(test, assert_instr(vpcompressb))]
142 pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i {
143 transmute(vpcompressb128(
144 a.as_i8x16(),
145 _mm_setzero_si128().as_i8x16(),
146 k,
147 ))
148 }
149
150 /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
151 ///
152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi16&expand=2310)
153 #[inline]
154 #[target_feature(enable = "avx512vbmi2")]
155 #[cfg_attr(test, assert_instr(vpexpandw))]
156 pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
157 transmute(vpexpandw(a.as_i16x32(), src.as_i16x32(), k))
158 }
159
160 /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
161 ///
162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_epi16&expand=2311)
163 #[inline]
164 #[target_feature(enable = "avx512vbmi2")]
165 #[cfg_attr(test, assert_instr(vpexpandw))]
166 pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i {
167 transmute(vpexpandw(
168 a.as_i16x32(),
169 _mm512_setzero_si512().as_i16x32(),
170 k,
171 ))
172 }
173
174 /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175 ///
176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_expand_epi16&expand=2308)
177 #[inline]
178 #[target_feature(enable = "avx512vbmi2,avx512vl")]
179 #[cfg_attr(test, assert_instr(vpexpandw))]
180 pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
181 transmute(vpexpandw256(a.as_i16x16(), src.as_i16x16(), k))
182 }
183
184 /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
185 ///
186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_expand_epi16&expand=2309)
187 #[inline]
188 #[target_feature(enable = "avx512vbmi2,avx512vl")]
189 #[cfg_attr(test, assert_instr(vpexpandw))]
190 pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i {
191 transmute(vpexpandw256(
192 a.as_i16x16(),
193 _mm256_setzero_si256().as_i16x16(),
194 k,
195 ))
196 }
197
198 /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
199 ///
200 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_expand_epi16&expand=2306)
201 #[inline]
202 #[target_feature(enable = "avx512vbmi2,avx512vl")]
203 #[cfg_attr(test, assert_instr(vpexpandw))]
204 pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
205 transmute(vpexpandw128(a.as_i16x8(), src.as_i16x8(), k))
206 }
207
208 /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
209 ///
210 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_expand_epi16&expand=2307)
211 #[inline]
212 #[target_feature(enable = "avx512vbmi2,avx512vl")]
213 #[cfg_attr(test, assert_instr(vpexpandw))]
214 pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i {
215 transmute(vpexpandw128(
216 a.as_i16x8(),
217 _mm_setzero_si128().as_i16x8(),
218 k,
219 ))
220 }
221
222 /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi8&expand=2328)
225 #[inline]
226 #[target_feature(enable = "avx512vbmi2")]
227 #[cfg_attr(test, assert_instr(vpexpandb))]
228 pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
229 transmute(vpexpandb(a.as_i8x64(), src.as_i8x64(), k))
230 }
231
232 /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
233 ///
234 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_epi8&expand=2329)
235 #[inline]
236 #[target_feature(enable = "avx512vbmi2")]
237 #[cfg_attr(test, assert_instr(vpexpandb))]
238 pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i {
239 transmute(vpexpandb(
240 a.as_i8x64(),
241 _mm512_setzero_si512().as_i8x64(),
242 k,
243 ))
244 }
245
246 /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
247 ///
248 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_expand_epi8&expand=2326)
249 #[inline]
250 #[target_feature(enable = "avx512vbmi2,avx512vl")]
251 #[cfg_attr(test, assert_instr(vpexpandb))]
252 pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
253 transmute(vpexpandb256(a.as_i8x32(), src.as_i8x32(), k))
254 }
255
256 /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
257 ///
258 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_expand_epi8&expand=2327)
259 #[inline]
260 #[target_feature(enable = "avx512vbmi2,avx512vl")]
261 #[cfg_attr(test, assert_instr(vpexpandb))]
262 pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i {
263 transmute(vpexpandb256(
264 a.as_i8x32(),
265 _mm256_setzero_si256().as_i8x32(),
266 k,
267 ))
268 }
269
270 /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
271 ///
272 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_expand_epi8&expand=2324)
273 #[inline]
274 #[target_feature(enable = "avx512vbmi2,avx512vl")]
275 #[cfg_attr(test, assert_instr(vpexpandb))]
276 pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
277 transmute(vpexpandb128(a.as_i8x16(), src.as_i8x16(), k))
278 }
279
280 /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
281 ///
282 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_expand_epi8&expand=2325)
283 #[inline]
284 #[target_feature(enable = "avx512vbmi2,avx512vl")]
285 #[cfg_attr(test, assert_instr(vpexpandb))]
286 pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
287 transmute(vpexpandb128(
288 a.as_i8x16(),
289 _mm_setzero_si128().as_i8x16(),
290 k,
291 ))
292 }
293
294 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
295 ///
296 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldv_epi64&expand=5087)
297 #[inline]
298 #[target_feature(enable = "avx512vbmi2")]
299 #[cfg_attr(test, assert_instr(vpshldvq))]
300 pub unsafe fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
301 transmute(vpshldvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8()))
302 }
303
304 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
305 ///
306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldv_epi64&expand=5085)
307 #[inline]
308 #[target_feature(enable = "avx512vbmi2")]
309 #[cfg_attr(test, assert_instr(vpshldvq))]
310 pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
311 let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
312 transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
313 }
314
315 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
316 ///
317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldv_epi64&expand=5086)
318 #[inline]
319 #[target_feature(enable = "avx512vbmi2")]
320 #[cfg_attr(test, assert_instr(vpshldvq))]
321 pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
322 let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
323 let zero = _mm512_setzero_si512().as_i64x8();
324 transmute(simd_select_bitmask(k, shf, zero))
325 }
326
327 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
328 ///
329 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldv_epi64&expand=5084)
330 #[inline]
331 #[target_feature(enable = "avx512vbmi2,avx512vl")]
332 #[cfg_attr(test, assert_instr(vpshldvq))]
333 pub unsafe fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
334 transmute(vpshldvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4()))
335 }
336
337 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
338 ///
339 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldv_epi64&expand=5082)
340 #[inline]
341 #[target_feature(enable = "avx512vbmi2,avx512vl")]
342 #[cfg_attr(test, assert_instr(vpshldvq))]
343 pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
344 let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
345 transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
346 }
347
348 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
349 ///
350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldv_epi64&expand=5083)
351 #[inline]
352 #[target_feature(enable = "avx512vbmi2,avx512vl")]
353 #[cfg_attr(test, assert_instr(vpshldvq))]
354 pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
355 let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
356 let zero = _mm256_setzero_si256().as_i64x4();
357 transmute(simd_select_bitmask(k, shf, zero))
358 }
359
360 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst.
361 ///
362 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldv_epi64&expand=5081)
363 #[inline]
364 #[target_feature(enable = "avx512vbmi2,avx512vl")]
365 #[cfg_attr(test, assert_instr(vpshldvq))]
366 pub unsafe fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
367 transmute(vpshldvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2()))
368 }
369
370 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
371 ///
372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldv_epi64&expand=5079)
373 #[inline]
374 #[target_feature(enable = "avx512vbmi2,avx512vl")]
375 #[cfg_attr(test, assert_instr(vpshldvq))]
376 pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
377 let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
378 transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
379 }
380
381 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
382 ///
383 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldv_epi64&expand=5080)
384 #[inline]
385 #[target_feature(enable = "avx512vbmi2,avx512vl")]
386 #[cfg_attr(test, assert_instr(vpshldvq))]
387 pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
388 let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
389 let zero = _mm_setzero_si128().as_i64x2();
390 transmute(simd_select_bitmask(k, shf, zero))
391 }
392
393 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
394 ///
395 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldv_epi32&expand=5078)
396 #[inline]
397 #[target_feature(enable = "avx512vbmi2")]
398 #[cfg_attr(test, assert_instr(vpshldvd))]
399 pub unsafe fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
400 transmute(vpshldvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16()))
401 }
402
403 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
404 ///
405 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldv_epi32&expand=5076)
406 #[inline]
407 #[target_feature(enable = "avx512vbmi2")]
408 #[cfg_attr(test, assert_instr(vpshldvd))]
409 pub unsafe fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
410 let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
411 transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
412 }
413
414 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
415 ///
416 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldv_epi32&expand=5077)
417 #[inline]
418 #[target_feature(enable = "avx512vbmi2")]
419 #[cfg_attr(test, assert_instr(vpshldvd))]
420 pub unsafe fn _mm512_maskz_shldv_epi32(
421 k: __mmask16,
422 a: __m512i,
423 b: __m512i,
424 c: __m512i,
425 ) -> __m512i {
426 let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
427 let zero = _mm512_setzero_si512().as_i32x16();
428 transmute(simd_select_bitmask(k, shf, zero))
429 }
430
431 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
432 ///
433 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldv_epi32&expand=5075)
434 #[inline]
435 #[target_feature(enable = "avx512vbmi2,avx512vl")]
436 #[cfg_attr(test, assert_instr(vpshldvd))]
437 pub unsafe fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
438 transmute(vpshldvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8()))
439 }
440
441 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
442 ///
443 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldv_epi32&expand=5073)
444 #[inline]
445 #[target_feature(enable = "avx512vbmi2,avx512vl")]
446 #[cfg_attr(test, assert_instr(vpshldvd))]
447 pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
448 let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
449 transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
450 }
451
452 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
453 ///
454 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldv_epi32&expand=5074)
455 #[inline]
456 #[target_feature(enable = "avx512vbmi2,avx512vl")]
457 #[cfg_attr(test, assert_instr(vpshldvd))]
458 pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
459 let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
460 let zero = _mm256_setzero_si256().as_i32x8();
461 transmute(simd_select_bitmask(k, shf, zero))
462 }
463
464 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst.
465 ///
466 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldv_epi32&expand=5072)
467 #[inline]
468 #[target_feature(enable = "avx512vbmi2,avx512vl")]
469 #[cfg_attr(test, assert_instr(vpshldvd))]
470 pub unsafe fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
471 transmute(vpshldvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4()))
472 }
473
474 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
475 ///
476 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldv_epi32&expand=5070)
477 #[inline]
478 #[target_feature(enable = "avx512vbmi2,avx512vl")]
479 #[cfg_attr(test, assert_instr(vpshldvd))]
480 pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
481 let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
482 transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
483 }
484
485 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
486 ///
487 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldv_epi32&expand=5071)
488 #[inline]
489 #[target_feature(enable = "avx512vbmi2,avx512vl")]
490 #[cfg_attr(test, assert_instr(vpshldvd))]
491 pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
492 let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
493 let zero = _mm_setzero_si128().as_i32x4();
494 transmute(simd_select_bitmask(k, shf, zero))
495 }
496
497 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
498 ///
499 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldv_epi16&expand=5069)
500 #[inline]
501 #[target_feature(enable = "avx512vbmi2")]
502 #[cfg_attr(test, assert_instr(vpshldvw))]
503 pub unsafe fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
504 transmute(vpshldvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32()))
505 }
506
507 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
508 ///
509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldv_epi16&expand=5067)
510 #[inline]
511 #[target_feature(enable = "avx512vbmi2")]
512 #[cfg_attr(test, assert_instr(vpshldvw))]
513 pub unsafe fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
514 let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
515 transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
516 }
517
518 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
519 ///
520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldv_epi16&expand=5068)
521 #[inline]
522 #[target_feature(enable = "avx512vbmi2")]
523 #[cfg_attr(test, assert_instr(vpshldvw))]
524 pub unsafe fn _mm512_maskz_shldv_epi16(
525 k: __mmask32,
526 a: __m512i,
527 b: __m512i,
528 c: __m512i,
529 ) -> __m512i {
530 let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
531 let zero = _mm512_setzero_si512().as_i16x32();
532 transmute(simd_select_bitmask(k, shf, zero))
533 }
534
535 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
536 ///
537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldv_epi16&expand=5066)
538 #[inline]
539 #[target_feature(enable = "avx512vbmi2,avx512vl")]
540 #[cfg_attr(test, assert_instr(vpshldvw))]
541 pub unsafe fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
542 transmute(vpshldvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16()))
543 }
544
545 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
546 ///
547 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldv_epi16&expand=5064)
548 #[inline]
549 #[target_feature(enable = "avx512vbmi2,avx512vl")]
550 #[cfg_attr(test, assert_instr(vpshldvw))]
551 pub unsafe fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
552 let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
553 transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
554 }
555
556 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
557 ///
558 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldv_epi16&expand=5065)
559 #[inline]
560 #[target_feature(enable = "avx512vbmi2,avx512vl")]
561 #[cfg_attr(test, assert_instr(vpshldvw))]
562 pub unsafe fn _mm256_maskz_shldv_epi16(
563 k: __mmask16,
564 a: __m256i,
565 b: __m256i,
566 c: __m256i,
567 ) -> __m256i {
568 let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
569 let zero = _mm256_setzero_si256().as_i16x16();
570 transmute(simd_select_bitmask(k, shf, zero))
571 }
572
573 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst.
574 ///
575 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldv_epi16&expand=5063)
576 #[inline]
577 #[target_feature(enable = "avx512vbmi2,avx512vl")]
578 #[cfg_attr(test, assert_instr(vpshldvw))]
579 pub unsafe fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
580 transmute(vpshldvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8()))
581 }
582
583 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
584 ///
585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldv_epi16&expand=5061)
586 #[inline]
587 #[target_feature(enable = "avx512vbmi2,avx512vl")]
588 #[cfg_attr(test, assert_instr(vpshldvw))]
589 pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
590 let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
591 transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
592 }
593
594 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595 ///
596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldv_epi16&expand=5062)
597 #[inline]
598 #[target_feature(enable = "avx512vbmi2,avx512vl")]
599 #[cfg_attr(test, assert_instr(vpshldvw))]
600 pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
601 let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
602 let zero = _mm_setzero_si128().as_i16x8();
603 transmute(simd_select_bitmask(k, shf, zero))
604 }
605
606 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
607 ///
608 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdv_epi64&expand=5141)
609 #[inline]
610 #[target_feature(enable = "avx512vbmi2")]
611 #[cfg_attr(test, assert_instr(vpshrdvq))]
612 pub unsafe fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
613 transmute(vpshrdvq(a.as_i64x8(), b.as_i64x8(), c.as_i64x8()))
614 }
615
616 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
617 ///
618 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdv_epi64&expand=5139)
619 #[inline]
620 #[target_feature(enable = "avx512vbmi2")]
621 #[cfg_attr(test, assert_instr(vpshrdvq))]
622 pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
623 let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
624 transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
625 }
626
627 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
628 ///
629 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdv_epi64&expand=5140)
630 #[inline]
631 #[target_feature(enable = "avx512vbmi2")]
632 #[cfg_attr(test, assert_instr(vpshrdvq))]
633 pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
634 let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
635 let zero = _mm512_setzero_si512().as_i64x8();
636 transmute(simd_select_bitmask(k, shf, zero))
637 }
638
639 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
640 ///
641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdv_epi64&expand=5138)
642 #[inline]
643 #[target_feature(enable = "avx512vbmi2,avx512vl")]
644 #[cfg_attr(test, assert_instr(vpshrdvq))]
645 pub unsafe fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
646 transmute(vpshrdvq256(a.as_i64x4(), b.as_i64x4(), c.as_i64x4()))
647 }
648
649 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
650 ///
651 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdv_epi64&expand=5136)
652 #[inline]
653 #[target_feature(enable = "avx512vbmi2,avx512vl")]
654 #[cfg_attr(test, assert_instr(vpshrdvq))]
655 pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
656 let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
657 transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
658 }
659
660 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
661 ///
662 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdv_epi64&expand=5137)
663 #[inline]
664 #[target_feature(enable = "avx512vbmi2,avx512vl")]
665 #[cfg_attr(test, assert_instr(vpshrdvq))]
666 pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
667 let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
668 let zero = _mm256_setzero_si256().as_i64x4();
669 transmute(simd_select_bitmask(k, shf, zero))
670 }
671
672 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst.
673 ///
674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdv_epi64&expand=5135)
675 #[inline]
676 #[target_feature(enable = "avx512vbmi2,avx512vl")]
677 #[cfg_attr(test, assert_instr(vpshrdvq))]
678 pub unsafe fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
679 transmute(vpshrdvq128(a.as_i64x2(), b.as_i64x2(), c.as_i64x2()))
680 }
681
682 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
683 ///
684 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdv_epi64&expand=5133)
685 #[inline]
686 #[target_feature(enable = "avx512vbmi2,avx512vl")]
687 #[cfg_attr(test, assert_instr(vpshrdvq))]
688 pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
689 let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
690 transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
691 }
692
693 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
694 ///
695 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdv_epi64&expand=5134)
696 #[inline]
697 #[target_feature(enable = "avx512vbmi2,avx512vl")]
698 #[cfg_attr(test, assert_instr(vpshrdvq))]
699 pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
700 let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
701 let zero = _mm_setzero_si128().as_i64x2();
702 transmute(simd_select_bitmask(k, shf, zero))
703 }
704
705 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
706 ///
707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdv_epi32&expand=5132)
708 #[inline]
709 #[target_feature(enable = "avx512vbmi2")]
710 #[cfg_attr(test, assert_instr(vpshrdvd))]
711 pub unsafe fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
712 transmute(vpshrdvd(a.as_i32x16(), b.as_i32x16(), c.as_i32x16()))
713 }
714
715 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
716 ///
717 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdv_epi32&expand=5130)
718 #[inline]
719 #[target_feature(enable = "avx512vbmi2")]
720 #[cfg_attr(test, assert_instr(vpshrdvd))]
721 pub unsafe fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
722 let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
723 transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
724 }
725
726 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
727 ///
728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdv_epi32&expand=5131)
729 #[inline]
730 #[target_feature(enable = "avx512vbmi2")]
731 #[cfg_attr(test, assert_instr(vpshrdvd))]
732 pub unsafe fn _mm512_maskz_shrdv_epi32(
733 k: __mmask16,
734 a: __m512i,
735 b: __m512i,
736 c: __m512i,
737 ) -> __m512i {
738 let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
739 let zero = _mm512_setzero_si512().as_i32x16();
740 transmute(simd_select_bitmask(k, shf, zero))
741 }
742
743 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
744 ///
745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdv_epi32&expand=5129)
746 #[inline]
747 #[target_feature(enable = "avx512vbmi2,avx512vl")]
748 #[cfg_attr(test, assert_instr(vpshrdvd))]
749 pub unsafe fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
750 transmute(vpshrdvd256(a.as_i32x8(), b.as_i32x8(), c.as_i32x8()))
751 }
752
753 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
754 ///
755 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdv_epi32&expand=5127)
756 #[inline]
757 #[target_feature(enable = "avx512vbmi2,avx512vl")]
758 #[cfg_attr(test, assert_instr(vpshrdvd))]
759 pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
760 let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
761 transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
762 }
763
764 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
765 ///
766 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdv_epi32&expand=5128)
767 #[inline]
768 #[target_feature(enable = "avx512vbmi2,avx512vl")]
769 #[cfg_attr(test, assert_instr(vpshrdvd))]
770 pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
771 let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
772 let zero = _mm256_setzero_si256().as_i32x8();
773 transmute(simd_select_bitmask(k, shf, zero))
774 }
775
776 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst.
777 ///
778 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdv_epi32&expand=5126)
779 #[inline]
780 #[target_feature(enable = "avx512vbmi2,avx512vl")]
781 #[cfg_attr(test, assert_instr(vpshrdvd))]
782 pub unsafe fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
783 transmute(vpshrdvd128(a.as_i32x4(), b.as_i32x4(), c.as_i32x4()))
784 }
785
786 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
787 ///
788 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdv_epi32&expand=5124)
789 #[inline]
790 #[target_feature(enable = "avx512vbmi2,avx512vl")]
791 #[cfg_attr(test, assert_instr(vpshrdvd))]
792 pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
793 let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
794 transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
795 }
796
797 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
798 ///
799 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdv_epi32&expand=5125)
800 #[inline]
801 #[target_feature(enable = "avx512vbmi2,avx512vl")]
802 #[cfg_attr(test, assert_instr(vpshrdvd))]
803 pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
804 let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
805 let zero = _mm_setzero_si128().as_i32x4();
806 transmute(simd_select_bitmask(k, shf, zero))
807 }
808
809 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
810 ///
811 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdv_epi16&expand=5123)
812 #[inline]
813 #[target_feature(enable = "avx512vbmi2")]
814 #[cfg_attr(test, assert_instr(vpshrdvw))]
815 pub unsafe fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
816 transmute(vpshrdvw(a.as_i16x32(), b.as_i16x32(), c.as_i16x32()))
817 }
818
819 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
820 ///
821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdv_epi16&expand=5121)
822 #[inline]
823 #[target_feature(enable = "avx512vbmi2")]
824 #[cfg_attr(test, assert_instr(vpshrdvw))]
825 pub unsafe fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
826 let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
827 transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
828 }
829
830 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
831 ///
832 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdv_epi16&expand=5122)
833 #[inline]
834 #[target_feature(enable = "avx512vbmi2")]
835 #[cfg_attr(test, assert_instr(vpshrdvw))]
836 pub unsafe fn _mm512_maskz_shrdv_epi16(
837 k: __mmask32,
838 a: __m512i,
839 b: __m512i,
840 c: __m512i,
841 ) -> __m512i {
842 let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
843 let zero = _mm512_setzero_si512().as_i16x32();
844 transmute(simd_select_bitmask(k, shf, zero))
845 }
846
847 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
848 ///
849 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdv_epi16&expand=5120)
850 #[inline]
851 #[target_feature(enable = "avx512vbmi2,avx512vl")]
852 #[cfg_attr(test, assert_instr(vpshrdvw))]
853 pub unsafe fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
854 transmute(vpshrdvw256(a.as_i16x16(), b.as_i16x16(), c.as_i16x16()))
855 }
856
857 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
858 ///
859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdv_epi16&expand=5118)
860 #[inline]
861 #[target_feature(enable = "avx512vbmi2,avx512vl")]
862 #[cfg_attr(test, assert_instr(vpshrdvw))]
863 pub unsafe fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
864 let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
865 transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
866 }
867
868 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869 ///
870 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdv_epi16&expand=5119)
871 #[inline]
872 #[target_feature(enable = "avx512vbmi2,avx512vl")]
873 #[cfg_attr(test, assert_instr(vpshrdvw))]
874 pub unsafe fn _mm256_maskz_shrdv_epi16(
875 k: __mmask16,
876 a: __m256i,
877 b: __m256i,
878 c: __m256i,
879 ) -> __m256i {
880 let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
881 let zero = _mm256_setzero_si256().as_i16x16();
882 transmute(simd_select_bitmask(k, shf, zero))
883 }
884
885 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst.
886 ///
887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdv_epi16&expand=5117)
888 #[inline]
889 #[target_feature(enable = "avx512vbmi2,avx512vl")]
890 #[cfg_attr(test, assert_instr(vpshrdvw))]
891 pub unsafe fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
892 transmute(vpshrdvw128(a.as_i16x8(), b.as_i16x8(), c.as_i16x8()))
893 }
894
895 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
896 ///
897 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdv_epi16&expand=5115)
898 #[inline]
899 #[target_feature(enable = "avx512vbmi2,avx512vl")]
900 #[cfg_attr(test, assert_instr(vpshrdvw))]
901 pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
902 let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
903 transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
904 }
905
906 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
907 ///
908 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdv_epi16&expand=5116)
909 #[inline]
910 #[target_feature(enable = "avx512vbmi2,avx512vl")]
911 #[cfg_attr(test, assert_instr(vpshrdvw))]
912 pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
913 let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
914 let zero = _mm_setzero_si128().as_i16x8();
915 transmute(simd_select_bitmask(k, shf, zero))
916 }
917
918 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
919 ///
920 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi64&expand=5060)
921 #[inline]
922 #[target_feature(enable = "avx512vbmi2")]
923 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
924 #[rustc_args_required_const(2)]
925 pub unsafe fn _mm512_shldi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
926 assert!(imm8 >= 0 && imm8 <= 255);
927 transmute(vpshldvq(
928 a.as_i64x8(),
929 b.as_i64x8(),
930 _mm512_set1_epi64(imm8 as i64).as_i64x8(),
931 ))
932 }
933
934 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
935 ///
936 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi64&expand=5058)
937 #[inline]
938 #[target_feature(enable = "avx512vbmi2")]
939 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
940 #[rustc_args_required_const(4)]
941 pub unsafe fn _mm512_mask_shldi_epi64(
942 src: __m512i,
943 k: __mmask8,
944 a: __m512i,
945 b: __m512i,
946 imm8: i32,
947 ) -> __m512i {
948 assert!(imm8 >= 0 && imm8 <= 255);
949 let shf: i64x8 = vpshldvq(
950 a.as_i64x8(),
951 b.as_i64x8(),
952 _mm512_set1_epi64(imm8 as i64).as_i64x8(),
953 );
954 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
955 }
956
957 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
958 ///
959 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi64&expand=5059)
960 #[inline]
961 #[target_feature(enable = "avx512vbmi2")]
962 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
963 #[rustc_args_required_const(3)]
964 pub unsafe fn _mm512_maskz_shldi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
965 assert!(imm8 >= 0 && imm8 <= 255);
966 let shf: i64x8 = vpshldvq(
967 a.as_i64x8(),
968 b.as_i64x8(),
969 _mm512_set1_epi64(imm8 as i64).as_i64x8(),
970 );
971 let zero = _mm512_setzero_si512().as_i64x8();
972 transmute(simd_select_bitmask(k, shf, zero))
973 }
974
975 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
976 ///
977 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi64&expand=5057)
978 #[inline]
979 #[target_feature(enable = "avx512vbmi2,avx512vl")]
980 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
981 #[rustc_args_required_const(2)]
982 pub unsafe fn _mm256_shldi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
983 assert!(imm8 >= 0 && imm8 <= 255);
984 transmute(vpshldvq256(
985 a.as_i64x4(),
986 b.as_i64x4(),
987 _mm256_set1_epi64x(imm8 as i64).as_i64x4(),
988 ))
989 }
990
991 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
992 ///
993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi64&expand=5055)
994 #[inline]
995 #[target_feature(enable = "avx512vbmi2,avx512vl")]
996 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
997 #[rustc_args_required_const(4)]
998 pub unsafe fn _mm256_mask_shldi_epi64(
999 src: __m256i,
1000 k: __mmask8,
1001 a: __m256i,
1002 b: __m256i,
1003 imm8: i32,
1004 ) -> __m256i {
1005 assert!(imm8 >= 0 && imm8 <= 255);
1006 let shf: i64x4 = vpshldvq256(
1007 a.as_i64x4(),
1008 b.as_i64x4(),
1009 _mm256_set1_epi64x(imm8 as i64).as_i64x4(),
1010 );
1011 transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1012 }
1013
1014 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1015 ///
1016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi64&expand=5056)
1017 #[inline]
1018 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1019 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
1020 #[rustc_args_required_const(3)]
1021 pub unsafe fn _mm256_maskz_shldi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1022 assert!(imm8 >= 0 && imm8 <= 255);
1023 let shf: i64x4 = vpshldvq256(
1024 a.as_i64x4(),
1025 b.as_i64x4(),
1026 _mm256_set1_epi64x(imm8 as i64).as_i64x4(),
1027 );
1028 let zero = _mm256_setzero_si256().as_i64x4();
1029 transmute(simd_select_bitmask(k, shf, zero))
1030 }
1031
1032 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst).
1033 ///
1034 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi64&expand=5054)
1035 #[inline]
1036 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1037 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
1038 #[rustc_args_required_const(2)]
1039 pub unsafe fn _mm_shldi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1040 assert!(imm8 >= 0 && imm8 <= 255);
1041 transmute(vpshldvq128(
1042 a.as_i64x2(),
1043 b.as_i64x2(),
1044 _mm_set1_epi64x(imm8 as i64).as_i64x2(),
1045 ))
1046 }
1047
1048 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1049 ///
1050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi64&expand=5052)
1051 #[inline]
1052 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1053 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
1054 #[rustc_args_required_const(4)]
1055 pub unsafe fn _mm_mask_shldi_epi64(
1056 src: __m128i,
1057 k: __mmask8,
1058 a: __m128i,
1059 b: __m128i,
1060 imm8: i32,
1061 ) -> __m128i {
1062 assert!(imm8 >= 0 && imm8 <= 255);
1063 let shf: i64x2 = vpshldvq128(
1064 a.as_i64x2(),
1065 b.as_i64x2(),
1066 _mm_set1_epi64x(imm8 as i64).as_i64x2(),
1067 );
1068 transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1069 }
1070
1071 /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1072 ///
1073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi64&expand=5053)
1074 #[inline]
1075 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1076 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))]
1077 #[rustc_args_required_const(3)]
1078 pub unsafe fn _mm_maskz_shldi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1079 assert!(imm8 >= 0 && imm8 <= 255);
1080 let shf: i64x2 = vpshldvq128(
1081 a.as_i64x2(),
1082 b.as_i64x2(),
1083 _mm_set1_epi64x(imm8 as i64).as_i64x2(),
1084 );
1085 let zero = _mm_setzero_si128().as_i64x2();
1086 transmute(simd_select_bitmask(k, shf, zero))
1087 }
1088
1089 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1090 ///
1091 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi32&expand=5051)
1092 #[inline]
1093 #[target_feature(enable = "avx512vbmi2")]
1094 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1095 #[rustc_args_required_const(2)]
1096 pub unsafe fn _mm512_shldi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1097 assert!(imm8 >= 0 && imm8 <= 255);
1098 transmute(vpshldvd(
1099 a.as_i32x16(),
1100 b.as_i32x16(),
1101 _mm512_set1_epi32(imm8).as_i32x16(),
1102 ))
1103 }
1104
1105 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1106 ///
1107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi32&expand=5049)
1108 #[inline]
1109 #[target_feature(enable = "avx512vbmi2")]
1110 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1111 #[rustc_args_required_const(4)]
1112 pub unsafe fn _mm512_mask_shldi_epi32(
1113 src: __m512i,
1114 k: __mmask16,
1115 a: __m512i,
1116 b: __m512i,
1117 imm8: i32,
1118 ) -> __m512i {
1119 assert!(imm8 >= 0 && imm8 <= 255);
1120 let shf: i32x16 = vpshldvd(
1121 a.as_i32x16(),
1122 b.as_i32x16(),
1123 _mm512_set1_epi32(imm8).as_i32x16(),
1124 );
1125 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1126 }
1127
1128 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1129 ///
1130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi32&expand=5050)
1131 #[inline]
1132 #[target_feature(enable = "avx512vbmi2")]
1133 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1134 #[rustc_args_required_const(3)]
1135 pub unsafe fn _mm512_maskz_shldi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1136 assert!(imm8 >= 0 && imm8 <= 255);
1137 let shf: i32x16 = vpshldvd(
1138 a.as_i32x16(),
1139 b.as_i32x16(),
1140 _mm512_set1_epi32(imm8).as_i32x16(),
1141 );
1142 let zero = _mm512_setzero_si512().as_i32x16();
1143 transmute(simd_select_bitmask(k, shf, zero))
1144 }
1145
1146 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1147 ///
1148 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi32&expand=5048)
1149 #[inline]
1150 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1151 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1152 #[rustc_args_required_const(2)]
1153 pub unsafe fn _mm256_shldi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1154 assert!(imm8 >= 0 && imm8 <= 255);
1155 transmute(vpshldvd256(
1156 a.as_i32x8(),
1157 b.as_i32x8(),
1158 _mm256_set1_epi32(imm8).as_i32x8(),
1159 ))
1160 }
1161
1162 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1163 ///
1164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi32&expand=5046)
1165 #[inline]
1166 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1167 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1168 #[rustc_args_required_const(4)]
1169 pub unsafe fn _mm256_mask_shldi_epi32(
1170 src: __m256i,
1171 k: __mmask8,
1172 a: __m256i,
1173 b: __m256i,
1174 imm8: i32,
1175 ) -> __m256i {
1176 assert!(imm8 >= 0 && imm8 <= 255);
1177 let shf: i32x8 = vpshldvd256(
1178 a.as_i32x8(),
1179 b.as_i32x8(),
1180 _mm256_set1_epi32(imm8).as_i32x8(),
1181 );
1182 transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1183 }
1184
1185 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1186 ///
1187 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi32&expand=5047)
1188 #[inline]
1189 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1190 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1191 #[rustc_args_required_const(3)]
1192 pub unsafe fn _mm256_maskz_shldi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1193 assert!(imm8 >= 0 && imm8 <= 255);
1194 let shf: i32x8 = vpshldvd256(
1195 a.as_i32x8(),
1196 b.as_i32x8(),
1197 _mm256_set1_epi32(imm8).as_i32x8(),
1198 );
1199 let zero = _mm256_setzero_si256().as_i32x8();
1200 transmute(simd_select_bitmask(k, shf, zero))
1201 }
1202
1203 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst.
1204 ///
1205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi32&expand=5045)
1206 #[inline]
1207 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1208 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1209 #[rustc_args_required_const(2)]
1210 pub unsafe fn _mm_shldi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1211 assert!(imm8 >= 0 && imm8 <= 255);
1212 transmute(vpshldvd128(
1213 a.as_i32x4(),
1214 b.as_i32x4(),
1215 _mm_set1_epi32(imm8).as_i32x4(),
1216 ))
1217 }
1218
1219 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1220 ///
1221 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi32&expand=5043)
1222 #[inline]
1223 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1224 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1225 #[rustc_args_required_const(4)]
1226 pub unsafe fn _mm_mask_shldi_epi32(
1227 src: __m128i,
1228 k: __mmask8,
1229 a: __m128i,
1230 b: __m128i,
1231 imm8: i32,
1232 ) -> __m128i {
1233 assert!(imm8 >= 0 && imm8 <= 255);
1234 let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4());
1235 transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1236 }
1237
1238 /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1239 ///
1240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi32&expand=5044)
1241 #[inline]
1242 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1243 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))]
1244 #[rustc_args_required_const(3)]
1245 pub unsafe fn _mm_maskz_shldi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1246 assert!(imm8 >= 0 && imm8 <= 255);
1247 let shf: i32x4 = vpshldvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4());
1248 let zero = _mm_setzero_si128().as_i32x4();
1249 transmute(simd_select_bitmask(k, shf, zero))
1250 }
1251
1252 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1253 ///
1254 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shldi_epi16&expand=5042)
1255 #[inline]
1256 #[target_feature(enable = "avx512vbmi2")]
1257 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1258 #[rustc_args_required_const(2)]
1259 pub unsafe fn _mm512_shldi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1260 assert!(imm8 >= 0 && imm8 <= 255);
1261 transmute(vpshldvw(
1262 a.as_i16x32(),
1263 b.as_i16x32(),
1264 _mm512_set1_epi16(imm8 as i16).as_i16x32(),
1265 ))
1266 }
1267
1268 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1269 ///
1270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shldi_epi16&expand=5040)
1271 #[inline]
1272 #[target_feature(enable = "avx512vbmi2")]
1273 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1274 #[rustc_args_required_const(4)]
1275 pub unsafe fn _mm512_mask_shldi_epi16(
1276 src: __m512i,
1277 k: __mmask32,
1278 a: __m512i,
1279 b: __m512i,
1280 imm8: i32,
1281 ) -> __m512i {
1282 assert!(imm8 >= 0 && imm8 <= 255);
1283 let shf: i16x32 = vpshldvw(
1284 a.as_i16x32(),
1285 b.as_i16x32(),
1286 _mm512_set1_epi16(imm8 as i16).as_i16x32(),
1287 );
1288 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1289 }
1290
1291 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1292 ///
1293 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shldi_epi16&expand=5041)
1294 #[inline]
1295 #[target_feature(enable = "avx512vbmi2")]
1296 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1297 #[rustc_args_required_const(3)]
1298 pub unsafe fn _mm512_maskz_shldi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1299 assert!(imm8 >= 0 && imm8 <= 255);
1300 let shf: i16x32 = vpshldvw(
1301 a.as_i16x32(),
1302 b.as_i16x32(),
1303 _mm512_set1_epi16(imm8 as i16).as_i16x32(),
1304 );
1305 let zero = _mm512_setzero_si512().as_i16x32();
1306 transmute(simd_select_bitmask(k, shf, zero))
1307 }
1308
1309 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1310 ///
1311 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shldi_epi16&expand=5039)
1312 #[inline]
1313 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1314 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1315 #[rustc_args_required_const(2)]
1316 pub unsafe fn _mm256_shldi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1317 assert!(imm8 >= 0 && imm8 <= 255);
1318 transmute(vpshldvw256(
1319 a.as_i16x16(),
1320 b.as_i16x16(),
1321 _mm256_set1_epi16(imm8 as i16).as_i16x16(),
1322 ))
1323 }
1324
1325 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1326 ///
1327 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shldi_epi16&expand=5037)
1328 #[inline]
1329 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1330 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1331 #[rustc_args_required_const(4)]
1332 pub unsafe fn _mm256_mask_shldi_epi16(
1333 src: __m256i,
1334 k: __mmask16,
1335 a: __m256i,
1336 b: __m256i,
1337 imm8: i32,
1338 ) -> __m256i {
1339 assert!(imm8 >= 0 && imm8 <= 255);
1340 let shf: i16x16 = vpshldvw256(
1341 a.as_i16x16(),
1342 b.as_i16x16(),
1343 _mm256_set1_epi16(imm8 as i16).as_i16x16(),
1344 );
1345 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1346 }
1347
1348 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1349 ///
1350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shldi_epi16&expand=5038)
1351 #[inline]
1352 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1353 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1354 #[rustc_args_required_const(3)]
1355 pub unsafe fn _mm256_maskz_shldi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1356 let shf: i16x16 = vpshldvw256(
1357 a.as_i16x16(),
1358 b.as_i16x16(),
1359 _mm256_set1_epi16(imm8 as i16).as_i16x16(),
1360 );
1361 let zero = _mm256_setzero_si256().as_i16x16();
1362 transmute(simd_select_bitmask(k, shf, zero))
1363 }
1364
1365 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst).
1366 ///
1367 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shldi_epi16&expand=5036)
1368 #[inline]
1369 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1370 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1371 #[rustc_args_required_const(2)]
1372 pub unsafe fn _mm_shldi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1373 transmute(vpshldvw128(
1374 a.as_i16x8(),
1375 b.as_i16x8(),
1376 _mm_set1_epi16(imm8 as i16).as_i16x8(),
1377 ))
1378 }
1379
1380 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1381 ///
1382 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shldi_epi16&expand=5034)
1383 #[inline]
1384 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1385 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1386 #[rustc_args_required_const(4)]
1387 pub unsafe fn _mm_mask_shldi_epi16(
1388 src: __m128i,
1389 k: __mmask8,
1390 a: __m128i,
1391 b: __m128i,
1392 imm8: i32,
1393 ) -> __m128i {
1394 let shf: i16x8 = vpshldvw128(
1395 a.as_i16x8(),
1396 b.as_i16x8(),
1397 _mm_set1_epi16(imm8 as i16).as_i16x8(),
1398 );
1399 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1400 }
1401
1402 /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1403 ///
1404 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shldi_epi16&expand=5035)
1405 #[inline]
1406 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1407 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))]
1408 #[rustc_args_required_const(3)]
1409 pub unsafe fn _mm_maskz_shldi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1410 let shf: i16x8 = vpshldvw128(
1411 a.as_i16x8(),
1412 b.as_i16x8(),
1413 _mm_set1_epi16(imm8 as i16).as_i16x8(),
1414 );
1415 let zero = _mm_setzero_si128().as_i16x8();
1416 transmute(simd_select_bitmask(k, shf, zero))
1417 }
1418
1419 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1420 ///
1421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi64&expand=5114)
1422 #[inline]
1423 #[target_feature(enable = "avx512vbmi2")]
1424 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1425 #[rustc_args_required_const(2)]
1426 pub unsafe fn _mm512_shrdi_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1427 assert!(imm8 >= 0 && imm8 <= 255);
1428 transmute(vpshrdvq(
1429 a.as_i64x8(),
1430 b.as_i64x8(),
1431 _mm512_set1_epi64(imm8 as i64).as_i64x8(),
1432 ))
1433 }
1434
1435 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1436 ///
1437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi64&expand=5112)
1438 #[inline]
1439 #[target_feature(enable = "avx512vbmi2")]
1440 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1441 #[rustc_args_required_const(4)]
1442 pub unsafe fn _mm512_mask_shrdi_epi64(
1443 src: __m512i,
1444 k: __mmask8,
1445 a: __m512i,
1446 b: __m512i,
1447 imm8: i32,
1448 ) -> __m512i {
1449 assert!(imm8 >= 0 && imm8 <= 255);
1450 let shf: i64x8 = vpshrdvq(
1451 a.as_i64x8(),
1452 b.as_i64x8(),
1453 _mm512_set1_epi64(imm8 as i64).as_i64x8(),
1454 );
1455 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
1456 }
1457
1458 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1459 ///
1460 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi64&expand=5113)
1461 #[inline]
1462 #[target_feature(enable = "avx512vbmi2")]
1463 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 255))] //should be vpshrdq
1464 #[rustc_args_required_const(3)]
1465 pub unsafe fn _mm512_maskz_shrdi_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1466 assert!(imm8 >= 0 && imm8 <= 255);
1467 let shf: i64x8 = vpshrdvq(
1468 a.as_i64x8(),
1469 b.as_i64x8(),
1470 _mm512_set1_epi64(imm8 as i64).as_i64x8(),
1471 );
1472 let zero = _mm512_setzero_si512().as_i64x8();
1473 transmute(simd_select_bitmask(k, shf, zero))
1474 }
1475
1476 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1477 ///
1478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi64&expand=5111)
1479 #[inline]
1480 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1481 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1482 #[rustc_args_required_const(2)]
1483 pub unsafe fn _mm256_shrdi_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1484 assert!(imm8 >= 0 && imm8 <= 255);
1485 transmute(vpshrdvq256(
1486 a.as_i64x4(),
1487 b.as_i64x4(),
1488 _mm256_set1_epi64x(imm8 as i64).as_i64x4(),
1489 ))
1490 }
1491
1492 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1493 ///
1494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi64&expand=5109)
1495 #[inline]
1496 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1497 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1498 #[rustc_args_required_const(4)]
1499 pub unsafe fn _mm256_mask_shrdi_epi64(
1500 src: __m256i,
1501 k: __mmask8,
1502 a: __m256i,
1503 b: __m256i,
1504 imm8: i32,
1505 ) -> __m256i {
1506 assert!(imm8 >= 0 && imm8 <= 255);
1507 let shf: i64x4 = vpshrdvq256(
1508 a.as_i64x4(),
1509 b.as_i64x4(),
1510 _mm256_set1_epi64x(imm8 as i64).as_i64x4(),
1511 );
1512 transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
1513 }
1514
1515 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1516 ///
1517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi64&expand=5110)
1518 #[inline]
1519 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1520 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1521 #[rustc_args_required_const(3)]
1522 pub unsafe fn _mm256_maskz_shrdi_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1523 assert!(imm8 >= 0 && imm8 <= 255);
1524 let shf: i64x4 = vpshrdvq256(
1525 a.as_i64x4(),
1526 b.as_i64x4(),
1527 _mm256_set1_epi64x(imm8 as i64).as_i64x4(),
1528 );
1529 let zero = _mm256_setzero_si256().as_i64x4();
1530 transmute(simd_select_bitmask(k, shf, zero))
1531 }
1532
1533 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst.
1534 ///
1535 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi64&expand=5108)
1536 #[inline]
1537 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1538 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1539 #[rustc_args_required_const(2)]
1540 pub unsafe fn _mm_shrdi_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1541 assert!(imm8 >= 0 && imm8 <= 255);
1542 transmute(vpshrdvq128(
1543 a.as_i64x2(),
1544 b.as_i64x2(),
1545 _mm_set1_epi64x(imm8 as i64).as_i64x2(),
1546 ))
1547 }
1548
1549 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using writemask k (elements are copied from src" when the corresponding mask bit is not set).
1550 ///
1551 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi64&expand=5106)
1552 #[inline]
1553 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1554 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1555 #[rustc_args_required_const(4)]
1556 pub unsafe fn _mm_mask_shrdi_epi64(
1557 src: __m128i,
1558 k: __mmask8,
1559 a: __m128i,
1560 b: __m128i,
1561 imm8: i32,
1562 ) -> __m128i {
1563 assert!(imm8 >= 0 && imm8 <= 255);
1564 let shf: i64x2 = vpshrdvq128(
1565 a.as_i64x2(),
1566 b.as_i64x2(),
1567 _mm_set1_epi64x(imm8 as i64).as_i64x2(),
1568 );
1569 transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
1570 }
1571
1572 /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1573 ///
1574 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi64&expand=5107)
1575 #[inline]
1576 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1577 #[cfg_attr(test, assert_instr(vpshldq, imm8 = 5))] //should be vpshrdq
1578 #[rustc_args_required_const(3)]
1579 pub unsafe fn _mm_maskz_shrdi_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1580 assert!(imm8 >= 0 && imm8 <= 255);
1581 let shf: i64x2 = vpshrdvq128(
1582 a.as_i64x2(),
1583 b.as_i64x2(),
1584 _mm_set1_epi64x(imm8 as i64).as_i64x2(),
1585 );
1586 let zero = _mm_setzero_si128().as_i64x2();
1587 transmute(simd_select_bitmask(k, shf, zero))
1588 }
1589
1590 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1591 ///
1592 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi32&expand=5105)
1593 #[inline]
1594 #[target_feature(enable = "avx512vbmi2")]
1595 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1596 #[rustc_args_required_const(2)]
1597 pub unsafe fn _mm512_shrdi_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1598 assert!(imm8 >= 0 && imm8 <= 255);
1599 transmute(vpshrdvd(
1600 a.as_i32x16(),
1601 b.as_i32x16(),
1602 _mm512_set1_epi32(imm8).as_i32x16(),
1603 ))
1604 }
1605
1606 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1607 ///
1608 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi32&expand=5103)
1609 #[inline]
1610 #[target_feature(enable = "avx512vbmi2")]
1611 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1612 #[rustc_args_required_const(4)]
1613 pub unsafe fn _mm512_mask_shrdi_epi32(
1614 src: __m512i,
1615 k: __mmask16,
1616 a: __m512i,
1617 b: __m512i,
1618 imm8: i32,
1619 ) -> __m512i {
1620 assert!(imm8 >= 0 && imm8 <= 255);
1621 let shf: i32x16 = vpshrdvd(
1622 a.as_i32x16(),
1623 b.as_i32x16(),
1624 _mm512_set1_epi32(imm8).as_i32x16(),
1625 );
1626 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
1627 }
1628
1629 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1630 ///
1631 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi32&expand=5104)
1632 #[inline]
1633 #[target_feature(enable = "avx512vbmi2")]
1634 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1635 #[rustc_args_required_const(3)]
1636 pub unsafe fn _mm512_maskz_shrdi_epi32(k: __mmask16, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1637 assert!(imm8 >= 0 && imm8 <= 255);
1638 let shf: i32x16 = vpshrdvd(
1639 a.as_i32x16(),
1640 b.as_i32x16(),
1641 _mm512_set1_epi32(imm8).as_i32x16(),
1642 );
1643 let zero = _mm512_setzero_si512().as_i32x16();
1644 transmute(simd_select_bitmask(k, shf, zero))
1645 }
1646
1647 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1648 ///
1649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi32&expand=5102)
1650 #[inline]
1651 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1652 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1653 #[rustc_args_required_const(2)]
1654 pub unsafe fn _mm256_shrdi_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1655 assert!(imm8 >= 0 && imm8 <= 255);
1656 transmute(vpshrdvd256(
1657 a.as_i32x8(),
1658 b.as_i32x8(),
1659 _mm256_set1_epi32(imm8).as_i32x8(),
1660 ))
1661 }
1662
1663 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1664 ///
1665 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi32&expand=5100)
1666 #[inline]
1667 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1668 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1669 #[rustc_args_required_const(4)]
1670 pub unsafe fn _mm256_mask_shrdi_epi32(
1671 src: __m256i,
1672 k: __mmask8,
1673 a: __m256i,
1674 b: __m256i,
1675 imm8: i32,
1676 ) -> __m256i {
1677 assert!(imm8 >= 0 && imm8 <= 255);
1678 let shf: i32x8 = vpshrdvd256(
1679 a.as_i32x8(),
1680 b.as_i32x8(),
1681 _mm256_set1_epi32(imm8).as_i32x8(),
1682 );
1683 transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
1684 }
1685
1686 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1687 ///
1688 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi32&expand=5101)
1689 #[inline]
1690 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1691 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1692 #[rustc_args_required_const(3)]
1693 pub unsafe fn _mm256_maskz_shrdi_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1694 assert!(imm8 >= 0 && imm8 <= 255);
1695 let shf: i32x8 = vpshrdvd256(
1696 a.as_i32x8(),
1697 b.as_i32x8(),
1698 _mm256_set1_epi32(imm8).as_i32x8(),
1699 );
1700 let zero = _mm256_setzero_si256().as_i32x8();
1701 transmute(simd_select_bitmask(k, shf, zero))
1702 }
1703
1704 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst.
1705 ///
1706 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi32&expand=5099)
1707 #[inline]
1708 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1709 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1710 #[rustc_args_required_const(2)]
1711 pub unsafe fn _mm_shrdi_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1712 assert!(imm8 >= 0 && imm8 <= 255);
1713 transmute(vpshrdvd128(
1714 a.as_i32x4(),
1715 b.as_i32x4(),
1716 _mm_set1_epi32(imm8).as_i32x4(),
1717 ))
1718 }
1719
1720 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1721 ///
1722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi32&expand=5097)
1723 #[inline]
1724 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1725 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1726 #[rustc_args_required_const(4)]
1727 pub unsafe fn _mm_mask_shrdi_epi32(
1728 src: __m128i,
1729 k: __mmask8,
1730 a: __m128i,
1731 b: __m128i,
1732 imm8: i32,
1733 ) -> __m128i {
1734 assert!(imm8 >= 0 && imm8 <= 255);
1735 let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4());
1736 transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
1737 }
1738
1739 /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1740 ///
1741 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi32&expand=5098)
1742 #[inline]
1743 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1744 #[cfg_attr(test, assert_instr(vpshldd, imm8 = 5))] //should be vpshldd
1745 #[rustc_args_required_const(3)]
1746 pub unsafe fn _mm_maskz_shrdi_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1747 assert!(imm8 >= 0 && imm8 <= 255);
1748 let shf: i32x4 = vpshrdvd128(a.as_i32x4(), b.as_i32x4(), _mm_set1_epi32(imm8).as_i32x4());
1749 let zero = _mm_setzero_si128().as_i32x4();
1750 transmute(simd_select_bitmask(k, shf, zero))
1751 }
1752
1753 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1754 ///
1755 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shrdi_epi16&expand=5096)
1756 #[inline]
1757 #[target_feature(enable = "avx512vbmi2")]
1758 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1759 #[rustc_args_required_const(2)]
1760 pub unsafe fn _mm512_shrdi_epi16(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1761 assert!(imm8 >= 0 && imm8 <= 255);
1762 transmute(vpshrdvw(
1763 a.as_i16x32(),
1764 b.as_i16x32(),
1765 _mm512_set1_epi16(imm8 as i16).as_i16x32(),
1766 ))
1767 }
1768
1769 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1770 ///
1771 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shrdi_epi16&expand=5094)
1772 #[inline]
1773 #[target_feature(enable = "avx512vbmi2")]
1774 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1775 #[rustc_args_required_const(4)]
1776 pub unsafe fn _mm512_mask_shrdi_epi16(
1777 src: __m512i,
1778 k: __mmask32,
1779 a: __m512i,
1780 b: __m512i,
1781 imm8: i32,
1782 ) -> __m512i {
1783 assert!(imm8 >= 0 && imm8 <= 255);
1784 let shf: i16x32 = vpshrdvw(
1785 a.as_i16x32(),
1786 b.as_i16x32(),
1787 _mm512_set1_epi16(imm8 as i16).as_i16x32(),
1788 );
1789 transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
1790 }
1791
1792 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1793 ///
1794 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shrdi_epi16&expand=5095)
1795 #[inline]
1796 #[target_feature(enable = "avx512vbmi2")]
1797 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1798 #[rustc_args_required_const(3)]
1799 pub unsafe fn _mm512_maskz_shrdi_epi16(k: __mmask32, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
1800 assert!(imm8 >= 0 && imm8 <= 255);
1801 let shf: i16x32 = vpshrdvw(
1802 a.as_i16x32(),
1803 b.as_i16x32(),
1804 _mm512_set1_epi16(imm8 as i16).as_i16x32(),
1805 );
1806 let zero = _mm512_setzero_si512().as_i16x32();
1807 transmute(simd_select_bitmask(k, shf, zero))
1808 }
1809
1810 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1811 ///
1812 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shrdi_epi16&expand=5093)
1813 #[inline]
1814 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1815 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1816 #[rustc_args_required_const(2)]
1817 pub unsafe fn _mm256_shrdi_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1818 assert!(imm8 >= 0 && imm8 <= 255);
1819 transmute(vpshrdvw256(
1820 a.as_i16x16(),
1821 b.as_i16x16(),
1822 _mm256_set1_epi16(imm8 as i16).as_i16x16(),
1823 ))
1824 }
1825
1826 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1827 ///
1828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shrdi_epi16&expand=5091)
1829 #[inline]
1830 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1831 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1832 #[rustc_args_required_const(4)]
1833 pub unsafe fn _mm256_mask_shrdi_epi16(
1834 src: __m256i,
1835 k: __mmask16,
1836 a: __m256i,
1837 b: __m256i,
1838 imm8: i32,
1839 ) -> __m256i {
1840 assert!(imm8 >= 0 && imm8 <= 255);
1841 let shf: i16x16 = vpshrdvw256(
1842 a.as_i16x16(),
1843 b.as_i16x16(),
1844 _mm256_set1_epi16(imm8 as i16).as_i16x16(),
1845 );
1846 transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
1847 }
1848
1849 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1850 ///
1851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shrdi_epi16&expand=5092)
1852 #[inline]
1853 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1854 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1855 #[rustc_args_required_const(3)]
1856 pub unsafe fn _mm256_maskz_shrdi_epi16(k: __mmask16, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
1857 let shf: i16x16 = vpshrdvw256(
1858 a.as_i16x16(),
1859 b.as_i16x16(),
1860 _mm256_set1_epi16(imm8 as i16).as_i16x16(),
1861 );
1862 let zero = _mm256_setzero_si256().as_i16x16();
1863 transmute(simd_select_bitmask(k, shf, zero))
1864 }
1865
1866 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst.
1867 ///
1868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shrdi_epi16&expand=5090)
1869 #[inline]
1870 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1871 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1872 #[rustc_args_required_const(2)]
1873 pub unsafe fn _mm_shrdi_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1874 transmute(vpshrdvw128(
1875 a.as_i16x8(),
1876 b.as_i16x8(),
1877 _mm_set1_epi16(imm8 as i16).as_i16x8(),
1878 ))
1879 }
1880
1881 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1882 ///
1883 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shrdi_epi16&expand=5088)
1884 #[inline]
1885 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1886 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1887 #[rustc_args_required_const(4)]
1888 pub unsafe fn _mm_mask_shrdi_epi16(
1889 src: __m128i,
1890 k: __mmask8,
1891 a: __m128i,
1892 b: __m128i,
1893 imm8: i32,
1894 ) -> __m128i {
1895 let shf: i16x8 = vpshrdvw128(
1896 a.as_i16x8(),
1897 b.as_i16x8(),
1898 _mm_set1_epi16(imm8 as i16).as_i16x8(),
1899 );
1900 transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
1901 }
1902
1903 /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1904 ///
1905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shrdi_epi16&expand=5089)
1906 #[inline]
1907 #[target_feature(enable = "avx512vbmi2,avx512vl")]
1908 #[cfg_attr(test, assert_instr(vpshldw, imm8 = 5))] //should be vpshrdw
1909 #[rustc_args_required_const(3)]
1910 pub unsafe fn _mm_maskz_shrdi_epi16(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
1911 let shf: i16x8 = vpshrdvw128(
1912 a.as_i16x8(),
1913 b.as_i16x8(),
1914 _mm_set1_epi16(imm8 as i16).as_i16x8(),
1915 );
1916 let zero = _mm_setzero_si128().as_i16x8();
1917 transmute(simd_select_bitmask(k, shf, zero))
1918 }
1919
1920 #[allow(improper_ctypes)]
1921 extern "C" {
1922 #[link_name = "llvm.x86.avx512.mask.compress.w.512"]
1923 fn vpcompressw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
1924 #[link_name = "llvm.x86.avx512.mask.compress.w.256"]
1925 fn vpcompressw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
1926 #[link_name = "llvm.x86.avx512.mask.compress.w.128"]
1927 fn vpcompressw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
1928
1929 #[link_name = "llvm.x86.avx512.mask.compress.b.512"]
1930 fn vpcompressb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
1931 #[link_name = "llvm.x86.avx512.mask.compress.b.256"]
1932 fn vpcompressb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
1933 #[link_name = "llvm.x86.avx512.mask.compress.b.128"]
1934 fn vpcompressb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
1935
1936 #[link_name = "llvm.x86.avx512.mask.expand.w.512"]
1937 fn vpexpandw(a: i16x32, src: i16x32, mask: u32) -> i16x32;
1938 #[link_name = "llvm.x86.avx512.mask.expand.w.256"]
1939 fn vpexpandw256(a: i16x16, src: i16x16, mask: u16) -> i16x16;
1940 #[link_name = "llvm.x86.avx512.mask.expand.w.128"]
1941 fn vpexpandw128(a: i16x8, src: i16x8, mask: u8) -> i16x8;
1942
1943 #[link_name = "llvm.x86.avx512.mask.expand.b.512"]
1944 fn vpexpandb(a: i8x64, src: i8x64, mask: u64) -> i8x64;
1945 #[link_name = "llvm.x86.avx512.mask.expand.b.256"]
1946 fn vpexpandb256(a: i8x32, src: i8x32, mask: u32) -> i8x32;
1947 #[link_name = "llvm.x86.avx512.mask.expand.b.128"]
1948 fn vpexpandb128(a: i8x16, src: i8x16, mask: u16) -> i8x16;
1949
1950 #[link_name = "llvm.fshl.v8i64"]
1951 fn vpshldvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
1952 #[link_name = "llvm.fshl.v4i64"]
1953 fn vpshldvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
1954 #[link_name = "llvm.fshl.v2i64"]
1955 fn vpshldvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
1956 #[link_name = "llvm.fshl.v16i32"]
1957 fn vpshldvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
1958 #[link_name = "llvm.fshl.v8i32"]
1959 fn vpshldvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
1960 #[link_name = "llvm.fshl.v4i32"]
1961 fn vpshldvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
1962 #[link_name = "llvm.fshl.v32i16"]
1963 fn vpshldvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
1964 #[link_name = "llvm.fshl.v16i16"]
1965 fn vpshldvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
1966 #[link_name = "llvm.fshl.v8i16"]
1967 fn vpshldvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
1968
1969 #[link_name = "llvm.fshr.v8i64"]
1970 fn vpshrdvq(a: i64x8, b: i64x8, c: i64x8) -> i64x8;
1971 #[link_name = "llvm.fshr.v4i64"]
1972 fn vpshrdvq256(a: i64x4, b: i64x4, c: i64x4) -> i64x4;
1973 #[link_name = "llvm.fshr.v2i64"]
1974 fn vpshrdvq128(a: i64x2, b: i64x2, c: i64x2) -> i64x2;
1975 #[link_name = "llvm.fshr.v16i32"]
1976 fn vpshrdvd(a: i32x16, b: i32x16, c: i32x16) -> i32x16;
1977 #[link_name = "llvm.fshr.v8i32"]
1978 fn vpshrdvd256(a: i32x8, b: i32x8, c: i32x8) -> i32x8;
1979 #[link_name = "llvm.fshr.v4i32"]
1980 fn vpshrdvd128(a: i32x4, b: i32x4, c: i32x4) -> i32x4;
1981 #[link_name = "llvm.fshr.v32i16"]
1982 fn vpshrdvw(a: i16x32, b: i16x32, c: i16x32) -> i16x32;
1983 #[link_name = "llvm.fshr.v16i16"]
1984 fn vpshrdvw256(a: i16x16, b: i16x16, c: i16x16) -> i16x16;
1985 #[link_name = "llvm.fshr.v8i16"]
1986 fn vpshrdvw128(a: i16x8, b: i16x8, c: i16x8) -> i16x8;
1987 }
1988
1989 #[cfg(test)]
1990 mod tests {
1991
1992 use stdarch_test::simd_test;
1993
1994 use crate::core_arch::x86::*;
1995
1996 #[simd_test(enable = "avx512vbmi2")]
1997 unsafe fn test_mm512_mask_compress_epi16() {
1998 let src = _mm512_set1_epi16(200);
1999 #[rustfmt::skip]
2000 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2001 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2002 let r = _mm512_mask_compress_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2003 #[rustfmt::skip]
2004 let e = _mm512_set_epi16(
2005 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200,
2006 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2007 );
2008 assert_eq_m512i(r, e);
2009 }
2010
2011 #[simd_test(enable = "avx512vbmi2")]
2012 unsafe fn test_mm512_maskz_compress_epi16() {
2013 #[rustfmt::skip]
2014 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2015 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2016 let r = _mm512_maskz_compress_epi16(0b01010101_01010101_01010101_01010101, a);
2017 #[rustfmt::skip]
2018 let e = _mm512_set_epi16(
2019 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2020 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2021 );
2022 assert_eq_m512i(r, e);
2023 }
2024
2025 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2026 unsafe fn test_mm256_mask_compress_epi16() {
2027 let src = _mm256_set1_epi16(200);
2028 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2029 let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
2030 let e = _mm256_set_epi16(
2031 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
2032 );
2033 assert_eq_m256i(r, e);
2034 }
2035
2036 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2037 unsafe fn test_mm256_maskz_compress_epi16() {
2038 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2039 let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
2040 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2041 assert_eq_m256i(r, e);
2042 }
2043
2044 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2045 unsafe fn test_mm_mask_compress_epi16() {
2046 let src = _mm_set1_epi16(200);
2047 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2048 let r = _mm_mask_compress_epi16(src, 0b01010101, a);
2049 let e = _mm_set_epi16(200, 200, 200, 200, 1, 3, 5, 7);
2050 assert_eq_m128i(r, e);
2051 }
2052
2053 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2054 unsafe fn test_mm_maskz_compress_epi16() {
2055 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2056 let r = _mm_maskz_compress_epi16(0b01010101, a);
2057 let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
2058 assert_eq_m128i(r, e);
2059 }
2060
2061 #[simd_test(enable = "avx512vbmi2")]
2062 unsafe fn test_mm512_mask_compress_epi8() {
2063 let src = _mm512_set1_epi8(100);
2064 #[rustfmt::skip]
2065 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2066 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2067 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2068 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2069 let r = _mm512_mask_compress_epi8(
2070 src,
2071 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2072 a,
2073 );
2074 #[rustfmt::skip]
2075 let e = _mm512_set_epi8(
2076 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2077 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2078 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2079 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2080 );
2081 assert_eq_m512i(r, e);
2082 }
2083
2084 #[simd_test(enable = "avx512vbmi2")]
2085 unsafe fn test_mm512_maskz_compress_epi8() {
2086 #[rustfmt::skip]
2087 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2088 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2089 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2090 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2091 let r = _mm512_maskz_compress_epi8(
2092 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2093 a,
2094 );
2095 #[rustfmt::skip]
2096 let e = _mm512_set_epi8(
2097 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2098 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2099 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2100 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63,
2101 );
2102 assert_eq_m512i(r, e);
2103 }
2104
2105 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2106 unsafe fn test_mm256_mask_compress_epi8() {
2107 let src = _mm256_set1_epi8(100);
2108 #[rustfmt::skip]
2109 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2110 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2111 let r = _mm256_mask_compress_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2112 #[rustfmt::skip]
2113 let e = _mm256_set_epi8(
2114 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
2115 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2116 );
2117 assert_eq_m256i(r, e);
2118 }
2119
2120 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2121 unsafe fn test_mm256_maskz_compress_epi8() {
2122 #[rustfmt::skip]
2123 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2124 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2125 let r = _mm256_maskz_compress_epi8(0b01010101_01010101_01010101_01010101, a);
2126 #[rustfmt::skip]
2127 let e = _mm256_set_epi8(
2128 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2129 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31,
2130 );
2131 assert_eq_m256i(r, e);
2132 }
2133
2134 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2135 unsafe fn test_mm_mask_compress_epi8() {
2136 let src = _mm_set1_epi8(100);
2137 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2138 let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
2139 let e = _mm_set_epi8(
2140 100, 100, 100, 100, 100, 100, 100, 100, 1, 3, 5, 7, 9, 11, 13, 15,
2141 );
2142 assert_eq_m128i(r, e);
2143 }
2144
2145 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2146 unsafe fn test_mm_maskz_compress_epi8() {
2147 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2148 let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
2149 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
2150 assert_eq_m128i(r, e);
2151 }
2152
2153 #[simd_test(enable = "avx512vbmi2")]
2154 unsafe fn test_mm512_mask_expand_epi16() {
2155 let src = _mm512_set1_epi16(200);
2156 #[rustfmt::skip]
2157 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2158 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2159 let r = _mm512_mask_expand_epi16(src, 0b01010101_01010101_01010101_01010101, a);
2160 #[rustfmt::skip]
2161 let e = _mm512_set_epi16(
2162 200, 16, 200, 17, 200, 18, 200, 19, 200, 20, 200, 21, 200, 22, 200, 23,
2163 200, 24, 200, 25, 200, 26, 200, 27, 200, 28, 200, 29, 200, 30, 200, 31,
2164 );
2165 assert_eq_m512i(r, e);
2166 }
2167
2168 #[simd_test(enable = "avx512vbmi2")]
2169 unsafe fn test_mm512_maskz_expand_epi16() {
2170 #[rustfmt::skip]
2171 let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2172 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2173 let r = _mm512_maskz_expand_epi16(0b01010101_01010101_01010101_01010101, a);
2174 #[rustfmt::skip]
2175 let e = _mm512_set_epi16(0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2176 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31);
2177 assert_eq_m512i(r, e);
2178 }
2179
2180 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2181 unsafe fn test_mm256_mask_expand_epi16() {
2182 let src = _mm256_set1_epi16(200);
2183 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2184 let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
2185 let e = _mm256_set_epi16(
2186 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
2187 );
2188 assert_eq_m256i(r, e);
2189 }
2190
2191 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2192 unsafe fn test_mm256_maskz_expand_epi16() {
2193 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2194 let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
2195 let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2196 assert_eq_m256i(r, e);
2197 }
2198
2199 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2200 unsafe fn test_mm_mask_expand_epi16() {
2201 let src = _mm_set1_epi16(200);
2202 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2203 let r = _mm_mask_expand_epi16(src, 0b01010101, a);
2204 let e = _mm_set_epi16(200, 4, 200, 5, 200, 6, 200, 7);
2205 assert_eq_m128i(r, e);
2206 }
2207
2208 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2209 unsafe fn test_mm_maskz_expand_epi16() {
2210 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2211 let r = _mm_maskz_expand_epi16(0b01010101, a);
2212 let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
2213 assert_eq_m128i(r, e);
2214 }
2215
2216 #[simd_test(enable = "avx512vbmi2")]
2217 unsafe fn test_mm512_mask_expand_epi8() {
2218 let src = _mm512_set1_epi8(100);
2219 #[rustfmt::skip]
2220 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2221 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2222 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2223 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2224 let r = _mm512_mask_expand_epi8(
2225 src,
2226 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2227 a,
2228 );
2229 #[rustfmt::skip]
2230 let e = _mm512_set_epi8(
2231 100, 32, 100, 33, 100, 34, 100, 35, 100, 36, 100, 37, 100, 38, 100, 39,
2232 100, 40, 100, 41, 100, 42, 100, 43, 100, 44, 100, 45, 100, 46, 100, 47,
2233 100, 48, 100, 49, 100, 50, 100, 51, 100, 52, 100, 53, 100, 54, 100, 55,
2234 100, 56, 100, 57, 100, 58, 100, 59, 100, 60, 100, 61, 100, 62, 100, 63,
2235 );
2236 assert_eq_m512i(r, e);
2237 }
2238
2239 #[simd_test(enable = "avx512vbmi2")]
2240 unsafe fn test_mm512_maskz_expand_epi8() {
2241 #[rustfmt::skip]
2242 let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2243 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2244 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2245 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63);
2246 let r = _mm512_maskz_expand_epi8(
2247 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101,
2248 a,
2249 );
2250 #[rustfmt::skip]
2251 let e = _mm512_set_epi8(
2252 0, 32, 0, 33, 0, 34, 0, 35, 0, 36, 0, 37, 0, 38, 0, 39,
2253 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
2254 0, 48, 0, 49, 0, 50, 0, 51, 0, 52, 0, 53, 0, 54, 0, 55,
2255 0, 56, 0, 57, 0, 58, 0, 59, 0, 60, 0, 61, 0, 62, 0, 63,
2256 );
2257 assert_eq_m512i(r, e);
2258 }
2259
2260 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2261 unsafe fn test_mm256_mask_expand_epi8() {
2262 let src = _mm256_set1_epi8(100);
2263 #[rustfmt::skip]
2264 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2265 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2266 let r = _mm256_mask_expand_epi8(src, 0b01010101_01010101_01010101_01010101, a);
2267 #[rustfmt::skip]
2268 let e = _mm256_set_epi8(
2269 100, 16, 100, 17, 100, 18, 100, 19, 100, 20, 100, 21, 100, 22, 100, 23,
2270 100, 24, 100, 25, 100, 26, 100, 27, 100, 28, 100, 29, 100, 30, 100, 31,
2271 );
2272 assert_eq_m256i(r, e);
2273 }
2274
2275 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2276 unsafe fn test_mm256_maskz_expand_epi8() {
2277 #[rustfmt::skip]
2278 let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
2279 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
2280 let r = _mm256_maskz_expand_epi8(0b01010101_01010101_01010101_01010101, a);
2281 #[rustfmt::skip]
2282 let e = _mm256_set_epi8(
2283 0, 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23,
2284 0, 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31,
2285 );
2286 assert_eq_m256i(r, e);
2287 }
2288
2289 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2290 unsafe fn test_mm_mask_expand_epi8() {
2291 let src = _mm_set1_epi8(100);
2292 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2293 let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
2294 let e = _mm_set_epi8(
2295 100, 8, 100, 9, 100, 10, 100, 11, 100, 12, 100, 13, 100, 14, 100, 15,
2296 );
2297 assert_eq_m128i(r, e);
2298 }
2299
2300 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2301 unsafe fn test_mm_maskz_expand_epi8() {
2302 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2303 let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
2304 let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
2305 assert_eq_m128i(r, e);
2306 }
2307
2308 #[simd_test(enable = "avx512vbmi2")]
2309 unsafe fn test_mm512_shldv_epi64() {
2310 let a = _mm512_set1_epi64(1);
2311 let b = _mm512_set1_epi64(1 << 63);
2312 let c = _mm512_set1_epi64(2);
2313 let r = _mm512_shldv_epi64(a, b, c);
2314 let e = _mm512_set1_epi64(6);
2315 assert_eq_m512i(r, e);
2316 }
2317
2318 #[simd_test(enable = "avx512vbmi2")]
2319 unsafe fn test_mm512_mask_shldv_epi64() {
2320 let a = _mm512_set1_epi64(1);
2321 let b = _mm512_set1_epi64(1 << 63);
2322 let c = _mm512_set1_epi64(2);
2323 let r = _mm512_mask_shldv_epi64(a, 0, b, c);
2324 assert_eq_m512i(r, a);
2325 let r = _mm512_mask_shldv_epi64(a, 0b11111111, b, c);
2326 let e = _mm512_set1_epi64(6);
2327 assert_eq_m512i(r, e);
2328 }
2329
2330 #[simd_test(enable = "avx512vbmi2")]
2331 unsafe fn test_mm512_maskz_shldv_epi64() {
2332 let a = _mm512_set1_epi64(1);
2333 let b = _mm512_set1_epi64(1 << 63);
2334 let c = _mm512_set1_epi64(2);
2335 let r = _mm512_maskz_shldv_epi64(0, a, b, c);
2336 assert_eq_m512i(r, _mm512_setzero_si512());
2337 let r = _mm512_maskz_shldv_epi64(0b11111111, a, b, c);
2338 let e = _mm512_set1_epi64(6);
2339 assert_eq_m512i(r, e);
2340 }
2341
2342 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2343 unsafe fn test_mm256_shldv_epi64() {
2344 let a = _mm256_set1_epi64x(1);
2345 let b = _mm256_set1_epi64x(1 << 63);
2346 let c = _mm256_set1_epi64x(2);
2347 let r = _mm256_shldv_epi64(a, b, c);
2348 let e = _mm256_set1_epi64x(6);
2349 assert_eq_m256i(r, e);
2350 }
2351
2352 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2353 unsafe fn test_mm256_mask_shldv_epi64() {
2354 let a = _mm256_set1_epi64x(1);
2355 let b = _mm256_set1_epi64x(1 << 63);
2356 let c = _mm256_set1_epi64x(2);
2357 let r = _mm256_mask_shldv_epi64(a, 0, b, c);
2358 assert_eq_m256i(r, a);
2359 let r = _mm256_mask_shldv_epi64(a, 0b00001111, b, c);
2360 let e = _mm256_set1_epi64x(6);
2361 assert_eq_m256i(r, e);
2362 }
2363
2364 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2365 unsafe fn test_mm256_maskz_shldv_epi64() {
2366 let a = _mm256_set1_epi64x(1);
2367 let b = _mm256_set1_epi64x(1 << 63);
2368 let c = _mm256_set1_epi64x(2);
2369 let r = _mm256_maskz_shldv_epi64(0, a, b, c);
2370 assert_eq_m256i(r, _mm256_setzero_si256());
2371 let r = _mm256_maskz_shldv_epi64(0b00001111, a, b, c);
2372 let e = _mm256_set1_epi64x(6);
2373 assert_eq_m256i(r, e);
2374 }
2375
2376 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2377 unsafe fn test_mm_shldv_epi64() {
2378 let a = _mm_set1_epi64x(1);
2379 let b = _mm_set1_epi64x(1 << 63);
2380 let c = _mm_set1_epi64x(2);
2381 let r = _mm_shldv_epi64(a, b, c);
2382 let e = _mm_set1_epi64x(6);
2383 assert_eq_m128i(r, e);
2384 }
2385
2386 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2387 unsafe fn test_mm_mask_shldv_epi64() {
2388 let a = _mm_set1_epi64x(1);
2389 let b = _mm_set1_epi64x(1 << 63);
2390 let c = _mm_set1_epi64x(2);
2391 let r = _mm_mask_shldv_epi64(a, 0, b, c);
2392 assert_eq_m128i(r, a);
2393 let r = _mm_mask_shldv_epi64(a, 0b00000011, b, c);
2394 let e = _mm_set1_epi64x(6);
2395 assert_eq_m128i(r, e);
2396 }
2397
2398 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2399 unsafe fn test_mm_maskz_shldv_epi64() {
2400 let a = _mm_set1_epi64x(1);
2401 let b = _mm_set1_epi64x(1 << 63);
2402 let c = _mm_set1_epi64x(2);
2403 let r = _mm_maskz_shldv_epi64(0, a, b, c);
2404 assert_eq_m128i(r, _mm_setzero_si128());
2405 let r = _mm_maskz_shldv_epi64(0b00000011, a, b, c);
2406 let e = _mm_set1_epi64x(6);
2407 assert_eq_m128i(r, e);
2408 }
2409
2410 #[simd_test(enable = "avx512vbmi2")]
2411 unsafe fn test_mm512_shldv_epi32() {
2412 let a = _mm512_set1_epi32(1);
2413 let b = _mm512_set1_epi32(1 << 31);
2414 let c = _mm512_set1_epi32(2);
2415 let r = _mm512_shldv_epi32(a, b, c);
2416 let e = _mm512_set1_epi32(6);
2417 assert_eq_m512i(r, e);
2418 }
2419
2420 #[simd_test(enable = "avx512vbmi2")]
2421 unsafe fn test_mm512_mask_shldv_epi32() {
2422 let a = _mm512_set1_epi32(1);
2423 let b = _mm512_set1_epi32(1 << 31);
2424 let c = _mm512_set1_epi32(2);
2425 let r = _mm512_mask_shldv_epi32(a, 0, b, c);
2426 assert_eq_m512i(r, a);
2427 let r = _mm512_mask_shldv_epi32(a, 0b11111111_11111111, b, c);
2428 let e = _mm512_set1_epi32(6);
2429 assert_eq_m512i(r, e);
2430 }
2431
2432 #[simd_test(enable = "avx512vbmi2")]
2433 unsafe fn test_mm512_maskz_shldv_epi32() {
2434 let a = _mm512_set1_epi32(1);
2435 let b = _mm512_set1_epi32(1 << 31);
2436 let c = _mm512_set1_epi32(2);
2437 let r = _mm512_maskz_shldv_epi32(0, a, b, c);
2438 assert_eq_m512i(r, _mm512_setzero_si512());
2439 let r = _mm512_maskz_shldv_epi32(0b11111111_11111111, a, b, c);
2440 let e = _mm512_set1_epi32(6);
2441 assert_eq_m512i(r, e);
2442 }
2443
2444 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2445 unsafe fn test_mm256_shldv_epi32() {
2446 let a = _mm256_set1_epi32(1);
2447 let b = _mm256_set1_epi32(1 << 31);
2448 let c = _mm256_set1_epi32(2);
2449 let r = _mm256_shldv_epi32(a, b, c);
2450 let e = _mm256_set1_epi32(6);
2451 assert_eq_m256i(r, e);
2452 }
2453
2454 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2455 unsafe fn test_mm256_mask_shldv_epi32() {
2456 let a = _mm256_set1_epi32(1);
2457 let b = _mm256_set1_epi32(1 << 31);
2458 let c = _mm256_set1_epi32(2);
2459 let r = _mm256_mask_shldv_epi32(a, 0, b, c);
2460 assert_eq_m256i(r, a);
2461 let r = _mm256_mask_shldv_epi32(a, 0b11111111, b, c);
2462 let e = _mm256_set1_epi32(6);
2463 assert_eq_m256i(r, e);
2464 }
2465
2466 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2467 unsafe fn test_mm256_maskz_shldv_epi32() {
2468 let a = _mm256_set1_epi32(1);
2469 let b = _mm256_set1_epi32(1 << 31);
2470 let c = _mm256_set1_epi32(2);
2471 let r = _mm256_maskz_shldv_epi32(0, a, b, c);
2472 assert_eq_m256i(r, _mm256_setzero_si256());
2473 let r = _mm256_maskz_shldv_epi32(0b11111111, a, b, c);
2474 let e = _mm256_set1_epi32(6);
2475 assert_eq_m256i(r, e);
2476 }
2477
2478 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2479 unsafe fn test_mm_shldv_epi32() {
2480 let a = _mm_set1_epi32(1);
2481 let b = _mm_set1_epi32(1 << 31);
2482 let c = _mm_set1_epi32(2);
2483 let r = _mm_shldv_epi32(a, b, c);
2484 let e = _mm_set1_epi32(6);
2485 assert_eq_m128i(r, e);
2486 }
2487
2488 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2489 unsafe fn test_mm_mask_shldv_epi32() {
2490 let a = _mm_set1_epi32(1);
2491 let b = _mm_set1_epi32(1 << 31);
2492 let c = _mm_set1_epi32(2);
2493 let r = _mm_mask_shldv_epi32(a, 0, b, c);
2494 assert_eq_m128i(r, a);
2495 let r = _mm_mask_shldv_epi32(a, 0b00001111, b, c);
2496 let e = _mm_set1_epi32(6);
2497 assert_eq_m128i(r, e);
2498 }
2499
2500 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2501 unsafe fn test_mm_maskz_shldv_epi32() {
2502 let a = _mm_set1_epi32(1);
2503 let b = _mm_set1_epi32(1 << 31);
2504 let c = _mm_set1_epi32(2);
2505 let r = _mm_maskz_shldv_epi32(0, a, b, c);
2506 assert_eq_m128i(r, _mm_setzero_si128());
2507 let r = _mm_maskz_shldv_epi32(0b00001111, a, b, c);
2508 let e = _mm_set1_epi32(6);
2509 assert_eq_m128i(r, e);
2510 }
2511
2512 #[simd_test(enable = "avx512vbmi2")]
2513 unsafe fn test_mm512_shldv_epi16() {
2514 let a = _mm512_set1_epi16(1);
2515 let b = _mm512_set1_epi16(1 << 15);
2516 let c = _mm512_set1_epi16(2);
2517 let r = _mm512_shldv_epi16(a, b, c);
2518 let e = _mm512_set1_epi16(6);
2519 assert_eq_m512i(r, e);
2520 }
2521
2522 #[simd_test(enable = "avx512vbmi2")]
2523 unsafe fn test_mm512_mask_shldv_epi16() {
2524 let a = _mm512_set1_epi16(1);
2525 let b = _mm512_set1_epi16(1 << 15);
2526 let c = _mm512_set1_epi16(2);
2527 let r = _mm512_mask_shldv_epi16(a, 0, b, c);
2528 assert_eq_m512i(r, a);
2529 let r = _mm512_mask_shldv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2530 let e = _mm512_set1_epi16(6);
2531 assert_eq_m512i(r, e);
2532 }
2533
2534 #[simd_test(enable = "avx512vbmi2")]
2535 unsafe fn test_mm512_maskz_shldv_epi16() {
2536 let a = _mm512_set1_epi16(1);
2537 let b = _mm512_set1_epi16(1 << 15);
2538 let c = _mm512_set1_epi16(2);
2539 let r = _mm512_maskz_shldv_epi16(0, a, b, c);
2540 assert_eq_m512i(r, _mm512_setzero_si512());
2541 let r = _mm512_maskz_shldv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2542 let e = _mm512_set1_epi16(6);
2543 assert_eq_m512i(r, e);
2544 }
2545
2546 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2547 unsafe fn test_mm256_shldv_epi16() {
2548 let a = _mm256_set1_epi16(1);
2549 let b = _mm256_set1_epi16(1 << 15);
2550 let c = _mm256_set1_epi16(2);
2551 let r = _mm256_shldv_epi16(a, b, c);
2552 let e = _mm256_set1_epi16(6);
2553 assert_eq_m256i(r, e);
2554 }
2555
2556 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2557 unsafe fn test_mm256_mask_shldv_epi16() {
2558 let a = _mm256_set1_epi16(1);
2559 let b = _mm256_set1_epi16(1 << 15);
2560 let c = _mm256_set1_epi16(2);
2561 let r = _mm256_mask_shldv_epi16(a, 0, b, c);
2562 assert_eq_m256i(r, a);
2563 let r = _mm256_mask_shldv_epi16(a, 0b11111111_11111111, b, c);
2564 let e = _mm256_set1_epi16(6);
2565 assert_eq_m256i(r, e);
2566 }
2567
2568 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2569 unsafe fn test_mm256_maskz_shldv_epi16() {
2570 let a = _mm256_set1_epi16(1);
2571 let b = _mm256_set1_epi16(1 << 15);
2572 let c = _mm256_set1_epi16(2);
2573 let r = _mm256_maskz_shldv_epi16(0, a, b, c);
2574 assert_eq_m256i(r, _mm256_setzero_si256());
2575 let r = _mm256_maskz_shldv_epi16(0b11111111_11111111, a, b, c);
2576 let e = _mm256_set1_epi16(6);
2577 assert_eq_m256i(r, e);
2578 }
2579
2580 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2581 unsafe fn test_mm_shldv_epi16() {
2582 let a = _mm_set1_epi16(1);
2583 let b = _mm_set1_epi16(1 << 15);
2584 let c = _mm_set1_epi16(2);
2585 let r = _mm_shldv_epi16(a, b, c);
2586 let e = _mm_set1_epi16(6);
2587 assert_eq_m128i(r, e);
2588 }
2589
2590 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2591 unsafe fn test_mm_mask_shldv_epi16() {
2592 let a = _mm_set1_epi16(1);
2593 let b = _mm_set1_epi16(1 << 15);
2594 let c = _mm_set1_epi16(2);
2595 let r = _mm_mask_shldv_epi16(a, 0, b, c);
2596 assert_eq_m128i(r, a);
2597 let r = _mm_mask_shldv_epi16(a, 0b11111111, b, c);
2598 let e = _mm_set1_epi16(6);
2599 assert_eq_m128i(r, e);
2600 }
2601
2602 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2603 unsafe fn test_mm_maskz_shldv_epi16() {
2604 let a = _mm_set1_epi16(1);
2605 let b = _mm_set1_epi16(1 << 15);
2606 let c = _mm_set1_epi16(2);
2607 let r = _mm_maskz_shldv_epi16(0, a, b, c);
2608 assert_eq_m128i(r, _mm_setzero_si128());
2609 let r = _mm_maskz_shldv_epi16(0b11111111, a, b, c);
2610 let e = _mm_set1_epi16(6);
2611 assert_eq_m128i(r, e);
2612 }
2613
2614 #[simd_test(enable = "avx512vbmi2")]
2615 unsafe fn test_mm512_shrdv_epi64() {
2616 let a = _mm512_set1_epi64(8);
2617 let b = _mm512_set1_epi64(2);
2618 let c = _mm512_set1_epi64(1);
2619 let r = _mm512_shrdv_epi64(a, b, c);
2620 let e = _mm512_set1_epi64(1);
2621 assert_eq_m512i(r, e);
2622 }
2623
2624 #[simd_test(enable = "avx512vbmi2")]
2625 unsafe fn test_mm512_mask_shrdv_epi64() {
2626 let a = _mm512_set1_epi64(8);
2627 let b = _mm512_set1_epi64(2);
2628 let c = _mm512_set1_epi64(1);
2629 let r = _mm512_mask_shrdv_epi64(a, 0, b, c);
2630 assert_eq_m512i(r, a);
2631 let r = _mm512_mask_shrdv_epi64(a, 0b11111111, b, c);
2632 let e = _mm512_set1_epi64(1);
2633 assert_eq_m512i(r, e);
2634 }
2635
2636 #[simd_test(enable = "avx512vbmi2")]
2637 unsafe fn test_mm512_maskz_shrdv_epi64() {
2638 let a = _mm512_set1_epi64(8);
2639 let b = _mm512_set1_epi64(2);
2640 let c = _mm512_set1_epi64(1);
2641 let r = _mm512_maskz_shrdv_epi64(0, a, b, c);
2642 assert_eq_m512i(r, _mm512_setzero_si512());
2643 let r = _mm512_maskz_shrdv_epi64(0b11111111, a, b, c);
2644 let e = _mm512_set1_epi64(1);
2645 assert_eq_m512i(r, e);
2646 }
2647
2648 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2649 unsafe fn test_mm256_shrdv_epi64() {
2650 let a = _mm256_set1_epi64x(8);
2651 let b = _mm256_set1_epi64x(2);
2652 let c = _mm256_set1_epi64x(1);
2653 let r = _mm256_shrdv_epi64(a, b, c);
2654 let e = _mm256_set1_epi64x(1);
2655 assert_eq_m256i(r, e);
2656 }
2657
2658 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2659 unsafe fn test_mm256_mask_shrdv_epi64() {
2660 let a = _mm256_set1_epi64x(8);
2661 let b = _mm256_set1_epi64x(2);
2662 let c = _mm256_set1_epi64x(1);
2663 let r = _mm256_mask_shrdv_epi64(a, 0, b, c);
2664 assert_eq_m256i(r, a);
2665 let r = _mm256_mask_shrdv_epi64(a, 0b00001111, b, c);
2666 let e = _mm256_set1_epi64x(1);
2667 assert_eq_m256i(r, e);
2668 }
2669
2670 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2671 unsafe fn test_mm256_maskz_shrdv_epi64() {
2672 let a = _mm256_set1_epi64x(8);
2673 let b = _mm256_set1_epi64x(2);
2674 let c = _mm256_set1_epi64x(1);
2675 let r = _mm256_maskz_shrdv_epi64(0, a, b, c);
2676 assert_eq_m256i(r, _mm256_setzero_si256());
2677 let r = _mm256_maskz_shrdv_epi64(0b00001111, a, b, c);
2678 let e = _mm256_set1_epi64x(1);
2679 assert_eq_m256i(r, e);
2680 }
2681
2682 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2683 unsafe fn test_mm_shrdv_epi64() {
2684 let a = _mm_set1_epi64x(8);
2685 let b = _mm_set1_epi64x(2);
2686 let c = _mm_set1_epi64x(1);
2687 let r = _mm_shrdv_epi64(a, b, c);
2688 let e = _mm_set1_epi64x(1);
2689 assert_eq_m128i(r, e);
2690 }
2691
2692 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2693 unsafe fn test_mm_mask_shrdv_epi64() {
2694 let a = _mm_set1_epi64x(8);
2695 let b = _mm_set1_epi64x(2);
2696 let c = _mm_set1_epi64x(1);
2697 let r = _mm_mask_shrdv_epi64(a, 0, b, c);
2698 assert_eq_m128i(r, a);
2699 let r = _mm_mask_shrdv_epi64(a, 0b00000011, b, c);
2700 let e = _mm_set1_epi64x(1);
2701 assert_eq_m128i(r, e);
2702 }
2703
2704 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2705 unsafe fn test_mm_maskz_shrdv_epi64() {
2706 let a = _mm_set1_epi64x(8);
2707 let b = _mm_set1_epi64x(2);
2708 let c = _mm_set1_epi64x(1);
2709 let r = _mm_maskz_shrdv_epi64(0, a, b, c);
2710 assert_eq_m128i(r, _mm_setzero_si128());
2711 let r = _mm_maskz_shrdv_epi64(0b00000011, a, b, c);
2712 let e = _mm_set1_epi64x(1);
2713 assert_eq_m128i(r, e);
2714 }
2715
2716 #[simd_test(enable = "avx512vbmi2")]
2717 unsafe fn test_mm512_shrdv_epi32() {
2718 let a = _mm512_set1_epi32(8);
2719 let b = _mm512_set1_epi32(2);
2720 let c = _mm512_set1_epi32(1);
2721 let r = _mm512_shrdv_epi32(a, b, c);
2722 let e = _mm512_set1_epi32(1);
2723 assert_eq_m512i(r, e);
2724 }
2725
2726 #[simd_test(enable = "avx512vbmi2")]
2727 unsafe fn test_mm512_mask_shrdv_epi32() {
2728 let a = _mm512_set1_epi32(8);
2729 let b = _mm512_set1_epi32(2);
2730 let c = _mm512_set1_epi32(1);
2731 let r = _mm512_mask_shrdv_epi32(a, 0, b, c);
2732 assert_eq_m512i(r, a);
2733 let r = _mm512_mask_shrdv_epi32(a, 0b11111111_11111111, b, c);
2734 let e = _mm512_set1_epi32(1);
2735 assert_eq_m512i(r, e);
2736 }
2737
2738 #[simd_test(enable = "avx512vbmi2")]
2739 unsafe fn test_mm512_maskz_shrdv_epi32() {
2740 let a = _mm512_set1_epi32(8);
2741 let b = _mm512_set1_epi32(2);
2742 let c = _mm512_set1_epi32(1);
2743 let r = _mm512_maskz_shrdv_epi32(0, a, b, c);
2744 assert_eq_m512i(r, _mm512_setzero_si512());
2745 let r = _mm512_maskz_shrdv_epi32(0b11111111_11111111, a, b, c);
2746 let e = _mm512_set1_epi32(1);
2747 assert_eq_m512i(r, e);
2748 }
2749
2750 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2751 unsafe fn test_mm256_shrdv_epi32() {
2752 let a = _mm256_set1_epi32(8);
2753 let b = _mm256_set1_epi32(2);
2754 let c = _mm256_set1_epi32(1);
2755 let r = _mm256_shrdv_epi32(a, b, c);
2756 let e = _mm256_set1_epi32(1);
2757 assert_eq_m256i(r, e);
2758 }
2759
2760 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2761 unsafe fn test_mm256_mask_shrdv_epi32() {
2762 let a = _mm256_set1_epi32(8);
2763 let b = _mm256_set1_epi32(2);
2764 let c = _mm256_set1_epi32(1);
2765 let r = _mm256_mask_shrdv_epi32(a, 0, b, c);
2766 assert_eq_m256i(r, a);
2767 let r = _mm256_mask_shrdv_epi32(a, 0b11111111, b, c);
2768 let e = _mm256_set1_epi32(1);
2769 assert_eq_m256i(r, e);
2770 }
2771
2772 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2773 unsafe fn test_mm256_maskz_shrdv_epi32() {
2774 let a = _mm256_set1_epi32(8);
2775 let b = _mm256_set1_epi32(2);
2776 let c = _mm256_set1_epi32(1);
2777 let r = _mm256_maskz_shrdv_epi32(0, a, b, c);
2778 assert_eq_m256i(r, _mm256_setzero_si256());
2779 let r = _mm256_maskz_shrdv_epi32(0b11111111, a, b, c);
2780 let e = _mm256_set1_epi32(1);
2781 assert_eq_m256i(r, e);
2782 }
2783
2784 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2785 unsafe fn test_mm_shrdv_epi32() {
2786 let a = _mm_set1_epi32(8);
2787 let b = _mm_set1_epi32(2);
2788 let c = _mm_set1_epi32(1);
2789 let r = _mm_shrdv_epi32(a, b, c);
2790 let e = _mm_set1_epi32(1);
2791 assert_eq_m128i(r, e);
2792 }
2793
2794 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2795 unsafe fn test_mm_mask_shrdv_epi32() {
2796 let a = _mm_set1_epi32(8);
2797 let b = _mm_set1_epi32(2);
2798 let c = _mm_set1_epi32(1);
2799 let r = _mm_mask_shrdv_epi32(a, 0, b, c);
2800 assert_eq_m128i(r, a);
2801 let r = _mm_mask_shrdv_epi32(a, 0b00001111, b, c);
2802 let e = _mm_set1_epi32(1);
2803 assert_eq_m128i(r, e);
2804 }
2805
2806 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2807 unsafe fn test_mm_maskz_shrdv_epi32() {
2808 let a = _mm_set1_epi32(8);
2809 let b = _mm_set1_epi32(2);
2810 let c = _mm_set1_epi32(1);
2811 let r = _mm_maskz_shrdv_epi32(0, a, b, c);
2812 assert_eq_m128i(r, _mm_setzero_si128());
2813 let r = _mm_maskz_shrdv_epi32(0b00001111, a, b, c);
2814 let e = _mm_set1_epi32(1);
2815 assert_eq_m128i(r, e);
2816 }
2817
2818 #[simd_test(enable = "avx512vbmi2")]
2819 unsafe fn test_mm512_shrdv_epi16() {
2820 let a = _mm512_set1_epi16(8);
2821 let b = _mm512_set1_epi16(2);
2822 let c = _mm512_set1_epi16(1);
2823 let r = _mm512_shrdv_epi16(a, b, c);
2824 let e = _mm512_set1_epi16(1);
2825 assert_eq_m512i(r, e);
2826 }
2827
2828 #[simd_test(enable = "avx512vbmi2")]
2829 unsafe fn test_mm512_mask_shrdv_epi16() {
2830 let a = _mm512_set1_epi16(8);
2831 let b = _mm512_set1_epi16(2);
2832 let c = _mm512_set1_epi16(1);
2833 let r = _mm512_mask_shrdv_epi16(a, 0, b, c);
2834 assert_eq_m512i(r, a);
2835 let r = _mm512_mask_shrdv_epi16(a, 0b11111111_11111111_11111111_11111111, b, c);
2836 let e = _mm512_set1_epi16(1);
2837 assert_eq_m512i(r, e);
2838 }
2839
2840 #[simd_test(enable = "avx512vbmi2")]
2841 unsafe fn test_mm512_maskz_shrdv_epi16() {
2842 let a = _mm512_set1_epi16(8);
2843 let b = _mm512_set1_epi16(2);
2844 let c = _mm512_set1_epi16(1);
2845 let r = _mm512_maskz_shrdv_epi16(0, a, b, c);
2846 assert_eq_m512i(r, _mm512_setzero_si512());
2847 let r = _mm512_maskz_shrdv_epi16(0b11111111_11111111_11111111_11111111, a, b, c);
2848 let e = _mm512_set1_epi16(1);
2849 assert_eq_m512i(r, e);
2850 }
2851
2852 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2853 unsafe fn test_mm256_shrdv_epi16() {
2854 let a = _mm256_set1_epi16(8);
2855 let b = _mm256_set1_epi16(2);
2856 let c = _mm256_set1_epi16(1);
2857 let r = _mm256_shrdv_epi16(a, b, c);
2858 let e = _mm256_set1_epi16(1);
2859 assert_eq_m256i(r, e);
2860 }
2861
2862 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2863 unsafe fn test_mm256_mask_shrdv_epi16() {
2864 let a = _mm256_set1_epi16(8);
2865 let b = _mm256_set1_epi16(2);
2866 let c = _mm256_set1_epi16(1);
2867 let r = _mm256_mask_shrdv_epi16(a, 0, b, c);
2868 assert_eq_m256i(r, a);
2869 let r = _mm256_mask_shrdv_epi16(a, 0b11111111_11111111, b, c);
2870 let e = _mm256_set1_epi16(1);
2871 assert_eq_m256i(r, e);
2872 }
2873
2874 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2875 unsafe fn test_mm256_maskz_shrdv_epi16() {
2876 let a = _mm256_set1_epi16(8);
2877 let b = _mm256_set1_epi16(2);
2878 let c = _mm256_set1_epi16(1);
2879 let r = _mm256_maskz_shrdv_epi16(0, a, b, c);
2880 assert_eq_m256i(r, _mm256_setzero_si256());
2881 let r = _mm256_maskz_shrdv_epi16(0b11111111_11111111, a, b, c);
2882 let e = _mm256_set1_epi16(1);
2883 assert_eq_m256i(r, e);
2884 }
2885
2886 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2887 unsafe fn test_mm_shrdv_epi16() {
2888 let a = _mm_set1_epi16(8);
2889 let b = _mm_set1_epi16(2);
2890 let c = _mm_set1_epi16(1);
2891 let r = _mm_shrdv_epi16(a, b, c);
2892 let e = _mm_set1_epi16(1);
2893 assert_eq_m128i(r, e);
2894 }
2895
2896 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2897 unsafe fn test_mm_mask_shrdv_epi16() {
2898 let a = _mm_set1_epi16(8);
2899 let b = _mm_set1_epi16(2);
2900 let c = _mm_set1_epi16(1);
2901 let r = _mm_mask_shrdv_epi16(a, 0, b, c);
2902 assert_eq_m128i(r, a);
2903 let r = _mm_mask_shrdv_epi16(a, 0b11111111, b, c);
2904 let e = _mm_set1_epi16(1);
2905 assert_eq_m128i(r, e);
2906 }
2907
2908 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2909 unsafe fn test_mm_maskz_shrdv_epi16() {
2910 let a = _mm_set1_epi16(8);
2911 let b = _mm_set1_epi16(2);
2912 let c = _mm_set1_epi16(1);
2913 let r = _mm_maskz_shrdv_epi16(0, a, b, c);
2914 assert_eq_m128i(r, _mm_setzero_si128());
2915 let r = _mm_maskz_shrdv_epi16(0b11111111, a, b, c);
2916 let e = _mm_set1_epi16(1);
2917 assert_eq_m128i(r, e);
2918 }
2919
2920 #[simd_test(enable = "avx512vbmi2")]
2921 unsafe fn test_mm512_shldi_epi64() {
2922 let a = _mm512_set1_epi64(1);
2923 let b = _mm512_set1_epi64(1 << 63);
2924 let r = _mm512_shldi_epi64(a, b, 2);
2925 let e = _mm512_set1_epi64(6);
2926 assert_eq_m512i(r, e);
2927 }
2928
2929 #[simd_test(enable = "avx512vbmi2")]
2930 unsafe fn test_mm512_mask_shldi_epi64() {
2931 let a = _mm512_set1_epi64(1);
2932 let b = _mm512_set1_epi64(1 << 63);
2933 let r = _mm512_mask_shldi_epi64(a, 0, a, b, 2);
2934 assert_eq_m512i(r, a);
2935 let r = _mm512_mask_shldi_epi64(a, 0b11111111, a, b, 2);
2936 let e = _mm512_set1_epi64(6);
2937 assert_eq_m512i(r, e);
2938 }
2939
2940 #[simd_test(enable = "avx512vbmi2")]
2941 unsafe fn test_mm512_maskz_shldi_epi64() {
2942 let a = _mm512_set1_epi64(1);
2943 let b = _mm512_set1_epi64(1 << 63);
2944 let r = _mm512_maskz_shldi_epi64(0, a, b, 2);
2945 assert_eq_m512i(r, _mm512_setzero_si512());
2946 let r = _mm512_maskz_shldi_epi64(0b11111111, a, b, 2);
2947 let e = _mm512_set1_epi64(6);
2948 assert_eq_m512i(r, e);
2949 }
2950
2951 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2952 unsafe fn test_mm256_shldi_epi64() {
2953 let a = _mm256_set1_epi64x(1);
2954 let b = _mm256_set1_epi64x(1 << 63);
2955 let r = _mm256_shldi_epi64(a, b, 2);
2956 let e = _mm256_set1_epi64x(6);
2957 assert_eq_m256i(r, e);
2958 }
2959
2960 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2961 unsafe fn test_mm256_mask_shldi_epi64() {
2962 let a = _mm256_set1_epi64x(1);
2963 let b = _mm256_set1_epi64x(1 << 63);
2964 let r = _mm256_mask_shldi_epi64(a, 0, a, b, 2);
2965 assert_eq_m256i(r, a);
2966 let r = _mm256_mask_shldi_epi64(a, 0b00001111, a, b, 2);
2967 let e = _mm256_set1_epi64x(6);
2968 assert_eq_m256i(r, e);
2969 }
2970
2971 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2972 unsafe fn test_mm256_maskz_shldi_epi64() {
2973 let a = _mm256_set1_epi64x(1);
2974 let b = _mm256_set1_epi64x(1 << 63);
2975 let r = _mm256_maskz_shldi_epi64(0, a, b, 2);
2976 assert_eq_m256i(r, _mm256_setzero_si256());
2977 let r = _mm256_maskz_shldi_epi64(0b00001111, a, b, 2);
2978 let e = _mm256_set1_epi64x(6);
2979 assert_eq_m256i(r, e);
2980 }
2981
2982 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2983 unsafe fn test_mm_shldi_epi64() {
2984 let a = _mm_set1_epi64x(1);
2985 let b = _mm_set1_epi64x(1 << 63);
2986 let r = _mm_shldi_epi64(a, b, 2);
2987 let e = _mm_set1_epi64x(6);
2988 assert_eq_m128i(r, e);
2989 }
2990
2991 #[simd_test(enable = "avx512vbmi2,avx512vl")]
2992 unsafe fn test_mm_mask_shldi_epi64() {
2993 let a = _mm_set1_epi64x(1);
2994 let b = _mm_set1_epi64x(1 << 63);
2995 let r = _mm_mask_shldi_epi64(a, 0, a, b, 2);
2996 assert_eq_m128i(r, a);
2997 let r = _mm_mask_shldi_epi64(a, 0b00000011, a, b, 2);
2998 let e = _mm_set1_epi64x(6);
2999 assert_eq_m128i(r, e);
3000 }
3001
3002 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3003 unsafe fn test_mm_maskz_shldi_epi64() {
3004 let a = _mm_set1_epi64x(1);
3005 let b = _mm_set1_epi64x(1 << 63);
3006 let r = _mm_maskz_shldi_epi64(0, a, b, 2);
3007 assert_eq_m128i(r, _mm_setzero_si128());
3008 let r = _mm_maskz_shldi_epi64(0b00000011, a, b, 2);
3009 let e = _mm_set1_epi64x(6);
3010 assert_eq_m128i(r, e);
3011 }
3012
3013 #[simd_test(enable = "avx512vbmi2")]
3014 unsafe fn test_mm512_shldi_epi32() {
3015 let a = _mm512_set1_epi32(1);
3016 let b = _mm512_set1_epi32(1 << 31);
3017 let r = _mm512_shldi_epi32(a, b, 2);
3018 let e = _mm512_set1_epi32(6);
3019 assert_eq_m512i(r, e);
3020 }
3021
3022 #[simd_test(enable = "avx512vbmi2")]
3023 unsafe fn test_mm512_mask_shldi_epi32() {
3024 let a = _mm512_set1_epi32(1);
3025 let b = _mm512_set1_epi32(1 << 31);
3026 let r = _mm512_mask_shldi_epi32(a, 0, a, b, 2);
3027 assert_eq_m512i(r, a);
3028 let r = _mm512_mask_shldi_epi32(a, 0b11111111_11111111, a, b, 2);
3029 let e = _mm512_set1_epi32(6);
3030 assert_eq_m512i(r, e);
3031 }
3032
3033 #[simd_test(enable = "avx512vbmi2")]
3034 unsafe fn test_mm512_maskz_shldi_epi32() {
3035 let a = _mm512_set1_epi32(1);
3036 let b = _mm512_set1_epi32(1 << 31);
3037 let r = _mm512_maskz_shldi_epi32(0, a, b, 2);
3038 assert_eq_m512i(r, _mm512_setzero_si512());
3039 let r = _mm512_maskz_shldi_epi32(0b11111111_11111111, a, b, 2);
3040 let e = _mm512_set1_epi32(6);
3041 assert_eq_m512i(r, e);
3042 }
3043
3044 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3045 unsafe fn test_mm256_shldi_epi32() {
3046 let a = _mm256_set1_epi32(1);
3047 let b = _mm256_set1_epi32(1 << 31);
3048 let r = _mm256_shldi_epi32(a, b, 2);
3049 let e = _mm256_set1_epi32(6);
3050 assert_eq_m256i(r, e);
3051 }
3052
3053 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3054 unsafe fn test_mm256_mask_shldi_epi32() {
3055 let a = _mm256_set1_epi32(1);
3056 let b = _mm256_set1_epi32(1 << 31);
3057 let r = _mm256_mask_shldi_epi32(a, 0, a, b, 2);
3058 assert_eq_m256i(r, a);
3059 let r = _mm256_mask_shldi_epi32(a, 0b11111111, a, b, 2);
3060 let e = _mm256_set1_epi32(6);
3061 assert_eq_m256i(r, e);
3062 }
3063
3064 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3065 unsafe fn test_mm256_maskz_shldi_epi32() {
3066 let a = _mm256_set1_epi32(1);
3067 let b = _mm256_set1_epi32(1 << 31);
3068 let r = _mm256_maskz_shldi_epi32(0, a, b, 2);
3069 assert_eq_m256i(r, _mm256_setzero_si256());
3070 let r = _mm256_maskz_shldi_epi32(0b11111111, a, b, 2);
3071 let e = _mm256_set1_epi32(6);
3072 assert_eq_m256i(r, e);
3073 }
3074
3075 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3076 unsafe fn test_mm_shldi_epi32() {
3077 let a = _mm_set1_epi32(1);
3078 let b = _mm_set1_epi32(1 << 31);
3079 let r = _mm_shldi_epi32(a, b, 2);
3080 let e = _mm_set1_epi32(6);
3081 assert_eq_m128i(r, e);
3082 }
3083
3084 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3085 unsafe fn test_mm_mask_shldi_epi32() {
3086 let a = _mm_set1_epi32(1);
3087 let b = _mm_set1_epi32(1 << 31);
3088 let r = _mm_mask_shldi_epi32(a, 0, a, b, 2);
3089 assert_eq_m128i(r, a);
3090 let r = _mm_mask_shldi_epi32(a, 0b00001111, a, b, 2);
3091 let e = _mm_set1_epi32(6);
3092 assert_eq_m128i(r, e);
3093 }
3094
3095 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3096 unsafe fn test_mm_maskz_shldi_epi32() {
3097 let a = _mm_set1_epi32(1);
3098 let b = _mm_set1_epi32(1 << 31);
3099 let r = _mm_maskz_shldi_epi32(0, a, b, 2);
3100 assert_eq_m128i(r, _mm_setzero_si128());
3101 let r = _mm_maskz_shldi_epi32(0b00001111, a, b, 2);
3102 let e = _mm_set1_epi32(6);
3103 assert_eq_m128i(r, e);
3104 }
3105
3106 #[simd_test(enable = "avx512vbmi2")]
3107 unsafe fn test_mm512_shldi_epi16() {
3108 let a = _mm512_set1_epi16(1);
3109 let b = _mm512_set1_epi16(1 << 15);
3110 let r = _mm512_shldi_epi16(a, b, 2);
3111 let e = _mm512_set1_epi16(6);
3112 assert_eq_m512i(r, e);
3113 }
3114
3115 #[simd_test(enable = "avx512vbmi2")]
3116 unsafe fn test_mm512_mask_shldi_epi16() {
3117 let a = _mm512_set1_epi16(1);
3118 let b = _mm512_set1_epi16(1 << 15);
3119 let r = _mm512_mask_shldi_epi16(a, 0, a, b, 2);
3120 assert_eq_m512i(r, a);
3121 let r = _mm512_mask_shldi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b, 2);
3122 let e = _mm512_set1_epi16(6);
3123 assert_eq_m512i(r, e);
3124 }
3125
3126 #[simd_test(enable = "avx512vbmi2")]
3127 unsafe fn test_mm512_maskz_shldi_epi16() {
3128 let a = _mm512_set1_epi16(1);
3129 let b = _mm512_set1_epi16(1 << 15);
3130 let r = _mm512_maskz_shldi_epi16(0, a, b, 2);
3131 assert_eq_m512i(r, _mm512_setzero_si512());
3132 let r = _mm512_maskz_shldi_epi16(0b11111111_11111111_11111111_11111111, a, b, 2);
3133 let e = _mm512_set1_epi16(6);
3134 assert_eq_m512i(r, e);
3135 }
3136
3137 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3138 unsafe fn test_mm256_shldi_epi16() {
3139 let a = _mm256_set1_epi16(1);
3140 let b = _mm256_set1_epi16(1 << 15);
3141 let r = _mm256_shldi_epi16(a, b, 2);
3142 let e = _mm256_set1_epi16(6);
3143 assert_eq_m256i(r, e);
3144 }
3145
3146 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3147 unsafe fn test_mm256_mask_shldi_epi16() {
3148 let a = _mm256_set1_epi16(1);
3149 let b = _mm256_set1_epi16(1 << 15);
3150 let r = _mm256_mask_shldi_epi16(a, 0, a, b, 2);
3151 assert_eq_m256i(r, a);
3152 let r = _mm256_mask_shldi_epi16(a, 0b11111111_11111111, a, b, 2);
3153 let e = _mm256_set1_epi16(6);
3154 assert_eq_m256i(r, e);
3155 }
3156
3157 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3158 unsafe fn test_mm256_maskz_shldi_epi16() {
3159 let a = _mm256_set1_epi16(1);
3160 let b = _mm256_set1_epi16(1 << 15);
3161 let r = _mm256_maskz_shldi_epi16(0, a, b, 2);
3162 assert_eq_m256i(r, _mm256_setzero_si256());
3163 let r = _mm256_maskz_shldi_epi16(0b11111111_11111111, a, b, 2);
3164 let e = _mm256_set1_epi16(6);
3165 assert_eq_m256i(r, e);
3166 }
3167
3168 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3169 unsafe fn test_mm_shldi_epi16() {
3170 let a = _mm_set1_epi16(1);
3171 let b = _mm_set1_epi16(1 << 15);
3172 let r = _mm_shldi_epi16(a, b, 2);
3173 let e = _mm_set1_epi16(6);
3174 assert_eq_m128i(r, e);
3175 }
3176
3177 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3178 unsafe fn test_mm_mask_shldi_epi16() {
3179 let a = _mm_set1_epi16(1);
3180 let b = _mm_set1_epi16(1 << 15);
3181 let r = _mm_mask_shldi_epi16(a, 0, a, b, 2);
3182 assert_eq_m128i(r, a);
3183 let r = _mm_mask_shldi_epi16(a, 0b11111111, a, b, 2);
3184 let e = _mm_set1_epi16(6);
3185 assert_eq_m128i(r, e);
3186 }
3187
3188 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3189 unsafe fn test_mm_maskz_shldi_epi16() {
3190 let a = _mm_set1_epi16(1);
3191 let b = _mm_set1_epi16(1 << 15);
3192 let r = _mm_maskz_shldi_epi16(0, a, b, 2);
3193 assert_eq_m128i(r, _mm_setzero_si128());
3194 let r = _mm_maskz_shldi_epi16(0b11111111, a, b, 2);
3195 let e = _mm_set1_epi16(6);
3196 assert_eq_m128i(r, e);
3197 }
3198
3199 #[simd_test(enable = "avx512vbmi2")]
3200 unsafe fn test_mm512_shrdi_epi64() {
3201 let a = _mm512_set1_epi64(8);
3202 let b = _mm512_set1_epi64(2);
3203 let r = _mm512_shrdi_epi64(a, b, 1);
3204 let e = _mm512_set1_epi64(1);
3205 assert_eq_m512i(r, e);
3206 }
3207
3208 #[simd_test(enable = "avx512vbmi2")]
3209 unsafe fn test_mm512_mask_shrdi_epi64() {
3210 let a = _mm512_set1_epi64(8);
3211 let b = _mm512_set1_epi64(2);
3212 let r = _mm512_mask_shrdi_epi64(a, 0, a, b, 1);
3213 assert_eq_m512i(r, a);
3214 let r = _mm512_mask_shrdi_epi64(a, 0b11111111, a, b, 1);
3215 let e = _mm512_set1_epi64(1);
3216 assert_eq_m512i(r, e);
3217 }
3218
3219 #[simd_test(enable = "avx512vbmi2")]
3220 unsafe fn test_mm512_maskz_shrdi_epi64() {
3221 let a = _mm512_set1_epi64(8);
3222 let b = _mm512_set1_epi64(2);
3223 let r = _mm512_maskz_shrdi_epi64(0, a, b, 1);
3224 assert_eq_m512i(r, _mm512_setzero_si512());
3225 let r = _mm512_maskz_shrdi_epi64(0b11111111, a, b, 1);
3226 let e = _mm512_set1_epi64(1);
3227 assert_eq_m512i(r, e);
3228 }
3229
3230 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3231 unsafe fn test_mm256_shrdi_epi64() {
3232 let a = _mm256_set1_epi64x(8);
3233 let b = _mm256_set1_epi64x(2);
3234 let r = _mm256_shrdi_epi64(a, b, 1);
3235 let e = _mm256_set1_epi64x(1);
3236 assert_eq_m256i(r, e);
3237 }
3238
3239 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3240 unsafe fn test_mm256_mask_shrdi_epi64() {
3241 let a = _mm256_set1_epi64x(8);
3242 let b = _mm256_set1_epi64x(2);
3243 let r = _mm256_mask_shrdi_epi64(a, 0, a, b, 1);
3244 assert_eq_m256i(r, a);
3245 let r = _mm256_mask_shrdi_epi64(a, 0b00001111, a, b, 1);
3246 let e = _mm256_set1_epi64x(1);
3247 assert_eq_m256i(r, e);
3248 }
3249
3250 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3251 unsafe fn test_mm256_maskz_shrdi_epi64() {
3252 let a = _mm256_set1_epi64x(8);
3253 let b = _mm256_set1_epi64x(2);
3254 let r = _mm256_maskz_shrdi_epi64(0, a, b, 1);
3255 assert_eq_m256i(r, _mm256_setzero_si256());
3256 let r = _mm256_maskz_shrdi_epi64(0b00001111, a, b, 1);
3257 let e = _mm256_set1_epi64x(1);
3258 assert_eq_m256i(r, e);
3259 }
3260
3261 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3262 unsafe fn test_mm_shrdi_epi64() {
3263 let a = _mm_set1_epi64x(8);
3264 let b = _mm_set1_epi64x(2);
3265 let r = _mm_shrdi_epi64(a, b, 1);
3266 let e = _mm_set1_epi64x(1);
3267 assert_eq_m128i(r, e);
3268 }
3269
3270 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3271 unsafe fn test_mm_mask_shrdi_epi64() {
3272 let a = _mm_set1_epi64x(8);
3273 let b = _mm_set1_epi64x(2);
3274 let r = _mm_mask_shrdi_epi64(a, 0, a, b, 1);
3275 assert_eq_m128i(r, a);
3276 let r = _mm_mask_shrdi_epi64(a, 0b00000011, a, b, 1);
3277 let e = _mm_set1_epi64x(1);
3278 assert_eq_m128i(r, e);
3279 }
3280
3281 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3282 unsafe fn test_mm_maskz_shrdi_epi64() {
3283 let a = _mm_set1_epi64x(8);
3284 let b = _mm_set1_epi64x(2);
3285 let r = _mm_maskz_shrdi_epi64(0, a, b, 1);
3286 assert_eq_m128i(r, _mm_setzero_si128());
3287 let r = _mm_maskz_shrdi_epi64(0b00000011, a, b, 1);
3288 let e = _mm_set1_epi64x(1);
3289 assert_eq_m128i(r, e);
3290 }
3291
3292 #[simd_test(enable = "avx512vbmi2")]
3293 unsafe fn test_mm512_shrdi_epi32() {
3294 let a = _mm512_set1_epi32(8);
3295 let b = _mm512_set1_epi32(2);
3296 let r = _mm512_shrdi_epi32(a, b, 1);
3297 let e = _mm512_set1_epi32(1);
3298 assert_eq_m512i(r, e);
3299 }
3300
3301 #[simd_test(enable = "avx512vbmi2")]
3302 unsafe fn test_mm512_mask_shrdi_epi32() {
3303 let a = _mm512_set1_epi32(8);
3304 let b = _mm512_set1_epi32(2);
3305 let r = _mm512_mask_shrdi_epi32(a, 0, a, b, 1);
3306 assert_eq_m512i(r, a);
3307 let r = _mm512_mask_shrdi_epi32(a, 0b11111111_11111111, a, b, 1);
3308 let e = _mm512_set1_epi32(1);
3309 assert_eq_m512i(r, e);
3310 }
3311
3312 #[simd_test(enable = "avx512vbmi2")]
3313 unsafe fn test_mm512_maskz_shrdi_epi32() {
3314 let a = _mm512_set1_epi32(8);
3315 let b = _mm512_set1_epi32(2);
3316 let r = _mm512_maskz_shrdi_epi32(0, a, b, 1);
3317 assert_eq_m512i(r, _mm512_setzero_si512());
3318 let r = _mm512_maskz_shrdi_epi32(0b11111111_11111111, a, b, 1);
3319 let e = _mm512_set1_epi32(1);
3320 assert_eq_m512i(r, e);
3321 }
3322
3323 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3324 unsafe fn test_mm256_shrdi_epi32() {
3325 let a = _mm256_set1_epi32(8);
3326 let b = _mm256_set1_epi32(2);
3327 let r = _mm256_shrdi_epi32(a, b, 1);
3328 let e = _mm256_set1_epi32(1);
3329 assert_eq_m256i(r, e);
3330 }
3331
3332 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3333 unsafe fn test_mm256_mask_shrdi_epi32() {
3334 let a = _mm256_set1_epi32(8);
3335 let b = _mm256_set1_epi32(2);
3336 let r = _mm256_mask_shrdi_epi32(a, 0, a, b, 1);
3337 assert_eq_m256i(r, a);
3338 let r = _mm256_mask_shrdi_epi32(a, 0b11111111, a, b, 1);
3339 let e = _mm256_set1_epi32(1);
3340 assert_eq_m256i(r, e);
3341 }
3342
3343 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3344 unsafe fn test_mm256_maskz_shrdi_epi32() {
3345 let a = _mm256_set1_epi32(8);
3346 let b = _mm256_set1_epi32(2);
3347 let r = _mm256_maskz_shrdi_epi32(0, a, b, 1);
3348 assert_eq_m256i(r, _mm256_setzero_si256());
3349 let r = _mm256_maskz_shrdi_epi32(0b11111111, a, b, 1);
3350 let e = _mm256_set1_epi32(1);
3351 assert_eq_m256i(r, e);
3352 }
3353
3354 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3355 unsafe fn test_mm_shrdi_epi32() {
3356 let a = _mm_set1_epi32(8);
3357 let b = _mm_set1_epi32(2);
3358 let r = _mm_shrdi_epi32(a, b, 1);
3359 let e = _mm_set1_epi32(1);
3360 assert_eq_m128i(r, e);
3361 }
3362
3363 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3364 unsafe fn test_mm_mask_shrdi_epi32() {
3365 let a = _mm_set1_epi32(8);
3366 let b = _mm_set1_epi32(2);
3367 let r = _mm_mask_shrdi_epi32(a, 0, a, b, 1);
3368 assert_eq_m128i(r, a);
3369 let r = _mm_mask_shrdi_epi32(a, 0b00001111, a, b, 1);
3370 let e = _mm_set1_epi32(1);
3371 assert_eq_m128i(r, e);
3372 }
3373
3374 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3375 unsafe fn test_mm_maskz_shrdi_epi32() {
3376 let a = _mm_set1_epi32(8);
3377 let b = _mm_set1_epi32(2);
3378 let r = _mm_maskz_shrdi_epi32(0, a, b, 1);
3379 assert_eq_m128i(r, _mm_setzero_si128());
3380 let r = _mm_maskz_shrdi_epi32(0b00001111, a, b, 1);
3381 let e = _mm_set1_epi32(1);
3382 assert_eq_m128i(r, e);
3383 }
3384
3385 #[simd_test(enable = "avx512vbmi2")]
3386 unsafe fn test_mm512_shrdi_epi16() {
3387 let a = _mm512_set1_epi16(8);
3388 let b = _mm512_set1_epi16(2);
3389 let r = _mm512_shrdi_epi16(a, b, 1);
3390 let e = _mm512_set1_epi16(1);
3391 assert_eq_m512i(r, e);
3392 }
3393
3394 #[simd_test(enable = "avx512vbmi2")]
3395 unsafe fn test_mm512_mask_shrdi_epi16() {
3396 let a = _mm512_set1_epi16(8);
3397 let b = _mm512_set1_epi16(2);
3398 let r = _mm512_mask_shrdi_epi16(a, 0, a, b, 1);
3399 assert_eq_m512i(r, a);
3400 let r = _mm512_mask_shrdi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b, 1);
3401 let e = _mm512_set1_epi16(1);
3402 assert_eq_m512i(r, e);
3403 }
3404
3405 #[simd_test(enable = "avx512vbmi2")]
3406 unsafe fn test_mm512_maskz_shrdi_epi16() {
3407 let a = _mm512_set1_epi16(8);
3408 let b = _mm512_set1_epi16(2);
3409 let r = _mm512_maskz_shrdi_epi16(0, a, b, 1);
3410 assert_eq_m512i(r, _mm512_setzero_si512());
3411 let r = _mm512_maskz_shrdi_epi16(0b11111111_11111111_11111111_11111111, a, b, 1);
3412 let e = _mm512_set1_epi16(1);
3413 assert_eq_m512i(r, e);
3414 }
3415
3416 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3417 unsafe fn test_mm256_shrdi_epi16() {
3418 let a = _mm256_set1_epi16(8);
3419 let b = _mm256_set1_epi16(2);
3420 let r = _mm256_shrdi_epi16(a, b, 1);
3421 let e = _mm256_set1_epi16(1);
3422 assert_eq_m256i(r, e);
3423 }
3424
3425 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3426 unsafe fn test_mm256_mask_shrdi_epi16() {
3427 let a = _mm256_set1_epi16(8);
3428 let b = _mm256_set1_epi16(2);
3429 let r = _mm256_mask_shrdi_epi16(a, 0, a, b, 1);
3430 assert_eq_m256i(r, a);
3431 let r = _mm256_mask_shrdi_epi16(a, 0b11111111_11111111, a, b, 1);
3432 let e = _mm256_set1_epi16(1);
3433 assert_eq_m256i(r, e);
3434 }
3435
3436 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3437 unsafe fn test_mm256_maskz_shrdi_epi16() {
3438 let a = _mm256_set1_epi16(8);
3439 let b = _mm256_set1_epi16(2);
3440 let r = _mm256_maskz_shrdi_epi16(0, a, b, 1);
3441 assert_eq_m256i(r, _mm256_setzero_si256());
3442 let r = _mm256_maskz_shrdi_epi16(0b11111111_11111111, a, b, 1);
3443 let e = _mm256_set1_epi16(1);
3444 assert_eq_m256i(r, e);
3445 }
3446
3447 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3448 unsafe fn test_mm_shrdi_epi16() {
3449 let a = _mm_set1_epi16(8);
3450 let b = _mm_set1_epi16(2);
3451 let r = _mm_shrdi_epi16(a, b, 1);
3452 let e = _mm_set1_epi16(1);
3453 assert_eq_m128i(r, e);
3454 }
3455
3456 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3457 unsafe fn test_mm_mask_shrdi_epi16() {
3458 let a = _mm_set1_epi16(8);
3459 let b = _mm_set1_epi16(2);
3460 let r = _mm_mask_shrdi_epi16(a, 0, a, b, 1);
3461 assert_eq_m128i(r, a);
3462 let r = _mm_mask_shrdi_epi16(a, 0b11111111, a, b, 1);
3463 let e = _mm_set1_epi16(1);
3464 assert_eq_m128i(r, e);
3465 }
3466
3467 #[simd_test(enable = "avx512vbmi2,avx512vl")]
3468 unsafe fn test_mm_maskz_shrdi_epi16() {
3469 let a = _mm_set1_epi16(8);
3470 let b = _mm_set1_epi16(2);
3471 let r = _mm_maskz_shrdi_epi16(0, a, b, 1);
3472 assert_eq_m128i(r, _mm_setzero_si128());
3473 let r = _mm_maskz_shrdi_epi16(0b11111111, a, b, 1);
3474 let e = _mm_set1_epi16(1);
3475 assert_eq_m128i(r, e);
3476 }
3477 }