]> git.proxmox.com Git - rustc.git/blob - library/stdarch/crates/core_arch/src/x86/avx512f.rs
New upstream version 1.50.0+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / avx512f.rs
1 use crate::{
2 core_arch::{simd::*, simd_llvm::*, x86::*},
3 mem::{self, transmute},
4 ptr,
5 };
6
7 #[cfg(test)]
8 use stdarch_test::assert_instr;
9
10 /// Computes the absolute values of packed 32-bit integers in `a`.
11 ///
12 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
13 #[inline]
14 #[target_feature(enable = "avx512f")]
15 #[cfg_attr(test, assert_instr(vpabsd))]
16 pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
17 let a = a.as_i32x16();
18 // all-0 is a properly initialized i32x16
19 let zero: i32x16 = mem::zeroed();
20 let sub = simd_sub(zero, a);
21 let cmp: i32x16 = simd_gt(a, zero);
22 transmute(simd_select(cmp, a, sub))
23 }
24
25 /// Computes the absolute value of packed 32-bit integers in `a`, and store the
26 /// unsigned results in `dst` using writemask `k` (elements are copied from
27 /// `src` when the corresponding mask bit is not set).
28 ///
29 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
30 #[inline]
31 #[target_feature(enable = "avx512f")]
32 #[cfg_attr(test, assert_instr(vpabsd))]
33 pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
34 let abs = _mm512_abs_epi32(a).as_i32x16();
35 transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
36 }
37
38 /// Computes the absolute value of packed 32-bit integers in `a`, and store the
39 /// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
40 /// the corresponding mask bit is not set).
41 ///
42 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
43 #[inline]
44 #[target_feature(enable = "avx512f")]
45 #[cfg_attr(test, assert_instr(vpabsd))]
46 pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
47 let abs = _mm512_abs_epi32(a).as_i32x16();
48 let zero = _mm512_setzero_si512().as_i32x16();
49 transmute(simd_select_bitmask(k, abs, zero))
50 }
51
52 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
53 ///
54 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48)
55 #[inline]
56 #[target_feature(enable = "avx512f")]
57 #[cfg_attr(test, assert_instr(vpabsq))]
58 pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
59 let a = a.as_i64x8();
60 // all-0 is a properly initialized i64x8
61 let zero: i64x8 = mem::zeroed();
62 let sub = simd_sub(zero, a);
63 let cmp: i64x8 = simd_gt(a, zero);
64 transmute(simd_select(cmp, a, sub))
65 }
66
67 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
68 ///
69 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49)
70 #[inline]
71 #[target_feature(enable = "avx512f")]
72 #[cfg_attr(test, assert_instr(vpabsq))]
73 pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
74 let abs = _mm512_abs_epi64(a).as_i64x8();
75 transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
76 }
77
78 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79 ///
80 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50)
81 #[inline]
82 #[target_feature(enable = "avx512f")]
83 #[cfg_attr(test, assert_instr(vpabsq))]
84 pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
85 let abs = _mm512_abs_epi64(a).as_i64x8();
86 let zero = _mm512_setzero_si512().as_i64x8();
87 transmute(simd_select_bitmask(k, abs, zero))
88 }
89
90 /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
91 ///
92 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_ps&expand=65)
93 #[inline]
94 #[target_feature(enable = "avx512f")]
95 #[cfg_attr(test, assert_instr(vpandq))]
96 pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
97 let a = _mm512_set1_epi32(0x7FFFFFFF); // from LLVM code
98 let b = transmute::<f32x16, __m512i>(v2.as_f32x16());
99 let abs = _mm512_and_epi32(a, b);
100 transmute(abs)
101 }
102
103 /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
104 ///
105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_ps&expand=66)
106 #[inline]
107 #[target_feature(enable = "avx512f")]
108 #[cfg_attr(test, assert_instr(vpandd))]
109 pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
110 let abs = _mm512_abs_ps(v2).as_f32x16();
111 transmute(simd_select_bitmask(k, abs, src.as_f32x16()))
112 }
113
114 /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
115 ///
116 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_pd&expand=60)
117 #[inline]
118 #[target_feature(enable = "avx512f")]
119 #[cfg_attr(test, assert_instr(vpandq))]
120 pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
121 let a = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF); // from LLVM code
122 let b = transmute::<f64x8, __m512i>(v2.as_f64x8());
123 let abs = _mm512_and_epi64(a, b);
124 transmute(abs)
125 }
126
127 /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
128 ///
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_pd&expand=61)
130 #[inline]
131 #[target_feature(enable = "avx512f")]
132 #[cfg_attr(test, assert_instr(vpandq))]
133 pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
134 let abs = _mm512_abs_pd(v2).as_f64x8();
135 transmute(simd_select_bitmask(k, abs, src.as_f64x8()))
136 }
137
138 /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
139 ///
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_epi32&expand=3801)
141 #[inline]
142 #[target_feature(enable = "avx512f")]
143 #[cfg_attr(test, assert_instr(vmovdqa32))]
144 pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
145 let mov = a.as_i32x16();
146 transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
147 }
148
149 /// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
150 ///
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_epi32&expand=3802)
152 #[inline]
153 #[target_feature(enable = "avx512f")]
154 #[cfg_attr(test, assert_instr(vmovdqa32))]
155 pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
156 let mov = a.as_i32x16();
157 let zero = _mm512_setzero_si512().as_i32x16();
158 transmute(simd_select_bitmask(k, mov, zero))
159 }
160
161 /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
162 ///
163 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_epi64&expand=3807)
164 #[inline]
165 #[target_feature(enable = "avx512f")]
166 #[cfg_attr(test, assert_instr(vmovdqa64))]
167 pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
168 let mov = a.as_i64x8();
169 transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
170 }
171
172 /// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
173 ///
174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_epi64&expand=3808)
175 #[inline]
176 #[target_feature(enable = "avx512f")]
177 #[cfg_attr(test, assert_instr(vmovdqa64))]
178 pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
179 let mov = a.as_i64x8();
180 let zero = _mm512_setzero_si512().as_i64x8();
181 transmute(simd_select_bitmask(k, mov, zero))
182 }
183
184 /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
185 ///
186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_ps&expand=3825)
187 #[inline]
188 #[target_feature(enable = "avx512f")]
189 #[cfg_attr(test, assert_instr(vmovaps))]
190 pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
191 let mov = a.as_f32x16();
192 transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
193 }
194
195 /// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
196 ///
197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_ps&expand=3826)
198 #[inline]
199 #[target_feature(enable = "avx512f")]
200 #[cfg_attr(test, assert_instr(vmovaps))]
201 pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
202 let mov = a.as_f32x16();
203 let zero = _mm512_setzero_ps().as_f32x16();
204 transmute(simd_select_bitmask(k, mov, zero))
205 }
206
207 /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
208 ///
209 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_pd&expand=3819)
210 #[inline]
211 #[target_feature(enable = "avx512f")]
212 #[cfg_attr(test, assert_instr(vmovapd))]
213 pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
214 let mov = a.as_f64x8();
215 transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
216 }
217
218 /// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
219 ///
220 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_pd&expand=3820)
221 #[inline]
222 #[target_feature(enable = "avx512f")]
223 #[cfg_attr(test, assert_instr(vmovapd))]
224 pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
225 let mov = a.as_f64x8();
226 let zero = _mm512_setzero_pd().as_f64x8();
227 transmute(simd_select_bitmask(k, mov, zero))
228 }
229
230 /// Add packed 32-bit integers in a and b, and store the results in dst.
231 ///
232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi32&expand=100)
233 #[inline]
234 #[target_feature(enable = "avx512f")]
235 #[cfg_attr(test, assert_instr(vpaddd))]
236 pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
237 transmute(simd_add(a.as_i32x16(), b.as_i32x16()))
238 }
239
240 /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
241 ///
242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi32&expand=101)
243 #[inline]
244 #[target_feature(enable = "avx512f")]
245 #[cfg_attr(test, assert_instr(vpaddd))]
246 pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
247 let add = _mm512_add_epi32(a, b).as_i32x16();
248 transmute(simd_select_bitmask(k, add, src.as_i32x16()))
249 }
250
251 /// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
252 ///
253 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi32&expand=102)
254 #[inline]
255 #[target_feature(enable = "avx512f")]
256 #[cfg_attr(test, assert_instr(vpaddd))]
257 pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
258 let add = _mm512_add_epi32(a, b).as_i32x16();
259 let zero = _mm512_setzero_si512().as_i32x16();
260 transmute(simd_select_bitmask(k, add, zero))
261 }
262
263 /// Add packed 64-bit integers in a and b, and store the results in dst.
264 ///
265 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi64&expand=109)
266 #[inline]
267 #[target_feature(enable = "avx512f")]
268 #[cfg_attr(test, assert_instr(vpaddq))]
269 pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
270 transmute(simd_add(a.as_i64x8(), b.as_i64x8()))
271 }
272
273 /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
274 ///
275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi64&expand=110)
276 #[inline]
277 #[target_feature(enable = "avx512f")]
278 #[cfg_attr(test, assert_instr(vpaddq))]
279 pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
280 let add = _mm512_add_epi64(a, b).as_i64x8();
281 transmute(simd_select_bitmask(k, add, src.as_i64x8()))
282 }
283
284 /// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
285 ///
286 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi64&expand=111)
287 #[inline]
288 #[target_feature(enable = "avx512f")]
289 #[cfg_attr(test, assert_instr(vpaddq))]
290 pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
291 let add = _mm512_add_epi64(a, b).as_i64x8();
292 let zero = _mm512_setzero_si512().as_i64x8();
293 transmute(simd_select_bitmask(k, add, zero))
294 }
295
296 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
297 ///
298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_ps&expand=139)
299 #[inline]
300 #[target_feature(enable = "avx512f")]
301 #[cfg_attr(test, assert_instr(vaddps))]
302 pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
303 transmute(simd_add(a.as_f32x16(), b.as_f32x16()))
304 }
305
306 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
307 ///
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_ps&expand=140)
309 #[inline]
310 #[target_feature(enable = "avx512f")]
311 #[cfg_attr(test, assert_instr(vaddps))]
312 pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
313 let add = _mm512_add_ps(a, b).as_f32x16();
314 transmute(simd_select_bitmask(k, add, src.as_f32x16()))
315 }
316
317 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
318 ///
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_ps&expand=141)
320 #[inline]
321 #[target_feature(enable = "avx512f")]
322 #[cfg_attr(test, assert_instr(vaddps))]
323 pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
324 let add = _mm512_add_ps(a, b).as_f32x16();
325 let zero = _mm512_setzero_ps().as_f32x16();
326 transmute(simd_select_bitmask(k, add, zero))
327 }
328
329 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
330 ///
331 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_pd&expand=127)
332 #[inline]
333 #[target_feature(enable = "avx512f")]
334 #[cfg_attr(test, assert_instr(vaddpd))]
335 pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
336 transmute(simd_add(a.as_f64x8(), b.as_f64x8()))
337 }
338
339 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
340 ///
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_pd&expand=128)
342 #[inline]
343 #[target_feature(enable = "avx512f")]
344 #[cfg_attr(test, assert_instr(vaddpd))]
345 pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
346 let add = _mm512_add_pd(a, b).as_f64x8();
347 transmute(simd_select_bitmask(k, add, src.as_f64x8()))
348 }
349
350 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
351 ///
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_pd&expand=129)
353 #[inline]
354 #[target_feature(enable = "avx512f")]
355 #[cfg_attr(test, assert_instr(vaddpd))]
356 pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
357 let add = _mm512_add_pd(a, b).as_f64x8();
358 let zero = _mm512_setzero_pd().as_f64x8();
359 transmute(simd_select_bitmask(k, add, zero))
360 }
361
362 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
363 ///
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi32&expand=5694)
365 #[inline]
366 #[target_feature(enable = "avx512f")]
367 #[cfg_attr(test, assert_instr(vpsubd))]
368 pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
369 transmute(simd_sub(a.as_i32x16(), b.as_i32x16()))
370 }
371
372 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
373 ///
374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi32&expand=5692)
375 #[inline]
376 #[target_feature(enable = "avx512f")]
377 #[cfg_attr(test, assert_instr(vpsubd))]
378 pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
379 let sub = _mm512_sub_epi32(a, b).as_i32x16();
380 transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
381 }
382
383 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
384 ///
385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi32&expand=5693)
386 #[inline]
387 #[target_feature(enable = "avx512f")]
388 #[cfg_attr(test, assert_instr(vpsubd))]
389 pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
390 let sub = _mm512_sub_epi32(a, b).as_i32x16();
391 let zero = _mm512_setzero_si512().as_i32x16();
392 transmute(simd_select_bitmask(k, sub, zero))
393 }
394
395 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
396 ///
397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi64&expand=5703)
398 #[inline]
399 #[target_feature(enable = "avx512f")]
400 #[cfg_attr(test, assert_instr(vpsubq))]
401 pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
402 transmute(simd_sub(a.as_i64x8(), b.as_i64x8()))
403 }
404
405 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
406 ///
407 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi64&expand=5701)
408 #[inline]
409 #[target_feature(enable = "avx512f")]
410 #[cfg_attr(test, assert_instr(vpsubq))]
411 pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
412 let sub = _mm512_sub_epi64(a, b).as_i64x8();
413 transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
414 }
415
416 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
417 ///
418 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi64&expand=5702)
419 #[inline]
420 #[target_feature(enable = "avx512f")]
421 #[cfg_attr(test, assert_instr(vpsubq))]
422 pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
423 let add = _mm512_sub_epi64(a, b).as_i64x8();
424 let zero = _mm512_setzero_si512().as_i64x8();
425 transmute(simd_select_bitmask(k, add, zero))
426 }
427
428 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
429 ///
430 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_ps&expand=5733)
431 #[inline]
432 #[target_feature(enable = "avx512f")]
433 #[cfg_attr(test, assert_instr(vsubps))]
434 pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
435 transmute(simd_sub(a.as_f32x16(), b.as_f32x16()))
436 }
437
438 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
439 ///
440 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_ps&expand=5731)
441 #[inline]
442 #[target_feature(enable = "avx512f")]
443 #[cfg_attr(test, assert_instr(vsubps))]
444 pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
445 let sub = _mm512_sub_ps(a, b).as_f32x16();
446 transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
447 }
448
449 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
450 ///
451 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_ps&expand=5732)
452 #[inline]
453 #[target_feature(enable = "avx512f")]
454 #[cfg_attr(test, assert_instr(vsubps))]
455 pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
456 let sub = _mm512_sub_ps(a, b).as_f32x16();
457 let zero = _mm512_setzero_ps().as_f32x16();
458 transmute(simd_select_bitmask(k, sub, zero))
459 }
460
461 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
462 ///
463 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_pd&expand=5721)
464 #[inline]
465 #[target_feature(enable = "avx512f")]
466 #[cfg_attr(test, assert_instr(vsubpd))]
467 pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
468 transmute(simd_sub(a.as_f64x8(), b.as_f64x8()))
469 }
470
471 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
472 ///
473 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_pd&expand=5719)
474 #[inline]
475 #[target_feature(enable = "avx512f")]
476 #[cfg_attr(test, assert_instr(vsubpd))]
477 pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
478 let sub = _mm512_sub_pd(a, b).as_f64x8();
479 transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
480 }
481
482 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
483 ///
484 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_pd&expand=5720)
485 #[inline]
486 #[target_feature(enable = "avx512f")]
487 #[cfg_attr(test, assert_instr(vsubpd))]
488 pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
489 let sub = _mm512_sub_pd(a, b).as_f64x8();
490 let zero = _mm512_setzero_pd().as_f64x8();
491 transmute(simd_select_bitmask(k, sub, zero))
492 }
493
494 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
495 ///
496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epi32&expand=3907)
497 #[inline]
498 #[target_feature(enable = "avx512f")]
499 #[cfg_attr(test, assert_instr(vpmuldq))]
500 pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
501 transmute(vpmuldq(a.as_i32x16(), b.as_i32x16()))
502 }
503
504 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
505 ///
506 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epi32&expand=3905)
507 #[inline]
508 #[target_feature(enable = "avx512f")]
509 #[cfg_attr(test, assert_instr(vpmuldq))]
510 pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
511 let mul = _mm512_mul_epi32(a, b).as_i64x8();
512 transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
513 }
514
515 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
516 ///
517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epi32&expand=3906)
518 #[inline]
519 #[target_feature(enable = "avx512f")]
520 #[cfg_attr(test, assert_instr(vpmuldq))]
521 pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
522 let mul = _mm512_mul_epi32(a, b).as_i64x8();
523 let zero = _mm512_setzero_si512().as_i64x8();
524 transmute(simd_select_bitmask(k, mul, zero))
525 }
526
527 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
528 ///
529 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullo_epi&expand=4005)
530 #[inline]
531 #[target_feature(enable = "avx512f")]
532 #[cfg_attr(test, assert_instr(vpmulld))]
533 pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
534 transmute(simd_mul(a.as_i32x16(), b.as_i32x16()))
535 }
536
537 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
538 ///
539 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullo_epi32&expand=4003)
540 #[inline]
541 #[target_feature(enable = "avx512f")]
542 #[cfg_attr(test, assert_instr(vpmulld))]
543 pub unsafe fn _mm512_mask_mullo_epi32(
544 src: __m512i,
545 k: __mmask16,
546 a: __m512i,
547 b: __m512i,
548 ) -> __m512i {
549 let mul = _mm512_mullo_epi32(a, b).as_i32x16();
550 transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
551 }
552
553 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
554 ///
555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mullo_epi32&expand=4004)
556 #[inline]
557 #[target_feature(enable = "avx512f")]
558 #[cfg_attr(test, assert_instr(vpmulld))]
559 pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
560 let mul = _mm512_mullo_epi32(a, b).as_i32x16();
561 let zero = _mm512_setzero_si512().as_i32x16();
562 transmute(simd_select_bitmask(k, mul, zero))
563 }
564
565 /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
566 ///
567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullox_epi64&expand=4017)
568 ///
569 /// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
570 #[inline]
571 #[target_feature(enable = "avx512f")]
572 pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
573 transmute(simd_mul(a.as_i64x8(), b.as_i64x8()))
574 }
575
576 /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
577 ///
578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullox&expand=4016)
579 ///
580 /// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
581 #[inline]
582 #[target_feature(enable = "avx512f")]
583 pub unsafe fn _mm512_mask_mullox_epi64(
584 src: __m512i,
585 k: __mmask8,
586 a: __m512i,
587 b: __m512i,
588 ) -> __m512i {
589 let mul = _mm512_mullox_epi64(a, b).as_i64x8();
590 transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
591 }
592
593 /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
594 ///
595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epu32&expand=3916)
596 #[inline]
597 #[target_feature(enable = "avx512f")]
598 #[cfg_attr(test, assert_instr(vpmuludq))]
599 pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
600 transmute(vpmuludq(a.as_u32x16(), b.as_u32x16()))
601 }
602
603 /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
604 ///
605 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epu32&expand=3914)
606 #[inline]
607 #[target_feature(enable = "avx512f")]
608 #[cfg_attr(test, assert_instr(vpmuludq))]
609 pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
610 let mul = _mm512_mul_epu32(a, b).as_u64x8();
611 transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
612 }
613
614 /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
615 ///
616 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epu32&expand=3915)
617 #[inline]
618 #[target_feature(enable = "avx512f")]
619 #[cfg_attr(test, assert_instr(vpmuludq))]
620 pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
621 let mul = _mm512_mul_epu32(a, b).as_u64x8();
622 let zero = _mm512_setzero_si512().as_u64x8();
623 transmute(simd_select_bitmask(k, mul, zero))
624 }
625
626 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
627 ///
628 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_mul_ps&expand=3934)
629 #[inline]
630 #[target_feature(enable = "avx512f")]
631 #[cfg_attr(test, assert_instr(vmulps))]
632 pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
633 transmute(simd_mul(a.as_f32x16(), b.as_f32x16()))
634 }
635
636 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
637 ///
638 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_ps&expand=3932)
639 #[inline]
640 #[target_feature(enable = "avx512f")]
641 #[cfg_attr(test, assert_instr(vmulps))]
642 pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
643 let mul = _mm512_mul_ps(a, b).as_f32x16();
644 transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
645 }
646
647 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
648 ///
649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_ps&expand=3933)
650 #[inline]
651 #[target_feature(enable = "avx512f")]
652 #[cfg_attr(test, assert_instr(vmulps))]
653 pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
654 let mul = _mm512_mul_ps(a, b).as_f32x16();
655 let zero = _mm512_setzero_ps().as_f32x16();
656 transmute(simd_select_bitmask(k, mul, zero))
657 }
658
659 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
660 ///
661 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_pd&expand=3925)
662 #[inline]
663 #[target_feature(enable = "avx512f")]
664 #[cfg_attr(test, assert_instr(vmulpd))]
665 pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
666 transmute(simd_mul(a.as_f64x8(), b.as_f64x8()))
667 }
668
669 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
670 ///
671 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_pd&expand=3923)
672 #[inline]
673 #[target_feature(enable = "avx512f")]
674 #[cfg_attr(test, assert_instr(vmulpd))]
675 pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
676 let mul = _mm512_mul_pd(a, b).as_f64x8();
677 transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
678 }
679
680 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
681 ///
682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_pd&expand=3924)
683 #[inline]
684 #[target_feature(enable = "avx512f")]
685 #[cfg_attr(test, assert_instr(vmulpd))]
686 pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
687 let mul = _mm512_mul_pd(a, b).as_f64x8();
688 let zero = _mm512_setzero_pd().as_f64x8();
689 transmute(simd_select_bitmask(k, mul, zero))
690 }
691
692 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
693 ///
694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_ps&expand=2162)
695 #[inline]
696 #[target_feature(enable = "avx512f")]
697 #[cfg_attr(test, assert_instr(vdivps))]
698 pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
699 transmute(simd_div(a.as_f32x16(), b.as_f32x16()))
700 }
701
702 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
703 ///
704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_ps&expand=2163)
705 #[inline]
706 #[target_feature(enable = "avx512f")]
707 #[cfg_attr(test, assert_instr(vdivps))]
708 pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
709 let div = _mm512_div_ps(a, b).as_f32x16();
710 transmute(simd_select_bitmask(k, div, src.as_f32x16()))
711 }
712
713 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
714 ///
715 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_ps&expand=2164)
716 #[inline]
717 #[target_feature(enable = "avx512f")]
718 #[cfg_attr(test, assert_instr(vdivps))]
719 pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
720 let div = _mm512_div_ps(a, b).as_f32x16();
721 let zero = _mm512_setzero_ps().as_f32x16();
722 transmute(simd_select_bitmask(k, div, zero))
723 }
724
725 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
726 ///
727 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_pd&expand=2153)
728 #[inline]
729 #[target_feature(enable = "avx512f")]
730 #[cfg_attr(test, assert_instr(vdivpd))]
731 pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
732 transmute(simd_div(a.as_f64x8(), b.as_f64x8()))
733 }
734
735 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
736 ///
737 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_pd&expand=2154)
738 #[inline]
739 #[target_feature(enable = "avx512f")]
740 #[cfg_attr(test, assert_instr(vdivpd))]
741 pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
742 let div = _mm512_div_pd(a, b).as_f64x8();
743 transmute(simd_select_bitmask(k, div, src.as_f64x8()))
744 }
745
746 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
747 ///
748 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_pd&expand=2155)
749 #[inline]
750 #[target_feature(enable = "avx512f")]
751 #[cfg_attr(test, assert_instr(vdivpd))]
752 pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
753 let div = _mm512_div_pd(a, b).as_f64x8();
754 let zero = _mm512_setzero_pd().as_f64x8();
755 transmute(simd_select_bitmask(k, div, zero))
756 }
757
758 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
759 ///
760 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi32&expand=3582)
761 #[inline]
762 #[target_feature(enable = "avx512f")]
763 #[cfg_attr(test, assert_instr(vpmaxsd))]
764 pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
765 transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16()))
766 }
767
768 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
769 ///
770 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi32&expand=3580)
771 #[inline]
772 #[target_feature(enable = "avx512f")]
773 #[cfg_attr(test, assert_instr(vpmaxsd))]
774 pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
775 let max = _mm512_max_epi32(a, b).as_i32x16();
776 transmute(simd_select_bitmask(k, max, src.as_i32x16()))
777 }
778
779 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
780 ///
781 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi32&expand=3581)
782 #[inline]
783 #[target_feature(enable = "avx512f")]
784 #[cfg_attr(test, assert_instr(vpmaxsd))]
785 pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
786 let max = _mm512_max_epi32(a, b).as_i32x16();
787 let zero = _mm512_setzero_si512().as_i32x16();
788 transmute(simd_select_bitmask(k, max, zero))
789 }
790
791 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
792 ///
793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi64&expand=3591)
794 #[inline]
795 #[target_feature(enable = "avx512f")]
796 #[cfg_attr(test, assert_instr(vpmaxsq))]
797 pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
798 transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8()))
799 }
800
801 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
802 ///
803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi64&expand=3589)
804 #[inline]
805 #[target_feature(enable = "avx512f")]
806 #[cfg_attr(test, assert_instr(vpmaxsq))]
807 pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
808 let max = _mm512_max_epi64(a, b).as_i64x8();
809 transmute(simd_select_bitmask(k, max, src.as_i64x8()))
810 }
811
812 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
813 ///
814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi64&expand=3590)
815 #[inline]
816 #[target_feature(enable = "avx512f")]
817 #[cfg_attr(test, assert_instr(vpmaxsq))]
818 pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
819 let max = _mm512_max_epi64(a, b).as_i64x8();
820 let zero = _mm512_setzero_si512().as_i64x8();
821 transmute(simd_select_bitmask(k, max, zero))
822 }
823
824 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
825 ///
826 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_ps&expand=3655)
827 #[inline]
828 #[target_feature(enable = "avx512f")]
829 #[cfg_attr(test, assert_instr(vmaxps))]
830 pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
831 transmute(vmaxps(
832 a.as_f32x16(),
833 b.as_f32x16(),
834 _MM_FROUND_CUR_DIRECTION,
835 ))
836 }
837
838 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
839 ///
840 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_ps&expand=3653)
841 #[inline]
842 #[target_feature(enable = "avx512f")]
843 #[cfg_attr(test, assert_instr(vmaxps))]
844 pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
845 let max = _mm512_max_ps(a, b).as_f32x16();
846 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
847 }
848
849 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
850 ///
851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_ps&expand=3654)
852 #[inline]
853 #[target_feature(enable = "avx512f")]
854 #[cfg_attr(test, assert_instr(vmaxps))]
855 pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
856 let max = _mm512_max_ps(a, b).as_f32x16();
857 let zero = _mm512_setzero_ps().as_f32x16();
858 transmute(simd_select_bitmask(k, max, zero))
859 }
860
861 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
862 ///
863 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_pd&expand=3645)
864 #[inline]
865 #[target_feature(enable = "avx512f")]
866 #[cfg_attr(test, assert_instr(vmaxpd))]
867 pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
868 transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
869 }
870
871 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
872 ///
873 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_pd&expand=3643)
874 #[inline]
875 #[target_feature(enable = "avx512f")]
876 #[cfg_attr(test, assert_instr(vmaxpd))]
877 pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
878 let max = _mm512_max_pd(a, b).as_f64x8();
879 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
880 }
881
882 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
883 ///
884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_pd&expand=3644)
885 #[inline]
886 #[target_feature(enable = "avx512f")]
887 #[cfg_attr(test, assert_instr(vmaxpd))]
888 pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
889 let max = _mm512_max_pd(a, b).as_f64x8();
890 let zero = _mm512_setzero_pd().as_f64x8();
891 transmute(simd_select_bitmask(k, max, zero))
892 }
893
894 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
895 ///
896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epu32&expand=3618)
897 #[inline]
898 #[target_feature(enable = "avx512f")]
899 #[cfg_attr(test, assert_instr(vpmaxud))]
900 pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
901 transmute(vpmaxud(a.as_u32x16(), b.as_u32x16()))
902 }
903
904 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
905 ///
906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu32&expand=3616)
907 #[inline]
908 #[target_feature(enable = "avx512f")]
909 #[cfg_attr(test, assert_instr(vpmaxud))]
910 pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
911 let max = _mm512_max_epu32(a, b).as_u32x16();
912 transmute(simd_select_bitmask(k, max, src.as_u32x16()))
913 }
914
915 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
916 ///
917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu32&expand=3617)
918 #[inline]
919 #[target_feature(enable = "avx512f")]
920 #[cfg_attr(test, assert_instr(vpmaxud))]
921 pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
922 let max = _mm512_max_epu32(a, b).as_u32x16();
923 let zero = _mm512_setzero_si512().as_u32x16();
924 transmute(simd_select_bitmask(k, max, zero))
925 }
926
927 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
928 ///
929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_epu64&expand=3627)
930 #[inline]
931 #[target_feature(enable = "avx512f")]
932 #[cfg_attr(test, assert_instr(vpmaxuq))]
933 pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
934 transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8()))
935 }
936
937 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
938 ///
939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu64&expand=3625)
940 #[inline]
941 #[target_feature(enable = "avx512f")]
942 #[cfg_attr(test, assert_instr(vpmaxuq))]
943 pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
944 let max = _mm512_max_epu64(a, b).as_u64x8();
945 transmute(simd_select_bitmask(k, max, src.as_u64x8()))
946 }
947
948 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
949 ///
950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu&expand=3626)
951 #[inline]
952 #[target_feature(enable = "avx512f")]
953 #[cfg_attr(test, assert_instr(vpmaxuq))]
954 pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
955 let max = _mm512_max_epu64(a, b).as_u64x8();
956 let zero = _mm512_setzero_si512().as_u64x8();
957 transmute(simd_select_bitmask(k, max, zero))
958 }
959
960 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
961 ///
962 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi32&expand=3696)
963 #[inline]
964 #[target_feature(enable = "avx512f")]
965 #[cfg_attr(test, assert_instr(vpminsd))]
966 pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
967 transmute(vpminsd(a.as_i32x16(), b.as_i32x16()))
968 }
969
970 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
971 ///
972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi32&expand=3694)
973 #[inline]
974 #[target_feature(enable = "avx512f")]
975 #[cfg_attr(test, assert_instr(vpminsd))]
976 pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
977 let max = _mm512_min_epi32(a, b).as_i32x16();
978 transmute(simd_select_bitmask(k, max, src.as_i32x16()))
979 }
980
981 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
982 ///
983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi32&expand=3695)
984 #[inline]
985 #[target_feature(enable = "avx512f")]
986 #[cfg_attr(test, assert_instr(vpminsd))]
987 pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
988 let max = _mm512_min_epi32(a, b).as_i32x16();
989 let zero = _mm512_setzero_si512().as_i32x16();
990 transmute(simd_select_bitmask(k, max, zero))
991 }
992
993 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
994 ///
995 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi64&expand=3705)
996 #[inline]
997 #[target_feature(enable = "avx512f")]
998 #[cfg_attr(test, assert_instr(vpminsq))]
999 pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
1000 transmute(vpminsq(a.as_i64x8(), b.as_i64x8()))
1001 }
1002
1003 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1004 ///
1005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi64&expand=3703)
1006 #[inline]
1007 #[target_feature(enable = "avx512f")]
1008 #[cfg_attr(test, assert_instr(vpminsq))]
1009 pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1010 let max = _mm512_min_epi64(a, b).as_i64x8();
1011 transmute(simd_select_bitmask(k, max, src.as_i64x8()))
1012 }
1013
1014 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1015 ///
1016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi64&expand=3704)
1017 #[inline]
1018 #[target_feature(enable = "avx512f")]
1019 #[cfg_attr(test, assert_instr(vpminsq))]
1020 pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1021 let max = _mm512_min_epi64(a, b).as_i64x8();
1022 let zero = _mm512_setzero_si512().as_i64x8();
1023 transmute(simd_select_bitmask(k, max, zero))
1024 }
1025
1026 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
1027 ///
1028 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_ps&expand=3769)
1029 #[inline]
1030 #[target_feature(enable = "avx512f")]
1031 #[cfg_attr(test, assert_instr(vminps))]
1032 pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
1033 transmute(vminps(
1034 a.as_f32x16(),
1035 b.as_f32x16(),
1036 _MM_FROUND_CUR_DIRECTION,
1037 ))
1038 }
1039
1040 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1041 ///
1042 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_ps&expand=3767)
1043 #[inline]
1044 #[target_feature(enable = "avx512f")]
1045 #[cfg_attr(test, assert_instr(vminps))]
1046 pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1047 let max = _mm512_min_ps(a, b).as_f32x16();
1048 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
1049 }
1050
1051 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1052 ///
1053 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_ps&expand=3768)
1054 #[inline]
1055 #[target_feature(enable = "avx512f")]
1056 #[cfg_attr(test, assert_instr(vminps))]
1057 pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1058 let max = _mm512_min_ps(a, b).as_f32x16();
1059 let zero = _mm512_setzero_ps().as_f32x16();
1060 transmute(simd_select_bitmask(k, max, zero))
1061 }
1062
1063 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
1064 ///
1065 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_pd&expand=3759)
1066 #[inline]
1067 #[target_feature(enable = "avx512f")]
1068 #[cfg_attr(test, assert_instr(vminpd))]
1069 pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
1070 transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
1071 }
1072
1073 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1074 ///
1075 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_pd&expand=3757)
1076 #[inline]
1077 #[target_feature(enable = "avx512f")]
1078 #[cfg_attr(test, assert_instr(vminpd))]
1079 pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1080 let max = _mm512_min_pd(a, b).as_f64x8();
1081 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
1082 }
1083
1084 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1085 ///
1086 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_pd&expand=3758)
1087 #[inline]
1088 #[target_feature(enable = "avx512f")]
1089 #[cfg_attr(test, assert_instr(vminpd))]
1090 pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1091 let max = _mm512_min_pd(a, b).as_f64x8();
1092 let zero = _mm512_setzero_pd().as_f64x8();
1093 transmute(simd_select_bitmask(k, max, zero))
1094 }
1095
1096 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
1097 ///
1098 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu32&expand=3732)
1099 #[inline]
1100 #[target_feature(enable = "avx512f")]
1101 #[cfg_attr(test, assert_instr(vpminud))]
1102 pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
1103 transmute(vpminud(a.as_u32x16(), b.as_u32x16()))
1104 }
1105
1106 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1107 ///
1108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu32&expand=3730)
1109 #[inline]
1110 #[target_feature(enable = "avx512f")]
1111 #[cfg_attr(test, assert_instr(vpminud))]
1112 pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1113 let max = _mm512_min_epu32(a, b).as_u32x16();
1114 transmute(simd_select_bitmask(k, max, src.as_u32x16()))
1115 }
1116
1117 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1118 ///
1119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu32&expand=3731)
1120 #[inline]
1121 #[target_feature(enable = "avx512f")]
1122 #[cfg_attr(test, assert_instr(vpminud))]
1123 pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1124 let max = _mm512_min_epu32(a, b).as_u32x16();
1125 let zero = _mm512_setzero_si512().as_u32x16();
1126 transmute(simd_select_bitmask(k, max, zero))
1127 }
1128
1129 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
1130 ///
1131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu64&expand=3741)
1132 #[inline]
1133 #[target_feature(enable = "avx512f")]
1134 #[cfg_attr(test, assert_instr(vpminuq))]
1135 pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
1136 transmute(vpminuq(a.as_u64x8(), b.as_u64x8()))
1137 }
1138
1139 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140 ///
1141 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu64&expand=3739)
1142 #[inline]
1143 #[target_feature(enable = "avx512f")]
1144 #[cfg_attr(test, assert_instr(vpminuq))]
1145 pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1146 let max = _mm512_min_epu64(a, b).as_u64x8();
1147 transmute(simd_select_bitmask(k, max, src.as_u64x8()))
1148 }
1149
1150 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1151 ///
1152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu64&expand=3740)
1153 #[inline]
1154 #[target_feature(enable = "avx512f")]
1155 #[cfg_attr(test, assert_instr(vpminuq))]
1156 pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1157 let max = _mm512_min_epu64(a, b).as_u64x8();
1158 let zero = _mm512_setzero_si512().as_u64x8();
1159 transmute(simd_select_bitmask(k, max, zero))
1160 }
1161
1162 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1163 ///
1164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_ps&expand=5371)
1165 #[inline]
1166 #[target_feature(enable = "avx512f")]
1167 #[cfg_attr(test, assert_instr(vsqrtps))]
1168 pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
1169 transmute(vsqrtps(a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
1170 }
1171
1172 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1173 ///
1174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_ps&expand=5369)
1175 #[inline]
1176 #[target_feature(enable = "avx512f")]
1177 #[cfg_attr(test, assert_instr(vsqrtps))]
1178 pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1179 let sqrt = _mm512_sqrt_ps(a).as_f32x16();
1180 transmute(simd_select_bitmask(k, sqrt, src.as_f32x16()))
1181 }
1182
1183 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1184 ///
1185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_ps&expand=5370)
1186 #[inline]
1187 #[target_feature(enable = "avx512f")]
1188 #[cfg_attr(test, assert_instr(vsqrtps))]
1189 pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
1190 let sqrt = _mm512_sqrt_ps(a).as_f32x16();
1191 let zero = _mm512_setzero_ps().as_f32x16();
1192 transmute(simd_select_bitmask(k, sqrt, zero))
1193 }
1194
1195 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1196 ///
1197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_pd&expand=5362)
1198 #[inline]
1199 #[target_feature(enable = "avx512f")]
1200 #[cfg_attr(test, assert_instr(vsqrtpd))]
1201 pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
1202 transmute(vsqrtpd(a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
1203 }
1204
1205 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1206 ///
1207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_pd&expand=5360)
1208 #[inline]
1209 #[target_feature(enable = "avx512f")]
1210 #[cfg_attr(test, assert_instr(vsqrtpd))]
1211 pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1212 let sqrt = _mm512_sqrt_pd(a).as_f64x8();
1213 transmute(simd_select_bitmask(k, sqrt, src.as_f64x8()))
1214 }
1215
1216 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1217 ///
1218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_pd&expand=5361)
1219 #[inline]
1220 #[target_feature(enable = "avx512f")]
1221 #[cfg_attr(test, assert_instr(vsqrtpd))]
1222 pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
1223 let sqrt = _mm512_sqrt_pd(a).as_f64x8();
1224 let zero = _mm512_setzero_pd().as_f64x8();
1225 transmute(simd_select_bitmask(k, sqrt, zero))
1226 }
1227
1228 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
1229 ///
1230 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fmadd_ps&expand=2557)
1231 #[inline]
1232 #[target_feature(enable = "avx512f")]
1233 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1234 pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1235 transmute(vfmadd132ps(
1236 a.as_f32x16(),
1237 b.as_f32x16(),
1238 c.as_f32x16(),
1239 _MM_FROUND_CUR_DIRECTION,
1240 ))
1241 }
1242
1243 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1244 ///
1245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_ps&expand=2558)
1246 #[inline]
1247 #[target_feature(enable = "avx512f")]
1248 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1249 pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1250 let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
1251 transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
1252 }
1253
1254 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1255 ///
1256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_ps&expand=2560)
1257 #[inline]
1258 #[target_feature(enable = "avx512f")]
1259 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1260 pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1261 let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
1262 let zero = _mm512_setzero_ps().as_f32x16();
1263 transmute(simd_select_bitmask(k, fmadd, zero))
1264 }
1265
1266 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1267 ///
1268 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_ps&expand=2559)
1269 #[inline]
1270 #[target_feature(enable = "avx512f")]
1271 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1272 pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1273 let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
1274 transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
1275 }
1276
1277 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
1278 ///
1279 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_pd&expand=2545)
1280 #[inline]
1281 #[target_feature(enable = "avx512f")]
1282 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1283 pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1284 transmute(vfmadd132pd(
1285 a.as_f64x8(),
1286 b.as_f64x8(),
1287 c.as_f64x8(),
1288 _MM_FROUND_CUR_DIRECTION,
1289 ))
1290 }
1291
1292 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1293 ///
1294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_pd&expand=2546)
1295 #[inline]
1296 #[target_feature(enable = "avx512f")]
1297 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1298 pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1299 let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
1300 transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
1301 }
1302
1303 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1304 ///
1305 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_pd&expand=2548)
1306 #[inline]
1307 #[target_feature(enable = "avx512f")]
1308 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1309 pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1310 let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
1311 let zero = _mm512_setzero_pd().as_f64x8();
1312 transmute(simd_select_bitmask(k, fmadd, zero))
1313 }
1314
1315 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1316 ///
1317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_pd&expand=2547)
1318 #[inline]
1319 #[target_feature(enable = "avx512f")]
1320 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1321 pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1322 let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
1323 transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
1324 }
1325
1326 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
1327 ///
1328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_ps&expand=2643)
1329 #[inline]
1330 #[target_feature(enable = "avx512f")]
1331 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1332 pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1333 let zero: f32x16 = mem::zeroed();
1334 let sub = simd_sub(zero, c.as_f32x16());
1335 transmute(vfmadd132ps(
1336 a.as_f32x16(),
1337 b.as_f32x16(),
1338 sub,
1339 _MM_FROUND_CUR_DIRECTION,
1340 ))
1341 }
1342
1343 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1344 ///
1345 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_ps&expand=2644)
1346 #[inline]
1347 #[target_feature(enable = "avx512f")]
1348 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1349 pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1350 let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
1351 transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
1352 }
1353
1354 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1355 ///
1356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_ps&expand=2646)
1357 #[inline]
1358 #[target_feature(enable = "avx512f")]
1359 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1360 pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1361 let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
1362 let zero = _mm512_setzero_ps().as_f32x16();
1363 transmute(simd_select_bitmask(k, fmsub, zero))
1364 }
1365
1366 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1367 ///
1368 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_ps&expand=2645)
1369 #[inline]
1370 #[target_feature(enable = "avx512f")]
1371 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1372 pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1373 let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
1374 transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
1375 }
1376
1377 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
1378 ///
1379 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_pd&expand=2631)
1380 #[inline]
1381 #[target_feature(enable = "avx512f")]
1382 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1383 pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1384 let zero: f64x8 = mem::zeroed();
1385 let sub = simd_sub(zero, c.as_f64x8());
1386 transmute(vfmadd132pd(
1387 a.as_f64x8(),
1388 b.as_f64x8(),
1389 sub,
1390 _MM_FROUND_CUR_DIRECTION,
1391 ))
1392 }
1393
1394 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1395 ///
1396 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_pd&expand=2632)
1397 #[inline]
1398 #[target_feature(enable = "avx512f")]
1399 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1400 pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1401 let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
1402 transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
1403 }
1404
1405 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1406 ///
1407 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_pd&expand=2634)
1408 #[inline]
1409 #[target_feature(enable = "avx512f")]
1410 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1411 pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1412 let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
1413 let zero = _mm512_setzero_pd().as_f64x8();
1414 transmute(simd_select_bitmask(k, fmsub, zero))
1415 }
1416
1417 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1418 ///
1419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_pd&expand=2633)
1420 #[inline]
1421 #[target_feature(enable = "avx512f")]
1422 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1423 pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1424 let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
1425 transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
1426 }
1427
1428 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
1429 ///
1430 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_ps&expand=2611)
1431 #[inline]
1432 #[target_feature(enable = "avx512f")]
1433 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1434 pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1435 transmute(vfmaddsub213ps(
1436 a.as_f32x16(),
1437 b.as_f32x16(),
1438 c.as_f32x16(),
1439 _MM_FROUND_CUR_DIRECTION,
1440 ))
1441 }
1442
1443 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1444 ///
1445 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_ps&expand=2612)
1446 #[inline]
1447 #[target_feature(enable = "avx512f")]
1448 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1449 pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1450 let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
1451 transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
1452 }
1453
1454 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1455 ///
1456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_ps&expand=2614)
1457 #[inline]
1458 #[target_feature(enable = "avx512f")]
1459 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1460 pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1461 let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
1462 let zero = _mm512_setzero_ps().as_f32x16();
1463 transmute(simd_select_bitmask(k, fmaddsub, zero))
1464 }
1465
1466 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1467 ///
1468 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_ps&expand=2613)
1469 #[inline]
1470 #[target_feature(enable = "avx512f")]
1471 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1472 pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1473 let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
1474 transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
1475 }
1476
1477 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
1478 ///
1479 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_pd&expand=2599)
1480 #[inline]
1481 #[target_feature(enable = "avx512f")]
1482 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1483 pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1484 transmute(vfmaddsub213pd(
1485 a.as_f64x8(),
1486 b.as_f64x8(),
1487 c.as_f64x8(),
1488 _MM_FROUND_CUR_DIRECTION,
1489 ))
1490 }
1491
1492 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1493 ///
1494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_pd&expand=2600)
1495 #[inline]
1496 #[target_feature(enable = "avx512f")]
1497 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1498 pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1499 let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
1500 transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
1501 }
1502
1503 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1504 ///
1505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_pd&expand=2602)
1506 #[inline]
1507 #[target_feature(enable = "avx512f")]
1508 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1509 pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1510 let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
1511 let zero = _mm512_setzero_pd().as_f64x8();
1512 transmute(simd_select_bitmask(k, fmaddsub, zero))
1513 }
1514
1515 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1516 ///
1517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_ps&expand=2613)
1518 #[inline]
1519 #[target_feature(enable = "avx512f")]
1520 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1521 pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1522 let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
1523 transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
1524 }
1525
1526 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
1527 ///
1528 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_ps&expand=2691)
1529 #[inline]
1530 #[target_feature(enable = "avx512f")]
1531 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1532 pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1533 let zero: f32x16 = mem::zeroed();
1534 let sub = simd_sub(zero, c.as_f32x16());
1535 transmute(vfmaddsub213ps(
1536 a.as_f32x16(),
1537 b.as_f32x16(),
1538 sub,
1539 _MM_FROUND_CUR_DIRECTION,
1540 ))
1541 }
1542
1543 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1544 ///
1545 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_ps&expand=2692)
1546 #[inline]
1547 #[target_feature(enable = "avx512f")]
1548 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1549 pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1550 let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
1551 transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
1552 }
1553
1554 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1555 ///
1556 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_ps&expand=2694)
1557 #[inline]
1558 #[target_feature(enable = "avx512f")]
1559 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1560 pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1561 let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
1562 let zero = _mm512_setzero_ps().as_f32x16();
1563 transmute(simd_select_bitmask(k, fmsubadd, zero))
1564 }
1565
1566 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1567 ///
1568 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_ps&expand=2693)
1569 #[inline]
1570 #[target_feature(enable = "avx512f")]
1571 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1572 pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1573 let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
1574 transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
1575 }
1576
1577 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
1578 ///
1579 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_pd&expand=2679)
1580 #[inline]
1581 #[target_feature(enable = "avx512f")]
1582 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1583 pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1584 let zero: f64x8 = mem::zeroed();
1585 let sub = simd_sub(zero, c.as_f64x8());
1586 transmute(vfmaddsub213pd(
1587 a.as_f64x8(),
1588 b.as_f64x8(),
1589 sub,
1590 _MM_FROUND_CUR_DIRECTION,
1591 ))
1592 }
1593
1594 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1595 ///
1596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_pd&expand=2680)
1597 #[inline]
1598 #[target_feature(enable = "avx512f")]
1599 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1600 pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1601 let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
1602 transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
1603 }
1604
1605 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1606 ///
1607 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_pd&expand=2682)
1608 #[inline]
1609 #[target_feature(enable = "avx512f")]
1610 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1611 pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1612 let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
1613 let zero = _mm512_setzero_pd().as_f64x8();
1614 transmute(simd_select_bitmask(k, fmsubadd, zero))
1615 }
1616
1617 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1618 ///
1619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_pd&expand=2681)
1620 #[inline]
1621 #[target_feature(enable = "avx512f")]
1622 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1623 pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1624 let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
1625 transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
1626 }
1627
1628 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
1629 ///
1630 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_ps&expand=2723)
1631 #[inline]
1632 #[target_feature(enable = "avx512f")]
1633 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1634 pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1635 let zero: f32x16 = mem::zeroed();
1636 let sub = simd_sub(zero, a.as_f32x16());
1637 transmute(vfmadd132ps(
1638 sub,
1639 b.as_f32x16(),
1640 c.as_f32x16(),
1641 _MM_FROUND_CUR_DIRECTION,
1642 ))
1643 }
1644
1645 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1646 ///
1647 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_ps&expand=2724)
1648 #[inline]
1649 #[target_feature(enable = "avx512f")]
1650 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1651 pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1652 let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
1653 transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
1654 }
1655
1656 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1657 ///
1658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_ps&expand=2726)
1659 #[inline]
1660 #[target_feature(enable = "avx512f")]
1661 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1662 pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1663 let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
1664 let zero = _mm512_setzero_ps().as_f32x16();
1665 transmute(simd_select_bitmask(k, fnmadd, zero))
1666 }
1667
1668 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1669 ///
1670 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_ps&expand=2725)
1671 #[inline]
1672 #[target_feature(enable = "avx512f")]
1673 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1674 pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1675 let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
1676 transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
1677 }
1678
1679 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
1680 ///
1681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_pd&expand=2711)
1682 #[inline]
1683 #[target_feature(enable = "avx512f")]
1684 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1685 pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1686 let zero: f64x8 = mem::zeroed();
1687 let sub = simd_sub(zero, a.as_f64x8());
1688 transmute(vfmadd132pd(
1689 sub,
1690 b.as_f64x8(),
1691 c.as_f64x8(),
1692 _MM_FROUND_CUR_DIRECTION,
1693 ))
1694 }
1695
1696 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1697 ///
1698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_pd&expand=2712)
1699 #[inline]
1700 #[target_feature(enable = "avx512f")]
1701 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1702 pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1703 let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
1704 transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
1705 }
1706
1707 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1708 ///
1709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_pd&expand=2714)
1710 #[inline]
1711 #[target_feature(enable = "avx512f")]
1712 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1713 pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1714 let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
1715 let zero = _mm512_setzero_pd().as_f64x8();
1716 transmute(simd_select_bitmask(k, fnmadd, zero))
1717 }
1718
1719 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1720 ///
1721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_pd&expand=2713)
1722 #[inline]
1723 #[target_feature(enable = "avx512f")]
1724 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1725 pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1726 let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
1727 transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
1728 }
1729
1730 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
1731 ///
1732 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_ps&expand=2771)
1733 #[inline]
1734 #[target_feature(enable = "avx512f")]
1735 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1736 pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1737 let zero: f32x16 = mem::zeroed();
1738 let suba = simd_sub(zero, a.as_f32x16());
1739 let subc = simd_sub(zero, c.as_f32x16());
1740 transmute(vfmadd132ps(
1741 suba,
1742 b.as_f32x16(),
1743 subc,
1744 _MM_FROUND_CUR_DIRECTION,
1745 ))
1746 }
1747
1748 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1749 ///
1750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_ps&expand=2772)
1751 #[inline]
1752 #[target_feature(enable = "avx512f")]
1753 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1754 pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1755 let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
1756 transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
1757 }
1758
1759 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760 ///
1761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_ps&expand=2774)
1762 #[inline]
1763 #[target_feature(enable = "avx512f")]
1764 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1765 pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1766 let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
1767 let zero = _mm512_setzero_ps().as_f32x16();
1768 transmute(simd_select_bitmask(k, fnmsub, zero))
1769 }
1770
1771 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1772 ///
1773 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_ps&expand=2773)
1774 #[inline]
1775 #[target_feature(enable = "avx512f")]
1776 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1777 pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1778 let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
1779 transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
1780 }
1781
1782 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
1783 ///
1784 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_pd&expand=2759)
1785 #[inline]
1786 #[target_feature(enable = "avx512f")]
1787 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1788 pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1789 let zero: f64x8 = mem::zeroed();
1790 let suba = simd_sub(zero, a.as_f64x8());
1791 let subc = simd_sub(zero, c.as_f64x8());
1792 transmute(vfmadd132pd(
1793 suba,
1794 b.as_f64x8(),
1795 subc,
1796 _MM_FROUND_CUR_DIRECTION,
1797 ))
1798 }
1799
1800 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1801 ///
1802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_pd&expand=2760)
1803 #[inline]
1804 #[target_feature(enable = "avx512f")]
1805 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1806 pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1807 let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
1808 transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
1809 }
1810
1811 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1812 ///
1813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_pd&expand=2762)
1814 #[inline]
1815 #[target_feature(enable = "avx512f")]
1816 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1817 pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1818 let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
1819 let zero = _mm512_setzero_pd().as_f64x8();
1820 transmute(simd_select_bitmask(k, fnmsub, zero))
1821 }
1822
1823 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1824 ///
1825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_pd&expand=2761)
1826 #[inline]
1827 #[target_feature(enable = "avx512f")]
1828 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1829 pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1830 let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
1831 transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
1832 }
1833
1834 /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1835 ///
1836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rcp14_ps&expand=4502)
1837 #[inline]
1838 #[target_feature(enable = "avx512f")]
1839 #[cfg_attr(test, assert_instr(vrcp14ps))]
1840 pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
1841 transmute(vrcp14ps(
1842 a.as_f32x16(),
1843 _mm512_setzero_ps().as_f32x16(),
1844 0b11111111_11111111,
1845 ))
1846 }
1847
1848 /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1849 ///
1850 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rcp14_ps&expand=4500)
1851 #[inline]
1852 #[target_feature(enable = "avx512f")]
1853 #[cfg_attr(test, assert_instr(vrcp14ps))]
1854 pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1855 transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k))
1856 }
1857
1858 /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1859 ///
1860 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rcp14_ps&expand=4501)
1861 #[inline]
1862 #[target_feature(enable = "avx512f")]
1863 #[cfg_attr(test, assert_instr(vrcp14ps))]
1864 pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
1865 transmute(vrcp14ps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
1866 }
1867
1868 /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1869 ///
1870 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rcp14_pd&expand=4493)
1871 #[inline]
1872 #[target_feature(enable = "avx512f")]
1873 #[cfg_attr(test, assert_instr(vrcp14pd))]
1874 pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
1875 transmute(vrcp14pd(
1876 a.as_f64x8(),
1877 _mm512_setzero_pd().as_f64x8(),
1878 0b11111111,
1879 ))
1880 }
1881
1882 /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1883 ///
1884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rcp14_pd&expand=4491)
1885 #[inline]
1886 #[target_feature(enable = "avx512f")]
1887 #[cfg_attr(test, assert_instr(vrcp14pd))]
1888 pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1889 transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k))
1890 }
1891
1892 /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1893 ///
1894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rcp14_pd&expand=4492)
1895 #[inline]
1896 #[target_feature(enable = "avx512f")]
1897 #[cfg_attr(test, assert_instr(vrcp14pd))]
1898 pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
1899 transmute(vrcp14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
1900 }
1901
1902 /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1903 ///
1904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rsqrt14_ps&expand=4819)
1905 #[inline]
1906 #[target_feature(enable = "avx512f")]
1907 #[cfg_attr(test, assert_instr(vrsqrt14ps))]
1908 pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
1909 transmute(vrsqrt14ps(
1910 a.as_f32x16(),
1911 _mm512_setzero_ps().as_f32x16(),
1912 0b11111111_11111111,
1913 ))
1914 }
1915
1916 /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1917 ///
1918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rsqrt14_ps&expand=4817)
1919 #[inline]
1920 #[target_feature(enable = "avx512f")]
1921 #[cfg_attr(test, assert_instr(vrsqrt14ps))]
1922 pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1923 transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k))
1924 }
1925
1926 /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1927 ///
1928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rsqrt14_ps&expand=4818)
1929 #[inline]
1930 #[target_feature(enable = "avx512f")]
1931 #[cfg_attr(test, assert_instr(vrsqrt14ps))]
1932 pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
1933 transmute(vrsqrt14ps(
1934 a.as_f32x16(),
1935 _mm512_setzero_ps().as_f32x16(),
1936 k,
1937 ))
1938 }
1939
1940 /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1941 ///
1942 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rsqrt14_pd&expand=4812)
1943 #[inline]
1944 #[target_feature(enable = "avx512f")]
1945 #[cfg_attr(test, assert_instr(vrsqrt14pd))]
1946 pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
1947 transmute(vrsqrt14pd(
1948 a.as_f64x8(),
1949 _mm512_setzero_pd().as_f64x8(),
1950 0b11111111,
1951 ))
1952 }
1953
1954 /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1955 ///
1956 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rsqrt14_pd&expand=4810)
1957 #[inline]
1958 #[target_feature(enable = "avx512f")]
1959 #[cfg_attr(test, assert_instr(vrsqrt14pd))]
1960 pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1961 transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k))
1962 }
1963
1964 /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1965 ///
1966 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rsqrt14_pd&expand=4811)
1967 #[inline]
1968 #[target_feature(enable = "avx512f")]
1969 #[cfg_attr(test, assert_instr(vrsqrt14pd))]
1970 pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
1971 transmute(vrsqrt14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
1972 }
1973
1974 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
1975 ///
1976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_ps&expand=2844)
1977 #[inline]
1978 #[target_feature(enable = "avx512f")]
1979 #[cfg_attr(test, assert_instr(vgetexpps))]
1980 pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
1981 transmute(vgetexpps(
1982 a.as_f32x16(),
1983 _mm512_setzero_ps().as_f32x16(),
1984 0b11111111_11111111,
1985 _MM_FROUND_CUR_DIRECTION,
1986 ))
1987 }
1988
1989 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
1990 ///
1991 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_ps&expand=2845)
1992 #[inline]
1993 #[target_feature(enable = "avx512f")]
1994 #[cfg_attr(test, assert_instr(vgetexpps))]
1995 pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1996 transmute(vgetexpps(
1997 a.as_f32x16(),
1998 src.as_f32x16(),
1999 k,
2000 _MM_FROUND_CUR_DIRECTION,
2001 ))
2002 }
2003
2004 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
2005 ///
2006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_ps&expand=2846)
2007 #[inline]
2008 #[target_feature(enable = "avx512f")]
2009 #[cfg_attr(test, assert_instr(vgetexpps))]
2010 pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
2011 transmute(vgetexpps(
2012 a.as_f32x16(),
2013 _mm512_setzero_ps().as_f32x16(),
2014 k,
2015 _MM_FROUND_CUR_DIRECTION,
2016 ))
2017 }
2018
2019 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
2020 ///
2021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_pd&expand=2835)
2022 #[inline]
2023 #[target_feature(enable = "avx512f")]
2024 #[cfg_attr(test, assert_instr(vgetexppd))]
2025 pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
2026 transmute(vgetexppd(
2027 a.as_f64x8(),
2028 _mm512_setzero_pd().as_f64x8(),
2029 0b11111111,
2030 _MM_FROUND_CUR_DIRECTION,
2031 ))
2032 }
2033
2034 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
2035 ///
2036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_pd&expand=2836)
2037 #[inline]
2038 #[target_feature(enable = "avx512f")]
2039 #[cfg_attr(test, assert_instr(vgetexppd))]
2040 pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
2041 transmute(vgetexppd(
2042 a.as_f64x8(),
2043 src.as_f64x8(),
2044 k,
2045 _MM_FROUND_CUR_DIRECTION,
2046 ))
2047 }
2048
2049 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
2050 ///
2051 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_pd&expand=2837)
2052 #[inline]
2053 #[target_feature(enable = "avx512f")]
2054 #[cfg_attr(test, assert_instr(vgetexppd))]
2055 pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
2056 transmute(vgetexppd(
2057 a.as_f64x8(),
2058 _mm512_setzero_pd().as_f64x8(),
2059 k,
2060 _MM_FROUND_CUR_DIRECTION,
2061 ))
2062 }
2063
2064 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
2065 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2066 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2067 /// _MM_FROUND_TO_NEG_INF // round down\
2068 /// _MM_FROUND_TO_POS_INF // round up\
2069 /// _MM_FROUND_TO_ZERO // truncate\
2070 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2071 ///
2072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_ps&expand=4784)
2073 #[inline]
2074 #[target_feature(enable = "avx512f")]
2075 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
2076 #[rustc_args_required_const(1)]
2077 pub unsafe fn _mm512_roundscale_ps(a: __m512, imm8: i32) -> __m512 {
2078 let a = a.as_f32x16();
2079 let zero = _mm512_setzero_ps().as_f32x16();
2080 macro_rules! call {
2081 ($imm8:expr) => {
2082 vrndscaleps(
2083 a,
2084 $imm8,
2085 zero,
2086 0b11111111_11111111,
2087 _MM_FROUND_CUR_DIRECTION,
2088 )
2089 };
2090 }
2091 let r = constify_imm8_sae!(imm8, call);
2092 transmute(r)
2093 }
2094
2095 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2096 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2097 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2098 /// _MM_FROUND_TO_NEG_INF // round down\
2099 /// _MM_FROUND_TO_POS_INF // round up\
2100 /// _MM_FROUND_TO_ZERO // truncate\
2101 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2102 ///
2103 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_ps&expand=4782)
2104 #[inline]
2105 #[target_feature(enable = "avx512f")]
2106 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
2107 #[rustc_args_required_const(3)]
2108 pub unsafe fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
2109 let a = a.as_f32x16();
2110 let src = src.as_f32x16();
2111 macro_rules! call {
2112 ($imm8:expr) => {
2113 vrndscaleps(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION)
2114 };
2115 }
2116 let r = constify_imm8_sae!(imm8, call);
2117 transmute(r)
2118 }
2119
2120 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2121 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2122 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2123 /// _MM_FROUND_TO_NEG_INF // round down\
2124 /// _MM_FROUND_TO_POS_INF // round up\
2125 /// _MM_FROUND_TO_ZERO // truncate\
2126 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2127 ///
2128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_ps&expand=4783)
2129 #[inline]
2130 #[target_feature(enable = "avx512f")]
2131 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
2132 #[rustc_args_required_const(2)]
2133 pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
2134 let a = a.as_f32x16();
2135 let zero = _mm512_setzero_ps().as_f32x16();
2136 macro_rules! call {
2137 ($imm8:expr) => {
2138 vrndscaleps(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION)
2139 };
2140 }
2141 let r = constify_imm8_sae!(imm8, call);
2142 transmute(r)
2143 }
2144
2145 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
2146 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2147 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2148 /// _MM_FROUND_TO_NEG_INF // round down\
2149 /// _MM_FROUND_TO_POS_INF // round up\
2150 /// _MM_FROUND_TO_ZERO // truncate\
2151 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2152 ///
2153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_pd&expand=4775)
2154 #[inline]
2155 #[target_feature(enable = "avx512f")]
2156 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
2157 #[rustc_args_required_const(1)]
2158 pub unsafe fn _mm512_roundscale_pd(a: __m512d, imm8: i32) -> __m512d {
2159 let a = a.as_f64x8();
2160 let zero = _mm512_setzero_pd().as_f64x8();
2161 macro_rules! call {
2162 ($imm8:expr) => {
2163 vrndscalepd(a, $imm8, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION)
2164 };
2165 }
2166 let r = constify_imm8_sae!(imm8, call);
2167 transmute(r)
2168 }
2169
2170 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2171 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2172 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2173 /// _MM_FROUND_TO_NEG_INF // round down\
2174 /// _MM_FROUND_TO_POS_INF // round up\
2175 /// _MM_FROUND_TO_ZERO // truncate\
2176 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2177 ///
2178 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_pd&expand=4773)
2179 #[inline]
2180 #[target_feature(enable = "avx512f")]
2181 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
2182 #[rustc_args_required_const(3)]
2183 pub unsafe fn _mm512_mask_roundscale_pd(
2184 src: __m512d,
2185 k: __mmask8,
2186 a: __m512d,
2187 imm8: i32,
2188 ) -> __m512d {
2189 let a = a.as_f64x8();
2190 let src = src.as_f64x8();
2191 macro_rules! call {
2192 ($imm8:expr) => {
2193 vrndscalepd(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION)
2194 };
2195 }
2196 let r = constify_imm8_sae!(imm8, call);
2197 transmute(r)
2198 }
2199
2200 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2201 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2202 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2203 /// _MM_FROUND_TO_NEG_INF // round down\
2204 /// _MM_FROUND_TO_POS_INF // round up\
2205 /// _MM_FROUND_TO_ZERO // truncate\
2206 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2207 ///
2208 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_pd&expand=4774)
2209 #[inline]
2210 #[target_feature(enable = "avx512f")]
2211 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
2212 #[rustc_args_required_const(2)]
2213 pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
2214 let a = a.as_f64x8();
2215 let zero = _mm512_setzero_pd().as_f64x8();
2216 macro_rules! call {
2217 ($imm8:expr) => {
2218 vrndscalepd(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION)
2219 };
2220 }
2221 let r = constify_imm8_sae!(imm8, call);
2222 transmute(r)
2223 }
2224
2225 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
2226 ///
2227 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_ps&expand=4883)
2228 #[inline]
2229 #[target_feature(enable = "avx512f")]
2230 #[cfg_attr(test, assert_instr(vscalefps))]
2231 pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
2232 transmute(vscalefps(
2233 a.as_f32x16(),
2234 b.as_f32x16(),
2235 _mm512_setzero_ps().as_f32x16(),
2236 0b11111111_11111111,
2237 _MM_FROUND_CUR_DIRECTION,
2238 ))
2239 }
2240
2241 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2242 ///
2243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_ps&expand=4881)
2244 #[inline]
2245 #[target_feature(enable = "avx512f")]
2246 #[cfg_attr(test, assert_instr(vscalefps))]
2247 pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2248 transmute(vscalefps(
2249 a.as_f32x16(),
2250 b.as_f32x16(),
2251 src.as_f32x16(),
2252 k,
2253 _MM_FROUND_CUR_DIRECTION,
2254 ))
2255 }
2256
2257 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258 ///
2259 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_ps&expand=4882)
2260 #[inline]
2261 #[target_feature(enable = "avx512f")]
2262 #[cfg_attr(test, assert_instr(vscalefps))]
2263 pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2264 transmute(vscalefps(
2265 a.as_f32x16(),
2266 b.as_f32x16(),
2267 _mm512_setzero_ps().as_f32x16(),
2268 k,
2269 _MM_FROUND_CUR_DIRECTION,
2270 ))
2271 }
2272
2273 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
2274 ///
2275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_pd&expand=4874)
2276 #[inline]
2277 #[target_feature(enable = "avx512f")]
2278 #[cfg_attr(test, assert_instr(vscalefpd))]
2279 pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
2280 transmute(vscalefpd(
2281 a.as_f64x8(),
2282 b.as_f64x8(),
2283 _mm512_setzero_pd().as_f64x8(),
2284 0b11111111,
2285 _MM_FROUND_CUR_DIRECTION,
2286 ))
2287 }
2288
2289 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2290 ///
2291 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_pd&expand=4872)
2292 #[inline]
2293 #[target_feature(enable = "avx512f")]
2294 #[cfg_attr(test, assert_instr(vscalefpd))]
2295 pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2296 transmute(vscalefpd(
2297 a.as_f64x8(),
2298 b.as_f64x8(),
2299 src.as_f64x8(),
2300 k,
2301 _MM_FROUND_CUR_DIRECTION,
2302 ))
2303 }
2304
2305 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2306 ///
2307 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_pd&expand=4873)
2308 #[inline]
2309 #[target_feature(enable = "avx512f")]
2310 #[cfg_attr(test, assert_instr(vscalefpd))]
2311 pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2312 transmute(vscalefpd(
2313 a.as_f64x8(),
2314 b.as_f64x8(),
2315 _mm512_setzero_pd().as_f64x8(),
2316 k,
2317 _MM_FROUND_CUR_DIRECTION,
2318 ))
2319 }
2320
2321 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
2322 ///
2323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_ps&expand=2499)
2324 #[inline]
2325 #[target_feature(enable = "avx512f")]
2326 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
2327 #[rustc_args_required_const(3)]
2328 pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i, imm8: i32) -> __m512 {
2329 let a = a.as_f32x16();
2330 let b = b.as_f32x16();
2331 let c = c.as_i32x16();
2332 macro_rules! call {
2333 ($imm8:expr) => {
2334 vfixupimmps(
2335 a,
2336 b,
2337 c,
2338 $imm8,
2339 0b11111111_11111111,
2340 _MM_FROUND_CUR_DIRECTION,
2341 )
2342 };
2343 }
2344 let r = constify_imm8_sae!(imm8, call);
2345 transmute(r)
2346 }
2347
2348 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2349 ///
2350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_ps&expand=2500)
2351 #[inline]
2352 #[target_feature(enable = "avx512f")]
2353 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
2354 #[rustc_args_required_const(4)]
2355 pub unsafe fn _mm512_mask_fixupimm_ps(
2356 a: __m512,
2357 k: __mmask16,
2358 b: __m512,
2359 c: __m512i,
2360 imm8: i32,
2361 ) -> __m512 {
2362 let a = a.as_f32x16();
2363 let b = b.as_f32x16();
2364 let c = c.as_i32x16();
2365 macro_rules! call {
2366 ($imm8:expr) => {
2367 vfixupimmps(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
2368 };
2369 }
2370 let r = constify_imm8_sae!(imm8, call);
2371 transmute(r)
2372 }
2373
2374 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2375 ///
2376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_ps&expand=2501)
2377 #[inline]
2378 #[target_feature(enable = "avx512f")]
2379 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
2380 #[rustc_args_required_const(4)]
2381 pub unsafe fn _mm512_maskz_fixupimm_ps(
2382 k: __mmask16,
2383 a: __m512,
2384 b: __m512,
2385 c: __m512i,
2386 imm8: i32,
2387 ) -> __m512 {
2388 let a = a.as_f32x16();
2389 let b = b.as_f32x16();
2390 let c = c.as_i32x16();
2391 macro_rules! call {
2392 ($imm8:expr) => {
2393 vfixupimmpsz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
2394 };
2395 }
2396 let r = constify_imm8_sae!(imm8, call);
2397 transmute(r)
2398 }
2399
2400 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
2401 ///
2402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_pd&expand=2490)
2403 #[inline]
2404 #[target_feature(enable = "avx512f")]
2405 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
2406 #[rustc_args_required_const(3)]
2407 pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i, imm8: i32) -> __m512d {
2408 let a = a.as_f64x8();
2409 let b = b.as_f64x8();
2410 let c = c.as_i64x8();
2411 macro_rules! call {
2412 ($imm8:expr) => {
2413 vfixupimmpd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
2414 };
2415 }
2416 let r = constify_imm8_sae!(imm8, call);
2417 transmute(r)
2418 }
2419
2420 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2421 ///
2422 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_pd&expand=2491)
2423 #[inline]
2424 #[target_feature(enable = "avx512f")]
2425 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
2426 #[rustc_args_required_const(4)]
2427 pub unsafe fn _mm512_mask_fixupimm_pd(
2428 a: __m512d,
2429 k: __mmask8,
2430 b: __m512d,
2431 c: __m512i,
2432 imm8: i32,
2433 ) -> __m512d {
2434 let a = a.as_f64x8();
2435 let b = b.as_f64x8();
2436 let c = c.as_i64x8();
2437 macro_rules! call {
2438 ($imm8:expr) => {
2439 vfixupimmpd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
2440 };
2441 }
2442 let r = constify_imm8_sae!(imm8, call);
2443 transmute(r)
2444 }
2445
2446 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2447 ///
2448 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_pd&expand=2492)
2449 #[inline]
2450 #[target_feature(enable = "avx512f")]
2451 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
2452 #[rustc_args_required_const(4)]
2453 pub unsafe fn _mm512_maskz_fixupimm_pd(
2454 k: __mmask8,
2455 a: __m512d,
2456 b: __m512d,
2457 c: __m512i,
2458 imm8: i32,
2459 ) -> __m512d {
2460 let a = a.as_f64x8();
2461 let b = b.as_f64x8();
2462 let c = c.as_i64x8();
2463 macro_rules! call {
2464 ($imm8:expr) => {
2465 vfixupimmpdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
2466 };
2467 }
2468 let r = constify_imm8_sae!(imm8, call);
2469 transmute(r)
2470 }
2471
2472 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
2473 ///
2474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ternarylogic_epi32&expand=5867)
2475 #[inline]
2476 #[target_feature(enable = "avx512f")]
2477 #[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
2478 #[rustc_args_required_const(3)]
2479 pub unsafe fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i {
2480 let a = a.as_i32x16();
2481 let b = b.as_i32x16();
2482 let c = c.as_i32x16();
2483 macro_rules! call {
2484 ($imm8:expr) => {
2485 vpternlogd(a, b, c, $imm8)
2486 };
2487 }
2488 let r = constify_imm8_sae!(imm8, call);
2489 transmute(r)
2490 }
2491
2492 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
2493 ///
2494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ternarylogic_epi32&expand=5865)
2495 #[inline]
2496 #[target_feature(enable = "avx512f")]
2497 #[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
2498 #[rustc_args_required_const(4)]
2499 pub unsafe fn _mm512_mask_ternarylogic_epi32(
2500 src: __m512i,
2501 k: __mmask16,
2502 a: __m512i,
2503 b: __m512i,
2504 imm8: i32,
2505 ) -> __m512i {
2506 let src = src.as_i32x16();
2507 let a = a.as_i32x16();
2508 let b = b.as_i32x16();
2509 macro_rules! call {
2510 ($imm8:expr) => {
2511 vpternlogd(src, a, b, $imm8)
2512 };
2513 }
2514 let ternarylogic = constify_imm8_sae!(imm8, call);
2515 transmute(simd_select_bitmask(k, ternarylogic, src))
2516 }
2517
2518 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
2519 ///
2520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ternarylogic_epi32&expand=5866)
2521 #[inline]
2522 #[target_feature(enable = "avx512f")]
2523 #[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
2524 #[rustc_args_required_const(4)]
2525 pub unsafe fn _mm512_maskz_ternarylogic_epi32(
2526 k: __mmask16,
2527 a: __m512i,
2528 b: __m512i,
2529 c: __m512i,
2530 imm8: i32,
2531 ) -> __m512i {
2532 let a = a.as_i32x16();
2533 let b = b.as_i32x16();
2534 let c = c.as_i32x16();
2535 macro_rules! call {
2536 ($imm8:expr) => {
2537 vpternlogd(a, b, c, $imm8)
2538 };
2539 }
2540 let ternarylogic = constify_imm8_sae!(imm8, call);
2541 let zero = _mm512_setzero_si512().as_i32x16();
2542 transmute(simd_select_bitmask(k, ternarylogic, zero))
2543 }
2544
2545 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
2546 ///
2547 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ternarylogic_epi64&expand=5876)
2548 #[inline]
2549 #[target_feature(enable = "avx512f")]
2550 #[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
2551 #[rustc_args_required_const(3)]
2552 pub unsafe fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i {
2553 let a = a.as_i64x8();
2554 let b = b.as_i64x8();
2555 let c = c.as_i64x8();
2556 macro_rules! call {
2557 ($imm8:expr) => {
2558 vpternlogq(a, b, c, $imm8)
2559 };
2560 }
2561 let r = constify_imm8_sae!(imm8, call);
2562 transmute(r)
2563 }
2564
2565 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
2566 ///
2567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ternarylogic_epi64&expand=5874)
2568 #[inline]
2569 #[target_feature(enable = "avx512f")]
2570 #[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
2571 #[rustc_args_required_const(4)]
2572 pub unsafe fn _mm512_mask_ternarylogic_epi64(
2573 src: __m512i,
2574 k: __mmask8,
2575 a: __m512i,
2576 b: __m512i,
2577 imm8: i32,
2578 ) -> __m512i {
2579 let src = src.as_i64x8();
2580 let a = a.as_i64x8();
2581 let b = b.as_i64x8();
2582 macro_rules! call {
2583 ($imm8:expr) => {
2584 vpternlogq(src, a, b, $imm8)
2585 };
2586 }
2587 let ternarylogic = constify_imm8_sae!(imm8, call);
2588 transmute(simd_select_bitmask(k, ternarylogic, src))
2589 }
2590
2591 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
2592 ///
2593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ternarylogic_epi64&expand=5875)
2594 #[inline]
2595 #[target_feature(enable = "avx512f")]
2596 #[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
2597 #[rustc_args_required_const(4)]
2598 pub unsafe fn _mm512_maskz_ternarylogic_epi64(
2599 k: __mmask8,
2600 a: __m512i,
2601 b: __m512i,
2602 c: __m512i,
2603 imm8: i32,
2604 ) -> __m512i {
2605 let a = a.as_i64x8();
2606 let b = b.as_i64x8();
2607 let c = c.as_i64x8();
2608 macro_rules! call {
2609 ($imm8:expr) => {
2610 vpternlogq(a, b, c, $imm8)
2611 };
2612 }
2613 let ternarylogic = constify_imm8_sae!(imm8, call);
2614 let zero = _mm512_setzero_si512().as_i64x8();
2615 transmute(simd_select_bitmask(k, ternarylogic, zero))
2616 }
2617
2618 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2619 /// The mantissa is normalized to the interval specified by interv, which can take the following values:
2620 /// _MM_MANT_NORM_1_2 // interval [1, 2)
2621 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2622 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2623 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2624 /// The sign is determined by sc which can take the following values:
2625 /// _MM_MANT_SIGN_src // sign = sign(src)
2626 /// _MM_MANT_SIGN_zero // sign = 0
2627 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2628 ///
2629 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_ps&expand=2880)
2630 #[inline]
2631 #[target_feature(enable = "avx512f")]
2632 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2633 #[rustc_args_required_const(1, 2)]
2634 pub unsafe fn _mm512_getmant_ps(
2635 a: __m512,
2636 norm: _MM_MANTISSA_NORM_ENUM,
2637 sign: _MM_MANTISSA_SIGN_ENUM,
2638 ) -> __m512 {
2639 macro_rules! call {
2640 ($imm4:expr, $imm2:expr) => {
2641 vgetmantps(
2642 a.as_f32x16(),
2643 $imm2 << 2 | $imm4,
2644 _mm512_setzero_ps().as_f32x16(),
2645 0b11111111_11111111,
2646 _MM_FROUND_CUR_DIRECTION,
2647 )
2648 };
2649 }
2650 let r = constify_imm4_mantissas!(norm, sign, call);
2651 transmute(r)
2652 }
2653
2654 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2655 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2656 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2657 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2658 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2659 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2660 /// The sign is determined by sc which can take the following values:\
2661 /// _MM_MANT_SIGN_src // sign = sign(src)\
2662 /// _MM_MANT_SIGN_zero // sign = 0\
2663 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2664 ///
2665 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_ps&expand=2881)
2666 #[inline]
2667 #[target_feature(enable = "avx512f")]
2668 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2669 #[rustc_args_required_const(3, 4)]
2670 pub unsafe fn _mm512_mask_getmant_ps(
2671 src: __m512,
2672 k: __mmask16,
2673 a: __m512,
2674 norm: _MM_MANTISSA_NORM_ENUM,
2675 sign: _MM_MANTISSA_SIGN_ENUM,
2676 ) -> __m512 {
2677 macro_rules! call {
2678 ($imm4:expr, $imm2:expr) => {
2679 vgetmantps(
2680 a.as_f32x16(),
2681 $imm2 << 2 | $imm4,
2682 src.as_f32x16(),
2683 k,
2684 _MM_FROUND_CUR_DIRECTION,
2685 )
2686 };
2687 }
2688 let r = constify_imm4_mantissas!(norm, sign, call);
2689 transmute(r)
2690 }
2691
2692 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2693 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2694 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2695 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2696 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2697 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2698 /// The sign is determined by sc which can take the following values:\
2699 /// _MM_MANT_SIGN_src // sign = sign(src)\
2700 /// _MM_MANT_SIGN_zero // sign = 0\
2701 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2702 ///
2703 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_ps&expand=2882)
2704 #[inline]
2705 #[target_feature(enable = "avx512f")]
2706 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2707 #[rustc_args_required_const(2, 3)]
2708 pub unsafe fn _mm512_maskz_getmant_ps(
2709 k: __mmask16,
2710 a: __m512,
2711 norm: _MM_MANTISSA_NORM_ENUM,
2712 sign: _MM_MANTISSA_SIGN_ENUM,
2713 ) -> __m512 {
2714 macro_rules! call {
2715 ($imm4:expr, $imm2:expr) => {
2716 vgetmantps(
2717 a.as_f32x16(),
2718 $imm2 << 2 | $imm4,
2719 _mm512_setzero_ps().as_f32x16(),
2720 k,
2721 _MM_FROUND_CUR_DIRECTION,
2722 )
2723 };
2724 }
2725 let r = constify_imm4_mantissas!(norm, sign, call);
2726 transmute(r)
2727 }
2728
2729 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2730 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2731 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2732 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2733 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2734 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2735 /// The sign is determined by sc which can take the following values:\
2736 /// _MM_MANT_SIGN_src // sign = sign(src)\
2737 /// _MM_MANT_SIGN_zero // sign = 0\
2738 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2739 ///
2740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_pd&expand=2871)
2741 #[inline]
2742 #[target_feature(enable = "avx512f")]
2743 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2744 #[rustc_args_required_const(1, 2)]
2745 pub unsafe fn _mm512_getmant_pd(
2746 a: __m512d,
2747 norm: _MM_MANTISSA_NORM_ENUM,
2748 sign: _MM_MANTISSA_SIGN_ENUM,
2749 ) -> __m512d {
2750 macro_rules! call {
2751 ($imm4:expr, $imm2:expr) => {
2752 vgetmantpd(
2753 a.as_f64x8(),
2754 $imm2 << 2 | $imm4,
2755 _mm512_setzero_pd().as_f64x8(),
2756 0b11111111,
2757 _MM_FROUND_CUR_DIRECTION,
2758 )
2759 };
2760 }
2761 let r = constify_imm4_mantissas!(norm, sign, call);
2762 transmute(r)
2763 }
2764
2765 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2766 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2767 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2768 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2769 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2770 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2771 /// The sign is determined by sc which can take the following values:\
2772 /// _MM_MANT_SIGN_src // sign = sign(src)\
2773 /// _MM_MANT_SIGN_zero // sign = 0\
2774 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2775 ///
2776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_pd&expand=2872)
2777 #[inline]
2778 #[target_feature(enable = "avx512f")]
2779 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2780 #[rustc_args_required_const(3, 4)]
2781 pub unsafe fn _mm512_mask_getmant_pd(
2782 src: __m512d,
2783 k: __mmask8,
2784 a: __m512d,
2785 norm: _MM_MANTISSA_NORM_ENUM,
2786 sign: _MM_MANTISSA_SIGN_ENUM,
2787 ) -> __m512d {
2788 macro_rules! call {
2789 ($imm4:expr, $imm2:expr) => {
2790 vgetmantpd(
2791 a.as_f64x8(),
2792 $imm2 << 2 | $imm4,
2793 src.as_f64x8(),
2794 k,
2795 _MM_FROUND_CUR_DIRECTION,
2796 )
2797 };
2798 }
2799 let r = constify_imm4_mantissas!(norm, sign, call);
2800 transmute(r)
2801 }
2802
2803 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2804 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2805 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2806 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2807 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2808 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2809 /// The sign is determined by sc which can take the following values:\
2810 /// _MM_MANT_SIGN_src // sign = sign(src)\
2811 /// _MM_MANT_SIGN_zero // sign = 0\
2812 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2813 ///
2814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_pd&expand=2873)
2815 #[inline]
2816 #[target_feature(enable = "avx512f")]
2817 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2818 #[rustc_args_required_const(2, 3)]
2819 pub unsafe fn _mm512_maskz_getmant_pd(
2820 k: __mmask8,
2821 a: __m512d,
2822 norm: _MM_MANTISSA_NORM_ENUM,
2823 sign: _MM_MANTISSA_SIGN_ENUM,
2824 ) -> __m512d {
2825 macro_rules! call {
2826 ($imm4:expr, $imm2:expr) => {
2827 vgetmantpd(
2828 a.as_f64x8(),
2829 $imm2 << 2 | $imm4,
2830 _mm512_setzero_pd().as_f64x8(),
2831 k,
2832 _MM_FROUND_CUR_DIRECTION,
2833 )
2834 };
2835 }
2836 let r = constify_imm4_mantissas!(norm, sign, call);
2837 transmute(r)
2838 }
2839
2840 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
2841 ///
2842 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2843 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2844 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2845 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2846 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2847 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2848 ///
2849 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_round_ps&expand=145)
2850 #[inline]
2851 #[target_feature(enable = "avx512f")]
2852 #[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2853 #[rustc_args_required_const(2)]
2854 pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
2855 let a = a.as_f32x16();
2856 let b = b.as_f32x16();
2857 macro_rules! call {
2858 ($imm4:expr) => {
2859 vaddps(a, b, $imm4)
2860 };
2861 }
2862 let r = constify_imm4_round!(rounding, call);
2863 transmute(r)
2864 }
2865
2866 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2867 ///
2868 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2869 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2870 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2871 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2872 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2873 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2874 ///
2875 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_round_ps&expand=146)
2876 #[inline]
2877 #[target_feature(enable = "avx512f")]
2878 #[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2879 #[rustc_args_required_const(4)]
2880 pub unsafe fn _mm512_mask_add_round_ps(
2881 src: __m512,
2882 k: __mmask16,
2883 a: __m512,
2884 b: __m512,
2885 rounding: i32,
2886 ) -> __m512 {
2887 let a = a.as_f32x16();
2888 let b = b.as_f32x16();
2889 macro_rules! call {
2890 ($imm4:expr) => {
2891 vaddps(a, b, $imm4)
2892 };
2893 }
2894 let addround = constify_imm4_round!(rounding, call);
2895 transmute(simd_select_bitmask(k, addround, src.as_f32x16()))
2896 }
2897
2898 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2899 ///
2900 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2901 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2902 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2903 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2904 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2905 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2906 ///
2907 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_round_ps&expand=147)
2908 #[inline]
2909 #[target_feature(enable = "avx512f")]
2910 #[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2911 #[rustc_args_required_const(3)]
2912 pub unsafe fn _mm512_maskz_add_round_ps(
2913 k: __mmask16,
2914 a: __m512,
2915 b: __m512,
2916 rounding: i32,
2917 ) -> __m512 {
2918 let a = a.as_f32x16();
2919 let b = b.as_f32x16();
2920 macro_rules! call {
2921 ($imm4:expr) => {
2922 vaddps(a, b, $imm4)
2923 };
2924 }
2925 let addround = constify_imm4_round!(rounding, call);
2926 let zero = _mm512_setzero_ps().as_f32x16();
2927 transmute(simd_select_bitmask(k, addround, zero))
2928 }
2929
2930 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
2931 ///
2932 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2933 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2934 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2935 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2936 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2937 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2938 ///
2939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_round_pd&expand=142)
2940 #[inline]
2941 #[target_feature(enable = "avx512f")]
2942 #[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2943 #[rustc_args_required_const(2)]
2944 pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
2945 let a = a.as_f64x8();
2946 let b = b.as_f64x8();
2947 macro_rules! call {
2948 ($imm4:expr) => {
2949 vaddpd(a, b, $imm4)
2950 };
2951 }
2952 let r = constify_imm4_round!(rounding, call);
2953 transmute(r)
2954 }
2955
2956 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2957 ///
2958 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2959 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2960 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2961 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2962 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2963 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2964 ///
2965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_round_pd&expand=143)
2966 #[inline]
2967 #[target_feature(enable = "avx512f")]
2968 #[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2969 #[rustc_args_required_const(4)]
2970 pub unsafe fn _mm512_mask_add_round_pd(
2971 src: __m512d,
2972 k: __mmask8,
2973 a: __m512d,
2974 b: __m512d,
2975 rounding: i32,
2976 ) -> __m512d {
2977 let a = a.as_f64x8();
2978 let b = b.as_f64x8();
2979 macro_rules! call {
2980 ($imm4:expr) => {
2981 vaddpd(a, b, $imm4)
2982 };
2983 }
2984 let addround = constify_imm4_round!(rounding, call);
2985 transmute(simd_select_bitmask(k, addround, src.as_f64x8()))
2986 }
2987
2988 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2989 ///
2990 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2991 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2992 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2993 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2994 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2995 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2996 ///
2997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_round_pd&expand=144)
2998 #[inline]
2999 #[target_feature(enable = "avx512f")]
3000 #[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
3001 #[rustc_args_required_const(3)]
3002 pub unsafe fn _mm512_maskz_add_round_pd(
3003 k: __mmask8,
3004 a: __m512d,
3005 b: __m512d,
3006 rounding: i32,
3007 ) -> __m512d {
3008 let a = a.as_f64x8();
3009 let b = b.as_f64x8();
3010 macro_rules! call {
3011 ($imm4:expr) => {
3012 vaddpd(a, b, $imm4)
3013 };
3014 }
3015 let addround = constify_imm4_round!(rounding, call);
3016 let zero = _mm512_setzero_pd().as_f64x8();
3017 transmute(simd_select_bitmask(k, addround, zero))
3018 }
3019
3020 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
3021 ///
3022 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3023 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3024 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3025 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3026 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3027 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3028 ///
3029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_round_ps&expand=5739)
3030 #[inline]
3031 #[target_feature(enable = "avx512f")]
3032 #[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
3033 #[rustc_args_required_const(2)]
3034 pub unsafe fn _mm512_sub_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
3035 let a = a.as_f32x16();
3036 let b = b.as_f32x16();
3037 macro_rules! call {
3038 ($imm4:expr) => {
3039 vsubps(a, b, $imm4)
3040 };
3041 }
3042 let r = constify_imm4_round!(rounding, call);
3043 transmute(r)
3044 }
3045
3046 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3047 ///
3048 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3049 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3050 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3051 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3052 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3053 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3054 ///
3055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_round_ps&expand=5737)
3056 #[inline]
3057 #[target_feature(enable = "avx512f")]
3058 #[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
3059 #[rustc_args_required_const(4)]
3060 pub unsafe fn _mm512_mask_sub_round_ps(
3061 src: __m512,
3062 k: __mmask16,
3063 a: __m512,
3064 b: __m512,
3065 rounding: i32,
3066 ) -> __m512 {
3067 let a = a.as_f32x16();
3068 let b = b.as_f32x16();
3069 macro_rules! call {
3070 ($imm4:expr) => {
3071 vsubps(a, b, $imm4)
3072 };
3073 }
3074 let subround = constify_imm4_round!(rounding, call);
3075 transmute(simd_select_bitmask(k, subround, src.as_f32x16()))
3076 }
3077
3078 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3079 ///
3080 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3081 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3082 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3083 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3084 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3085 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3086 ///
3087 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_round_ps&expand=5738)
3088 #[inline]
3089 #[target_feature(enable = "avx512f")]
3090 #[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
3091 #[rustc_args_required_const(3)]
3092 pub unsafe fn _mm512_maskz_sub_round_ps(
3093 k: __mmask16,
3094 a: __m512,
3095 b: __m512,
3096 rounding: i32,
3097 ) -> __m512 {
3098 let a = a.as_f32x16();
3099 let b = b.as_f32x16();
3100 macro_rules! call {
3101 ($imm4:expr) => {
3102 vsubps(a, b, $imm4)
3103 };
3104 }
3105 let subround = constify_imm4_round!(rounding, call);
3106 let zero = _mm512_setzero_ps().as_f32x16();
3107 transmute(simd_select_bitmask(k, subround, zero))
3108 }
3109
3110 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
3111 ///
3112 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3113 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3114 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3115 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3116 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3117 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3118 ///
3119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_round_pd&expand=5736)
3120 #[inline]
3121 #[target_feature(enable = "avx512f")]
3122 #[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
3123 #[rustc_args_required_const(2)]
3124 pub unsafe fn _mm512_sub_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
3125 let a = a.as_f64x8();
3126 let b = b.as_f64x8();
3127 macro_rules! call {
3128 ($imm4:expr) => {
3129 vsubpd(a, b, $imm4)
3130 };
3131 }
3132 let r = constify_imm4_round!(rounding, call);
3133 transmute(r)
3134 }
3135
3136 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3137 ///
3138 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3139 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3140 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3141 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3142 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3143 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3144 ///
3145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_round_pd&expand=5734)
3146 #[inline]
3147 #[target_feature(enable = "avx512f")]
3148 #[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
3149 #[rustc_args_required_const(4)]
3150 pub unsafe fn _mm512_mask_sub_round_pd(
3151 src: __m512d,
3152 k: __mmask8,
3153 a: __m512d,
3154 b: __m512d,
3155 rounding: i32,
3156 ) -> __m512d {
3157 let a = a.as_f64x8();
3158 let b = b.as_f64x8();
3159 macro_rules! call {
3160 ($imm4:expr) => {
3161 vsubpd(a, b, $imm4)
3162 };
3163 }
3164 let subround = constify_imm4_round!(rounding, call);
3165 transmute(simd_select_bitmask(k, subround, src.as_f64x8()))
3166 }
3167
3168 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3169 ///
3170 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3171 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3172 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3173 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3174 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3175 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3176 ///
3177 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_round_pd&expand=5735)
3178 #[inline]
3179 #[target_feature(enable = "avx512f")]
3180 #[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
3181 #[rustc_args_required_const(3)]
3182 pub unsafe fn _mm512_maskz_sub_round_pd(
3183 k: __mmask8,
3184 a: __m512d,
3185 b: __m512d,
3186 rounding: i32,
3187 ) -> __m512d {
3188 let a = a.as_f64x8();
3189 let b = b.as_f64x8();
3190 macro_rules! call {
3191 ($imm4:expr) => {
3192 vsubpd(a, b, $imm4)
3193 };
3194 }
3195 let subround = constify_imm4_round!(rounding, call);
3196 let zero = _mm512_setzero_pd().as_f64x8();
3197 transmute(simd_select_bitmask(k, subround, zero))
3198 }
3199
3200 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
3201 ///
3202 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3203 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3204 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3205 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3206 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3207 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3208 ///
3209 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_round_ps&expand=3940)
3210 #[inline]
3211 #[target_feature(enable = "avx512f")]
3212 #[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
3213 #[rustc_args_required_const(2)]
3214 pub unsafe fn _mm512_mul_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
3215 let a = a.as_f32x16();
3216 let b = b.as_f32x16();
3217 macro_rules! call {
3218 ($imm4:expr) => {
3219 vmulps(a, b, $imm4)
3220 };
3221 }
3222 let r = constify_imm4_round!(rounding, call);
3223 transmute(r)
3224 }
3225
3226 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3227 ///
3228 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3229 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3230 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3231 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3232 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3233 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3234 ///
3235 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_round_ps&expand=3938)
3236 #[inline]
3237 #[target_feature(enable = "avx512f")]
3238 #[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
3239 #[rustc_args_required_const(4)]
3240 pub unsafe fn _mm512_mask_mul_round_ps(
3241 src: __m512,
3242 k: __mmask16,
3243 a: __m512,
3244 b: __m512,
3245 rounding: i32,
3246 ) -> __m512 {
3247 let a = a.as_f32x16();
3248 let b = b.as_f32x16();
3249 macro_rules! call {
3250 ($imm4:expr) => {
3251 vmulps(a, b, $imm4)
3252 };
3253 }
3254 let mulround = constify_imm4_round!(rounding, call);
3255 transmute(simd_select_bitmask(k, mulround, src.as_f32x16()))
3256 }
3257
3258 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3259 ///
3260 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3261 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3262 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3263 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3264 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3265 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3266 ///
3267 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_round_ps&expand=3939)
3268 #[inline]
3269 #[target_feature(enable = "avx512f")]
3270 #[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
3271 #[rustc_args_required_const(3)]
3272 pub unsafe fn _mm512_maskz_mul_round_ps(
3273 k: __mmask16,
3274 a: __m512,
3275 b: __m512,
3276 rounding: i32,
3277 ) -> __m512 {
3278 let a = a.as_f32x16();
3279 let b = b.as_f32x16();
3280 macro_rules! call {
3281 ($imm4:expr) => {
3282 vmulps(a, b, $imm4)
3283 };
3284 }
3285 let mulround = constify_imm4_round!(rounding, call);
3286 let zero = _mm512_setzero_ps().as_f32x16();
3287 transmute(simd_select_bitmask(k, mulround, zero))
3288 }
3289
3290 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
3291 ///
3292 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3293 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3294 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3295 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3296 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3297 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3298 ///
3299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_round_pd&expand=3937)
3300 #[inline]
3301 #[target_feature(enable = "avx512f")]
3302 #[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
3303 #[rustc_args_required_const(2)]
3304 pub unsafe fn _mm512_mul_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
3305 let a = a.as_f64x8();
3306 let b = b.as_f64x8();
3307 macro_rules! call {
3308 ($imm4:expr) => {
3309 vmulpd(a, b, $imm4)
3310 };
3311 }
3312 let r = constify_imm4_round!(rounding, call);
3313 transmute(r)
3314 }
3315
3316 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3317 ///
3318 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3319 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3320 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3321 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3322 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3323 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3324 ///
3325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_round_pd&expand=3935)
3326 #[inline]
3327 #[target_feature(enable = "avx512f")]
3328 #[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
3329 #[rustc_args_required_const(4)]
3330 pub unsafe fn _mm512_mask_mul_round_pd(
3331 src: __m512d,
3332 k: __mmask8,
3333 a: __m512d,
3334 b: __m512d,
3335 rounding: i32,
3336 ) -> __m512d {
3337 let a = a.as_f64x8();
3338 let b = b.as_f64x8();
3339 macro_rules! call {
3340 ($imm4:expr) => {
3341 vmulpd(a, b, $imm4)
3342 };
3343 }
3344 let mulround = constify_imm4_round!(rounding, call);
3345 transmute(simd_select_bitmask(k, mulround, src.as_f64x8()))
3346 }
3347
3348 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3349 ///
3350 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3351 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3352 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3353 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3354 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3355 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3356 ///
3357 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_round_ps&expand=3939)
3358 #[inline]
3359 #[target_feature(enable = "avx512f")]
3360 #[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
3361 #[rustc_args_required_const(3)]
3362 pub unsafe fn _mm512_maskz_mul_round_pd(
3363 k: __mmask8,
3364 a: __m512d,
3365 b: __m512d,
3366 rounding: i32,
3367 ) -> __m512d {
3368 let a = a.as_f64x8();
3369 let b = b.as_f64x8();
3370 macro_rules! call {
3371 ($imm4:expr) => {
3372 vmulpd(a, b, $imm4)
3373 };
3374 }
3375 let mulround = constify_imm4_round!(rounding, call);
3376 let zero = _mm512_setzero_pd().as_f64x8();
3377 transmute(simd_select_bitmask(k, mulround, zero))
3378 }
3379
3380 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
3381 ///
3382 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3383 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3384 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3385 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3386 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3387 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3388 ///
3389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_round_ps&expand=2168)
3390 #[inline]
3391 #[target_feature(enable = "avx512f")]
3392 #[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
3393 #[rustc_args_required_const(2)]
3394 pub unsafe fn _mm512_div_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
3395 let a = a.as_f32x16();
3396 let b = b.as_f32x16();
3397 macro_rules! call {
3398 ($imm4:expr) => {
3399 vdivps(a, b, $imm4)
3400 };
3401 }
3402 let r = constify_imm4_round!(rounding, call);
3403 transmute(r)
3404 }
3405
3406 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3407 ///
3408 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3409 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3410 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3411 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3412 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3413 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3414 ///
3415 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_round_ps&expand=2169)
3416 #[inline]
3417 #[target_feature(enable = "avx512f")]
3418 #[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
3419 #[rustc_args_required_const(4)]
3420 pub unsafe fn _mm512_mask_div_round_ps(
3421 src: __m512,
3422 k: __mmask16,
3423 a: __m512,
3424 b: __m512,
3425 rounding: i32,
3426 ) -> __m512 {
3427 let a = a.as_f32x16();
3428 let b = b.as_f32x16();
3429 macro_rules! call {
3430 ($imm4:expr) => {
3431 vdivps(a, b, $imm4)
3432 };
3433 }
3434 let divround = constify_imm4_round!(rounding, call);
3435 transmute(simd_select_bitmask(k, divround, src.as_f32x16()))
3436 }
3437
3438 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3439 ///
3440 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3441 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3442 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3443 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3444 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3445 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3446 ///
3447 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_round_ps&expand=2170)
3448 #[inline]
3449 #[target_feature(enable = "avx512f")]
3450 #[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
3451 #[rustc_args_required_const(3)]
3452 pub unsafe fn _mm512_maskz_div_round_ps(
3453 k: __mmask16,
3454 a: __m512,
3455 b: __m512,
3456 rounding: i32,
3457 ) -> __m512 {
3458 let a = a.as_f32x16();
3459 let b = b.as_f32x16();
3460 macro_rules! call {
3461 ($imm4:expr) => {
3462 vdivps(a, b, $imm4)
3463 };
3464 }
3465 let divround = constify_imm4_round!(rounding, call);
3466 let zero = _mm512_setzero_ps().as_f32x16();
3467 transmute(simd_select_bitmask(k, divround, zero))
3468 }
3469
3470 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
3471 ///
3472 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3473 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3474 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3475 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3476 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3477 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3478 ///
3479 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_round_pd&expand=2165)
3480 #[inline]
3481 #[target_feature(enable = "avx512f")]
3482 #[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
3483 #[rustc_args_required_const(2)]
3484 pub unsafe fn _mm512_div_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
3485 let a = a.as_f64x8();
3486 let b = b.as_f64x8();
3487 macro_rules! call {
3488 ($imm4:expr) => {
3489 vdivpd(a, b, $imm4)
3490 };
3491 }
3492 let r = constify_imm4_round!(rounding, call);
3493 transmute(r)
3494 }
3495
3496 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3497 ///
3498 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3499 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3500 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3501 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3502 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3503 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3504 ///
3505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_round_pd&expand=2166)
3506 #[inline]
3507 #[target_feature(enable = "avx512f")]
3508 #[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
3509 #[rustc_args_required_const(4)]
3510 pub unsafe fn _mm512_mask_div_round_pd(
3511 src: __m512d,
3512 k: __mmask8,
3513 a: __m512d,
3514 b: __m512d,
3515 rounding: i32,
3516 ) -> __m512d {
3517 let a = a.as_f64x8();
3518 let b = b.as_f64x8();
3519 macro_rules! call {
3520 ($imm4:expr) => {
3521 vdivpd(a, b, $imm4)
3522 };
3523 }
3524 let divround = constify_imm4_round!(rounding, call);
3525 transmute(simd_select_bitmask(k, divround, src.as_f64x8()))
3526 }
3527
3528 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3529 ///
3530 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3531 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3532 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3533 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3534 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3535 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3536 ///
3537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_round_pd&expand=2167)
3538 #[inline]
3539 #[target_feature(enable = "avx512f")]
3540 #[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
3541 #[rustc_args_required_const(3)]
3542 pub unsafe fn _mm512_maskz_div_round_pd(
3543 k: __mmask8,
3544 a: __m512d,
3545 b: __m512d,
3546 rounding: i32,
3547 ) -> __m512d {
3548 let a = a.as_f64x8();
3549 let b = b.as_f64x8();
3550 macro_rules! call {
3551 ($imm4:expr) => {
3552 vdivpd(a, b, $imm4)
3553 };
3554 }
3555 let divround = constify_imm4_round!(rounding, call);
3556 let zero = _mm512_setzero_pd().as_f64x8();
3557 transmute(simd_select_bitmask(k, divround, zero))
3558 }
3559
3560 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
3561 ///
3562 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3563 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3564 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3565 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3566 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3567 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3568 ///
3569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_round_ps&expand=5377)
3570 #[inline]
3571 #[target_feature(enable = "avx512f")]
3572 #[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
3573 #[rustc_args_required_const(1)]
3574 pub unsafe fn _mm512_sqrt_round_ps(a: __m512, rounding: i32) -> __m512 {
3575 let a = a.as_f32x16();
3576 macro_rules! call {
3577 ($imm4:expr) => {
3578 vsqrtps(a, $imm4)
3579 };
3580 }
3581 let r = constify_imm4_round!(rounding, call);
3582 transmute(r)
3583 }
3584
3585 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3586 ///
3587 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3588 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3589 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3590 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3591 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3592 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3593 ///
3594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_round_ps&expand=5375)
3595 #[inline]
3596 #[target_feature(enable = "avx512f")]
3597 #[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
3598 #[rustc_args_required_const(3)]
3599 pub unsafe fn _mm512_mask_sqrt_round_ps(
3600 src: __m512,
3601 k: __mmask16,
3602 a: __m512,
3603 rounding: i32,
3604 ) -> __m512 {
3605 let a = a.as_f32x16();
3606 macro_rules! call {
3607 ($imm4:expr) => {
3608 vsqrtps(a, $imm4)
3609 };
3610 }
3611 let sqrtround = constify_imm4_round!(rounding, call);
3612 transmute(simd_select_bitmask(k, sqrtround, src.as_f32x16()))
3613 }
3614
3615 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3616 ///
3617 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3618 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3619 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3620 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3621 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3622 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3623 ///
3624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_round_ps&expand=5376)
3625 #[inline]
3626 #[target_feature(enable = "avx512f")]
3627 #[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
3628 #[rustc_args_required_const(2)]
3629 pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512, rounding: i32) -> __m512 {
3630 let a = a.as_f32x16();
3631 macro_rules! call {
3632 ($imm4:expr) => {
3633 vsqrtps(a, $imm4)
3634 };
3635 }
3636 let sqrtround = constify_imm4_round!(rounding, call);
3637 let zero = _mm512_setzero_ps().as_f32x16();
3638 transmute(simd_select_bitmask(k, sqrtround, zero))
3639 }
3640
3641 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
3642 ///
3643 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3644 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3645 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3646 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3647 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3648 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3649 ///
3650 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_round_pd&expand=5374)
3651 #[inline]
3652 #[target_feature(enable = "avx512f")]
3653 #[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3654 #[rustc_args_required_const(1)]
3655 pub unsafe fn _mm512_sqrt_round_pd(a: __m512d, rounding: i32) -> __m512d {
3656 let a = a.as_f64x8();
3657 macro_rules! call {
3658 ($imm4:expr) => {
3659 vsqrtpd(a, $imm4)
3660 };
3661 }
3662 let r = constify_imm4_round!(rounding, call);
3663 transmute(r)
3664 }
3665
3666 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3667 ///
3668 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3669 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3670 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3671 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3672 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3673 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3674 ///
3675 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_round_pd&expand=5372)
3676 #[inline]
3677 #[target_feature(enable = "avx512f")]
3678 #[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3679 #[rustc_args_required_const(3)]
3680 pub unsafe fn _mm512_mask_sqrt_round_pd(
3681 src: __m512d,
3682 k: __mmask8,
3683 a: __m512d,
3684 rounding: i32,
3685 ) -> __m512d {
3686 macro_rules! call {
3687 ($imm4:expr) => {
3688 vsqrtpd(a.as_f64x8(), $imm4)
3689 };
3690 }
3691 let sqrtround = constify_imm4_round!(rounding, call);
3692 transmute(simd_select_bitmask(k, sqrtround, src.as_f64x8()))
3693 }
3694
3695 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3696 ///
3697 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3698 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3699 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3700 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3701 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3702 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3703 ///
3704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_round_pd&expand=5373)
3705 #[inline]
3706 #[target_feature(enable = "avx512f")]
3707 #[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3708 #[rustc_args_required_const(2)]
3709 pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d, rounding: i32) -> __m512d {
3710 macro_rules! call {
3711 ($imm4:expr) => {
3712 vsqrtpd(a.as_f64x8(), $imm4)
3713 };
3714 }
3715 let sqrtround = constify_imm4_round!(rounding, call);
3716 let zero = _mm512_setzero_pd().as_f64x8();
3717 transmute(simd_select_bitmask(k, sqrtround, zero))
3718 }
3719
3720 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
3721 ///
3722 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3723 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3724 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3725 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3726 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3727 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3728 ///
3729 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_round_ps&expand=2565)
3730 #[inline]
3731 #[target_feature(enable = "avx512f")]
3732 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3733 #[rustc_args_required_const(3)]
3734 pub unsafe fn _mm512_fmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3735 macro_rules! call {
3736 ($imm4:expr) => {
3737 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3738 };
3739 }
3740 let r = constify_imm4_round!(rounding, call);
3741 transmute(r)
3742 }
3743
3744 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
3745 ///
3746 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3747 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3748 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3749 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3750 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3751 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3752 ///
3753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_round_ps&expand=2566)
3754 #[inline]
3755 #[target_feature(enable = "avx512f")]
3756 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3757 #[rustc_args_required_const(4)]
3758 pub unsafe fn _mm512_mask_fmadd_round_ps(
3759 a: __m512,
3760 k: __mmask16,
3761 b: __m512,
3762 c: __m512,
3763 rounding: i32,
3764 ) -> __m512 {
3765 macro_rules! call {
3766 ($imm4:expr) => {
3767 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3768 };
3769 }
3770 let fmadd = constify_imm4_round!(rounding, call);
3771 transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
3772 }
3773
3774 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3775 ///
3776 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3777 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3778 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3779 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3780 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3781 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3782 ///
3783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_round_ps&expand=2568)
3784 #[inline]
3785 #[target_feature(enable = "avx512f")]
3786 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3787 #[rustc_args_required_const(4)]
3788 pub unsafe fn _mm512_maskz_fmadd_round_ps(
3789 k: __mmask16,
3790 a: __m512,
3791 b: __m512,
3792 c: __m512,
3793 rounding: i32,
3794 ) -> __m512 {
3795 macro_rules! call {
3796 ($imm4:expr) => {
3797 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3798 };
3799 }
3800 let fmadd = constify_imm4_round!(rounding, call);
3801 let zero = _mm512_setzero_ps().as_f32x16();
3802 transmute(simd_select_bitmask(k, fmadd, zero))
3803 }
3804
3805 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
3806 ///
3807 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3808 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3809 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3810 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3811 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3812 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3813 ///
3814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_round_ps&expand=2567)
3815 #[inline]
3816 #[target_feature(enable = "avx512f")]
3817 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3818 #[rustc_args_required_const(4)]
3819 pub unsafe fn _mm512_mask3_fmadd_round_ps(
3820 a: __m512,
3821 b: __m512,
3822 c: __m512,
3823 k: __mmask16,
3824 rounding: i32,
3825 ) -> __m512 {
3826 macro_rules! call {
3827 ($imm4:expr) => {
3828 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3829 };
3830 }
3831 let fmadd = constify_imm4_round!(rounding, call);
3832 transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
3833 }
3834
3835 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
3836 ///
3837 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3838 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3839 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3840 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3841 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3842 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3843 ///
3844 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_round_pd&expand=2561)
3845 #[inline]
3846 #[target_feature(enable = "avx512f")]
3847 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3848 #[rustc_args_required_const(3)]
3849 pub unsafe fn _mm512_fmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
3850 macro_rules! call {
3851 ($imm4:expr) => {
3852 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3853 };
3854 }
3855 let r = constify_imm4_round!(rounding, call);
3856 transmute(r)
3857 }
3858
3859 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
3860 ///
3861 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3862 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3863 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3864 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3865 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3866 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3867 ///
3868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_round_pd&expand=2562)
3869 #[inline]
3870 #[target_feature(enable = "avx512f")]
3871 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3872 #[rustc_args_required_const(4)]
3873 pub unsafe fn _mm512_mask_fmadd_round_pd(
3874 a: __m512d,
3875 k: __mmask8,
3876 b: __m512d,
3877 c: __m512d,
3878 rounding: i32,
3879 ) -> __m512d {
3880 macro_rules! call {
3881 ($imm4:expr) => {
3882 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3883 };
3884 }
3885 let fmadd = constify_imm4_round!(rounding, call);
3886 transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
3887 }
3888
3889 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3890 ///
3891 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3892 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3893 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3894 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3895 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3896 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3897 ///
3898 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_round_pd&expand=2564)
3899 #[inline]
3900 #[target_feature(enable = "avx512f")]
3901 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3902 #[rustc_args_required_const(4)]
3903 pub unsafe fn _mm512_maskz_fmadd_round_pd(
3904 k: __mmask8,
3905 a: __m512d,
3906 b: __m512d,
3907 c: __m512d,
3908 rounding: i32,
3909 ) -> __m512d {
3910 macro_rules! call {
3911 ($imm4:expr) => {
3912 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3913 };
3914 }
3915 let fmadd = constify_imm4_round!(rounding, call);
3916 let zero = _mm512_setzero_pd().as_f64x8();
3917 transmute(simd_select_bitmask(k, fmadd, zero))
3918 }
3919
3920 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
3921 ///
3922 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3923 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3924 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3925 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3926 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3927 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3928 ///
3929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_round_pd&expand=2563)
3930 #[inline]
3931 #[target_feature(enable = "avx512f")]
3932 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3933 #[rustc_args_required_const(4)]
3934 pub unsafe fn _mm512_mask3_fmadd_round_pd(
3935 a: __m512d,
3936 b: __m512d,
3937 c: __m512d,
3938 k: __mmask8,
3939 rounding: i32,
3940 ) -> __m512d {
3941 macro_rules! call {
3942 ($imm4:expr) => {
3943 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3944 };
3945 }
3946 let fmadd = constify_imm4_round!(rounding, call);
3947 transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
3948 }
3949
3950 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
3951 ///
3952 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3953 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3954 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3955 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3956 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3957 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3958 ///
3959 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_round_ps&expand=2651)
3960 #[inline]
3961 #[target_feature(enable = "avx512f")]
3962 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3963 #[rustc_args_required_const(3)]
3964 pub unsafe fn _mm512_fmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3965 let zero: f32x16 = mem::zeroed();
3966 let sub = simd_sub(zero, c.as_f32x16());
3967 macro_rules! call {
3968 ($imm4:expr) => {
3969 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3970 };
3971 }
3972 let r = constify_imm4_round!(rounding, call);
3973 transmute(r)
3974 }
3975
3976 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
3977 ///
3978 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3979 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3980 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3981 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3982 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3983 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3984 ///
3985 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_round_ps&expand=2652)
3986 #[inline]
3987 #[target_feature(enable = "avx512f")]
3988 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3989 #[rustc_args_required_const(4)]
3990 pub unsafe fn _mm512_mask_fmsub_round_ps(
3991 a: __m512,
3992 k: __mmask16,
3993 b: __m512,
3994 c: __m512,
3995 rounding: i32,
3996 ) -> __m512 {
3997 let zero: f32x16 = mem::zeroed();
3998 let sub = simd_sub(zero, c.as_f32x16());
3999 macro_rules! call {
4000 ($imm4:expr) => {
4001 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4002 };
4003 }
4004 let fmsub = constify_imm4_round!(rounding, call);
4005 transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
4006 }
4007
4008 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4009 ///
4010 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4011 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4012 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4013 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4014 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4015 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4016 ///
4017 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_round_ps&expand=2654)
4018 #[inline]
4019 #[target_feature(enable = "avx512f")]
4020 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
4021 #[rustc_args_required_const(4)]
4022 pub unsafe fn _mm512_maskz_fmsub_round_ps(
4023 k: __mmask16,
4024 a: __m512,
4025 b: __m512,
4026 c: __m512,
4027 rounding: i32,
4028 ) -> __m512 {
4029 let zero: f32x16 = mem::zeroed();
4030 let sub = simd_sub(zero, c.as_f32x16());
4031 macro_rules! call {
4032 ($imm4:expr) => {
4033 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4034 };
4035 }
4036 let fmsub = constify_imm4_round!(rounding, call);
4037 transmute(simd_select_bitmask(k, fmsub, zero))
4038 }
4039
4040 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4041 ///
4042 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4043 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4044 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4045 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4046 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4047 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4048 ///
4049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_round_ps&expand=2653)
4050 #[inline]
4051 #[target_feature(enable = "avx512f")]
4052 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
4053 #[rustc_args_required_const(4)]
4054 pub unsafe fn _mm512_mask3_fmsub_round_ps(
4055 a: __m512,
4056 b: __m512,
4057 c: __m512,
4058 k: __mmask16,
4059 rounding: i32,
4060 ) -> __m512 {
4061 let zero: f32x16 = mem::zeroed();
4062 let sub = simd_sub(zero, c.as_f32x16());
4063 macro_rules! call {
4064 ($imm4:expr) => {
4065 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4066 };
4067 }
4068 let fmsub = constify_imm4_round!(rounding, call);
4069 transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
4070 }
4071
4072 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
4073 ///
4074 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4075 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4076 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4077 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4078 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4079 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4080 ///
4081 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_round_pd&expand=2647)
4082 #[inline]
4083 #[target_feature(enable = "avx512f")]
4084 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4085 #[rustc_args_required_const(3)]
4086 pub unsafe fn _mm512_fmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
4087 let zero: f64x8 = mem::zeroed();
4088 let sub = simd_sub(zero, c.as_f64x8());
4089 macro_rules! call {
4090 ($imm4:expr) => {
4091 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4092 };
4093 }
4094 let r = constify_imm4_round!(rounding, call);
4095 transmute(r)
4096 }
4097
4098 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4099 ///
4100 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4101 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4102 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4103 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4104 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4105 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4106 ///
4107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_round_pd&expand=2648)
4108 #[inline]
4109 #[target_feature(enable = "avx512f")]
4110 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4111 #[rustc_args_required_const(4)]
4112 pub unsafe fn _mm512_mask_fmsub_round_pd(
4113 a: __m512d,
4114 k: __mmask8,
4115 b: __m512d,
4116 c: __m512d,
4117 rounding: i32,
4118 ) -> __m512d {
4119 let zero: f64x8 = mem::zeroed();
4120 let sub = simd_sub(zero, c.as_f64x8());
4121 macro_rules! call {
4122 ($imm4:expr) => {
4123 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4124 };
4125 }
4126 let fmsub = constify_imm4_round!(rounding, call);
4127 transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
4128 }
4129
4130 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4131 ///
4132 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4133 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4134 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4135 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4136 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4137 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4138 ///
4139 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_round_pd&expand=2650)
4140 #[inline]
4141 #[target_feature(enable = "avx512f")]
4142 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4143 #[rustc_args_required_const(4)]
4144 pub unsafe fn _mm512_maskz_fmsub_round_pd(
4145 k: __mmask8,
4146 a: __m512d,
4147 b: __m512d,
4148 c: __m512d,
4149 rounding: i32,
4150 ) -> __m512d {
4151 let zero: f64x8 = mem::zeroed();
4152 let sub = simd_sub(zero, c.as_f64x8());
4153 macro_rules! call {
4154 ($imm4:expr) => {
4155 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4156 };
4157 }
4158 let fmsub = constify_imm4_round!(rounding, call);
4159 transmute(simd_select_bitmask(k, fmsub, zero))
4160 }
4161
4162 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4163 ///
4164 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4165 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4166 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4167 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4168 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4169 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4170 ///
4171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_round_pd&expand=2649)
4172 #[inline]
4173 #[target_feature(enable = "avx512f")]
4174 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4175 #[rustc_args_required_const(4)]
4176 pub unsafe fn _mm512_mask3_fmsub_round_pd(
4177 a: __m512d,
4178 b: __m512d,
4179 c: __m512d,
4180 k: __mmask8,
4181 rounding: i32,
4182 ) -> __m512d {
4183 let zero: f64x8 = mem::zeroed();
4184 let sub = simd_sub(zero, c.as_f64x8());
4185 macro_rules! call {
4186 ($imm4:expr) => {
4187 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4188 };
4189 }
4190 let fmsub = constify_imm4_round!(rounding, call);
4191 transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
4192 }
4193
4194 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
4195 ///
4196 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4197 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4198 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4199 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4200 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4201 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4202 ///
4203 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_round_ps&expand=2619)
4204 #[inline]
4205 #[target_feature(enable = "avx512f")]
4206 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4207 #[rustc_args_required_const(3)]
4208 pub unsafe fn _mm512_fmaddsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
4209 macro_rules! call {
4210 ($imm4:expr) => {
4211 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
4212 };
4213 }
4214 let r = constify_imm4_round!(rounding, call);
4215 transmute(r)
4216 }
4217
4218 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4219 ///
4220 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4221 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4222 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4223 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4224 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4225 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4226 ///
4227 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_round_ps&expand=2620)
4228 #[inline]
4229 #[target_feature(enable = "avx512f")]
4230 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4231 #[rustc_args_required_const(4)]
4232 pub unsafe fn _mm512_mask_fmaddsub_round_ps(
4233 a: __m512,
4234 k: __mmask16,
4235 b: __m512,
4236 c: __m512,
4237 rounding: i32,
4238 ) -> __m512 {
4239 macro_rules! call {
4240 ($imm4:expr) => {
4241 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
4242 };
4243 }
4244 let fmaddsub = constify_imm4_round!(rounding, call);
4245 transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
4246 }
4247
4248 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4249 ///
4250 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4251 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4252 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4253 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4254 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4255 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4256 ///
4257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_round_ps&expand=2622)
4258 #[inline]
4259 #[target_feature(enable = "avx512f")]
4260 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4261 #[rustc_args_required_const(4)]
4262 pub unsafe fn _mm512_maskz_fmaddsub_round_ps(
4263 k: __mmask16,
4264 a: __m512,
4265 b: __m512,
4266 c: __m512,
4267 rounding: i32,
4268 ) -> __m512 {
4269 macro_rules! call {
4270 ($imm4:expr) => {
4271 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
4272 };
4273 }
4274 let fmaddsub = constify_imm4_round!(rounding, call);
4275 let zero = _mm512_setzero_ps().as_f32x16();
4276 transmute(simd_select_bitmask(k, fmaddsub, zero))
4277 }
4278
4279 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4280 ///
4281 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4282 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4283 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4284 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4285 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4286 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4287 ///
4288 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_round_ps&expand=2621)
4289 #[inline]
4290 #[target_feature(enable = "avx512f")]
4291 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4292 #[rustc_args_required_const(4)]
4293 pub unsafe fn _mm512_mask3_fmaddsub_round_ps(
4294 a: __m512,
4295 b: __m512,
4296 c: __m512,
4297 k: __mmask16,
4298 rounding: i32,
4299 ) -> __m512 {
4300 macro_rules! call {
4301 ($imm4:expr) => {
4302 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
4303 };
4304 }
4305 let fmaddsub = constify_imm4_round!(rounding, call);
4306 transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
4307 }
4308
4309 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
4310 ///
4311 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4312 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4313 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4314 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4315 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4316 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4317 ///
4318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_round_pd&expand=2615)
4319 #[inline]
4320 #[target_feature(enable = "avx512f")]
4321 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4322 #[rustc_args_required_const(3)]
4323 pub unsafe fn _mm512_fmaddsub_round_pd(
4324 a: __m512d,
4325 b: __m512d,
4326 c: __m512d,
4327 rounding: i32,
4328 ) -> __m512d {
4329 macro_rules! call {
4330 ($imm4:expr) => {
4331 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
4332 };
4333 }
4334 let r = constify_imm4_round!(rounding, call);
4335 transmute(r)
4336 }
4337
4338 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4339 ///
4340 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4341 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4342 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4343 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4344 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4345 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4346 ///
4347 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_round_pd&expand=2616)
4348 #[inline]
4349 #[target_feature(enable = "avx512f")]
4350 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4351 #[rustc_args_required_const(4)]
4352 pub unsafe fn _mm512_mask_fmaddsub_round_pd(
4353 a: __m512d,
4354 k: __mmask8,
4355 b: __m512d,
4356 c: __m512d,
4357 rounding: i32,
4358 ) -> __m512d {
4359 macro_rules! call {
4360 ($imm4:expr) => {
4361 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
4362 };
4363 }
4364 let fmaddsub = constify_imm4_round!(rounding, call);
4365 transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
4366 }
4367
4368 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4369 ///
4370 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4371 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4372 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4373 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4374 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4375 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4376 ///
4377 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_round_pd&expand=2618)
4378 #[inline]
4379 #[target_feature(enable = "avx512f")]
4380 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4381 #[rustc_args_required_const(4)]
4382 pub unsafe fn _mm512_maskz_fmaddsub_round_pd(
4383 k: __mmask8,
4384 a: __m512d,
4385 b: __m512d,
4386 c: __m512d,
4387 rounding: i32,
4388 ) -> __m512d {
4389 macro_rules! call {
4390 ($imm4:expr) => {
4391 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
4392 };
4393 }
4394 let fmaddsub = constify_imm4_round!(rounding, call);
4395 let zero = _mm512_setzero_pd().as_f64x8();
4396 transmute(simd_select_bitmask(k, fmaddsub, zero))
4397 }
4398
4399 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4400 ///
4401 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4402 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4403 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4404 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4405 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4406 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4407 ///
4408 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_round_pd&expand=2617)
4409 #[inline]
4410 #[target_feature(enable = "avx512f")]
4411 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4412 #[rustc_args_required_const(4)]
4413 pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
4414 a: __m512d,
4415 b: __m512d,
4416 c: __m512d,
4417 k: __mmask8,
4418 rounding: i32,
4419 ) -> __m512d {
4420 macro_rules! call {
4421 ($imm4:expr) => {
4422 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
4423 };
4424 }
4425 let fmaddsub = constify_imm4_round!(rounding, call);
4426 transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
4427 }
4428
4429 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
4430 ///
4431 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4432 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4433 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4434 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4435 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4436 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4437 ///
4438 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_round_ps&expand=2699)
4439 #[inline]
4440 #[target_feature(enable = "avx512f")]
4441 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4442 #[rustc_args_required_const(3)]
4443 pub unsafe fn _mm512_fmsubadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
4444 let zero: f32x16 = mem::zeroed();
4445 let sub = simd_sub(zero, c.as_f32x16());
4446 macro_rules! call {
4447 ($imm4:expr) => {
4448 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4449 };
4450 }
4451 let r = constify_imm4_round!(rounding, call);
4452 transmute(r)
4453 }
4454
4455 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4456 ///
4457 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4458 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4459 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4460 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4461 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4462 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4463 ///
4464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_round_ps&expand=2700)
4465 #[inline]
4466 #[target_feature(enable = "avx512f")]
4467 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4468 #[rustc_args_required_const(4)]
4469 pub unsafe fn _mm512_mask_fmsubadd_round_ps(
4470 a: __m512,
4471 k: __mmask16,
4472 b: __m512,
4473 c: __m512,
4474 rounding: i32,
4475 ) -> __m512 {
4476 let zero: f32x16 = mem::zeroed();
4477 let sub = simd_sub(zero, c.as_f32x16());
4478 macro_rules! call {
4479 ($imm4:expr) => {
4480 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4481 };
4482 }
4483 let fmsubadd = constify_imm4_round!(rounding, call);
4484 transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
4485 }
4486
4487 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4488 ///
4489 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4490 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4491 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4492 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4493 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4494 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4495 ///
4496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_round_ps&expand=2702)
4497 #[inline]
4498 #[target_feature(enable = "avx512f")]
4499 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500 #[rustc_args_required_const(4)]
4501 pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
4502 k: __mmask16,
4503 a: __m512,
4504 b: __m512,
4505 c: __m512,
4506 rounding: i32,
4507 ) -> __m512 {
4508 let zero: f32x16 = mem::zeroed();
4509 let sub = simd_sub(zero, c.as_f32x16());
4510 macro_rules! call {
4511 ($imm4:expr) => {
4512 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4513 };
4514 }
4515 let fmsubadd = constify_imm4_round!(rounding, call);
4516 transmute(simd_select_bitmask(k, fmsubadd, zero))
4517 }
4518
4519 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4520 ///
4521 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4522 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4523 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4524 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4525 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4526 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4527 ///
4528 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_round_ps&expand=2701)
4529 #[inline]
4530 #[target_feature(enable = "avx512f")]
4531 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4532 #[rustc_args_required_const(4)]
4533 pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
4534 a: __m512,
4535 b: __m512,
4536 c: __m512,
4537 k: __mmask16,
4538 rounding: i32,
4539 ) -> __m512 {
4540 let zero: f32x16 = mem::zeroed();
4541 let sub = simd_sub(zero, c.as_f32x16());
4542 macro_rules! call {
4543 ($imm4:expr) => {
4544 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
4545 };
4546 }
4547 let fmsubadd = constify_imm4_round!(rounding, call);
4548 transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
4549 }
4550
4551 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
4552 ///
4553 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4554 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4555 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4556 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4557 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4558 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4559 ///
4560 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_round_pd&expand=2695)
4561 #[inline]
4562 #[target_feature(enable = "avx512f")]
4563 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4564 #[rustc_args_required_const(3)]
4565 pub unsafe fn _mm512_fmsubadd_round_pd(
4566 a: __m512d,
4567 b: __m512d,
4568 c: __m512d,
4569 rounding: i32,
4570 ) -> __m512d {
4571 let zero: f64x8 = mem::zeroed();
4572 let sub = simd_sub(zero, c.as_f64x8());
4573 macro_rules! call {
4574 ($imm4:expr) => {
4575 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4576 };
4577 }
4578 let r = constify_imm4_round!(rounding, call);
4579 transmute(r)
4580 }
4581
4582 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4583 ///
4584 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4585 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4586 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4587 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4588 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4589 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4590 ///
4591 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_round_pd&expand=2696)
4592 #[inline]
4593 #[target_feature(enable = "avx512f")]
4594 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4595 #[rustc_args_required_const(4)]
4596 pub unsafe fn _mm512_mask_fmsubadd_round_pd(
4597 a: __m512d,
4598 k: __mmask8,
4599 b: __m512d,
4600 c: __m512d,
4601 rounding: i32,
4602 ) -> __m512d {
4603 let zero: f64x8 = mem::zeroed();
4604 let sub = simd_sub(zero, c.as_f64x8());
4605 macro_rules! call {
4606 ($imm4:expr) => {
4607 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4608 };
4609 }
4610 let fmsubadd = constify_imm4_round!(rounding, call);
4611 transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
4612 }
4613
4614 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4615 ///
4616 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4617 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4618 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4619 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4620 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4621 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4622 ///
4623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_round_pd&expand=2698)
4624 #[inline]
4625 #[target_feature(enable = "avx512f")]
4626 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4627 #[rustc_args_required_const(4)]
4628 pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
4629 k: __mmask8,
4630 a: __m512d,
4631 b: __m512d,
4632 c: __m512d,
4633 rounding: i32,
4634 ) -> __m512d {
4635 let zero: f64x8 = mem::zeroed();
4636 let sub = simd_sub(zero, c.as_f64x8());
4637 macro_rules! call {
4638 ($imm4:expr) => {
4639 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4640 };
4641 }
4642 let fmsubadd = constify_imm4_round!(rounding, call);
4643 transmute(simd_select_bitmask(k, fmsubadd, zero))
4644 }
4645
4646 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4647 ///
4648 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4649 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4650 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4651 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4652 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4653 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4654 ///
4655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_round_pd&expand=2697)
4656 #[inline]
4657 #[target_feature(enable = "avx512f")]
4658 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4659 #[rustc_args_required_const(4)]
4660 pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
4661 a: __m512d,
4662 b: __m512d,
4663 c: __m512d,
4664 k: __mmask8,
4665 rounding: i32,
4666 ) -> __m512d {
4667 let zero: f64x8 = mem::zeroed();
4668 let sub = simd_sub(zero, c.as_f64x8());
4669 macro_rules! call {
4670 ($imm4:expr) => {
4671 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
4672 };
4673 }
4674 let fmsubadd = constify_imm4_round!(rounding, call);
4675 transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
4676 }
4677
4678 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
4679 ///
4680 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4681 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4682 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4683 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4684 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4685 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4686 ///
4687 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_round_ps&expand=2731)
4688 #[inline]
4689 #[target_feature(enable = "avx512f")]
4690 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4691 #[rustc_args_required_const(3)]
4692 pub unsafe fn _mm512_fnmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
4693 let zero: f32x16 = mem::zeroed();
4694 let sub = simd_sub(zero, a.as_f32x16());
4695 macro_rules! call {
4696 ($imm4:expr) => {
4697 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4698 };
4699 }
4700 let r = constify_imm4_round!(rounding, call);
4701 transmute(r)
4702 }
4703
4704 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4705 ///
4706 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4707 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4708 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4709 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4710 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4711 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4712 ///
4713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_round_ps&expand=2732)
4714 #[inline]
4715 #[target_feature(enable = "avx512f")]
4716 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4717 #[rustc_args_required_const(4)]
4718 pub unsafe fn _mm512_mask_fnmadd_round_ps(
4719 a: __m512,
4720 k: __mmask16,
4721 b: __m512,
4722 c: __m512,
4723 rounding: i32,
4724 ) -> __m512 {
4725 let zero: f32x16 = mem::zeroed();
4726 let sub = simd_sub(zero, a.as_f32x16());
4727 macro_rules! call {
4728 ($imm4:expr) => {
4729 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4730 };
4731 }
4732 let fnmadd = constify_imm4_round!(rounding, call);
4733 transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
4734 }
4735
4736 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4737 ///
4738 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4739 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4740 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4741 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4742 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4743 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4744 ///
4745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_round_ps&expand=2734)
4746 #[inline]
4747 #[target_feature(enable = "avx512f")]
4748 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4749 #[rustc_args_required_const(4)]
4750 pub unsafe fn _mm512_maskz_fnmadd_round_ps(
4751 k: __mmask16,
4752 a: __m512,
4753 b: __m512,
4754 c: __m512,
4755 rounding: i32,
4756 ) -> __m512 {
4757 let zero: f32x16 = mem::zeroed();
4758 let sub = simd_sub(zero, a.as_f32x16());
4759 macro_rules! call {
4760 ($imm4:expr) => {
4761 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4762 };
4763 }
4764 let fnmadd = constify_imm4_round!(rounding, call);
4765 transmute(simd_select_bitmask(k, fnmadd, zero))
4766 }
4767
4768 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4769 ///
4770 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4771 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4772 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4773 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4774 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4775 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4776 ///
4777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_round_ps&expand=2733)
4778 #[inline]
4779 #[target_feature(enable = "avx512f")]
4780 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4781 #[rustc_args_required_const(4)]
4782 pub unsafe fn _mm512_mask3_fnmadd_round_ps(
4783 a: __m512,
4784 b: __m512,
4785 c: __m512,
4786 k: __mmask16,
4787 rounding: i32,
4788 ) -> __m512 {
4789 let zero: f32x16 = mem::zeroed();
4790 let sub = simd_sub(zero, a.as_f32x16());
4791 macro_rules! call {
4792 ($imm4:expr) => {
4793 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4794 };
4795 }
4796 let fnmadd = constify_imm4_round!(rounding, call);
4797 transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
4798 }
4799
4800 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
4801 ///
4802 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4803 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4804 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4805 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4806 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4807 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4808 ///
4809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_pd&expand=2711)
4810 #[inline]
4811 #[target_feature(enable = "avx512f")]
4812 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4813 #[rustc_args_required_const(3)]
4814 pub unsafe fn _mm512_fnmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
4815 let zero: f64x8 = mem::zeroed();
4816 let sub = simd_sub(zero, a.as_f64x8());
4817 macro_rules! call {
4818 ($imm4:expr) => {
4819 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4820 };
4821 }
4822 let r = constify_imm4_round!(rounding, call);
4823 transmute(r)
4824 }
4825
4826 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4827 ///
4828 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4829 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4830 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4831 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4832 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4833 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4834 ///
4835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_round_pd&expand=2728)
4836 #[inline]
4837 #[target_feature(enable = "avx512f")]
4838 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4839 #[rustc_args_required_const(4)]
4840 pub unsafe fn _mm512_mask_fnmadd_round_pd(
4841 a: __m512d,
4842 k: __mmask8,
4843 b: __m512d,
4844 c: __m512d,
4845 rounding: i32,
4846 ) -> __m512d {
4847 let zero: f64x8 = mem::zeroed();
4848 let sub = simd_sub(zero, a.as_f64x8());
4849 macro_rules! call {
4850 ($imm4:expr) => {
4851 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4852 };
4853 }
4854 let fnmadd = constify_imm4_round!(rounding, call);
4855 transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
4856 }
4857
4858 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4859 ///
4860 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4861 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4862 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4863 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4864 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4865 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4866 ///
4867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_round_pd&expand=2730)
4868 #[inline]
4869 #[target_feature(enable = "avx512f")]
4870 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4871 #[rustc_args_required_const(4)]
4872 pub unsafe fn _mm512_maskz_fnmadd_round_pd(
4873 k: __mmask8,
4874 a: __m512d,
4875 b: __m512d,
4876 c: __m512d,
4877 rounding: i32,
4878 ) -> __m512d {
4879 let zero: f64x8 = mem::zeroed();
4880 let sub = simd_sub(zero, a.as_f64x8());
4881 macro_rules! call {
4882 ($imm4:expr) => {
4883 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4884 };
4885 }
4886 let fnmadd = constify_imm4_round!(rounding, call);
4887 transmute(simd_select_bitmask(k, fnmadd, zero))
4888 }
4889
4890 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4891 ///
4892 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4893 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4894 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4895 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4896 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4897 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4898 ///
4899 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_round_pd&expand=2729)
4900 #[inline]
4901 #[target_feature(enable = "avx512f")]
4902 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4903 #[rustc_args_required_const(4)]
4904 pub unsafe fn _mm512_mask3_fnmadd_round_pd(
4905 a: __m512d,
4906 b: __m512d,
4907 c: __m512d,
4908 k: __mmask8,
4909 rounding: i32,
4910 ) -> __m512d {
4911 let zero: f64x8 = mem::zeroed();
4912 let sub = simd_sub(zero, a.as_f64x8());
4913 macro_rules! call {
4914 ($imm4:expr) => {
4915 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4916 };
4917 }
4918 let fnmadd = constify_imm4_round!(rounding, call);
4919 transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
4920 }
4921
4922 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
4923 ///
4924 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4925 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4926 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4927 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4928 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4929 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4930 ///
4931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_round_ps&expand=2779)
4932 #[inline]
4933 #[target_feature(enable = "avx512f")]
4934 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4935 #[rustc_args_required_const(3)]
4936 pub unsafe fn _mm512_fnmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
4937 let zero: f32x16 = mem::zeroed();
4938 let suba = simd_sub(zero, a.as_f32x16());
4939 let subc = simd_sub(zero, c.as_f32x16());
4940 macro_rules! call {
4941 ($imm4:expr) => {
4942 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
4943 };
4944 }
4945 let r = constify_imm4_round!(rounding, call);
4946 transmute(r)
4947 }
4948
4949 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4950 ///
4951 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4952 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4953 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4954 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4955 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4956 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4957 ///
4958 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_round_ps&expand=2780)
4959 #[inline]
4960 #[target_feature(enable = "avx512f")]
4961 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4962 #[rustc_args_required_const(4)]
4963 pub unsafe fn _mm512_mask_fnmsub_round_ps(
4964 a: __m512,
4965 k: __mmask16,
4966 b: __m512,
4967 c: __m512,
4968 rounding: i32,
4969 ) -> __m512 {
4970 let zero: f32x16 = mem::zeroed();
4971 let suba = simd_sub(zero, a.as_f32x16());
4972 let subc = simd_sub(zero, c.as_f32x16());
4973 macro_rules! call {
4974 ($imm4:expr) => {
4975 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
4976 };
4977 }
4978 let fnmsub = constify_imm4_round!(rounding, call);
4979 transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
4980 }
4981
4982 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4983 ///
4984 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4985 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4986 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4987 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4988 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4989 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4990 ///
4991 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_round_ps&expand=2782)
4992 #[inline]
4993 #[target_feature(enable = "avx512f")]
4994 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4995 #[rustc_args_required_const(4)]
4996 pub unsafe fn _mm512_maskz_fnmsub_round_ps(
4997 k: __mmask16,
4998 a: __m512,
4999 b: __m512,
5000 c: __m512,
5001 rounding: i32,
5002 ) -> __m512 {
5003 let zero: f32x16 = mem::zeroed();
5004 let suba = simd_sub(zero, a.as_f32x16());
5005 let subc = simd_sub(zero, c.as_f32x16());
5006 macro_rules! call {
5007 ($imm4:expr) => {
5008 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
5009 };
5010 }
5011 let fnmsub = constify_imm4_round!(rounding, call);
5012 transmute(simd_select_bitmask(k, fnmsub, zero))
5013 }
5014
5015 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
5016 ///
5017 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5018 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5019 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5020 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5021 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5022 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5023 ///
5024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_round_ps&expand=2781)
5025 #[inline]
5026 #[target_feature(enable = "avx512f")]
5027 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5028 #[rustc_args_required_const(4)]
5029 pub unsafe fn _mm512_mask3_fnmsub_round_ps(
5030 a: __m512,
5031 b: __m512,
5032 c: __m512,
5033 k: __mmask16,
5034 rounding: i32,
5035 ) -> __m512 {
5036 let zero: f32x16 = mem::zeroed();
5037 let suba = simd_sub(zero, a.as_f32x16());
5038 let subc = simd_sub(zero, c.as_f32x16());
5039 macro_rules! call {
5040 ($imm4:expr) => {
5041 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
5042 };
5043 }
5044 let fnmsub = constify_imm4_round!(rounding, call);
5045 transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
5046 }
5047
5048 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
5049 ///
5050 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5051 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5052 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5053 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5054 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5055 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5056 ///
5057 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_round_pd&expand=2775)
5058 #[inline]
5059 #[target_feature(enable = "avx512f")]
5060 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5061 #[rustc_args_required_const(3)]
5062 pub unsafe fn _mm512_fnmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
5063 let zero: f64x8 = mem::zeroed();
5064 let suba = simd_sub(zero, a.as_f64x8());
5065 let subc = simd_sub(zero, c.as_f64x8());
5066 macro_rules! call {
5067 ($imm4:expr) => {
5068 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
5069 };
5070 }
5071 let r = constify_imm4_round!(rounding, call);
5072 transmute(r)
5073 }
5074
5075 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
5076 ///
5077 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5078 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5079 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5080 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5081 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5082 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5083 ///
5084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_round_pd&expand=2776)
5085 #[inline]
5086 #[target_feature(enable = "avx512f")]
5087 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5088 #[rustc_args_required_const(4)]
5089 pub unsafe fn _mm512_mask_fnmsub_round_pd(
5090 a: __m512d,
5091 k: __mmask8,
5092 b: __m512d,
5093 c: __m512d,
5094 rounding: i32,
5095 ) -> __m512d {
5096 let zero: f64x8 = mem::zeroed();
5097 let suba = simd_sub(zero, a.as_f64x8());
5098 let subc = simd_sub(zero, c.as_f64x8());
5099 macro_rules! call {
5100 ($imm4:expr) => {
5101 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
5102 };
5103 }
5104 let fnmsub = constify_imm4_round!(rounding, call);
5105 transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
5106 }
5107
5108 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5109 ///
5110 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5111 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5112 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5113 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5114 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5115 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5116 ///
5117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_round_pd&expand=2778)
5118 #[inline]
5119 #[target_feature(enable = "avx512f")]
5120 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5121 #[rustc_args_required_const(4)]
5122 pub unsafe fn _mm512_maskz_fnmsub_round_pd(
5123 k: __mmask8,
5124 a: __m512d,
5125 b: __m512d,
5126 c: __m512d,
5127 rounding: i32,
5128 ) -> __m512d {
5129 let zero: f64x8 = mem::zeroed();
5130 let suba = simd_sub(zero, a.as_f64x8());
5131 let subc = simd_sub(zero, c.as_f64x8());
5132 macro_rules! call {
5133 ($imm4:expr) => {
5134 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
5135 };
5136 }
5137 let fnmsub = constify_imm4_round!(rounding, call);
5138 transmute(simd_select_bitmask(k, fnmsub, zero))
5139 }
5140
5141 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
5142 ///
5143 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5144 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5145 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5146 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5147 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5148 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5149 ///
5150 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_round_pd&expand=2777)
5151 #[inline]
5152 #[target_feature(enable = "avx512f")]
5153 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5154 #[rustc_args_required_const(4)]
5155 pub unsafe fn _mm512_mask3_fnmsub_round_pd(
5156 a: __m512d,
5157 b: __m512d,
5158 c: __m512d,
5159 k: __mmask8,
5160 rounding: i32,
5161 ) -> __m512d {
5162 let zero: f64x8 = mem::zeroed();
5163 let suba = simd_sub(zero, a.as_f64x8());
5164 let subc = simd_sub(zero, c.as_f64x8());
5165 macro_rules! call {
5166 ($imm4:expr) => {
5167 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
5168 };
5169 }
5170 let fnmsub = constify_imm4_round!(rounding, call);
5171 transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
5172 }
5173
5174 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
5175 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5176 ///
5177 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_round_ps&expand=3662)
5178 #[inline]
5179 #[target_feature(enable = "avx512f")]
5180 #[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
5181 #[rustc_args_required_const(2)]
5182 pub unsafe fn _mm512_max_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
5183 macro_rules! call {
5184 ($imm4:expr) => {
5185 vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
5186 };
5187 }
5188 let r = constify_imm4_sae!(sae, call);
5189 transmute(r)
5190 }
5191
5192 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5193 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5194 ///
5195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_round_ps&expand=3660)
5196 #[inline]
5197 #[target_feature(enable = "avx512f")]
5198 #[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
5199 #[rustc_args_required_const(4)]
5200 pub unsafe fn _mm512_mask_max_round_ps(
5201 src: __m512,
5202 k: __mmask16,
5203 a: __m512,
5204 b: __m512,
5205 sae: i32,
5206 ) -> __m512 {
5207 macro_rules! call {
5208 ($imm4:expr) => {
5209 vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
5210 };
5211 }
5212 let max = constify_imm4_sae!(sae, call);
5213 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
5214 }
5215
5216 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5217 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5218 ///
5219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_round_ps&expand=3661)
5220 #[inline]
5221 #[target_feature(enable = "avx512f")]
5222 #[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
5223 #[rustc_args_required_const(3)]
5224 pub unsafe fn _mm512_maskz_max_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
5225 macro_rules! call {
5226 ($imm4:expr) => {
5227 vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
5228 };
5229 }
5230 let max = constify_imm4_sae!(sae, call);
5231 let zero = _mm512_setzero_ps().as_f32x16();
5232 transmute(simd_select_bitmask(k, max, zero))
5233 }
5234
5235 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
5236 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5237 ///
5238 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_round_pd&expand=3659)
5239 #[inline]
5240 #[target_feature(enable = "avx512f")]
5241 #[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
5242 #[rustc_args_required_const(2)]
5243 pub unsafe fn _mm512_max_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
5244 macro_rules! call {
5245 ($imm4:expr) => {
5246 vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
5247 };
5248 }
5249 let r = constify_imm4_sae!(sae, call);
5250 transmute(r)
5251 }
5252
5253 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5254 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5255 ///
5256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_round_pd&expand=3657)
5257 #[inline]
5258 #[target_feature(enable = "avx512f")]
5259 #[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
5260 #[rustc_args_required_const(4)]
5261 pub unsafe fn _mm512_mask_max_round_pd(
5262 src: __m512d,
5263 k: __mmask8,
5264 a: __m512d,
5265 b: __m512d,
5266 sae: i32,
5267 ) -> __m512d {
5268 macro_rules! call {
5269 ($imm4:expr) => {
5270 vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
5271 };
5272 }
5273 let max = constify_imm4_sae!(sae, call);
5274 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
5275 }
5276
5277 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5278 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5279 ///
5280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_round_pd&expand=3658)
5281 #[inline]
5282 #[target_feature(enable = "avx512f")]
5283 #[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
5284 #[rustc_args_required_const(3)]
5285 pub unsafe fn _mm512_maskz_max_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
5286 macro_rules! call {
5287 ($imm4:expr) => {
5288 vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
5289 };
5290 }
5291 let max = constify_imm4_sae!(sae, call);
5292 let zero = _mm512_setzero_pd().as_f64x8();
5293 transmute(simd_select_bitmask(k, max, zero))
5294 }
5295
5296 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
5297 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5298 ///
5299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_round_ps&expand=3776)
5300 #[inline]
5301 #[target_feature(enable = "avx512f")]
5302 #[cfg_attr(test, assert_instr(vminps, sae = 8))]
5303 #[rustc_args_required_const(2)]
5304 pub unsafe fn _mm512_min_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
5305 macro_rules! call {
5306 ($imm4:expr) => {
5307 vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
5308 };
5309 }
5310 let r = constify_imm4_sae!(sae, call);
5311 transmute(r)
5312 }
5313
5314 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5315 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5316 ///
5317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_round_ps&expand=3774)
5318 #[inline]
5319 #[target_feature(enable = "avx512f")]
5320 #[cfg_attr(test, assert_instr(vminps, sae = 8))]
5321 #[rustc_args_required_const(4)]
5322 pub unsafe fn _mm512_mask_min_round_ps(
5323 src: __m512,
5324 k: __mmask16,
5325 a: __m512,
5326 b: __m512,
5327 sae: i32,
5328 ) -> __m512 {
5329 macro_rules! call {
5330 ($imm4:expr) => {
5331 vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
5332 };
5333 }
5334 let max = constify_imm4_sae!(sae, call);
5335 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
5336 }
5337
5338 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5339 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5340 ///
5341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_round_ps&expand=3775)
5342 #[inline]
5343 #[target_feature(enable = "avx512f")]
5344 #[cfg_attr(test, assert_instr(vminps, sae = 8))]
5345 #[rustc_args_required_const(3)]
5346 pub unsafe fn _mm512_maskz_min_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
5347 macro_rules! call {
5348 ($imm4:expr) => {
5349 vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
5350 };
5351 }
5352 let max = constify_imm4_sae!(sae, call);
5353 let zero = _mm512_setzero_ps().as_f32x16();
5354 transmute(simd_select_bitmask(k, max, zero))
5355 }
5356
5357 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
5358 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5359 ///
5360 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_round_pd&expand=3773)
5361 #[inline]
5362 #[target_feature(enable = "avx512f")]
5363 #[cfg_attr(test, assert_instr(vminpd, sae = 8))]
5364 #[rustc_args_required_const(2)]
5365 pub unsafe fn _mm512_min_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
5366 macro_rules! call {
5367 ($imm4:expr) => {
5368 vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
5369 };
5370 }
5371 let r = constify_imm4_sae!(sae, call);
5372 transmute(r)
5373 }
5374
5375 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5376 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5377 ///
5378 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_round_pd&expand=3771)
5379 #[inline]
5380 #[target_feature(enable = "avx512f")]
5381 #[cfg_attr(test, assert_instr(vminpd, sae = 8))]
5382 #[rustc_args_required_const(4)]
5383 pub unsafe fn _mm512_mask_min_round_pd(
5384 src: __m512d,
5385 k: __mmask8,
5386 a: __m512d,
5387 b: __m512d,
5388 sae: i32,
5389 ) -> __m512d {
5390 macro_rules! call {
5391 ($imm4:expr) => {
5392 vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
5393 };
5394 }
5395 let max = constify_imm4_sae!(sae, call);
5396 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
5397 }
5398
5399 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5400 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5401 ///
5402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_round_pd&expand=3772)
5403 #[inline]
5404 #[target_feature(enable = "avx512f")]
5405 #[cfg_attr(test, assert_instr(vminpd, sae = 8))]
5406 #[rustc_args_required_const(3)]
5407 pub unsafe fn _mm512_maskz_min_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
5408 macro_rules! call {
5409 ($imm4:expr) => {
5410 vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
5411 };
5412 }
5413 let max = constify_imm4_sae!(sae, call);
5414 let zero = _mm512_setzero_pd().as_f64x8();
5415 transmute(simd_select_bitmask(k, max, zero))
5416 }
5417
5418 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
5419 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5420 ///
5421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_round_ps&expand=2850)
5422 #[inline]
5423 #[target_feature(enable = "avx512f")]
5424 #[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
5425 #[rustc_args_required_const(1)]
5426 pub unsafe fn _mm512_getexp_round_ps(a: __m512, sae: i32) -> __m512 {
5427 macro_rules! call {
5428 ($imm4:expr) => {
5429 vgetexpps(
5430 a.as_f32x16(),
5431 _mm512_setzero_ps().as_f32x16(),
5432 0b11111111_11111111,
5433 $imm4,
5434 )
5435 };
5436 }
5437 let r = constify_imm4_sae!(sae, call);
5438 transmute(r)
5439 }
5440
5441 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5442 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5443 ///
5444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_round_ps&expand=2851)
5445 #[inline]
5446 #[target_feature(enable = "avx512f")]
5447 #[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
5448 #[rustc_args_required_const(3)]
5449 pub unsafe fn _mm512_mask_getexp_round_ps(
5450 src: __m512,
5451 k: __mmask16,
5452 a: __m512,
5453 sae: i32,
5454 ) -> __m512 {
5455 macro_rules! call {
5456 ($imm4:expr) => {
5457 vgetexpps(a.as_f32x16(), src.as_f32x16(), k, $imm4)
5458 };
5459 }
5460 let r = constify_imm4_sae!(sae, call);
5461 transmute(r)
5462 }
5463
5464 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5465 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5466 ///
5467 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_round_ps&expand=2852)
5468 #[inline]
5469 #[target_feature(enable = "avx512f")]
5470 #[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
5471 #[rustc_args_required_const(2)]
5472 pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512, sae: i32) -> __m512 {
5473 macro_rules! call {
5474 ($imm4:expr) => {
5475 vgetexpps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k, $imm4)
5476 };
5477 }
5478 let r = constify_imm4_sae!(sae, call);
5479 transmute(r)
5480 }
5481
5482 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
5483 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5484 ///
5485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_round_pd&expand=2847)
5486 #[inline]
5487 #[target_feature(enable = "avx512f")]
5488 #[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
5489 #[rustc_args_required_const(1)]
5490 pub unsafe fn _mm512_getexp_round_pd(a: __m512d, sae: i32) -> __m512d {
5491 macro_rules! call {
5492 ($imm4:expr) => {
5493 vgetexppd(
5494 a.as_f64x8(),
5495 _mm512_setzero_pd().as_f64x8(),
5496 0b11111111,
5497 $imm4,
5498 )
5499 };
5500 }
5501 let r = constify_imm4_sae!(sae, call);
5502 transmute(r)
5503 }
5504
5505 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5506 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5507 ///
5508 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_round_pd&expand=2848)
5509 #[inline]
5510 #[target_feature(enable = "avx512f")]
5511 #[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
5512 #[rustc_args_required_const(3)]
5513 pub unsafe fn _mm512_mask_getexp_round_pd(
5514 src: __m512d,
5515 k: __mmask8,
5516 a: __m512d,
5517 sae: i32,
5518 ) -> __m512d {
5519 macro_rules! call {
5520 ($imm4:expr) => {
5521 vgetexppd(a.as_f64x8(), src.as_f64x8(), k, $imm4)
5522 };
5523 }
5524 let r = constify_imm4_sae!(sae, call);
5525 transmute(r)
5526 }
5527
5528 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5529 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5530 ///
5531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_round_pd&expand=2849)
5532 #[inline]
5533 #[target_feature(enable = "avx512f")]
5534 #[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
5535 #[rustc_args_required_const(2)]
5536 pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d, sae: i32) -> __m512d {
5537 macro_rules! call {
5538 ($imm4:expr) => {
5539 vgetexppd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k, $imm4)
5540 };
5541 }
5542 let r = constify_imm4_sae!(sae, call);
5543 transmute(r)
5544 }
5545
5546 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5547 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5548 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5549 /// _MM_FROUND_TO_NEG_INF // round down\
5550 /// _MM_FROUND_TO_POS_INF // round up\
5551 /// _MM_FROUND_TO_ZERO // truncate\
5552 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5553 ///
5554 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_round_ps&expand=4790)
5556 #[inline]
5557 #[target_feature(enable = "avx512f")]
5558 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
5559 #[rustc_args_required_const(1, 2)]
5560 pub unsafe fn _mm512_roundscale_round_ps(a: __m512, imm8: i32, sae: i32) -> __m512 {
5561 let a = a.as_f32x16();
5562 let zero = _mm512_setzero_ps().as_f32x16();
5563 macro_rules! call {
5564 ($imm8:expr, $imm4:expr) => {
5565 vrndscaleps(a, $imm8, zero, 0b11111111_11111111, $imm4)
5566 };
5567 }
5568 let r = constify_imm8_roundscale!(imm8, sae, call);
5569 transmute(r)
5570 }
5571
5572 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5573 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5574 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5575 /// _MM_FROUND_TO_NEG_INF // round down\
5576 /// _MM_FROUND_TO_POS_INF // round up\
5577 /// _MM_FROUND_TO_ZERO // truncate\
5578 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5579 ///
5580 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_round_ps&expand=4788)
5582 #[inline]
5583 #[target_feature(enable = "avx512f")]
5584 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
5585 #[rustc_args_required_const(3, 4)]
5586 pub unsafe fn _mm512_mask_roundscale_round_ps(
5587 src: __m512,
5588 k: __mmask16,
5589 a: __m512,
5590 imm8: i32,
5591 sae: i32,
5592 ) -> __m512 {
5593 let a = a.as_f32x16();
5594 let src = src.as_f32x16();
5595 macro_rules! call {
5596 ($imm8:expr, $imm4:expr) => {
5597 vrndscaleps(a, $imm8, src, k, $imm4)
5598 };
5599 }
5600 let r = constify_imm8_roundscale!(imm8, sae, call);
5601 transmute(r)
5602 }
5603
5604 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5605 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5606 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5607 /// _MM_FROUND_TO_NEG_INF // round down\
5608 /// _MM_FROUND_TO_POS_INF // round up\
5609 /// _MM_FROUND_TO_ZERO // truncate\
5610 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5611 ///
5612 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5613 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_round_ps&expand=4789)
5614 #[inline]
5615 #[target_feature(enable = "avx512f")]
5616 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
5617 #[rustc_args_required_const(2, 3)]
5618 pub unsafe fn _mm512_maskz_roundscale_round_ps(
5619 k: __mmask16,
5620 a: __m512,
5621 imm8: i32,
5622 sae: i32,
5623 ) -> __m512 {
5624 let a = a.as_f32x16();
5625 let zero = _mm512_setzero_ps().as_f32x16();
5626 macro_rules! call {
5627 ($imm8:expr, $imm4:expr) => {
5628 vrndscaleps(a, $imm8, zero, k, $imm4)
5629 };
5630 }
5631 let r = constify_imm8_roundscale!(imm8, sae, call);
5632 transmute(r)
5633 }
5634
5635 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5636 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5637 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5638 /// _MM_FROUND_TO_NEG_INF // round down\
5639 /// _MM_FROUND_TO_POS_INF // round up\
5640 /// _MM_FROUND_TO_ZERO // truncate\
5641 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5642 ///
5643 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_round_pd&expand=4787)
5645 #[inline]
5646 #[target_feature(enable = "avx512f")]
5647 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
5648 #[rustc_args_required_const(1, 2)]
5649 pub unsafe fn _mm512_roundscale_round_pd(a: __m512d, imm8: i32, sae: i32) -> __m512d {
5650 let a = a.as_f64x8();
5651 let zero = _mm512_setzero_pd().as_f64x8();
5652 macro_rules! call {
5653 ($imm8:expr, $imm4:expr) => {
5654 vrndscalepd(a, $imm8, zero, 0b11111111, $imm4)
5655 };
5656 }
5657 let r = constify_imm8_roundscale!(imm8, sae, call);
5658 transmute(r)
5659 }
5660
5661 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5662 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5663 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5664 /// _MM_FROUND_TO_NEG_INF // round down\
5665 /// _MM_FROUND_TO_POS_INF // round up\
5666 /// _MM_FROUND_TO_ZERO // truncate\
5667 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5668 ///
5669 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5670 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_round_pd&expand=4785)
5671 #[inline]
5672 #[target_feature(enable = "avx512f")]
5673 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
5674 #[rustc_args_required_const(3, 4)]
5675 pub unsafe fn _mm512_mask_roundscale_round_pd(
5676 src: __m512d,
5677 k: __mmask8,
5678 a: __m512d,
5679 imm8: i32,
5680 sae: i32,
5681 ) -> __m512d {
5682 let a = a.as_f64x8();
5683 let src = src.as_f64x8();
5684 macro_rules! call {
5685 ($imm8:expr, $imm4:expr) => {
5686 vrndscalepd(a, $imm8, src, k, $imm4)
5687 };
5688 }
5689 let r = constify_imm8_roundscale!(imm8, sae, call);
5690 transmute(r)
5691 }
5692
5693 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5694 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5696 /// _MM_FROUND_TO_NEG_INF // round down\
5697 /// _MM_FROUND_TO_POS_INF // round up\
5698 /// _MM_FROUND_TO_ZERO // truncate\
5699 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5700 ///
5701 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_round_pd&expand=4786)
5703 #[inline]
5704 #[target_feature(enable = "avx512f")]
5705 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
5706 #[rustc_args_required_const(2, 3)]
5707 pub unsafe fn _mm512_maskz_roundscale_round_pd(
5708 k: __mmask8,
5709 a: __m512d,
5710 imm8: i32,
5711 sae: i32,
5712 ) -> __m512d {
5713 let a = a.as_f64x8();
5714 let zero = _mm512_setzero_pd().as_f64x8();
5715 macro_rules! call {
5716 ($imm8:expr, $imm4:expr) => {
5717 vrndscalepd(a, $imm8, zero, k, $imm4)
5718 };
5719 }
5720 let r = constify_imm8_roundscale!(imm8, sae, call);
5721 transmute(r)
5722 }
5723
5724 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
5725 ///
5726 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5727 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5728 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5729 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5730 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5731 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5732 ///
5733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_round_ps&expand=4889)
5734 #[inline]
5735 #[target_feature(enable = "avx512f")]
5736 #[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
5737 #[rustc_args_required_const(2)]
5738 pub unsafe fn _mm512_scalef_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
5739 macro_rules! call {
5740 ($imm4:expr) => {
5741 vscalefps(
5742 a.as_f32x16(),
5743 b.as_f32x16(),
5744 _mm512_setzero_ps().as_f32x16(),
5745 0b11111111_11111111,
5746 $imm4,
5747 )
5748 };
5749 }
5750 let r = constify_imm4_round!(rounding, call);
5751 transmute(r)
5752 }
5753
5754 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5755 ///
5756 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5757 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5758 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5759 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5760 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5761 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5762 ///
5763 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_round_ps&expand=4887)
5764 #[inline]
5765 #[target_feature(enable = "avx512f")]
5766 #[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
5767 #[rustc_args_required_const(4)]
5768 pub unsafe fn _mm512_mask_scalef_round_ps(
5769 src: __m512,
5770 k: __mmask16,
5771 a: __m512,
5772 b: __m512,
5773 rounding: i32,
5774 ) -> __m512 {
5775 macro_rules! call {
5776 ($imm4:expr) => {
5777 vscalefps(a.as_f32x16(), b.as_f32x16(), src.as_f32x16(), k, $imm4)
5778 };
5779 }
5780 let r = constify_imm4_round!(rounding, call);
5781 transmute(r)
5782 }
5783
5784 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5785 ///
5786 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5787 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5788 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5789 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5790 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5791 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5792 ///
5793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_round_ps&expand=4888)
5794 #[inline]
5795 #[target_feature(enable = "avx512f")]
5796 #[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
5797 #[rustc_args_required_const(3)]
5798 pub unsafe fn _mm512_maskz_scalef_round_ps(
5799 k: __mmask16,
5800 a: __m512,
5801 b: __m512,
5802 rounding: i32,
5803 ) -> __m512 {
5804 macro_rules! call {
5805 ($imm4:expr) => {
5806 vscalefps(
5807 a.as_f32x16(),
5808 b.as_f32x16(),
5809 _mm512_setzero_ps().as_f32x16(),
5810 k,
5811 $imm4,
5812 )
5813 };
5814 }
5815 let r = constify_imm4_round!(rounding, call);
5816 transmute(r)
5817 }
5818
5819 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
5820 ///
5821 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5822 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5823 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5824 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5825 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5826 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5827 ///
5828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_round_pd&expand=4886)
5829 #[inline]
5830 #[target_feature(enable = "avx512f")]
5831 #[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
5832 #[rustc_args_required_const(2)]
5833 pub unsafe fn _mm512_scalef_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
5834 macro_rules! call {
5835 ($imm4:expr) => {
5836 vscalefpd(
5837 a.as_f64x8(),
5838 b.as_f64x8(),
5839 _mm512_setzero_pd().as_f64x8(),
5840 0b11111111,
5841 $imm4,
5842 )
5843 };
5844 }
5845 let r = constify_imm4_round!(rounding, call);
5846 transmute(r)
5847 }
5848
5849 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5850 ///
5851 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5852 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5853 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5854 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5855 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5856 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5857 ///
5858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_round_pd&expand=4884)
5859 #[inline]
5860 #[target_feature(enable = "avx512f")]
5861 #[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
5862 #[rustc_args_required_const(4)]
5863 pub unsafe fn _mm512_mask_scalef_round_pd(
5864 src: __m512d,
5865 k: __mmask8,
5866 a: __m512d,
5867 b: __m512d,
5868 rounding: i32,
5869 ) -> __m512d {
5870 macro_rules! call {
5871 ($imm4:expr) => {
5872 vscalefpd(a.as_f64x8(), b.as_f64x8(), src.as_f64x8(), k, $imm4)
5873 };
5874 }
5875 let r = constify_imm4_round!(rounding, call);
5876 transmute(r)
5877 }
5878
5879 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5880 ///
5881 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5882 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5883 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5884 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5885 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5886 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5887 ///
5888 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_round_pd&expand=4885)
5889 #[inline]
5890 #[target_feature(enable = "avx512f")]
5891 #[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
5892 #[rustc_args_required_const(3)]
5893 pub unsafe fn _mm512_maskz_scalef_round_pd(
5894 k: __mmask8,
5895 a: __m512d,
5896 b: __m512d,
5897 rounding: i32,
5898 ) -> __m512d {
5899 macro_rules! call {
5900 ($imm4:expr) => {
5901 vscalefpd(
5902 a.as_f64x8(),
5903 b.as_f64x8(),
5904 _mm512_setzero_pd().as_f64x8(),
5905 k,
5906 $imm4,
5907 )
5908 };
5909 }
5910 let r = constify_imm4_round!(rounding, call);
5911 transmute(r)
5912 }
5913
5914 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
5915 ///
5916 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_round_ps&expand=2505)
5918 #[inline]
5919 #[target_feature(enable = "avx512f")]
5920 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
5921 #[rustc_args_required_const(3, 4)]
5922 pub unsafe fn _mm512_fixupimm_round_ps(
5923 a: __m512,
5924 b: __m512,
5925 c: __m512i,
5926 imm8: i32,
5927 sae: i32,
5928 ) -> __m512 {
5929 let a = a.as_f32x16();
5930 let b = b.as_f32x16();
5931 let c = c.as_i32x16();
5932 macro_rules! call {
5933 ($imm8:expr, $imm4:expr) => {
5934 vfixupimmps(a, b, c, $imm8, 0b11111111_11111111, $imm4)
5935 };
5936 }
5937 let r = constify_imm8_roundscale!(imm8, sae, call);
5938 transmute(r)
5939 }
5940
5941 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
5942 ///
5943 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_round_ps&expand=2506)
5945 #[inline]
5946 #[target_feature(enable = "avx512f")]
5947 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
5948 #[rustc_args_required_const(4, 5)]
5949 pub unsafe fn _mm512_mask_fixupimm_round_ps(
5950 a: __m512,
5951 k: __mmask16,
5952 b: __m512,
5953 c: __m512i,
5954 imm8: i32,
5955 sae: i32,
5956 ) -> __m512 {
5957 let a = a.as_f32x16();
5958 let b = b.as_f32x16();
5959 let c = c.as_i32x16();
5960 macro_rules! call {
5961 ($imm8:expr, $imm4:expr) => {
5962 vfixupimmps(a, b, c, $imm8, k, $imm4)
5963 };
5964 }
5965 let r = constify_imm8_roundscale!(imm8, sae, call);
5966 transmute(r)
5967 }
5968
5969 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
5970 ///
5971 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_round_ps&expand=2507)
5973 #[inline]
5974 #[target_feature(enable = "avx512f")]
5975 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
5976 #[rustc_args_required_const(4, 5)]
5977 pub unsafe fn _mm512_maskz_fixupimm_round_ps(
5978 k: __mmask16,
5979 a: __m512,
5980 b: __m512,
5981 c: __m512i,
5982 imm8: i32,
5983 sae: i32,
5984 ) -> __m512 {
5985 let a = a.as_f32x16();
5986 let b = b.as_f32x16();
5987 let c = c.as_i32x16();
5988 macro_rules! call {
5989 ($imm8:expr, $imm4:expr) => {
5990 vfixupimmpsz(a, b, c, $imm8, k, $imm4)
5991 };
5992 }
5993 let r = constify_imm8_roundscale!(imm8, sae, call);
5994 transmute(r)
5995 }
5996
5997 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
5998 ///
5999 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6000 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_round_pd&expand=2502)
6001 #[inline]
6002 #[target_feature(enable = "avx512f")]
6003 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
6004 #[rustc_args_required_const(3, 4)]
6005 pub unsafe fn _mm512_fixupimm_round_pd(
6006 a: __m512d,
6007 b: __m512d,
6008 c: __m512i,
6009 imm8: i32,
6010 sae: i32,
6011 ) -> __m512d {
6012 let a = a.as_f64x8();
6013 let b = b.as_f64x8();
6014 let c = c.as_i64x8();
6015 macro_rules! call {
6016 ($imm8:expr, $imm4:expr) => {
6017 vfixupimmpd(a, b, c, $imm8, 0b11111111, $imm4)
6018 };
6019 }
6020 let r = constify_imm8_roundscale!(imm8, sae, call);
6021 transmute(r)
6022 }
6023
6024 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
6025 ///
6026 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_round_pd&expand=2503)
6028 #[inline]
6029 #[target_feature(enable = "avx512f")]
6030 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
6031 #[rustc_args_required_const(4, 5)]
6032 pub unsafe fn _mm512_mask_fixupimm_round_pd(
6033 a: __m512d,
6034 k: __mmask8,
6035 b: __m512d,
6036 c: __m512i,
6037 imm8: i32,
6038 sae: i32,
6039 ) -> __m512d {
6040 let a = a.as_f64x8();
6041 let b = b.as_f64x8();
6042 let c = c.as_i64x8();
6043 macro_rules! call {
6044 ($imm8:expr, $imm4:expr) => {
6045 vfixupimmpd(a, b, c, $imm8, k, $imm4)
6046 };
6047 }
6048 let r = constify_imm8_roundscale!(imm8, sae, call);
6049 transmute(r)
6050 }
6051
6052 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
6053 ///
6054 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_round_pd&expand=2504)
6056 #[inline]
6057 #[target_feature(enable = "avx512f")]
6058 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
6059 #[rustc_args_required_const(4, 5)]
6060 pub unsafe fn _mm512_maskz_fixupimm_round_pd(
6061 k: __mmask8,
6062 a: __m512d,
6063 b: __m512d,
6064 c: __m512i,
6065 imm8: i32,
6066 sae: i32,
6067 ) -> __m512d {
6068 let a = a.as_f64x8();
6069 let b = b.as_f64x8();
6070 let c = c.as_i64x8();
6071 macro_rules! call {
6072 ($imm8:expr, $imm4:expr) => {
6073 vfixupimmpdz(a, b, c, $imm8, k, $imm4)
6074 };
6075 }
6076 let r = constify_imm8_roundscale!(imm8, sae, call);
6077 transmute(r)
6078 }
6079
6080 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6081 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6082 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6083 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6084 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6085 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6086 /// The sign is determined by sc which can take the following values:\
6087 /// _MM_MANT_SIGN_src // sign = sign(src)\
6088 /// _MM_MANT_SIGN_zero // sign = 0\
6089 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6090 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6091 ///
6092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_round_ps&expand=2886)
6093 #[inline]
6094 #[target_feature(enable = "avx512f")]
6095 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
6096 #[rustc_args_required_const(1, 2, 3)]
6097 pub unsafe fn _mm512_getmant_round_ps(
6098 a: __m512,
6099 norm: _MM_MANTISSA_NORM_ENUM,
6100 sign: _MM_MANTISSA_SIGN_ENUM,
6101 sae: i32,
6102 ) -> __m512 {
6103 macro_rules! call {
6104 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
6105 vgetmantps(
6106 a.as_f32x16(),
6107 $imm2 << 2 | $imm4_1,
6108 _mm512_setzero_ps().as_f32x16(),
6109 0b11111111_11111111,
6110 $imm4_2,
6111 )
6112 };
6113 }
6114 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
6115 transmute(r)
6116 }
6117
6118 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6119 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6120 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6121 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6122 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6123 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6124 /// The sign is determined by sc which can take the following values:\
6125 /// _MM_MANT_SIGN_src // sign = sign(src)\
6126 /// _MM_MANT_SIGN_zero // sign = 0\
6127 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6128 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6129 ///
6130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_round_ps&expand=2887)
6131 #[inline]
6132 #[target_feature(enable = "avx512f")]
6133 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
6134 #[rustc_args_required_const(3, 4, 5)]
6135 pub unsafe fn _mm512_mask_getmant_round_ps(
6136 src: __m512,
6137 k: __mmask16,
6138 a: __m512,
6139 norm: _MM_MANTISSA_NORM_ENUM,
6140 sign: _MM_MANTISSA_SIGN_ENUM,
6141 sae: i32,
6142 ) -> __m512 {
6143 macro_rules! call {
6144 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
6145 vgetmantps(
6146 a.as_f32x16(),
6147 $imm2 << 2 | $imm4_1,
6148 src.as_f32x16(),
6149 k,
6150 $imm4_2,
6151 )
6152 };
6153 }
6154 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
6155 transmute(r)
6156 }
6157
6158 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6159 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6160 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6161 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6162 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6163 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6164 /// The sign is determined by sc which can take the following values:\
6165 /// _MM_MANT_SIGN_src // sign = sign(src)\
6166 /// _MM_MANT_SIGN_zero // sign = 0\
6167 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6168 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6169 ///
6170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_round_ps&expand=2888)
6171 #[inline]
6172 #[target_feature(enable = "avx512f")]
6173 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
6174 #[rustc_args_required_const(2, 3, 4)]
6175 pub unsafe fn _mm512_maskz_getmant_round_ps(
6176 k: __mmask16,
6177 a: __m512,
6178 norm: _MM_MANTISSA_NORM_ENUM,
6179 sign: _MM_MANTISSA_SIGN_ENUM,
6180 sae: i32,
6181 ) -> __m512 {
6182 macro_rules! call {
6183 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
6184 vgetmantps(
6185 a.as_f32x16(),
6186 $imm2 << 2 | $imm4_1,
6187 _mm512_setzero_ps().as_f32x16(),
6188 k,
6189 $imm4_2,
6190 )
6191 };
6192 }
6193 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
6194 transmute(r)
6195 }
6196
6197 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6198 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6199 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6200 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6201 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6202 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6203 /// The sign is determined by sc which can take the following values:\
6204 /// _MM_MANT_SIGN_src // sign = sign(src)\
6205 /// _MM_MANT_SIGN_zero // sign = 0\
6206 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6207 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6208 ///
6209 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_round_pd&expand=2883)
6210 #[inline]
6211 #[target_feature(enable = "avx512f")]
6212 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
6213 #[rustc_args_required_const(1, 2, 3)]
6214 pub unsafe fn _mm512_getmant_round_pd(
6215 a: __m512d,
6216 norm: _MM_MANTISSA_NORM_ENUM,
6217 sign: _MM_MANTISSA_SIGN_ENUM,
6218 sae: i32,
6219 ) -> __m512d {
6220 macro_rules! call {
6221 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
6222 vgetmantpd(
6223 a.as_f64x8(),
6224 $imm2 << 2 | $imm4_1,
6225 _mm512_setzero_pd().as_f64x8(),
6226 0b11111111,
6227 $imm4_2,
6228 )
6229 };
6230 }
6231 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
6232 transmute(r)
6233 }
6234
6235 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6236 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6237 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6238 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6239 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6240 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6241 /// The sign is determined by sc which can take the following values:\
6242 /// _MM_MANT_SIGN_src // sign = sign(src)\
6243 /// _MM_MANT_SIGN_zero // sign = 0\
6244 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6245 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6246 ///
6247 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_round_pd&expand=2884)
6248 #[inline]
6249 #[target_feature(enable = "avx512f")]
6250 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
6251 #[rustc_args_required_const(3, 4, 5)]
6252 pub unsafe fn _mm512_mask_getmant_round_pd(
6253 src: __m512d,
6254 k: __mmask8,
6255 a: __m512d,
6256 norm: _MM_MANTISSA_NORM_ENUM,
6257 sign: _MM_MANTISSA_SIGN_ENUM,
6258 sae: i32,
6259 ) -> __m512d {
6260 macro_rules! call {
6261 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
6262 vgetmantpd(
6263 a.as_f64x8(),
6264 $imm2 << 2 | $imm4_1,
6265 src.as_f64x8(),
6266 k,
6267 $imm4_2,
6268 )
6269 };
6270 }
6271 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
6272 transmute(r)
6273 }
6274
6275 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6276 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6277 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6278 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6279 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6280 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6281 /// The sign is determined by sc which can take the following values:\
6282 /// _MM_MANT_SIGN_src // sign = sign(src)\
6283 /// _MM_MANT_SIGN_zero // sign = 0\
6284 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6285 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6286 ///
6287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_round_pd&expand=2885)
6288 #[inline]
6289 #[target_feature(enable = "avx512f")]
6290 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
6291 #[rustc_args_required_const(2, 3, 4)]
6292 pub unsafe fn _mm512_maskz_getmant_round_pd(
6293 k: __mmask8,
6294 a: __m512d,
6295 norm: _MM_MANTISSA_NORM_ENUM,
6296 sign: _MM_MANTISSA_SIGN_ENUM,
6297 sae: i32,
6298 ) -> __m512d {
6299 macro_rules! call {
6300 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
6301 vgetmantpd(
6302 a.as_f64x8(),
6303 $imm2 << 2 | $imm4_1,
6304 _mm512_setzero_pd().as_f64x8(),
6305 k,
6306 $imm4_2,
6307 )
6308 };
6309 }
6310 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
6311 transmute(r)
6312 }
6313
6314 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
6315 ///
6316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvtps_epi32&expand=1737)
6317 #[inline]
6318 #[target_feature(enable = "avx512f")]
6319 #[cfg_attr(test, assert_instr(vcvtps2dq))]
6320 pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
6321 transmute(vcvtps2dq(
6322 a.as_f32x16(),
6323 _mm512_setzero_si512().as_i32x16(),
6324 0b11111111_11111111,
6325 _MM_FROUND_CUR_DIRECTION,
6326 ))
6327 }
6328
6329 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6330 ///
6331 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_epi32&expand=1738)
6332 #[inline]
6333 #[target_feature(enable = "avx512f")]
6334 #[cfg_attr(test, assert_instr(vcvtps2dq))]
6335 pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
6336 transmute(vcvtps2dq(
6337 a.as_f32x16(),
6338 src.as_i32x16(),
6339 k,
6340 _MM_FROUND_CUR_DIRECTION,
6341 ))
6342 }
6343
6344 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6345 ///
6346 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_epi32&expand=1739)
6347 #[inline]
6348 #[target_feature(enable = "avx512f")]
6349 #[cfg_attr(test, assert_instr(vcvtps2dq))]
6350 pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
6351 transmute(vcvtps2dq(
6352 a.as_f32x16(),
6353 _mm512_setzero_si512().as_i32x16(),
6354 k,
6355 _MM_FROUND_CUR_DIRECTION,
6356 ))
6357 }
6358
6359 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
6360 ///
6361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_epu32&expand=1755)
6362 #[inline]
6363 #[target_feature(enable = "avx512f")]
6364 #[cfg_attr(test, assert_instr(vcvtps2udq))]
6365 pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
6366 transmute(vcvtps2udq(
6367 a.as_f32x16(),
6368 _mm512_setzero_si512().as_u32x16(),
6369 0b11111111_11111111,
6370 _MM_FROUND_CUR_DIRECTION,
6371 ))
6372 }
6373
6374 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6375 ///
6376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_epu32&expand=1756)
6377 #[inline]
6378 #[target_feature(enable = "avx512f")]
6379 #[cfg_attr(test, assert_instr(vcvtps2udq))]
6380 pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
6381 transmute(vcvtps2udq(
6382 a.as_f32x16(),
6383 src.as_u32x16(),
6384 k,
6385 _MM_FROUND_CUR_DIRECTION,
6386 ))
6387 }
6388
6389 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6390 ///
6391 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=maskz_cvt_roundps_epu32&expand=1343)
6392 #[inline]
6393 #[target_feature(enable = "avx512f")]
6394 #[cfg_attr(test, assert_instr(vcvtps2udq))]
6395 pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
6396 transmute(vcvtps2udq(
6397 a.as_f32x16(),
6398 _mm512_setzero_si512().as_u32x16(),
6399 k,
6400 _MM_FROUND_CUR_DIRECTION,
6401 ))
6402 }
6403
6404 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
6405 ///
6406 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_pd&expand=1769)
6407 #[inline]
6408 #[target_feature(enable = "avx512f")]
6409 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6410 pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
6411 transmute(vcvtps2pd(
6412 a.as_f32x8(),
6413 _mm512_setzero_pd().as_f64x8(),
6414 0b11111111,
6415 _MM_FROUND_CUR_DIRECTION,
6416 ))
6417 }
6418
6419 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6420 ///
6421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_pd&expand=1770)
6422 #[inline]
6423 #[target_feature(enable = "avx512f")]
6424 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6425 pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
6426 transmute(vcvtps2pd(
6427 a.as_f32x8(),
6428 src.as_f64x8(),
6429 k,
6430 _MM_FROUND_CUR_DIRECTION,
6431 ))
6432 }
6433
6434 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6435 ///
6436 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_pd&expand=1771)
6437 #[inline]
6438 #[target_feature(enable = "avx512f")]
6439 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6440 pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
6441 transmute(vcvtps2pd(
6442 a.as_f32x8(),
6443 _mm512_setzero_pd().as_f64x8(),
6444 k,
6445 _MM_FROUND_CUR_DIRECTION,
6446 ))
6447 }
6448
6449 /// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
6450 ///
6451 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtpslo_pd&expand=1784)
6452 #[inline]
6453 #[target_feature(enable = "avx512f")]
6454 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6455 pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
6456 transmute(vcvtps2pd(
6457 _mm512_castps512_ps256(v2).as_f32x8(),
6458 _mm512_setzero_pd().as_f64x8(),
6459 0b11111111,
6460 _MM_FROUND_CUR_DIRECTION,
6461 ))
6462 }
6463
6464 /// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6465 ///
6466 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtpslo_pd&expand=1785)
6467 #[inline]
6468 #[target_feature(enable = "avx512f")]
6469 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6470 pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
6471 transmute(vcvtps2pd(
6472 _mm512_castps512_ps256(v2).as_f32x8(),
6473 src.as_f64x8(),
6474 k,
6475 _MM_FROUND_CUR_DIRECTION,
6476 ))
6477 }
6478
6479 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
6480 ///
6481 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtpd_ps&expand=1712)
6482 #[inline]
6483 #[target_feature(enable = "avx512f")]
6484 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6485 pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
6486 transmute(vcvtpd2ps(
6487 a.as_f64x8(),
6488 _mm256_setzero_ps().as_f32x8(),
6489 0b11111111,
6490 _MM_FROUND_CUR_DIRECTION,
6491 ))
6492 }
6493
6494 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6495 ///
6496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtpd_ps&expand=1713)
6497 #[inline]
6498 #[target_feature(enable = "avx512f")]
6499 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6500 pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
6501 transmute(vcvtpd2ps(
6502 a.as_f64x8(),
6503 src.as_f32x8(),
6504 k,
6505 _MM_FROUND_CUR_DIRECTION,
6506 ))
6507 }
6508
6509 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6510 ///
6511 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtpd_ps&expand=1714)
6512 #[inline]
6513 #[target_feature(enable = "avx512f")]
6514 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6515 pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
6516 transmute(vcvtpd2ps(
6517 a.as_f64x8(),
6518 _mm256_setzero_ps().as_f32x8(),
6519 k,
6520 _MM_FROUND_CUR_DIRECTION,
6521 ))
6522 }
6523
6524 /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
6525 ///
6526 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtpd_pslo&expand=1715)
6527 #[inline]
6528 #[target_feature(enable = "avx512f")]
6529 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6530 pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
6531 let r: f32x8 = vcvtpd2ps(
6532 v2.as_f64x8(),
6533 _mm256_setzero_ps().as_f32x8(),
6534 0b11111111,
6535 _MM_FROUND_CUR_DIRECTION,
6536 );
6537 simd_shuffle16(
6538 r,
6539 _mm256_setzero_ps().as_f32x8(),
6540 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
6541 )
6542 }
6543
6544 /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
6545 ///
6546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtpd_pslo&expand=1716)
6547 #[inline]
6548 #[target_feature(enable = "avx512f")]
6549 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6550 pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
6551 let r: f32x8 = vcvtpd2ps(
6552 v2.as_f64x8(),
6553 _mm512_castps512_ps256(src).as_f32x8(),
6554 k,
6555 _MM_FROUND_CUR_DIRECTION,
6556 );
6557 simd_shuffle16(
6558 r,
6559 _mm256_setzero_ps().as_f32x8(),
6560 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
6561 )
6562 }
6563
6564 /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
6565 ///
6566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi8_epi32&expand=1535)
6567 #[inline]
6568 #[target_feature(enable = "avx512f")]
6569 #[cfg_attr(test, assert_instr(vpmovsxbd))]
6570 pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
6571 let a = a.as_i8x16();
6572 transmute::<i32x16, _>(simd_cast(a))
6573 }
6574
6575 /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6576 ///
6577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi8_epi32&expand=1536)
6578 #[inline]
6579 #[target_feature(enable = "avx512f")]
6580 #[cfg_attr(test, assert_instr(vpmovsxbd))]
6581 pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
6582 let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
6583 transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
6584 }
6585
6586 /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6587 ///
6588 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi8_epi32&expand=1537)
6589 #[inline]
6590 #[target_feature(enable = "avx512f")]
6591 #[cfg_attr(test, assert_instr(vpmovsxbd))]
6592 pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
6593 let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
6594 let zero = _mm512_setzero_si512().as_i32x16();
6595 transmute(simd_select_bitmask(k, convert, zero))
6596 }
6597
6598 /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
6599 ///
6600 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi8_epi64&expand=1544)
6601 #[inline]
6602 #[target_feature(enable = "avx512f")]
6603 #[cfg_attr(test, assert_instr(vpmovsxbq))]
6604 pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
6605 let a = a.as_i8x16();
6606 let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
6607 transmute::<i64x8, _>(simd_cast(v64))
6608 }
6609
6610 /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6611 ///
6612 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi8_epi64&expand=1545)
6613 #[inline]
6614 #[target_feature(enable = "avx512f")]
6615 #[cfg_attr(test, assert_instr(vpmovsxbq))]
6616 pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
6617 let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
6618 transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
6619 }
6620
6621 /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6622 ///
6623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi8_epi64&expand=1546)
6624 #[inline]
6625 #[target_feature(enable = "avx512f")]
6626 #[cfg_attr(test, assert_instr(vpmovsxbq))]
6627 pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
6628 let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
6629 let zero = _mm512_setzero_si512().as_i64x8();
6630 transmute(simd_select_bitmask(k, convert, zero))
6631 }
6632
6633 /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
6634 ///
6635 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu8_epi32&expand=1621)
6636 #[inline]
6637 #[target_feature(enable = "avx512f")]
6638 #[cfg_attr(test, assert_instr(vpmovzxbd))]
6639 pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
6640 let a = a.as_u8x16();
6641 transmute::<i32x16, _>(simd_cast(a))
6642 }
6643
6644 /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6645 ///
6646 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu8_epi32&expand=1622)
6647 #[inline]
6648 #[target_feature(enable = "avx512f")]
6649 #[cfg_attr(test, assert_instr(vpmovzxbd))]
6650 pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
6651 let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
6652 transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
6653 }
6654
6655 /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6656 ///
6657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu8_epi32&expand=1623)
6658 #[inline]
6659 #[target_feature(enable = "avx512f")]
6660 #[cfg_attr(test, assert_instr(vpmovzxbd))]
6661 pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
6662 let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
6663 let zero = _mm512_setzero_si512().as_i32x16();
6664 transmute(simd_select_bitmask(k, convert, zero))
6665 }
6666
6667 /// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
6668 ///
6669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu8_epi64&expand=1630)
6670 #[inline]
6671 #[target_feature(enable = "avx512f")]
6672 #[cfg_attr(test, assert_instr(vpmovzxbq))]
6673 pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
6674 let a = a.as_u8x16();
6675 let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
6676 transmute::<i64x8, _>(simd_cast(v64))
6677 }
6678
6679 /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6680 ///
6681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu8_epi64&expand=1631)
6682 #[inline]
6683 #[target_feature(enable = "avx512f")]
6684 #[cfg_attr(test, assert_instr(vpmovzxbq))]
6685 pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
6686 let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
6687 transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
6688 }
6689
6690 /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6691 ///
6692 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu8_epi64&expand=1632)
6693 #[inline]
6694 #[target_feature(enable = "avx512f")]
6695 #[cfg_attr(test, assert_instr(vpmovzxbq))]
6696 pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
6697 let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
6698 let zero = _mm512_setzero_si512().as_i64x8();
6699 transmute(simd_select_bitmask(k, convert, zero))
6700 }
6701
6702 /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
6703 ///
6704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi16_epi32&expand=1389)
6705 #[inline]
6706 #[target_feature(enable = "avx512f")]
6707 #[cfg_attr(test, assert_instr(vpmovsxwd))]
6708 pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
6709 let a = a.as_i16x16();
6710 transmute::<i32x16, _>(simd_cast(a))
6711 }
6712
6713 /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6714 ///
6715 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi16_epi32&expand=1390)
6716 #[inline]
6717 #[target_feature(enable = "avx512f")]
6718 #[cfg_attr(test, assert_instr(vpmovsxwd))]
6719 pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
6720 let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
6721 transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
6722 }
6723
6724 /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6725 ///
6726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi16_epi32&expand=1391)
6727 #[inline]
6728 #[target_feature(enable = "avx512f")]
6729 #[cfg_attr(test, assert_instr(vpmovsxwd))]
6730 pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
6731 let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
6732 let zero = _mm512_setzero_si512().as_i32x16();
6733 transmute(simd_select_bitmask(k, convert, zero))
6734 }
6735
6736 /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
6737 ///
6738 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi16_epi64&expand=1398)
6739 #[inline]
6740 #[target_feature(enable = "avx512f")]
6741 #[cfg_attr(test, assert_instr(vpmovsxwq))]
6742 pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
6743 let a = a.as_i16x8();
6744 transmute::<i64x8, _>(simd_cast(a))
6745 }
6746
6747 /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6748 ///
6749 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi16_epi64&expand=1399)
6750 #[inline]
6751 #[target_feature(enable = "avx512f")]
6752 #[cfg_attr(test, assert_instr(vpmovsxwq))]
6753 pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
6754 let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
6755 transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
6756 }
6757
6758 /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6759 ///
6760 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi16_epi64&expand=1400)
6761 #[inline]
6762 #[target_feature(enable = "avx512f")]
6763 #[cfg_attr(test, assert_instr(vpmovsxwq))]
6764 pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
6765 let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
6766 let zero = _mm512_setzero_si512().as_i64x8();
6767 transmute(simd_select_bitmask(k, convert, zero))
6768 }
6769
6770 /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
6771 ///
6772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu16_epi32&expand=1553)
6773 #[inline]
6774 #[target_feature(enable = "avx512f")]
6775 #[cfg_attr(test, assert_instr(vpmovzxwd))]
6776 pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
6777 let a = a.as_u16x16();
6778 transmute::<i32x16, _>(simd_cast(a))
6779 }
6780
6781 /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6782 ///
6783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu16_epi32&expand=1554)
6784 #[inline]
6785 #[target_feature(enable = "avx512f")]
6786 #[cfg_attr(test, assert_instr(vpmovzxwd))]
6787 pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
6788 let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
6789 transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
6790 }
6791
6792 /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6793 ///
6794 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu16_epi32&expand=1555)
6795 #[inline]
6796 #[target_feature(enable = "avx512f")]
6797 #[cfg_attr(test, assert_instr(vpmovzxwd))]
6798 pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
6799 let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
6800 let zero = _mm512_setzero_si512().as_i32x16();
6801 transmute(simd_select_bitmask(k, convert, zero))
6802 }
6803
6804 /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
6805 ///
6806 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu16_epi64&expand=1562)
6807 #[inline]
6808 #[target_feature(enable = "avx512f")]
6809 #[cfg_attr(test, assert_instr(vpmovzxwq))]
6810 pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
6811 let a = a.as_u16x8();
6812 transmute::<i64x8, _>(simd_cast(a))
6813 }
6814
6815 /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6816 ///
6817 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu16_epi64&expand=1563)
6818 #[inline]
6819 #[target_feature(enable = "avx512f")]
6820 #[cfg_attr(test, assert_instr(vpmovzxwq))]
6821 pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
6822 let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
6823 transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
6824 }
6825
6826 /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6827 ///
6828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu16_epi64&expand=1564)
6829 #[inline]
6830 #[target_feature(enable = "avx512f")]
6831 #[cfg_attr(test, assert_instr(vpmovzxwq))]
6832 pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
6833 let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
6834 let zero = _mm512_setzero_si512().as_i64x8();
6835 transmute(simd_select_bitmask(k, convert, zero))
6836 }
6837
6838 /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
6839 ///
6840 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_epi64&expand=1428)
6841 #[inline]
6842 #[target_feature(enable = "avx512f")]
6843 #[cfg_attr(test, assert_instr(vpmovsxdq))]
6844 pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
6845 let a = a.as_i32x8();
6846 transmute::<i64x8, _>(simd_cast(a))
6847 }
6848
6849 /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6850 ///
6851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_epi64&expand=1429)
6852 #[inline]
6853 #[target_feature(enable = "avx512f")]
6854 #[cfg_attr(test, assert_instr(vpmovsxdq))]
6855 pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
6856 let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
6857 transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
6858 }
6859
6860 /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6861 ///
6862 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_epi64&expand=1430)
6863 #[inline]
6864 #[target_feature(enable = "avx512f")]
6865 #[cfg_attr(test, assert_instr(vpmovsxdq))]
6866 pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
6867 let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
6868 let zero = _mm512_setzero_si512().as_i64x8();
6869 transmute(simd_select_bitmask(k, convert, zero))
6870 }
6871
6872 /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
6873 ///
6874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32_epi64&expand=1571)
6875 #[inline]
6876 #[target_feature(enable = "avx512f")]
6877 #[cfg_attr(test, assert_instr(vpmovzxdq))]
6878 pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
6879 let a = a.as_u32x8();
6880 transmute::<i64x8, _>(simd_cast(a))
6881 }
6882
6883 /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6884 ///
6885 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32_epi64&expand=1572)
6886 #[inline]
6887 #[target_feature(enable = "avx512f")]
6888 #[cfg_attr(test, assert_instr(vpmovzxdq))]
6889 pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
6890 let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
6891 transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
6892 }
6893
6894 /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6895 ///
6896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu32_epi64&expand=1573)
6897 #[inline]
6898 #[target_feature(enable = "avx512f")]
6899 #[cfg_attr(test, assert_instr(vpmovzxdq))]
6900 pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
6901 let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
6902 let zero = _mm512_setzero_si512().as_i64x8();
6903 transmute(simd_select_bitmask(k, convert, zero))
6904 }
6905
6906 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
6907 ///
6908 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_ps&expand=1455)
6909 #[inline]
6910 #[target_feature(enable = "avx512f")]
6911 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
6912 pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
6913 let a = a.as_i32x16();
6914 transmute::<f32x16, _>(simd_cast(a))
6915 }
6916
6917 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6918 ///
6919 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_ps&expand=1456)
6920 #[inline]
6921 #[target_feature(enable = "avx512f")]
6922 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
6923 pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
6924 let convert = _mm512_cvtepi32_ps(a).as_f32x16();
6925 transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
6926 }
6927
6928 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6929 ///
6930 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_ps&expand=1457)
6931 #[inline]
6932 #[target_feature(enable = "avx512f")]
6933 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
6934 pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
6935 let convert = _mm512_cvtepi32_ps(a).as_f32x16();
6936 let zero = _mm512_setzero_ps().as_f32x16();
6937 transmute(simd_select_bitmask(k, convert, zero))
6938 }
6939
6940 /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
6941 ///
6942 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_pd&expand=1446)
6943 #[inline]
6944 #[target_feature(enable = "avx512f")]
6945 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
6946 pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
6947 let a = a.as_i32x8();
6948 transmute::<f64x8, _>(simd_cast(a))
6949 }
6950
6951 /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6952 ///
6953 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_pd&expand=1447)
6954 #[inline]
6955 #[target_feature(enable = "avx512f")]
6956 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
6957 pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
6958 let convert = _mm512_cvtepi32_pd(a).as_f64x8();
6959 transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
6960 }
6961
6962 /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6963 ///
6964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_pd&expand=1448)
6965 #[inline]
6966 #[target_feature(enable = "avx512f")]
6967 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
6968 pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
6969 let convert = _mm512_cvtepi32_pd(a).as_f64x8();
6970 let zero = _mm512_setzero_pd().as_f64x8();
6971 transmute(simd_select_bitmask(k, convert, zero))
6972 }
6973
6974 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
6975 ///
6976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32_ps&expand=1583)
6977 #[inline]
6978 #[target_feature(enable = "avx512f")]
6979 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
6980 pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
6981 let a = a.as_u32x16();
6982 transmute::<f32x16, _>(simd_cast(a))
6983 }
6984
6985 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6986 ///
6987 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32_ps&expand=1584)
6988 #[inline]
6989 #[target_feature(enable = "avx512f")]
6990 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
6991 pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
6992 let convert = _mm512_cvtepu32_ps(a).as_f32x16();
6993 transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
6994 }
6995
6996 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6997 ///
6998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu32_ps&expand=1585)
6999 #[inline]
7000 #[target_feature(enable = "avx512f")]
7001 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
7002 pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
7003 let convert = _mm512_cvtepu32_ps(a).as_f32x16();
7004 let zero = _mm512_setzero_ps().as_f32x16();
7005 transmute(simd_select_bitmask(k, convert, zero))
7006 }
7007
7008 /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
7009 ///
7010 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32_pd&expand=1580)
7011 #[inline]
7012 #[target_feature(enable = "avx512f")]
7013 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7014 pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
7015 let a = a.as_u32x8();
7016 transmute::<f64x8, _>(simd_cast(a))
7017 }
7018
7019 /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7020 ///
7021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32_pd&expand=1581)
7022 #[inline]
7023 #[target_feature(enable = "avx512f")]
7024 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7025 pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
7026 let convert = _mm512_cvtepu32_pd(a).as_f64x8();
7027 transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
7028 }
7029
7030 /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7031 ///
7032 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu32_pd&expand=1582)
7033 #[inline]
7034 #[target_feature(enable = "avx512f")]
7035 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7036 pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
7037 let convert = _mm512_cvtepu32_pd(a).as_f64x8();
7038 let zero = _mm512_setzero_pd().as_f64x8();
7039 transmute(simd_select_bitmask(k, convert, zero))
7040 }
7041
7042 /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
7043 ///
7044 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32lo_pd&expand=1464)
7045 #[inline]
7046 #[target_feature(enable = "avx512f")]
7047 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
7048 pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
7049 let v2 = v2.as_i32x16();
7050 let v256: i32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
7051 transmute::<f64x8, _>(simd_cast(v256))
7052 }
7053
7054 /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7055 ///
7056 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32lo_pd&expand=1465)
7057 #[inline]
7058 #[target_feature(enable = "avx512f")]
7059 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
7060 pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
7061 let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
7062 transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
7063 }
7064
7065 /// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
7066 ///
7067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32lo_pd&expand=1586)
7068 #[inline]
7069 #[target_feature(enable = "avx512f")]
7070 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7071 pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
7072 let v2 = v2.as_u32x16();
7073 let v256: u32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
7074 transmute::<f64x8, _>(simd_cast(v256))
7075 }
7076
7077 /// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7078 ///
7079 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32lo_pd&expand=1587)
7080 #[inline]
7081 #[target_feature(enable = "avx512f")]
7082 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7083 pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
7084 let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
7085 transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
7086 }
7087
7088 /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
7089 ///
7090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_epi16&expand=1419)
7091 #[inline]
7092 #[target_feature(enable = "avx512f")]
7093 #[cfg_attr(test, assert_instr(vpmovdw))]
7094 pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
7095 let a = a.as_i32x16();
7096 transmute::<i16x16, _>(simd_cast(a))
7097 }
7098
7099 /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7100 ///
7101 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_epi16&expand=1420)
7102 #[inline]
7103 #[target_feature(enable = "avx512f")]
7104 #[cfg_attr(test, assert_instr(vpmovdw))]
7105 pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
7106 let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
7107 transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
7108 }
7109
7110 /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7111 ///
7112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_epi16&expand=1421)
7113 #[inline]
7114 #[target_feature(enable = "avx512f")]
7115 #[cfg_attr(test, assert_instr(vpmovdw))]
7116 pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
7117 let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
7118 let zero = _mm256_setzero_si256().as_i16x16();
7119 transmute(simd_select_bitmask(k, convert, zero))
7120 }
7121
7122 /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
7123 ///
7124 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_epi8&expand=1437)
7125 #[inline]
7126 #[target_feature(enable = "avx512f")]
7127 #[cfg_attr(test, assert_instr(vpmovdb))]
7128 pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
7129 let a = a.as_i32x16();
7130 transmute::<i8x16, _>(simd_cast(a))
7131 }
7132
7133 /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7134 ///
7135 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_epi8&expand=1438)
7136 #[inline]
7137 #[target_feature(enable = "avx512f")]
7138 #[cfg_attr(test, assert_instr(vpmovdb))]
7139 pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
7140 let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
7141 transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
7142 }
7143
7144 /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7145 ///
7146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_epi8&expand=1439)
7147 #[inline]
7148 #[target_feature(enable = "avx512f")]
7149 #[cfg_attr(test, assert_instr(vpmovdb))]
7150 pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
7151 let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
7152 let zero = _mm_setzero_si128().as_i8x16();
7153 transmute(simd_select_bitmask(k, convert, zero))
7154 }
7155
7156 /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
7157 ///
7158 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi64_epi32&expand=1481)
7159 #[inline]
7160 #[target_feature(enable = "avx512f")]
7161 #[cfg_attr(test, assert_instr(vpmovqd))]
7162 pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
7163 let a = a.as_i64x8();
7164 transmute::<i32x8, _>(simd_cast(a))
7165 }
7166
7167 /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7168 ///
7169 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi64_epi32&expand=1482)
7170 #[inline]
7171 #[target_feature(enable = "avx512f")]
7172 #[cfg_attr(test, assert_instr(vpmovqd))]
7173 pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
7174 let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
7175 transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
7176 }
7177
7178 /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7179 ///
7180 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi64_epi32&expand=1483)
7181 #[inline]
7182 #[target_feature(enable = "avx512f")]
7183 #[cfg_attr(test, assert_instr(vpmovqd))]
7184 pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
7185 let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
7186 let zero = _mm256_setzero_si256().as_i32x8();
7187 transmute(simd_select_bitmask(k, convert, zero))
7188 }
7189
7190 /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
7191 ///
7192 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi64_epi16&expand=1472)
7193 #[inline]
7194 #[target_feature(enable = "avx512f")]
7195 #[cfg_attr(test, assert_instr(vpmovqw))]
7196 pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
7197 let a = a.as_i64x8();
7198 transmute::<i16x8, _>(simd_cast(a))
7199 }
7200
7201 /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7202 ///
7203 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi64_epi16&expand=1473)
7204 #[inline]
7205 #[target_feature(enable = "avx512f")]
7206 #[cfg_attr(test, assert_instr(vpmovqw))]
7207 pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
7208 let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
7209 transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
7210 }
7211
7212 /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7213 ///
7214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi64_epi16&expand=1474)
7215 #[inline]
7216 #[target_feature(enable = "avx512f")]
7217 #[cfg_attr(test, assert_instr(vpmovqw))]
7218 pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
7219 let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
7220 let zero = _mm_setzero_si128().as_i16x8();
7221 transmute(simd_select_bitmask(k, convert, zero))
7222 }
7223
7224 /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
7225 ///
7226 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi64_epi8&expand=1490)
7227 #[inline]
7228 #[target_feature(enable = "avx512f")]
7229 #[cfg_attr(test, assert_instr(vpmovqb))]
7230 pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
7231 transmute(vpmovqb(
7232 a.as_i64x8(),
7233 _mm_setzero_si128().as_i8x16(),
7234 0b11111111,
7235 ))
7236 }
7237
7238 /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7239 ///
7240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi64_epi8&expand=1491)
7241 #[inline]
7242 #[target_feature(enable = "avx512f")]
7243 #[cfg_attr(test, assert_instr(vpmovqb))]
7244 pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
7245 transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k))
7246 }
7247
7248 /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7249 ///
7250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi64_epi8&expand=1492)
7251 #[inline]
7252 #[target_feature(enable = "avx512f")]
7253 #[cfg_attr(test, assert_instr(vpmovqb))]
7254 pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
7255 transmute(vpmovqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
7256 }
7257
7258 /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
7259 ///
7260 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi32_epi16&expand=1819)
7261 #[inline]
7262 #[target_feature(enable = "avx512f")]
7263 #[cfg_attr(test, assert_instr(vpmovsdw))]
7264 pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
7265 transmute(vpmovsdw(
7266 a.as_i32x16(),
7267 _mm256_setzero_si256().as_i16x16(),
7268 0b11111111_11111111,
7269 ))
7270 }
7271
7272 /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7273 ///
7274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi32_epi16&expand=1820)
7275 #[inline]
7276 #[target_feature(enable = "avx512f")]
7277 #[cfg_attr(test, assert_instr(vpmovsdw))]
7278 pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
7279 transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k))
7280 }
7281
7282 /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
7283 ///
7284 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi32_epi16&expand=1819)
7285 #[inline]
7286 #[target_feature(enable = "avx512f")]
7287 #[cfg_attr(test, assert_instr(vpmovsdw))]
7288 pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
7289 transmute(vpmovsdw(
7290 a.as_i32x16(),
7291 _mm256_setzero_si256().as_i16x16(),
7292 k,
7293 ))
7294 }
7295
7296 /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
7297 ///
7298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi32_epi8&expand=1828)
7299 #[inline]
7300 #[target_feature(enable = "avx512f")]
7301 #[cfg_attr(test, assert_instr(vpmovsdb))]
7302 pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
7303 transmute(vpmovsdb(
7304 a.as_i32x16(),
7305 _mm_setzero_si128().as_i8x16(),
7306 0b11111111_11111111,
7307 ))
7308 }
7309
7310 /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7311 ///
7312 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi32_epi8&expand=1829)
7313 #[inline]
7314 #[target_feature(enable = "avx512f")]
7315 #[cfg_attr(test, assert_instr(vpmovsdb))]
7316 pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
7317 transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k))
7318 }
7319
7320 /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7321 ///
7322 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi32_epi8&expand=1830)
7323 #[inline]
7324 #[target_feature(enable = "avx512f")]
7325 #[cfg_attr(test, assert_instr(vpmovsdb))]
7326 pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
7327 transmute(vpmovsdb(a.as_i32x16(), _mm_setzero_si128().as_i8x16(), k))
7328 }
7329
7330 /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
7331 ///
7332 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi64_epi32&expand=1852)
7333 #[inline]
7334 #[target_feature(enable = "avx512f")]
7335 #[cfg_attr(test, assert_instr(vpmovsqd))]
7336 pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
7337 transmute(vpmovsqd(
7338 a.as_i64x8(),
7339 _mm256_setzero_si256().as_i32x8(),
7340 0b11111111,
7341 ))
7342 }
7343
7344 /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7345 ///
7346 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi64_epi32&expand=1853)
7347 #[inline]
7348 #[target_feature(enable = "avx512f")]
7349 #[cfg_attr(test, assert_instr(vpmovsqd))]
7350 pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
7351 transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k))
7352 }
7353
7354 /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7355 ///
7356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi64_epi32&expand=1854)
7357 #[inline]
7358 #[target_feature(enable = "avx512f")]
7359 #[cfg_attr(test, assert_instr(vpmovsqd))]
7360 pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
7361 transmute(vpmovsqd(a.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k))
7362 }
7363
7364 /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
7365 ///
7366 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi64_epi16&expand=1843)
7367 #[inline]
7368 #[target_feature(enable = "avx512f")]
7369 #[cfg_attr(test, assert_instr(vpmovsqw))]
7370 pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
7371 transmute(vpmovsqw(
7372 a.as_i64x8(),
7373 _mm_setzero_si128().as_i16x8(),
7374 0b11111111,
7375 ))
7376 }
7377
7378 /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7379 ///
7380 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi64_epi16&expand=1844)
7381 #[inline]
7382 #[target_feature(enable = "avx512f")]
7383 #[cfg_attr(test, assert_instr(vpmovsqw))]
7384 pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
7385 transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k))
7386 }
7387
7388 /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7389 ///
7390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi64_epi16&expand=1845)
7391 #[inline]
7392 #[target_feature(enable = "avx512f")]
7393 #[cfg_attr(test, assert_instr(vpmovsqw))]
7394 pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
7395 transmute(vpmovsqw(a.as_i64x8(), _mm_setzero_si128().as_i16x8(), k))
7396 }
7397
7398 /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
7399 ///
7400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi64_epi8&expand=1861)
7401 #[inline]
7402 #[target_feature(enable = "avx512f")]
7403 #[cfg_attr(test, assert_instr(vpmovsqb))]
7404 pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
7405 transmute(vpmovsqb(
7406 a.as_i64x8(),
7407 _mm_setzero_si128().as_i8x16(),
7408 0b11111111,
7409 ))
7410 }
7411
7412 /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7413 ///
7414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi64_epi8&expand=1862)
7415 #[inline]
7416 #[target_feature(enable = "avx512f")]
7417 #[cfg_attr(test, assert_instr(vpmovsqb))]
7418 pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
7419 transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k))
7420 }
7421
7422 /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7423 ///
7424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi64_epi8&expand=1863)
7425 #[inline]
7426 #[target_feature(enable = "avx512f")]
7427 #[cfg_attr(test, assert_instr(vpmovsqb))]
7428 pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
7429 transmute(vpmovsqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
7430 }
7431
7432 /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
7433 ///
7434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi32_epi16&expand=2054)
7435 #[inline]
7436 #[target_feature(enable = "avx512f")]
7437 #[cfg_attr(test, assert_instr(vpmovusdw))]
7438 pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
7439 transmute(vpmovusdw(
7440 a.as_u32x16(),
7441 _mm256_setzero_si256().as_u16x16(),
7442 0b11111111_11111111,
7443 ))
7444 }
7445
7446 /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7447 ///
7448 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi32_epi16&expand=2055)
7449 #[inline]
7450 #[target_feature(enable = "avx512f")]
7451 #[cfg_attr(test, assert_instr(vpmovusdw))]
7452 pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
7453 transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k))
7454 }
7455
7456 /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7457 ///
7458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi32_epi16&expand=2056)
7459 #[inline]
7460 #[target_feature(enable = "avx512f")]
7461 #[cfg_attr(test, assert_instr(vpmovusdw))]
7462 pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
7463 transmute(vpmovusdw(
7464 a.as_u32x16(),
7465 _mm256_setzero_si256().as_u16x16(),
7466 k,
7467 ))
7468 }
7469
7470 /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
7471 ///
7472 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi32_epi8&expand=2063)
7473 #[inline]
7474 #[target_feature(enable = "avx512f")]
7475 #[cfg_attr(test, assert_instr(vpmovusdb))]
7476 pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
7477 transmute(vpmovusdb(
7478 a.as_u32x16(),
7479 _mm_setzero_si128().as_u8x16(),
7480 0b11111111_11111111,
7481 ))
7482 }
7483
7484 /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7485 ///
7486 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi32_epi8&expand=2064)
7487 #[inline]
7488 #[target_feature(enable = "avx512f")]
7489 #[cfg_attr(test, assert_instr(vpmovusdb))]
7490 pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
7491 transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k))
7492 }
7493
7494 /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7495 ///
7496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi32_epi8&expand=2065)
7497 #[inline]
7498 #[target_feature(enable = "avx512f")]
7499 #[cfg_attr(test, assert_instr(vpmovusdb))]
7500 pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
7501 transmute(vpmovusdb(a.as_u32x16(), _mm_setzero_si128().as_u8x16(), k))
7502 }
7503
7504 /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
7505 ///
7506 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi64_epi32&expand=2087)
7507 #[inline]
7508 #[target_feature(enable = "avx512f")]
7509 #[cfg_attr(test, assert_instr(vpmovusqd))]
7510 pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
7511 transmute(vpmovusqd(
7512 a.as_u64x8(),
7513 _mm256_setzero_si256().as_u32x8(),
7514 0b11111111,
7515 ))
7516 }
7517
7518 /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7519 ///
7520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi64_epi32&expand=2088)
7521 #[inline]
7522 #[target_feature(enable = "avx512f")]
7523 #[cfg_attr(test, assert_instr(vpmovusqd))]
7524 pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
7525 transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k))
7526 }
7527
7528 /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7529 ///
7530 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi64_epi32&expand=2089)
7531 #[inline]
7532 #[target_feature(enable = "avx512f")]
7533 #[cfg_attr(test, assert_instr(vpmovusqd))]
7534 pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
7535 transmute(vpmovusqd(
7536 a.as_u64x8(),
7537 _mm256_setzero_si256().as_u32x8(),
7538 k,
7539 ))
7540 }
7541
7542 /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
7543 ///
7544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi64_epi16&expand=2078)
7545 #[inline]
7546 #[target_feature(enable = "avx512f")]
7547 #[cfg_attr(test, assert_instr(vpmovusqw))]
7548 pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
7549 transmute(vpmovusqw(
7550 a.as_u64x8(),
7551 _mm_setzero_si128().as_u16x8(),
7552 0b11111111,
7553 ))
7554 }
7555
7556 /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7557 ///
7558 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi64_epi16&expand=2079)
7559 #[inline]
7560 #[target_feature(enable = "avx512f")]
7561 #[cfg_attr(test, assert_instr(vpmovusqw))]
7562 pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
7563 transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k))
7564 }
7565
7566 /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7567 ///
7568 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi64_epi16&expand=2080)
7569 #[inline]
7570 #[target_feature(enable = "avx512f")]
7571 #[cfg_attr(test, assert_instr(vpmovusqw))]
7572 pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
7573 transmute(vpmovusqw(a.as_u64x8(), _mm_setzero_si128().as_u16x8(), k))
7574 }
7575
7576 /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
7577 ///
7578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi64_epi8&expand=2096)
7579 #[inline]
7580 #[target_feature(enable = "avx512f")]
7581 #[cfg_attr(test, assert_instr(vpmovusqb))]
7582 pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
7583 transmute(vpmovusqb(
7584 a.as_u64x8(),
7585 _mm_setzero_si128().as_u8x16(),
7586 0b11111111,
7587 ))
7588 }
7589
7590 /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7591 ///
7592 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi64_epi8&expand=2097)
7593 #[inline]
7594 #[target_feature(enable = "avx512f")]
7595 #[cfg_attr(test, assert_instr(vpmovusqb))]
7596 pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
7597 transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k))
7598 }
7599
7600 /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7601 ///
7602 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi64_epi8&expand=2098)
7603 #[inline]
7604 #[target_feature(enable = "avx512f")]
7605 #[cfg_attr(test, assert_instr(vpmovusqb))]
7606 pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
7607 transmute(vpmovusqb(a.as_u64x8(), _mm_setzero_si128().as_u8x16(), k))
7608 }
7609
7610 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
7611 ///
7612 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
7613 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
7614 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
7615 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
7616 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
7617 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7618 ///
7619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_epi32&expand=1335)
7620 #[inline]
7621 #[target_feature(enable = "avx512f")]
7622 #[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
7623 #[rustc_args_required_const(1)]
7624 pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i {
7625 macro_rules! call {
7626 ($imm4:expr) => {
7627 vcvtps2dq(
7628 a.as_f32x16(),
7629 _mm512_setzero_si512().as_i32x16(),
7630 0b11111111_11111111,
7631 $imm4,
7632 )
7633 };
7634 }
7635 let r = constify_imm4_round!(rounding, call);
7636 transmute(r)
7637 }
7638
7639 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7640 ///
7641 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7642 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7643 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7644 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7645 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7646 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7647 ///
7648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epi32&expand=1336)
7649 #[inline]
7650 #[target_feature(enable = "avx512f")]
7651 #[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
7652 #[rustc_args_required_const(3)]
7653 pub unsafe fn _mm512_mask_cvt_roundps_epi32(
7654 src: __m512i,
7655 k: __mmask16,
7656 a: __m512,
7657 rounding: i32,
7658 ) -> __m512i {
7659 macro_rules! call {
7660 ($imm4:expr) => {
7661 vcvtps2dq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
7662 };
7663 }
7664 let r = constify_imm4_round!(rounding, call);
7665 transmute(r)
7666 }
7667
7668 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7669 ///
7670 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7671 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7672 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7673 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7674 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7675 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7676 ///
7677 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_epi32&expand=1337)
7678 #[inline]
7679 #[target_feature(enable = "avx512f")]
7680 #[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
7681 #[rustc_args_required_const(2)]
7682 pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
7683 macro_rules! call {
7684 ($imm4:expr) => {
7685 vcvtps2dq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
7686 };
7687 }
7688 let r = constify_imm4_round!(rounding, call);
7689 transmute(r)
7690 }
7691
7692 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
7693 ///
7694 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7695 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7696 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7697 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7698 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7699 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7700 ///
7701 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_epu32&expand=1341)
7702 #[inline]
7703 #[target_feature(enable = "avx512f")]
7704 #[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
7705 #[rustc_args_required_const(1)]
7706 pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i {
7707 macro_rules! call {
7708 ($imm4:expr) => {
7709 vcvtps2udq(
7710 a.as_f32x16(),
7711 _mm512_setzero_si512().as_u32x16(),
7712 0b11111111_11111111,
7713 $imm4,
7714 )
7715 };
7716 }
7717 let r = constify_imm4_round!(rounding, call);
7718 transmute(r)
7719 }
7720
7721 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7722 ///
7723 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7724 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7725 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7726 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7727 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7728 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7729 ///
7730 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epu32&expand=1342)
7731 #[inline]
7732 #[target_feature(enable = "avx512f")]
7733 #[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
7734 #[rustc_args_required_const(3)]
7735 pub unsafe fn _mm512_mask_cvt_roundps_epu32(
7736 src: __m512i,
7737 k: __mmask16,
7738 a: __m512,
7739 rounding: i32,
7740 ) -> __m512i {
7741 macro_rules! call {
7742 ($imm4:expr) => {
7743 vcvtps2udq(a.as_f32x16(), src.as_u32x16(), k, $imm4)
7744 };
7745 }
7746 let r = constify_imm4_round!(rounding, call);
7747 transmute(r)
7748 }
7749
7750 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7751 ///
7752 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7753 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7754 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7755 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7756 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7757 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7758 ///
7759 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=maskz_cvt_roundps_epu32&expand=1343)
7760 #[inline]
7761 #[target_feature(enable = "avx512f")]
7762 #[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
7763 #[rustc_args_required_const(2)]
7764 pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
7765 macro_rules! call {
7766 ($imm4:expr) => {
7767 vcvtps2udq(a.as_f32x16(), _mm512_setzero_si512().as_u32x16(), k, $imm4)
7768 };
7769 }
7770 let r = constify_imm4_round!(rounding, call);
7771 transmute(r)
7772 }
7773
7774 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
7775 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
7776 ///
7777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundps_pd&expand=1347)
7778 #[inline]
7779 #[target_feature(enable = "avx512f")]
7780 #[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
7781 #[rustc_args_required_const(1)]
7782 pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d {
7783 macro_rules! call {
7784 ($imm4:expr) => {
7785 vcvtps2pd(
7786 a.as_f32x8(),
7787 _mm512_setzero_pd().as_f64x8(),
7788 0b11111111,
7789 $imm4,
7790 )
7791 };
7792 }
7793 let r = constify_imm4_sae!(sae, call);
7794 transmute(r)
7795 }
7796
7797 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7798 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
7799 ///
7800 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epi32&expand=1336)
7801 #[inline]
7802 #[target_feature(enable = "avx512f")]
7803 #[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
7804 #[rustc_args_required_const(3)]
7805 pub unsafe fn _mm512_mask_cvt_roundps_pd(
7806 src: __m512d,
7807 k: __mmask8,
7808 a: __m256,
7809 sae: i32,
7810 ) -> __m512d {
7811 macro_rules! call {
7812 ($imm4:expr) => {
7813 vcvtps2pd(a.as_f32x8(), src.as_f64x8(), k, $imm4)
7814 };
7815 }
7816 let r = constify_imm4_sae!(sae, call);
7817 transmute(r)
7818 }
7819
7820 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7821 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
7822 ///
7823 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_epi32&expand=1337)
7824 #[inline]
7825 #[target_feature(enable = "avx512f")]
7826 #[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
7827 #[rustc_args_required_const(2)]
7828 pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> __m512d {
7829 macro_rules! call {
7830 ($imm4:expr) => {
7831 vcvtps2pd(a.as_f32x8(), _mm512_setzero_pd().as_f64x8(), k, $imm4)
7832 };
7833 }
7834 let r = constify_imm4_sae!(sae, call);
7835 transmute(r)
7836 }
7837
7838 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
7839 ///
7840 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7841 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7842 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7843 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7844 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7845 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7846 ///
7847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundpd_epi32&expand=1315)
7848 #[inline]
7849 #[target_feature(enable = "avx512f")]
7850 #[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
7851 #[rustc_args_required_const(1)]
7852 pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i {
7853 macro_rules! call {
7854 ($imm4:expr) => {
7855 vcvtpd2dq(
7856 a.as_f64x8(),
7857 _mm256_setzero_si256().as_i32x8(),
7858 0b11111111,
7859 $imm4,
7860 )
7861 };
7862 }
7863 let r = constify_imm4_round!(rounding, call);
7864 transmute(r)
7865 }
7866
7867 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7868 ///
7869 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7871 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7872 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7873 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7874 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7875 ///
7876 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundpd_epi32&expand=1316)
7877 #[inline]
7878 #[target_feature(enable = "avx512f")]
7879 #[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
7880 #[rustc_args_required_const(3)]
7881 pub unsafe fn _mm512_mask_cvt_roundpd_epi32(
7882 src: __m256i,
7883 k: __mmask8,
7884 a: __m512d,
7885 rounding: i32,
7886 ) -> __m256i {
7887 macro_rules! call {
7888 ($imm4:expr) => {
7889 vcvtpd2dq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
7890 };
7891 }
7892 let r = constify_imm4_round!(rounding, call);
7893 transmute(r)
7894 }
7895
7896 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7897 ///
7898 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7900 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7901 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7902 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7903 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7904 ///
7905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=512_maskz_cvt_roundpd_epi32&expand=1317)
7906 #[inline]
7907 #[target_feature(enable = "avx512f")]
7908 #[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
7909 #[rustc_args_required_const(2)]
7910 pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i {
7911 macro_rules! call {
7912 ($imm4:expr) => {
7913 vcvtpd2dq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
7914 };
7915 }
7916 let r = constify_imm4_round!(rounding, call);
7917 transmute(r)
7918 }
7919
7920 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
7921 ///
7922 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7923 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7924 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7925 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7926 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7927 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7928 ///
7929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundpd_epu32&expand=1321)
7930 #[inline]
7931 #[target_feature(enable = "avx512f")]
7932 #[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
7933 #[rustc_args_required_const(1)]
7934 pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i {
7935 macro_rules! call {
7936 ($imm4:expr) => {
7937 vcvtpd2udq(
7938 a.as_f64x8(),
7939 _mm256_setzero_si256().as_u32x8(),
7940 0b11111111,
7941 $imm4,
7942 )
7943 };
7944 }
7945 let r = constify_imm4_round!(rounding, call);
7946 transmute(r)
7947 }
7948
7949 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7950 ///
7951 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7952 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7953 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7954 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7955 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7956 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7957 ///
7958 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundpd_epu32&expand=1322)
7959 #[inline]
7960 #[target_feature(enable = "avx512f")]
7961 #[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
7962 #[rustc_args_required_const(3)]
7963 pub unsafe fn _mm512_mask_cvt_roundpd_epu32(
7964 src: __m256i,
7965 k: __mmask8,
7966 a: __m512d,
7967 rounding: i32,
7968 ) -> __m256i {
7969 macro_rules! call {
7970 ($imm4:expr) => {
7971 vcvtpd2udq(a.as_f64x8(), src.as_u32x8(), k, $imm4)
7972 };
7973 }
7974 let r = constify_imm4_round!(rounding, call);
7975 transmute(r)
7976 }
7977
7978 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7979 ///
7980 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7981 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7982 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7983 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7984 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7985 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7986 ///
7987 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=512_maskz_cvt_roundpd_epu32&expand=1323)
7988 #[inline]
7989 #[target_feature(enable = "avx512f")]
7990 #[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
7991 #[rustc_args_required_const(2)]
7992 pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i {
7993 macro_rules! call {
7994 ($imm4:expr) => {
7995 vcvtpd2udq(a.as_f64x8(), _mm256_setzero_si256().as_u32x8(), k, $imm4)
7996 };
7997 }
7998 let r = constify_imm4_round!(rounding, call);
7999 transmute(r)
8000 }
8001
8002 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8003 ///
8004 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8005 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8006 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8007 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8008 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8009 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8010 ///
8011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundpd_ps&expand=1327)
8012 #[inline]
8013 #[target_feature(enable = "avx512f")]
8014 #[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
8015 #[rustc_args_required_const(1)]
8016 pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 {
8017 macro_rules! call {
8018 ($imm4:expr) => {
8019 vcvtpd2ps(
8020 a.as_f64x8(),
8021 _mm256_setzero_ps().as_f32x8(),
8022 0b11111111,
8023 $imm4,
8024 )
8025 };
8026 }
8027 let r = constify_imm4_round!(rounding, call);
8028 transmute(r)
8029 }
8030
8031 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8032 ///
8033 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8034 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8035 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8036 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8037 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8038 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8039 ///
8040 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundpd_ps&expand=1328)
8041 #[inline]
8042 #[target_feature(enable = "avx512f")]
8043 #[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
8044 #[rustc_args_required_const(3)]
8045 pub unsafe fn _mm512_mask_cvt_roundpd_ps(
8046 src: __m256,
8047 k: __mmask8,
8048 a: __m512d,
8049 rounding: i32,
8050 ) -> __m256 {
8051 macro_rules! call {
8052 ($imm4:expr) => {
8053 vcvtpd2ps(a.as_f64x8(), src.as_f32x8(), k, $imm4)
8054 };
8055 }
8056 let r = constify_imm4_round!(rounding, call);
8057 transmute(r)
8058 }
8059
8060 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8061 ///
8062 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8063 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8064 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8065 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8066 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8067 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8068 ///
8069 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundpd_ps&expand=1329)
8070 #[inline]
8071 #[target_feature(enable = "avx512f")]
8072 #[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
8073 #[rustc_args_required_const(2)]
8074 pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32) -> __m256 {
8075 macro_rules! call {
8076 ($imm4:expr) => {
8077 vcvtpd2ps(a.as_f64x8(), _mm256_setzero_ps().as_f32x8(), k, $imm4)
8078 };
8079 }
8080 let r = constify_imm4_round!(rounding, call);
8081 transmute(r)
8082 }
8083
8084 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8085 ///
8086 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8087 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8088 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8089 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8090 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8091 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8092 ///
8093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundepi32_ps&expand=1294)
8094 #[inline]
8095 #[target_feature(enable = "avx512f")]
8096 #[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
8097 #[rustc_args_required_const(1)]
8098 pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 {
8099 macro_rules! call {
8100 ($imm4:expr) => {
8101 vcvtdq2ps(a.as_i32x16(), $imm4)
8102 };
8103 }
8104 let r = constify_imm4_round!(rounding, call);
8105 transmute(r)
8106 }
8107
8108 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8109 ///
8110 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8111 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8112 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8113 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8114 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8115 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8116 ///
8117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundepi32_ps&expand=1295)
8118 #[inline]
8119 #[target_feature(enable = "avx512f")]
8120 #[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
8121 #[rustc_args_required_const(3)]
8122 pub unsafe fn _mm512_mask_cvt_roundepi32_ps(
8123 src: __m512,
8124 k: __mmask16,
8125 a: __m512i,
8126 rounding: i32,
8127 ) -> __m512 {
8128 macro_rules! call {
8129 ($imm4:expr) => {
8130 vcvtdq2ps(a.as_i32x16(), $imm4)
8131 };
8132 }
8133 let r: f32x16 = constify_imm4_round!(rounding, call);
8134 transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8135 }
8136
8137 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8138 ///
8139 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8140 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8141 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8142 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8143 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8144 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8145 ///
8146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundepi32_ps&expand=1296)
8147 #[inline]
8148 #[target_feature(enable = "avx512f")]
8149 #[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
8150 #[rustc_args_required_const(2)]
8151 pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 {
8152 macro_rules! call {
8153 ($imm4:expr) => {
8154 vcvtdq2ps(a.as_i32x16(), $imm4)
8155 };
8156 }
8157 let r = constify_imm4_round!(rounding, call);
8158 let zero = _mm512_setzero_ps().as_f32x16();
8159 transmute(simd_select_bitmask(k, r, zero))
8160 }
8161
8162 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8163 ///
8164 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8165 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8166 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8167 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8168 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8169 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8170 ///
8171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundepu32_ps&expand=1303)
8172 #[inline]
8173 #[target_feature(enable = "avx512f")]
8174 #[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
8175 #[rustc_args_required_const(1)]
8176 pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 {
8177 macro_rules! call {
8178 ($imm4:expr) => {
8179 vcvtudq2ps(a.as_u32x16(), $imm4)
8180 };
8181 }
8182 let r = constify_imm4_round!(rounding, call);
8183 transmute(r)
8184 }
8185
8186 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8187 ///
8188 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8189 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8190 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8191 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8192 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8193 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8194 ///
8195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundepu32_ps&expand=1304)
8196 #[inline]
8197 #[target_feature(enable = "avx512f")]
8198 #[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
8199 #[rustc_args_required_const(3)]
8200 pub unsafe fn _mm512_mask_cvt_roundepu32_ps(
8201 src: __m512,
8202 k: __mmask16,
8203 a: __m512i,
8204 rounding: i32,
8205 ) -> __m512 {
8206 macro_rules! call {
8207 ($imm4:expr) => {
8208 vcvtudq2ps(a.as_u32x16(), $imm4)
8209 };
8210 }
8211 let r: f32x16 = constify_imm4_round!(rounding, call);
8212 transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8213 }
8214
8215 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8216 ///
8217 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8218 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8219 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8220 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8221 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8222 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8223 ///
8224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundepu32_ps&expand=1305)
8225 #[inline]
8226 #[target_feature(enable = "avx512f")]
8227 #[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
8228 #[rustc_args_required_const(2)]
8229 pub unsafe fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 {
8230 macro_rules! call {
8231 ($imm4:expr) => {
8232 vcvtudq2ps(a.as_u32x16(), $imm4)
8233 };
8234 }
8235 let r = constify_imm4_round!(rounding, call);
8236 let zero = _mm512_setzero_ps().as_f32x16();
8237 transmute(simd_select_bitmask(k, r, zero))
8238 }
8239
8240 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
8241 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8242 ///
8243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_ph&expand=1354)
8244 #[inline]
8245 #[target_feature(enable = "avx512f")]
8246 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8247 #[rustc_args_required_const(1)]
8248 pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i {
8249 macro_rules! call {
8250 ($imm4:expr) => {
8251 vcvtps2ph(
8252 a.as_f32x16(),
8253 $imm4,
8254 _mm256_setzero_si256().as_i16x16(),
8255 0b11111111_11111111,
8256 )
8257 };
8258 }
8259 let r = constify_imm4_sae!(sae, call);
8260 transmute(r)
8261 }
8262
8263 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8264 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8265 ///
8266 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_ph&expand=1355)
8267 #[inline]
8268 #[target_feature(enable = "avx512f")]
8269 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8270 #[rustc_args_required_const(3)]
8271 pub unsafe fn _mm512_mask_cvt_roundps_ph(
8272 src: __m256i,
8273 k: __mmask16,
8274 a: __m512,
8275 sae: i32,
8276 ) -> __m256i {
8277 macro_rules! call {
8278 ($imm4:expr) => {
8279 vcvtps2ph(a.as_f32x16(), $imm4, src.as_i16x16(), k)
8280 };
8281 }
8282 let r = constify_imm4_sae!(sae, call);
8283 transmute(r)
8284 }
8285
8286 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8287 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8288 ///
8289 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_ph&expand=1356)
8290 #[inline]
8291 #[target_feature(enable = "avx512f")]
8292 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8293 #[rustc_args_required_const(2)]
8294 pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i {
8295 macro_rules! call {
8296 ($imm4:expr) => {
8297 vcvtps2ph(a.as_f32x16(), $imm4, _mm256_setzero_si256().as_i16x16(), k)
8298 };
8299 }
8300 let r = constify_imm4_sae!(sae, call);
8301 transmute(r)
8302 }
8303
8304 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
8305 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8306 ///
8307 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_ph&expand=1778)
8308 #[inline]
8309 #[target_feature(enable = "avx512f")]
8310 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8311 #[rustc_args_required_const(1)]
8312 pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i {
8313 macro_rules! call {
8314 ($imm4:expr) => {
8315 vcvtps2ph(
8316 a.as_f32x16(),
8317 $imm4,
8318 _mm256_setzero_si256().as_i16x16(),
8319 0b11111111_11111111,
8320 )
8321 };
8322 }
8323 let r = constify_imm4_sae!(sae, call);
8324 transmute(r)
8325 }
8326
8327 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8328 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8329 ///
8330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_ph&expand=1779)
8331 #[inline]
8332 #[target_feature(enable = "avx512f")]
8333 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8334 #[rustc_args_required_const(3)]
8335 pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i32) -> __m256i {
8336 macro_rules! call {
8337 ($imm4:expr) => {
8338 vcvtps2ph(a.as_f32x16(), $imm4, src.as_i16x16(), k)
8339 };
8340 }
8341 let r = constify_imm4_sae!(sae, call);
8342 transmute(r)
8343 }
8344
8345 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8346 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8347 ///
8348 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_ph&expand=1780)
8349 #[inline]
8350 #[target_feature(enable = "avx512f")]
8351 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8352 #[rustc_args_required_const(2)]
8353 pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i {
8354 macro_rules! call {
8355 ($imm4:expr) => {
8356 vcvtps2ph(a.as_f32x16(), $imm4, _mm256_setzero_si256().as_i16x16(), k)
8357 };
8358 }
8359 let r = constify_imm4_sae!(sae, call);
8360 transmute(r)
8361 }
8362
8363 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8364 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8365 ///
8366 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundph_ps&expand=1332)
8367 #[inline]
8368 #[target_feature(enable = "avx512f")]
8369 #[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
8370 #[rustc_args_required_const(1)]
8371 pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 {
8372 macro_rules! call {
8373 ($imm4:expr) => {
8374 vcvtph2ps(
8375 a.as_i16x16(),
8376 _mm512_setzero_ps().as_f32x16(),
8377 0b11111111_11111111,
8378 $imm4,
8379 )
8380 };
8381 }
8382 let r = constify_imm4_sae!(sae, call);
8383 transmute(r)
8384 }
8385
8386 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8387 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8388 ///
8389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundph_ps&expand=1333)
8390 #[inline]
8391 #[target_feature(enable = "avx512f")]
8392 #[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
8393 #[rustc_args_required_const(3)]
8394 pub unsafe fn _mm512_mask_cvt_roundph_ps(
8395 src: __m512,
8396 k: __mmask16,
8397 a: __m256i,
8398 sae: i32,
8399 ) -> __m512 {
8400 macro_rules! call {
8401 ($imm4:expr) => {
8402 vcvtph2ps(a.as_i16x16(), src.as_f32x16(), k, $imm4)
8403 };
8404 }
8405 let r = constify_imm4_sae!(sae, call);
8406 transmute(r)
8407 }
8408
8409 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8410 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8411 ///
8412 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundph_ps&expand=1334)
8413 #[inline]
8414 #[target_feature(enable = "avx512f")]
8415 #[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
8416 #[rustc_args_required_const(2)]
8417 pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i, sae: i32) -> __m512 {
8418 macro_rules! call {
8419 ($imm4:expr) => {
8420 vcvtph2ps(a.as_i16x16(), _mm512_setzero_ps().as_f32x16(), k, $imm4)
8421 };
8422 }
8423 let r = constify_imm4_sae!(sae, call);
8424 transmute(r)
8425 }
8426
8427 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
8428 ///
8429 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtph_ps&expand=1723)
8430 #[inline]
8431 #[target_feature(enable = "avx512f")]
8432 #[cfg_attr(test, assert_instr(vcvtph2ps))]
8433 pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
8434 transmute(vcvtph2ps(
8435 a.as_i16x16(),
8436 _mm512_setzero_ps().as_f32x16(),
8437 0b11111111_11111111,
8438 _MM_FROUND_NO_EXC,
8439 ))
8440 }
8441
8442 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8443 ///
8444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtph_ps&expand=1724)
8445 #[inline]
8446 #[target_feature(enable = "avx512f")]
8447 #[cfg_attr(test, assert_instr(vcvtph2ps))]
8448 pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
8449 transmute(vcvtph2ps(
8450 a.as_i16x16(),
8451 src.as_f32x16(),
8452 k,
8453 _MM_FROUND_NO_EXC,
8454 ))
8455 }
8456
8457 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8458 ///
8459 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtph_ps&expand=1725)
8460 #[inline]
8461 #[target_feature(enable = "avx512f")]
8462 #[cfg_attr(test, assert_instr(vcvtph2ps))]
8463 pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
8464 transmute(vcvtph2ps(
8465 a.as_i16x16(),
8466 _mm512_setzero_ps().as_f32x16(),
8467 k,
8468 _MM_FROUND_NO_EXC,
8469 ))
8470 }
8471
8472 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
8473 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8474 ///
8475 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundps_epi32&expand=1916)
8476 #[inline]
8477 #[target_feature(enable = "avx512f")]
8478 #[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
8479 #[rustc_args_required_const(1)]
8480 pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i {
8481 macro_rules! call {
8482 ($imm4:expr) => {
8483 vcvttps2dq(
8484 a.as_f32x16(),
8485 _mm512_setzero_si512().as_i32x16(),
8486 0b11111111_11111111,
8487 $imm4,
8488 )
8489 };
8490 }
8491 let r = constify_imm4_sae!(sae, call);
8492 transmute(r)
8493 }
8494
8495 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8496 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8497 ///
8498 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundps_epi32&expand=1917)
8499 #[inline]
8500 #[target_feature(enable = "avx512f")]
8501 #[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
8502 #[rustc_args_required_const(3)]
8503 pub unsafe fn _mm512_mask_cvtt_roundps_epi32(
8504 src: __m512i,
8505 k: __mmask16,
8506 a: __m512,
8507 sae: i32,
8508 ) -> __m512i {
8509 macro_rules! call {
8510 ($imm4:expr) => {
8511 vcvttps2dq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
8512 };
8513 }
8514 let r = constify_imm4_sae!(sae, call);
8515 transmute(r)
8516 }
8517
8518 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8519 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8520 ///
8521 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epi32&expand=1918)
8522 #[inline]
8523 #[target_feature(enable = "avx512f")]
8524 #[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
8525 #[rustc_args_required_const(2)]
8526 pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
8527 macro_rules! call {
8528 ($imm4:expr) => {
8529 vcvttps2dq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
8530 };
8531 }
8532 let r = constify_imm4_sae!(sae, call);
8533 transmute(r)
8534 }
8535
8536 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
8537 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8538 ///
8539 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundps_epu32&expand=1922)
8540 #[inline]
8541 #[target_feature(enable = "avx512f")]
8542 #[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
8543 #[rustc_args_required_const(1)]
8544 pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i {
8545 macro_rules! call {
8546 ($imm4:expr) => {
8547 vcvttps2udq(
8548 a.as_f32x16(),
8549 _mm512_setzero_si512().as_i32x16(),
8550 0b11111111_11111111,
8551 $imm4,
8552 )
8553 };
8554 }
8555 let r = constify_imm4_sae!(sae, call);
8556 transmute(r)
8557 }
8558
8559 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8560 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8561 ///
8562 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundps_epu32&expand=1923)
8563 #[inline]
8564 #[target_feature(enable = "avx512f")]
8565 #[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
8566 #[rustc_args_required_const(3)]
8567 pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
8568 src: __m512i,
8569 k: __mmask16,
8570 a: __m512,
8571 sae: i32,
8572 ) -> __m512i {
8573 macro_rules! call {
8574 ($imm4:expr) => {
8575 vcvttps2udq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
8576 };
8577 }
8578 let r = constify_imm4_sae!(sae, call);
8579 transmute(r)
8580 }
8581
8582 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8583 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8584 ///
8585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epu32&expand=1924)
8586 #[inline]
8587 #[target_feature(enable = "avx512f")]
8588 #[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
8589 #[rustc_args_required_const(2)]
8590 pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
8591 macro_rules! call {
8592 ($imm4:expr) => {
8593 vcvttps2udq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
8594 };
8595 }
8596 let r = constify_imm4_sae!(sae, call);
8597 transmute(r)
8598 }
8599
8600 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
8601 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8602 ///
8603 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundpd_epi32&expand=1904)
8604 #[inline]
8605 #[target_feature(enable = "avx512f")]
8606 #[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
8607 #[rustc_args_required_const(1)]
8608 pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i {
8609 macro_rules! call {
8610 ($imm4:expr) => {
8611 vcvttpd2dq(
8612 a.as_f64x8(),
8613 _mm256_setzero_si256().as_i32x8(),
8614 0b11111111,
8615 $imm4,
8616 )
8617 };
8618 }
8619 let r = constify_imm4_sae!(sae, call);
8620 transmute(r)
8621 }
8622
8623 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8624 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8625 ///
8626 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundpd_epi32&expand=1905)
8627 #[inline]
8628 #[target_feature(enable = "avx512f")]
8629 #[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
8630 #[rustc_args_required_const(3)]
8631 pub unsafe fn _mm512_mask_cvtt_roundpd_epi32(
8632 src: __m256i,
8633 k: __mmask8,
8634 a: __m512d,
8635 sae: i32,
8636 ) -> __m256i {
8637 macro_rules! call {
8638 ($imm4:expr) => {
8639 vcvttpd2dq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
8640 };
8641 }
8642 let r = constify_imm4_sae!(sae, call);
8643 transmute(r)
8644 }
8645
8646 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8647 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8648 ///
8649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epi32&expand=1918)
8650 #[inline]
8651 #[target_feature(enable = "avx512f")]
8652 #[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
8653 #[rustc_args_required_const(2)]
8654 pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
8655 macro_rules! call {
8656 ($imm4:expr) => {
8657 vcvttpd2dq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
8658 };
8659 }
8660 let r = constify_imm4_sae!(sae, call);
8661 transmute(r)
8662 }
8663
8664 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
8665 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8666 ///
8667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundpd_epu32&expand=1910)
8668 #[inline]
8669 #[target_feature(enable = "avx512f")]
8670 #[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
8671 #[rustc_args_required_const(1)]
8672 pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i {
8673 macro_rules! call {
8674 ($imm4:expr) => {
8675 vcvttpd2udq(
8676 a.as_f64x8(),
8677 _mm256_setzero_si256().as_i32x8(),
8678 0b11111111,
8679 $imm4,
8680 )
8681 };
8682 }
8683 let r = constify_imm4_sae!(sae, call);
8684 transmute(r)
8685 }
8686
8687 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8688 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8689 ///
8690 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundpd_epu32&expand=1911)
8691 #[inline]
8692 #[target_feature(enable = "avx512f")]
8693 #[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
8694 #[rustc_args_required_const(3)]
8695 pub unsafe fn _mm512_mask_cvtt_roundpd_epu32(
8696 src: __m256i,
8697 k: __mmask8,
8698 a: __m512d,
8699 sae: i32,
8700 ) -> __m256i {
8701 macro_rules! call {
8702 ($imm4:expr) => {
8703 vcvttpd2udq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
8704 };
8705 }
8706 let r = constify_imm4_sae!(sae, call);
8707 transmute(r)
8708 }
8709
8710 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
8711 ///
8712 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttps_epi32&expand=1984)
8713 #[inline]
8714 #[target_feature(enable = "avx512f")]
8715 #[cfg_attr(test, assert_instr(vcvttps2dq))]
8716 pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
8717 transmute(vcvttps2dq(
8718 a.as_f32x16(),
8719 _mm512_setzero_si512().as_i32x16(),
8720 0b11111111_11111111,
8721 _MM_FROUND_CUR_DIRECTION,
8722 ))
8723 }
8724
8725 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8726 ///
8727 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttps_epi32&expand=1985)
8728 #[inline]
8729 #[target_feature(enable = "avx512f")]
8730 #[cfg_attr(test, assert_instr(vcvttps2dq))]
8731 pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
8732 transmute(vcvttps2dq(
8733 a.as_f32x16(),
8734 src.as_i32x16(),
8735 k,
8736 _MM_FROUND_CUR_DIRECTION,
8737 ))
8738 }
8739
8740 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8741 ///
8742 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttps_epi32&expand=1986)
8743 #[inline]
8744 #[target_feature(enable = "avx512f")]
8745 #[cfg_attr(test, assert_instr(vcvttps2dq))]
8746 pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
8747 transmute(vcvttps2dq(
8748 a.as_f32x16(),
8749 _mm512_setzero_si512().as_i32x16(),
8750 k,
8751 _MM_FROUND_CUR_DIRECTION,
8752 ))
8753 }
8754
8755 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
8756 ///
8757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttps_epu32&expand=2002)
8758 #[inline]
8759 #[target_feature(enable = "avx512f")]
8760 #[cfg_attr(test, assert_instr(vcvttps2udq))]
8761 pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
8762 transmute(vcvttps2udq(
8763 a.as_f32x16(),
8764 _mm512_setzero_si512().as_i32x16(),
8765 0b11111111_11111111,
8766 _MM_FROUND_CUR_DIRECTION,
8767 ))
8768 }
8769
8770 /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8771 ///
8772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttps_epu32&expand=2003)
8773 #[inline]
8774 #[target_feature(enable = "avx512f")]
8775 #[cfg_attr(test, assert_instr(vcvttps2udq))]
8776 pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
8777 transmute(vcvttps2udq(
8778 a.as_f32x16(),
8779 src.as_i32x16(),
8780 k,
8781 _MM_FROUND_CUR_DIRECTION,
8782 ))
8783 }
8784
8785 /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8786 ///
8787 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttps_epu32&expand=2004)
8788 #[inline]
8789 #[target_feature(enable = "avx512f")]
8790 #[cfg_attr(test, assert_instr(vcvttps2udq))]
8791 pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
8792 transmute(vcvttps2udq(
8793 a.as_f32x16(),
8794 _mm512_setzero_si512().as_i32x16(),
8795 k,
8796 _MM_FROUND_CUR_DIRECTION,
8797 ))
8798 }
8799
8800 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8801 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8802 ///
8803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundpd_epu32&expand=1912)
8804 #[inline]
8805 #[target_feature(enable = "avx512f")]
8806 #[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
8807 #[rustc_args_required_const(2)]
8808 pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
8809 macro_rules! call {
8810 ($imm4:expr) => {
8811 vcvttpd2udq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
8812 };
8813 }
8814 let r = constify_imm4_sae!(sae, call);
8815 transmute(r)
8816 }
8817
8818 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
8819 ///
8820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttpd_epi32&expand=1947)
8821 #[inline]
8822 #[target_feature(enable = "avx512f")]
8823 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
8824 pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
8825 transmute(vcvttpd2dq(
8826 a.as_f64x8(),
8827 _mm256_setzero_si256().as_i32x8(),
8828 0b11111111,
8829 _MM_FROUND_CUR_DIRECTION,
8830 ))
8831 }
8832
8833 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8834 ///
8835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttpd_epi32&expand=1948)
8836 #[inline]
8837 #[target_feature(enable = "avx512f")]
8838 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
8839 pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
8840 transmute(vcvttpd2dq(
8841 a.as_f64x8(),
8842 src.as_i32x8(),
8843 k,
8844 _MM_FROUND_CUR_DIRECTION,
8845 ))
8846 }
8847
8848 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8849 ///
8850 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttpd_epi32&expand=1949)
8851 #[inline]
8852 #[target_feature(enable = "avx512f")]
8853 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
8854 pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
8855 transmute(vcvttpd2dq(
8856 a.as_f64x8(),
8857 _mm256_setzero_si256().as_i32x8(),
8858 k,
8859 _MM_FROUND_CUR_DIRECTION,
8860 ))
8861 }
8862
8863 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
8864 ///
8865 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttpd_epu32&expand=1965)
8866 #[inline]
8867 #[target_feature(enable = "avx512f")]
8868 #[cfg_attr(test, assert_instr(vcvttpd2udq))]
8869 pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
8870 transmute(vcvttpd2udq(
8871 a.as_f64x8(),
8872 _mm256_setzero_si256().as_i32x8(),
8873 0b11111111,
8874 _MM_FROUND_CUR_DIRECTION,
8875 ))
8876 }
8877
8878 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8879 ///
8880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttpd_epu32&expand=1966)
8881 #[inline]
8882 #[target_feature(enable = "avx512f")]
8883 #[cfg_attr(test, assert_instr(vcvttpd2udq))]
8884 pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
8885 transmute(vcvttpd2udq(
8886 a.as_f64x8(),
8887 src.as_i32x8(),
8888 k,
8889 _MM_FROUND_CUR_DIRECTION,
8890 ))
8891 }
8892
8893 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8894 ///
8895 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttpd_epu32&expand=1967)
8896 #[inline]
8897 #[target_feature(enable = "avx512f")]
8898 #[cfg_attr(test, assert_instr(vcvttpd2udq))]
8899 pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
8900 transmute(vcvttpd2udq(
8901 a.as_f64x8(),
8902 _mm256_setzero_si256().as_i32x8(),
8903 k,
8904 _MM_FROUND_CUR_DIRECTION,
8905 ))
8906 }
8907
8908 /// Returns vector of type `__m512d` with all elements set to zero.
8909 ///
8910 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
8911 #[inline]
8912 #[target_feature(enable = "avx512f")]
8913 #[cfg_attr(test, assert_instr(vxorps))]
8914 pub unsafe fn _mm512_setzero_pd() -> __m512d {
8915 // All-0 is a properly initialized __m512d
8916 mem::zeroed()
8917 }
8918
8919 /// Returns vector of type `__m512d` with all elements set to zero.
8920 ///
8921 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
8922 #[inline]
8923 #[target_feature(enable = "avx512f")]
8924 #[cfg_attr(test, assert_instr(vxorps))]
8925 pub unsafe fn _mm512_setzero_ps() -> __m512 {
8926 // All-0 is a properly initialized __m512
8927 mem::zeroed()
8928 }
8929
8930 /// Return vector of type __m512 with all elements set to zero.
8931 ///
8932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setzero&expand=5014)
8933 #[inline]
8934 #[target_feature(enable = "avx512f")]
8935 #[cfg_attr(test, assert_instr(vxorps))]
8936 pub unsafe fn _mm512_setzero() -> __m512 {
8937 // All-0 is a properly initialized __m512
8938 mem::zeroed()
8939 }
8940
8941 /// Returns vector of type `__m512i` with all elements set to zero.
8942 ///
8943 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
8944 #[inline]
8945 #[target_feature(enable = "avx512f")]
8946 #[cfg_attr(test, assert_instr(vxorps))]
8947 pub unsafe fn _mm512_setzero_si512() -> __m512i {
8948 // All-0 is a properly initialized __m512i
8949 mem::zeroed()
8950 }
8951
8952 /// Return vector of type __m512i with all elements set to zero.
8953 ///
8954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setzero_epi32&expand=5015)
8955 #[inline]
8956 #[target_feature(enable = "avx512f")]
8957 #[cfg_attr(test, assert_instr(vxorps))]
8958 pub unsafe fn _mm512_setzero_epi32() -> __m512i {
8959 // All-0 is a properly initialized __m512i
8960 mem::zeroed()
8961 }
8962
8963 /// Sets packed 32-bit integers in `dst` with the supplied values in reverse
8964 /// order.
8965 #[inline]
8966 #[target_feature(enable = "avx512f")]
8967 pub unsafe fn _mm512_setr_epi32(
8968 e15: i32,
8969 e14: i32,
8970 e13: i32,
8971 e12: i32,
8972 e11: i32,
8973 e10: i32,
8974 e9: i32,
8975 e8: i32,
8976 e7: i32,
8977 e6: i32,
8978 e5: i32,
8979 e4: i32,
8980 e3: i32,
8981 e2: i32,
8982 e1: i32,
8983 e0: i32,
8984 ) -> __m512i {
8985 let r = i32x16(
8986 e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
8987 );
8988 transmute(r)
8989 }
8990
8991 /// Set packed 8-bit integers in dst with the supplied values.
8992 ///
8993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set_epi8&expand=4915)
8994 #[inline]
8995 #[target_feature(enable = "avx512f")]
8996 pub unsafe fn _mm512_set_epi8(
8997 e63: i8,
8998 e62: i8,
8999 e61: i8,
9000 e60: i8,
9001 e59: i8,
9002 e58: i8,
9003 e57: i8,
9004 e56: i8,
9005 e55: i8,
9006 e54: i8,
9007 e53: i8,
9008 e52: i8,
9009 e51: i8,
9010 e50: i8,
9011 e49: i8,
9012 e48: i8,
9013 e47: i8,
9014 e46: i8,
9015 e45: i8,
9016 e44: i8,
9017 e43: i8,
9018 e42: i8,
9019 e41: i8,
9020 e40: i8,
9021 e39: i8,
9022 e38: i8,
9023 e37: i8,
9024 e36: i8,
9025 e35: i8,
9026 e34: i8,
9027 e33: i8,
9028 e32: i8,
9029 e31: i8,
9030 e30: i8,
9031 e29: i8,
9032 e28: i8,
9033 e27: i8,
9034 e26: i8,
9035 e25: i8,
9036 e24: i8,
9037 e23: i8,
9038 e22: i8,
9039 e21: i8,
9040 e20: i8,
9041 e19: i8,
9042 e18: i8,
9043 e17: i8,
9044 e16: i8,
9045 e15: i8,
9046 e14: i8,
9047 e13: i8,
9048 e12: i8,
9049 e11: i8,
9050 e10: i8,
9051 e9: i8,
9052 e8: i8,
9053 e7: i8,
9054 e6: i8,
9055 e5: i8,
9056 e4: i8,
9057 e3: i8,
9058 e2: i8,
9059 e1: i8,
9060 e0: i8,
9061 ) -> __m512i {
9062 let r = i8x64(
9063 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
9064 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37,
9065 e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55,
9066 e56, e57, e58, e59, e60, e61, e62, e63,
9067 );
9068 transmute(r)
9069 }
9070
9071 /// Set packed 16-bit integers in dst with the supplied values.
9072 ///
9073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set_epi16&expand=4905)
9074 #[inline]
9075 #[target_feature(enable = "avx512f")]
9076 pub unsafe fn _mm512_set_epi16(
9077 e31: i16,
9078 e30: i16,
9079 e29: i16,
9080 e28: i16,
9081 e27: i16,
9082 e26: i16,
9083 e25: i16,
9084 e24: i16,
9085 e23: i16,
9086 e22: i16,
9087 e21: i16,
9088 e20: i16,
9089 e19: i16,
9090 e18: i16,
9091 e17: i16,
9092 e16: i16,
9093 e15: i16,
9094 e14: i16,
9095 e13: i16,
9096 e12: i16,
9097 e11: i16,
9098 e10: i16,
9099 e9: i16,
9100 e8: i16,
9101 e7: i16,
9102 e6: i16,
9103 e5: i16,
9104 e4: i16,
9105 e3: i16,
9106 e2: i16,
9107 e1: i16,
9108 e0: i16,
9109 ) -> __m512i {
9110 let r = i16x32(
9111 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
9112 e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
9113 );
9114 transmute(r)
9115 }
9116
9117 /// Set packed 32-bit integers in dst with the repeated 4 element sequence.
9118 ///
9119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_epi32&expand=4982)
9120 #[inline]
9121 #[target_feature(enable = "avx512f")]
9122 pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
9123 _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
9124 }
9125
9126 /// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
9127 ///
9128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_ps&expand=4985)
9129 #[inline]
9130 #[target_feature(enable = "avx512f")]
9131 pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
9132 _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
9133 }
9134
9135 /// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
9136 ///
9137 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_pd&expand=4984)
9138 #[inline]
9139 #[target_feature(enable = "avx512f")]
9140 pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
9141 _mm512_set_pd(d, c, b, a, d, c, b, a)
9142 }
9143
9144 /// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
9145 ///
9146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_epi32&expand=5009)
9147 #[inline]
9148 #[target_feature(enable = "avx512f")]
9149 pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
9150 _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
9151 }
9152
9153 /// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
9154 ///
9155 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_ps&expand=5012)
9156 #[inline]
9157 #[target_feature(enable = "avx512f")]
9158 pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
9159 _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
9160 }
9161
9162 /// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
9163 ///
9164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_pd&expand=5011)
9165 #[inline]
9166 #[target_feature(enable = "avx512f")]
9167 pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
9168 _mm512_set_pd(a, b, c, d, a, b, c, d)
9169 }
9170
9171 /// Set packed 64-bit integers in dst with the supplied values.
9172 ///
9173 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set_epi64&expand=4910)
9174 #[inline]
9175 #[target_feature(enable = "avx512f")]
9176 pub unsafe fn _mm512_set_epi64(
9177 e0: i64,
9178 e1: i64,
9179 e2: i64,
9180 e3: i64,
9181 e4: i64,
9182 e5: i64,
9183 e6: i64,
9184 e7: i64,
9185 ) -> __m512i {
9186 _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
9187 }
9188
9189 /// Set packed 64-bit integers in dst with the supplied values in reverse order.
9190 ///
9191 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr_epi64&expand=4993)
9192 #[inline]
9193 #[target_feature(enable = "avx512f")]
9194 pub unsafe fn _mm512_setr_epi64(
9195 e0: i64,
9196 e1: i64,
9197 e2: i64,
9198 e3: i64,
9199 e4: i64,
9200 e5: i64,
9201 e6: i64,
9202 e7: i64,
9203 ) -> __m512i {
9204 let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
9205 transmute(r)
9206 }
9207
9208 /// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9209 ///
9210 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_pd&expand=3002)
9211 #[inline]
9212 #[target_feature(enable = "avx512f")]
9213 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
9214 #[rustc_args_required_const(2)]
9215 pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8, scale: i32) -> __m512d {
9216 let zero = _mm512_setzero_pd().as_f64x8();
9217 let neg_one = -1;
9218 let slice = slice as *const i8;
9219 let offsets = offsets.as_i32x8();
9220 macro_rules! call {
9221 ($imm8:expr) => {
9222 vgatherdpd(zero, slice, offsets, neg_one, $imm8)
9223 };
9224 }
9225 let r = constify_imm8_gather!(scale, call);
9226 transmute(r)
9227 }
9228
9229 /// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9230 ///
9231 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_pd&expand=3003)
9232 #[inline]
9233 #[target_feature(enable = "avx512f")]
9234 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
9235 #[rustc_args_required_const(4)]
9236 pub unsafe fn _mm512_mask_i32gather_pd(
9237 src: __m512d,
9238 mask: __mmask8,
9239 offsets: __m256i,
9240 slice: *const u8,
9241 scale: i32,
9242 ) -> __m512d {
9243 let src = src.as_f64x8();
9244 let slice = slice as *const i8;
9245 let offsets = offsets.as_i32x8();
9246 macro_rules! call {
9247 ($imm8:expr) => {
9248 vgatherdpd(src, slice, offsets, mask as i8, $imm8)
9249 };
9250 }
9251 let r = constify_imm8_gather!(scale, call);
9252 transmute(r)
9253 }
9254
9255 /// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9256 ///
9257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_pd&expand=3092)
9258 #[inline]
9259 #[target_feature(enable = "avx512f")]
9260 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
9261 #[rustc_args_required_const(2)]
9262 pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8, scale: i32) -> __m512d {
9263 let zero = _mm512_setzero_pd().as_f64x8();
9264 let neg_one = -1;
9265 let slice = slice as *const i8;
9266 let offsets = offsets.as_i64x8();
9267 macro_rules! call {
9268 ($imm8:expr) => {
9269 vgatherqpd(zero, slice, offsets, neg_one, $imm8)
9270 };
9271 }
9272 let r = constify_imm8_gather!(scale, call);
9273 transmute(r)
9274 }
9275
9276 /// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9277 ///
9278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_pd&expand=3093)
9279 #[inline]
9280 #[target_feature(enable = "avx512f")]
9281 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
9282 #[rustc_args_required_const(4)]
9283 pub unsafe fn _mm512_mask_i64gather_pd(
9284 src: __m512d,
9285 mask: __mmask8,
9286 offsets: __m512i,
9287 slice: *const u8,
9288 scale: i32,
9289 ) -> __m512d {
9290 let src = src.as_f64x8();
9291 let slice = slice as *const i8;
9292 let offsets = offsets.as_i64x8();
9293 macro_rules! call {
9294 ($imm8:expr) => {
9295 vgatherqpd(src, slice, offsets, mask as i8, $imm8)
9296 };
9297 }
9298 let r = constify_imm8_gather!(scale, call);
9299 transmute(r)
9300 }
9301
9302 /// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9303 ///
9304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_ps&expand=3100)
9305 #[inline]
9306 #[target_feature(enable = "avx512f")]
9307 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
9308 #[rustc_args_required_const(2)]
9309 pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m256 {
9310 let zero = _mm256_setzero_ps().as_f32x8();
9311 let neg_one = -1;
9312 let slice = slice as *const i8;
9313 let offsets = offsets.as_i64x8();
9314 macro_rules! call {
9315 ($imm8:expr) => {
9316 vgatherqps(zero, slice, offsets, neg_one, $imm8)
9317 };
9318 }
9319 let r = constify_imm8_gather!(scale, call);
9320 transmute(r)
9321 }
9322
9323 /// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9324 ///
9325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_ps&expand=3101)
9326 #[inline]
9327 #[target_feature(enable = "avx512f")]
9328 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
9329 #[rustc_args_required_const(4)]
9330 pub unsafe fn _mm512_mask_i64gather_ps(
9331 src: __m256,
9332 mask: __mmask8,
9333 offsets: __m512i,
9334 slice: *const u8,
9335 scale: i32,
9336 ) -> __m256 {
9337 let src = src.as_f32x8();
9338 let slice = slice as *const i8;
9339 let offsets = offsets.as_i64x8();
9340 macro_rules! call {
9341 ($imm8:expr) => {
9342 vgatherqps(src, slice, offsets, mask as i8, $imm8)
9343 };
9344 }
9345 let r = constify_imm8_gather!(scale, call);
9346 transmute(r)
9347 }
9348
9349 /// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9350 ///
9351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_ps&expand=3010)
9352 #[inline]
9353 #[target_feature(enable = "avx512f")]
9354 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
9355 #[rustc_args_required_const(2)]
9356 pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m512 {
9357 let zero = _mm512_setzero_ps().as_f32x16();
9358 let neg_one = -1;
9359 let slice = slice as *const i8;
9360 let offsets = offsets.as_i32x16();
9361 macro_rules! call {
9362 ($imm8:expr) => {
9363 vgatherdps(zero, slice, offsets, neg_one, $imm8)
9364 };
9365 }
9366 let r = constify_imm8_gather!(scale, call);
9367 transmute(r)
9368 }
9369
9370 /// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9371 ///
9372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_ps&expand=3011)
9373 #[inline]
9374 #[target_feature(enable = "avx512f")]
9375 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
9376 #[rustc_args_required_const(4)]
9377 pub unsafe fn _mm512_mask_i32gather_ps(
9378 src: __m512,
9379 mask: __mmask16,
9380 offsets: __m512i,
9381 slice: *const u8,
9382 scale: i32,
9383 ) -> __m512 {
9384 let src = src.as_f32x16();
9385 let slice = slice as *const i8;
9386 let offsets = offsets.as_i32x16();
9387 macro_rules! call {
9388 ($imm8:expr) => {
9389 vgatherdps(src, slice, offsets, mask as i16, $imm8)
9390 };
9391 }
9392 let r = constify_imm8_gather!(scale, call);
9393 transmute(r)
9394 }
9395
9396 /// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9397 ///
9398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_epi32&expand=2986)
9399 #[inline]
9400 #[target_feature(enable = "avx512f")]
9401 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
9402 #[rustc_args_required_const(2)]
9403 pub unsafe fn _mm512_i32gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
9404 let zero = _mm512_setzero_si512().as_i32x16();
9405 let neg_one = -1;
9406 let slice = slice as *const i8;
9407 let offsets = offsets.as_i32x16();
9408 macro_rules! call {
9409 ($imm8:expr) => {
9410 vpgatherdd(zero, slice, offsets, neg_one, $imm8)
9411 };
9412 }
9413 let r = constify_imm8_gather!(scale, call);
9414 transmute(r)
9415 }
9416
9417 /// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9418 ///
9419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_epi32&expand=2987)
9420 #[inline]
9421 #[target_feature(enable = "avx512f")]
9422 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
9423 #[rustc_args_required_const(4)]
9424 pub unsafe fn _mm512_mask_i32gather_epi32(
9425 src: __m512i,
9426 mask: __mmask16,
9427 offsets: __m512i,
9428 slice: *const u8,
9429 scale: i32,
9430 ) -> __m512i {
9431 let src = src.as_i32x16();
9432 let mask = mask as i16;
9433 let slice = slice as *const i8;
9434 let offsets = offsets.as_i32x16();
9435 macro_rules! call {
9436 ($imm8:expr) => {
9437 vpgatherdd(src, slice, offsets, mask, $imm8)
9438 };
9439 }
9440 let r = constify_imm8_gather!(scale, call);
9441 transmute(r)
9442 }
9443
9444 /// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9445 ///
9446 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_epi64&expand=2994)
9447 #[inline]
9448 #[target_feature(enable = "avx512f")]
9449 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
9450 #[rustc_args_required_const(2)]
9451 pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale: i32) -> __m512i {
9452 let zero = _mm512_setzero_si512().as_i64x8();
9453 let neg_one = -1;
9454 let slice = slice as *const i8;
9455 let offsets = offsets.as_i32x8();
9456 macro_rules! call {
9457 ($imm8:expr) => {
9458 vpgatherdq(zero, slice, offsets, neg_one, $imm8)
9459 };
9460 }
9461 let r = constify_imm8_gather!(scale, call);
9462 transmute(r)
9463 }
9464
9465 /// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9466 ///
9467 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_epi64&expand=2995)
9468 #[inline]
9469 #[target_feature(enable = "avx512f")]
9470 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
9471 #[rustc_args_required_const(4)]
9472 pub unsafe fn _mm512_mask_i32gather_epi64(
9473 src: __m512i,
9474 mask: __mmask8,
9475 offsets: __m256i,
9476 slice: *const u8,
9477 scale: i32,
9478 ) -> __m512i {
9479 let src = src.as_i64x8();
9480 let mask = mask as i8;
9481 let slice = slice as *const i8;
9482 let offsets = offsets.as_i32x8();
9483 macro_rules! call {
9484 ($imm8:expr) => {
9485 vpgatherdq(src, slice, offsets, mask, $imm8)
9486 };
9487 }
9488 let r = constify_imm8_gather!(scale, call);
9489 transmute(r)
9490 }
9491
9492 /// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9493 ///
9494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_epi64&expand=3084)
9495 #[inline]
9496 #[target_feature(enable = "avx512f")]
9497 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
9498 #[rustc_args_required_const(2)]
9499 pub unsafe fn _mm512_i64gather_epi64(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
9500 let zero = _mm512_setzero_si512().as_i64x8();
9501 let neg_one = -1;
9502 let slice = slice as *const i8;
9503 let offsets = offsets.as_i64x8();
9504 macro_rules! call {
9505 ($imm8:expr) => {
9506 vpgatherqq(zero, slice, offsets, neg_one, $imm8)
9507 };
9508 }
9509 let r = constify_imm8_gather!(scale, call);
9510 transmute(r)
9511 }
9512
9513 /// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9514 ///
9515 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_epi64&expand=3085)
9516 #[inline]
9517 #[target_feature(enable = "avx512f")]
9518 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
9519 #[rustc_args_required_const(4)]
9520 pub unsafe fn _mm512_mask_i64gather_epi64(
9521 src: __m512i,
9522 mask: __mmask8,
9523 offsets: __m512i,
9524 slice: *const u8,
9525 scale: i32,
9526 ) -> __m512i {
9527 let src = src.as_i64x8();
9528 let mask = mask as i8;
9529 let slice = slice as *const i8;
9530 let offsets = offsets.as_i64x8();
9531 macro_rules! call {
9532 ($imm8:expr) => {
9533 vpgatherqq(src, slice, offsets, mask, $imm8)
9534 };
9535 }
9536 let r = constify_imm8_gather!(scale, call);
9537 transmute(r)
9538 }
9539
9540 /// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9541 ///
9542 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_epi32&expand=3074)
9543 #[inline]
9544 #[target_feature(enable = "avx512f")]
9545 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
9546 #[rustc_args_required_const(2)]
9547 pub unsafe fn _mm512_i64gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m256i {
9548 let zeros = _mm256_setzero_si256().as_i32x8();
9549 let neg_one = -1;
9550 let slice = slice as *const i8;
9551 let offsets = offsets.as_i64x8();
9552 macro_rules! call {
9553 ($imm8:expr) => {
9554 vpgatherqd(zeros, slice, offsets, neg_one, $imm8)
9555 };
9556 }
9557 let r = constify_imm8_gather!(scale, call);
9558 transmute(r)
9559 }
9560
9561 /// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9562 ///
9563 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_epi32&expand=3075)
9564 #[inline]
9565 #[target_feature(enable = "avx512f")]
9566 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
9567 #[rustc_args_required_const(4)]
9568 pub unsafe fn _mm512_mask_i64gather_epi32(
9569 src: __m256i,
9570 mask: __mmask8,
9571 offsets: __m512i,
9572 slice: *const u8,
9573 scale: i32,
9574 ) -> __m256i {
9575 let src = src.as_i32x8();
9576 let mask = mask as i8;
9577 let slice = slice as *const i8;
9578 let offsets = offsets.as_i64x8();
9579 macro_rules! call {
9580 ($imm8:expr) => {
9581 vpgatherqd(src, slice, offsets, mask, $imm8)
9582 };
9583 }
9584 let r = constify_imm8_gather!(scale, call);
9585 transmute(r)
9586 }
9587
9588 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9589 ///
9590 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_pd&expand=3044)
9591 #[inline]
9592 #[target_feature(enable = "avx512f")]
9593 #[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
9594 #[rustc_args_required_const(3)]
9595 pub unsafe fn _mm512_i32scatter_pd(slice: *mut u8, offsets: __m256i, src: __m512d, scale: i32) {
9596 let src = src.as_f64x8();
9597 let neg_one = -1;
9598 let slice = slice as *mut i8;
9599 let offsets = offsets.as_i32x8();
9600 macro_rules! call {
9601 ($imm8:expr) => {
9602 vscatterdpd(slice, neg_one, offsets, src, $imm8)
9603 };
9604 }
9605 constify_imm8_gather!(scale, call);
9606 }
9607
9608 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9609 ///
9610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_pd&expand=3045)
9611 #[inline]
9612 #[target_feature(enable = "avx512f")]
9613 #[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
9614 #[rustc_args_required_const(4)]
9615 pub unsafe fn _mm512_mask_i32scatter_pd(
9616 slice: *mut u8,
9617 mask: __mmask8,
9618 offsets: __m256i,
9619 src: __m512d,
9620 scale: i32,
9621 ) {
9622 let src = src.as_f64x8();
9623 let slice = slice as *mut i8;
9624 let offsets = offsets.as_i32x8();
9625 macro_rules! call {
9626 ($imm8:expr) => {
9627 vscatterdpd(slice, mask as i8, offsets, src, $imm8)
9628 };
9629 }
9630 constify_imm8_gather!(scale, call);
9631 }
9632
9633 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9634 ///
9635 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_pd&expand=3122)
9636 #[inline]
9637 #[target_feature(enable = "avx512f")]
9638 #[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
9639 #[rustc_args_required_const(3)]
9640 pub unsafe fn _mm512_i64scatter_pd(slice: *mut u8, offsets: __m512i, src: __m512d, scale: i32) {
9641 let src = src.as_f64x8();
9642 let neg_one = -1;
9643 let slice = slice as *mut i8;
9644 let offsets = offsets.as_i64x8();
9645 macro_rules! call {
9646 ($imm8:expr) => {
9647 vscatterqpd(slice, neg_one, offsets, src, $imm8)
9648 };
9649 }
9650 constify_imm8_gather!(scale, call);
9651 }
9652
9653 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9654 ///
9655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_pd&expand=3123)
9656 #[inline]
9657 #[target_feature(enable = "avx512f")]
9658 #[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
9659 #[rustc_args_required_const(4)]
9660 pub unsafe fn _mm512_mask_i64scatter_pd(
9661 slice: *mut u8,
9662 mask: __mmask8,
9663 offsets: __m512i,
9664 src: __m512d,
9665 scale: i32,
9666 ) {
9667 let src = src.as_f64x8();
9668 let slice = slice as *mut i8;
9669 let offsets = offsets.as_i64x8();
9670 macro_rules! call {
9671 ($imm8:expr) => {
9672 vscatterqpd(slice, mask as i8, offsets, src, $imm8)
9673 };
9674 }
9675 constify_imm8_gather!(scale, call);
9676 }
9677
9678 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9679 ///
9680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_ps&expand=3050)
9681 #[inline]
9682 #[target_feature(enable = "avx512f")]
9683 #[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
9684 #[rustc_args_required_const(3)]
9685 pub unsafe fn _mm512_i32scatter_ps(slice: *mut u8, offsets: __m512i, src: __m512, scale: i32) {
9686 let src = src.as_f32x16();
9687 let neg_one = -1;
9688 let slice = slice as *mut i8;
9689 let offsets = offsets.as_i32x16();
9690 macro_rules! call {
9691 ($imm8:expr) => {
9692 vscatterdps(slice, neg_one, offsets, src, $imm8)
9693 };
9694 }
9695 constify_imm8_gather!(scale, call);
9696 }
9697
9698 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9699 ///
9700 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_ps&expand=3051)
9701 #[inline]
9702 #[target_feature(enable = "avx512f")]
9703 #[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
9704 #[rustc_args_required_const(4)]
9705 pub unsafe fn _mm512_mask_i32scatter_ps(
9706 slice: *mut u8,
9707 mask: __mmask16,
9708 offsets: __m512i,
9709 src: __m512,
9710 scale: i32,
9711 ) {
9712 let src = src.as_f32x16();
9713 let slice = slice as *mut i8;
9714 let offsets = offsets.as_i32x16();
9715 macro_rules! call {
9716 ($imm8:expr) => {
9717 vscatterdps(slice, mask as i16, offsets, src, $imm8)
9718 };
9719 }
9720 constify_imm8_gather!(scale, call);
9721 }
9722
9723 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9724 ///
9725 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_ps&expand=3128)
9726 #[inline]
9727 #[target_feature(enable = "avx512f")]
9728 #[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
9729 #[rustc_args_required_const(3)]
9730 pub unsafe fn _mm512_i64scatter_ps(slice: *mut u8, offsets: __m512i, src: __m256, scale: i32) {
9731 let src = src.as_f32x8();
9732 let neg_one = -1;
9733 let slice = slice as *mut i8;
9734 let offsets = offsets.as_i64x8();
9735 macro_rules! call {
9736 ($imm8:expr) => {
9737 vscatterqps(slice, neg_one, offsets, src, $imm8)
9738 };
9739 }
9740 constify_imm8_gather!(scale, call);
9741 }
9742
9743 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9744 ///
9745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_ps&expand=3129)
9746 #[inline]
9747 #[target_feature(enable = "avx512f")]
9748 #[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
9749 #[rustc_args_required_const(4)]
9750 pub unsafe fn _mm512_mask_i64scatter_ps(
9751 slice: *mut u8,
9752 mask: __mmask8,
9753 offsets: __m512i,
9754 src: __m256,
9755 scale: i32,
9756 ) {
9757 let src = src.as_f32x8();
9758 let slice = slice as *mut i8;
9759 let offsets = offsets.as_i64x8();
9760 macro_rules! call {
9761 ($imm8:expr) => {
9762 vscatterqps(slice, mask as i8, offsets, src, $imm8)
9763 };
9764 }
9765 constify_imm8_gather!(scale, call);
9766 }
9767
9768 /// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9769 ///
9770 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_epi64&expand=3038)
9771 #[inline]
9772 #[target_feature(enable = "avx512f")]
9773 #[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
9774 #[rustc_args_required_const(3)]
9775 pub unsafe fn _mm512_i32scatter_epi64(slice: *mut u8, offsets: __m256i, src: __m512i, scale: i32) {
9776 let src = src.as_i64x8();
9777 let neg_one = -1;
9778 let slice = slice as *mut i8;
9779 let offsets = offsets.as_i32x8();
9780 macro_rules! call {
9781 ($imm8:expr) => {
9782 vpscatterdq(slice, neg_one, offsets, src, $imm8)
9783 };
9784 }
9785 constify_imm8_gather!(scale, call);
9786 }
9787
9788 /// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9789 ///
9790 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_epi64&expand=3039)
9791 #[inline]
9792 #[target_feature(enable = "avx512f")]
9793 #[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
9794 #[rustc_args_required_const(4)]
9795 pub unsafe fn _mm512_mask_i32scatter_epi64(
9796 slice: *mut u8,
9797 mask: __mmask8,
9798 offsets: __m256i,
9799 src: __m512i,
9800 scale: i32,
9801 ) {
9802 let src = src.as_i64x8();
9803 let mask = mask as i8;
9804 let slice = slice as *mut i8;
9805 let offsets = offsets.as_i32x8();
9806 macro_rules! call {
9807 ($imm8:expr) => {
9808 vpscatterdq(slice, mask, offsets, src, $imm8)
9809 };
9810 }
9811 constify_imm8_gather!(scale, call);
9812 }
9813
9814 /// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9815 ///
9816 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_epi64&expand=3116)
9817 #[inline]
9818 #[target_feature(enable = "avx512f")]
9819 #[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
9820 #[rustc_args_required_const(3)]
9821 pub unsafe fn _mm512_i64scatter_epi64(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
9822 let src = src.as_i64x8();
9823 let neg_one = -1;
9824 let slice = slice as *mut i8;
9825 let offsets = offsets.as_i64x8();
9826 macro_rules! call {
9827 ($imm8:expr) => {
9828 vpscatterqq(slice, neg_one, offsets, src, $imm8)
9829 };
9830 }
9831 constify_imm8_gather!(scale, call);
9832 }
9833
9834 /// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9835 ///
9836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_epi64&expand=3117)
9837 #[inline]
9838 #[target_feature(enable = "avx512f")]
9839 #[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
9840 #[rustc_args_required_const(4)]
9841 pub unsafe fn _mm512_mask_i64scatter_epi64(
9842 slice: *mut u8,
9843 mask: __mmask8,
9844 offsets: __m512i,
9845 src: __m512i,
9846 scale: i32,
9847 ) {
9848 let src = src.as_i64x8();
9849 let mask = mask as i8;
9850 let slice = slice as *mut i8;
9851 let offsets = offsets.as_i64x8();
9852 macro_rules! call {
9853 ($imm8:expr) => {
9854 vpscatterqq(slice, mask, offsets, src, $imm8)
9855 };
9856 }
9857 constify_imm8_gather!(scale, call);
9858 }
9859
9860 /// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9861 ///
9862 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_epi32&expand=3032)
9863 #[inline]
9864 #[target_feature(enable = "avx512f")]
9865 #[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
9866 #[rustc_args_required_const(3)]
9867 pub unsafe fn _mm512_i32scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
9868 let src = src.as_i32x16();
9869 let neg_one = -1;
9870 let slice = slice as *mut i8;
9871 let offsets = offsets.as_i32x16();
9872 macro_rules! call {
9873 ($imm8:expr) => {
9874 vpscatterdd(slice, neg_one, offsets, src, $imm8)
9875 };
9876 }
9877 constify_imm8_gather!(scale, call);
9878 }
9879
9880 /// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9881 ///
9882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_epi32&expand=3033)
9883 #[inline]
9884 #[target_feature(enable = "avx512f")]
9885 #[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
9886 #[rustc_args_required_const(4)]
9887 pub unsafe fn _mm512_mask_i32scatter_epi32(
9888 slice: *mut u8,
9889 mask: __mmask16,
9890 offsets: __m512i,
9891 src: __m512i,
9892 scale: i32,
9893 ) {
9894 let src = src.as_i32x16();
9895 let mask = mask as i16;
9896 let slice = slice as *mut i8;
9897 let offsets = offsets.as_i32x16();
9898 macro_rules! call {
9899 ($imm8:expr) => {
9900 vpscatterdd(slice, mask, offsets, src, $imm8)
9901 };
9902 }
9903 constify_imm8_gather!(scale, call);
9904 }
9905
9906 /// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9907 ///
9908 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_epi32&expand=3108)
9909 #[inline]
9910 #[target_feature(enable = "avx512f")]
9911 #[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
9912 #[rustc_args_required_const(3)]
9913 pub unsafe fn _mm512_i64scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m256i, scale: i32) {
9914 let src = src.as_i32x8();
9915 let neg_one = -1;
9916 let slice = slice as *mut i8;
9917 let offsets = offsets.as_i64x8();
9918 macro_rules! call {
9919 ($imm8:expr) => {
9920 vpscatterqd(slice, neg_one, offsets, src, $imm8)
9921 };
9922 }
9923 constify_imm8_gather!(scale, call);
9924 }
9925
9926 /// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9927 ///
9928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_epi32&expand=3109)
9929 #[inline]
9930 #[target_feature(enable = "avx512f")]
9931 #[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
9932 #[rustc_args_required_const(4)]
9933 pub unsafe fn _mm512_mask_i64scatter_epi32(
9934 slice: *mut u8,
9935 mask: __mmask8,
9936 offsets: __m512i,
9937 src: __m256i,
9938 scale: i32,
9939 ) {
9940 let src = src.as_i32x8();
9941 let mask = mask as i8;
9942 let slice = slice as *mut i8;
9943 let offsets = offsets.as_i64x8();
9944 macro_rules! call {
9945 ($imm8:expr) => {
9946 vpscatterqd(slice, mask, offsets, src, $imm8)
9947 };
9948 }
9949 constify_imm8_gather!(scale, call);
9950 }
9951
9952 /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
9953 ///
9954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_epi32&expand=1198)
9955 #[inline]
9956 #[target_feature(enable = "avx512f")]
9957 #[cfg_attr(test, assert_instr(vpcompressd))]
9958 pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
9959 transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k))
9960 }
9961
9962 /// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
9963 ///
9964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_epi32&expand=1199)
9965 #[inline]
9966 #[target_feature(enable = "avx512f")]
9967 #[cfg_attr(test, assert_instr(vpcompressd))]
9968 pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
9969 transmute(vpcompressd(
9970 a.as_i32x16(),
9971 _mm512_setzero_si512().as_i32x16(),
9972 k,
9973 ))
9974 }
9975
9976 /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
9977 ///
9978 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_epi64&expand=1204)
9979 #[inline]
9980 #[target_feature(enable = "avx512f")]
9981 #[cfg_attr(test, assert_instr(vpcompressq))]
9982 pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
9983 transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k))
9984 }
9985
9986 /// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
9987 ///
9988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_epi64&expand=1205)
9989 #[inline]
9990 #[target_feature(enable = "avx512f")]
9991 #[cfg_attr(test, assert_instr(vpcompressq))]
9992 pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
9993 transmute(vpcompressq(
9994 a.as_i64x8(),
9995 _mm512_setzero_si512().as_i64x8(),
9996 k,
9997 ))
9998 }
9999
10000 /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
10001 ///
10002 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_ps&expand=1222)
10003 #[inline]
10004 #[target_feature(enable = "avx512f")]
10005 #[cfg_attr(test, assert_instr(vcompressps))]
10006 pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10007 transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k))
10008 }
10009
10010 /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
10011 ///
10012 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_ps&expand=1223)
10013 #[inline]
10014 #[target_feature(enable = "avx512f")]
10015 #[cfg_attr(test, assert_instr(vcompressps))]
10016 pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
10017 transmute(vcompressps(
10018 a.as_f32x16(),
10019 _mm512_setzero_ps().as_f32x16(),
10020 k,
10021 ))
10022 }
10023
10024 /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
10025 ///
10026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_pd&expand=1216)
10027 #[inline]
10028 #[target_feature(enable = "avx512f")]
10029 #[cfg_attr(test, assert_instr(vcompresspd))]
10030 pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
10031 transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k))
10032 }
10033
10034 /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
10035 ///
10036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_pd&expand=1217)
10037 #[inline]
10038 #[target_feature(enable = "avx512f")]
10039 #[cfg_attr(test, assert_instr(vcompresspd))]
10040 pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
10041 transmute(vcompresspd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
10042 }
10043
10044 /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10045 ///
10046 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_epi32&expand=2316)
10047 #[inline]
10048 #[target_feature(enable = "avx512f")]
10049 #[cfg_attr(test, assert_instr(vpexpandd))]
10050 pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
10051 transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k))
10052 }
10053
10054 /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10055 ///
10056 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_epi32&expand=2317)
10057 #[inline]
10058 #[target_feature(enable = "avx512f")]
10059 #[cfg_attr(test, assert_instr(vpexpandd))]
10060 pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
10061 transmute(vpexpandd(
10062 a.as_i32x16(),
10063 _mm512_setzero_si512().as_i32x16(),
10064 k,
10065 ))
10066 }
10067
10068 /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10069 ///
10070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_epi64&expand=2322)
10071 #[inline]
10072 #[target_feature(enable = "avx512f")]
10073 #[cfg_attr(test, assert_instr(vpexpandq))]
10074 pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
10075 transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k))
10076 }
10077
10078 /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10079 ///
10080 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_epi64&expand=2323)
10081 #[inline]
10082 #[target_feature(enable = "avx512f")]
10083 #[cfg_attr(test, assert_instr(vpexpandq))]
10084 pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
10085 transmute(vpexpandq(
10086 a.as_i64x8(),
10087 _mm512_setzero_si512().as_i64x8(),
10088 k,
10089 ))
10090 }
10091
10092 /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10093 ///
10094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_ps&expand=2340)
10095 #[inline]
10096 #[target_feature(enable = "avx512f")]
10097 #[cfg_attr(test, assert_instr(vexpandps))]
10098 pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10099 transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k))
10100 }
10101
10102 /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10103 ///
10104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_ps&expand=2341)
10105 #[inline]
10106 #[target_feature(enable = "avx512f")]
10107 #[cfg_attr(test, assert_instr(vexpandps))]
10108 pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
10109 transmute(vexpandps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
10110 }
10111
10112 /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10113 ///
10114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_pd&expand=2334)
10115 #[inline]
10116 #[target_feature(enable = "avx512f")]
10117 #[cfg_attr(test, assert_instr(vexpandpd))]
10118 pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
10119 transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k))
10120 }
10121
10122 /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10123 ///
10124 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_pd&expand=2335)
10125 #[inline]
10126 #[target_feature(enable = "avx512f")]
10127 #[cfg_attr(test, assert_instr(vexpandpd))]
10128 pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
10129 transmute(vexpandpd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
10130 }
10131
10132 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
10133 ///
10134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685)
10135 #[inline]
10136 #[target_feature(enable = "avx512f")]
10137 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10138 #[rustc_args_required_const(1)]
10139 pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
10140 let a = a.as_i32x16();
10141 macro_rules! call {
10142 ($imm8:expr) => {
10143 vprold(a, $imm8)
10144 };
10145 }
10146 let r = constify_imm8_sae!(imm8, call);
10147 transmute(r)
10148 }
10149
10150 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10151 ///
10152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
10153 #[inline]
10154 #[target_feature(enable = "avx512f")]
10155 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10156 #[rustc_args_required_const(3)]
10157 pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
10158 let a = a.as_i32x16();
10159 macro_rules! call {
10160 ($imm8:expr) => {
10161 vprold(a, $imm8)
10162 };
10163 }
10164 let rol = constify_imm8_sae!(imm8, call);
10165 transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
10166 }
10167
10168 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10169 ///
10170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
10171 #[inline]
10172 #[target_feature(enable = "avx512f")]
10173 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10174 #[rustc_args_required_const(2)]
10175 pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
10176 let a = a.as_i32x16();
10177 macro_rules! call {
10178 ($imm8:expr) => {
10179 vprold(a, $imm8)
10180 };
10181 }
10182 let rol = constify_imm8_sae!(imm8, call);
10183 let zero = _mm512_setzero_si512().as_i32x16();
10184 transmute(simd_select_bitmask(k, rol, zero))
10185 }
10186
10187 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
10188 ///
10189 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
10190 #[inline]
10191 #[target_feature(enable = "avx512f")]
10192 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10193 #[rustc_args_required_const(1)]
10194 pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
10195 let a = a.as_i32x16();
10196 macro_rules! call {
10197 ($imm8:expr) => {
10198 vprord(a, $imm8)
10199 };
10200 }
10201 let r = constify_imm8_sae!(imm8, call);
10202 transmute(r)
10203 }
10204
10205 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10206 ///
10207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
10208 #[inline]
10209 #[target_feature(enable = "avx512f")]
10210 #[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
10211 #[rustc_args_required_const(3)]
10212 pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
10213 let a = a.as_i32x16();
10214 macro_rules! call {
10215 ($imm8:expr) => {
10216 vprord(a, $imm8)
10217 };
10218 }
10219 let ror = constify_imm8_sae!(imm8, call);
10220 transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
10221 }
10222
10223 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10224 ///
10225 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
10226 #[inline]
10227 #[target_feature(enable = "avx512f")]
10228 #[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
10229 #[rustc_args_required_const(2)]
10230 pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
10231 let a = a.as_i32x16();
10232 macro_rules! call {
10233 ($imm8:expr) => {
10234 vprord(a, $imm8)
10235 };
10236 }
10237 let ror = constify_imm8_sae!(imm8, call);
10238 let zero = _mm512_setzero_si512().as_i32x16();
10239 transmute(simd_select_bitmask(k, ror, zero))
10240 }
10241
10242 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
10243 ///
10244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694)
10245 #[inline]
10246 #[target_feature(enable = "avx512f")]
10247 #[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
10248 #[rustc_args_required_const(1)]
10249 pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
10250 let a = a.as_i64x8();
10251 macro_rules! call {
10252 ($imm8:expr) => {
10253 vprolq(a, $imm8)
10254 };
10255 }
10256 let r = constify_imm8_sae!(imm8, call);
10257 transmute(r)
10258 }
10259
10260 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10261 ///
10262 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
10263 #[inline]
10264 #[target_feature(enable = "avx512f")]
10265 #[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
10266 #[rustc_args_required_const(3)]
10267 pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
10268 let a = a.as_i64x8();
10269 macro_rules! call {
10270 ($imm8:expr) => {
10271 vprolq(a, $imm8)
10272 };
10273 }
10274 let rol = constify_imm8_sae!(imm8, call);
10275 transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
10276 }
10277
10278 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10279 ///
10280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
10281 #[inline]
10282 #[target_feature(enable = "avx512f")]
10283 #[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
10284 #[rustc_args_required_const(2)]
10285 pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
10286 let a = a.as_i64x8();
10287 macro_rules! call {
10288 ($imm8:expr) => {
10289 vprolq(a, $imm8)
10290 };
10291 }
10292 let rol = constify_imm8_sae!(imm8, call);
10293 let zero = _mm512_setzero_si512().as_i64x8();
10294 transmute(simd_select_bitmask(k, rol, zero))
10295 }
10296
10297 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
10298 ///
10299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
10300 #[inline]
10301 #[target_feature(enable = "avx512f")]
10302 #[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
10303 #[rustc_args_required_const(1)]
10304 pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
10305 let a = a.as_i64x8();
10306 macro_rules! call {
10307 ($imm8:expr) => {
10308 vprorq(a, $imm8)
10309 };
10310 }
10311 let r = constify_imm8_sae!(imm8, call);
10312 transmute(r)
10313 }
10314
10315 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10316 ///
10317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
10318 #[inline]
10319 #[target_feature(enable = "avx512f")]
10320 #[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
10321 #[rustc_args_required_const(3)]
10322 pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
10323 let a = a.as_i64x8();
10324 macro_rules! call {
10325 ($imm8:expr) => {
10326 vprorq(a, $imm8)
10327 };
10328 }
10329 let ror = constify_imm8_sae!(imm8, call);
10330 transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
10331 }
10332
10333 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10334 ///
10335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
10336 #[inline]
10337 #[target_feature(enable = "avx512f")]
10338 #[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
10339 #[rustc_args_required_const(2)]
10340 pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
10341 let a = a.as_i64x8();
10342 macro_rules! call {
10343 ($imm8:expr) => {
10344 vprorq(a, $imm8)
10345 };
10346 }
10347 let ror = constify_imm8_sae!(imm8, call);
10348 let zero = _mm512_setzero_si512().as_i64x8();
10349 transmute(simd_select_bitmask(k, ror, zero))
10350 }
10351
10352 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
10353 ///
10354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
10355 #[inline]
10356 #[target_feature(enable = "avx512f")]
10357 #[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
10358 #[rustc_args_required_const(1)]
10359 pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
10360 let a = a.as_i32x16();
10361 macro_rules! call {
10362 ($imm8:expr) => {
10363 vpsllid(a, $imm8)
10364 };
10365 }
10366 let r = constify_imm8_sae!(imm8, call);
10367 transmute(r)
10368 }
10369
10370 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10371 ///
10372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
10373 #[inline]
10374 #[target_feature(enable = "avx512f")]
10375 #[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
10376 #[rustc_args_required_const(3)]
10377 pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
10378 let a = a.as_i32x16();
10379 macro_rules! call {
10380 ($imm8:expr) => {
10381 vpsllid(a, $imm8)
10382 };
10383 }
10384 let shf = constify_imm8_sae!(imm8, call);
10385 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10386 }
10387
10388 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10389 ///
10390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
10391 #[inline]
10392 #[target_feature(enable = "avx512f")]
10393 #[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
10394 #[rustc_args_required_const(2)]
10395 pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
10396 let a = a.as_i32x16();
10397 macro_rules! call {
10398 ($imm8:expr) => {
10399 vpsllid(a, $imm8)
10400 };
10401 }
10402 let shf = constify_imm8_sae!(imm8, call);
10403 let zero = _mm512_setzero_si512().as_i32x16();
10404 transmute(simd_select_bitmask(k, shf, zero))
10405 }
10406
10407 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
10408 ///
10409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
10410 #[inline]
10411 #[target_feature(enable = "avx512f")]
10412 #[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
10413 #[rustc_args_required_const(1)]
10414 pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
10415 let a = a.as_i32x16();
10416 macro_rules! call {
10417 ($imm8:expr) => {
10418 vpsrlid(a, $imm8)
10419 };
10420 }
10421 let r = constify_imm8_sae!(imm8, call);
10422 transmute(r)
10423 }
10424
10425 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10426 ///
10427 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
10428 #[inline]
10429 #[target_feature(enable = "avx512f")]
10430 #[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
10431 #[rustc_args_required_const(3)]
10432 pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
10433 let a = a.as_i32x16();
10434 macro_rules! call {
10435 ($imm8:expr) => {
10436 vpsrlid(a, $imm8)
10437 };
10438 }
10439 let shf = constify_imm8_sae!(imm8, call);
10440 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10441 }
10442
10443 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10444 ///
10445 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
10446 #[inline]
10447 #[target_feature(enable = "avx512f")]
10448 #[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
10449 #[rustc_args_required_const(2)]
10450 pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
10451 let a = a.as_i32x16();
10452 macro_rules! call {
10453 ($imm8:expr) => {
10454 vpsrlid(a, $imm8)
10455 };
10456 }
10457 let shf = constify_imm8_sae!(imm8, call);
10458 let zero = _mm512_setzero_si512().as_i32x16();
10459 transmute(simd_select_bitmask(k, shf, zero))
10460 }
10461
10462 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
10463 ///
10464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
10465 #[inline]
10466 #[target_feature(enable = "avx512f")]
10467 #[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
10468 #[rustc_args_required_const(1)]
10469 pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
10470 let a = a.as_i64x8();
10471 macro_rules! call {
10472 ($imm8:expr) => {
10473 vpslliq(a, $imm8)
10474 };
10475 }
10476 let r = constify_imm8_sae!(imm8, call);
10477 transmute(r)
10478 }
10479
10480 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10481 ///
10482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
10483 #[inline]
10484 #[target_feature(enable = "avx512f")]
10485 #[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
10486 #[rustc_args_required_const(3)]
10487 pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
10488 let a = a.as_i64x8();
10489 macro_rules! call {
10490 ($imm8:expr) => {
10491 vpslliq(a, $imm8)
10492 };
10493 }
10494 let shf = constify_imm8_sae!(imm8, call);
10495 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10496 }
10497
10498 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10499 ///
10500 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
10501 #[inline]
10502 #[target_feature(enable = "avx512f")]
10503 #[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
10504 #[rustc_args_required_const(2)]
10505 pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
10506 let a = a.as_i64x8();
10507 macro_rules! call {
10508 ($imm8:expr) => {
10509 vpslliq(a, $imm8)
10510 };
10511 }
10512 let shf = constify_imm8_sae!(imm8, call);
10513 let zero = _mm512_setzero_si512().as_i64x8();
10514 transmute(simd_select_bitmask(k, shf, zero))
10515 }
10516
10517 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
10518 ///
10519 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
10520 #[inline]
10521 #[target_feature(enable = "avx512f")]
10522 #[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
10523 #[rustc_args_required_const(1)]
10524 pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
10525 let a = a.as_i64x8();
10526 macro_rules! call {
10527 ($imm8:expr) => {
10528 vpsrliq(a, $imm8)
10529 };
10530 }
10531 let r = constify_imm8_sae!(imm8, call);
10532 transmute(r)
10533 }
10534
10535 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10536 ///
10537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
10538 #[inline]
10539 #[target_feature(enable = "avx512f")]
10540 #[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
10541 #[rustc_args_required_const(3)]
10542 pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
10543 let a = a.as_i64x8();
10544 macro_rules! call {
10545 ($imm8:expr) => {
10546 vpsrliq(a, $imm8)
10547 };
10548 }
10549 let shf = constify_imm8_sae!(imm8, call);
10550 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10551 }
10552
10553 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10554 ///
10555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
10556 #[inline]
10557 #[target_feature(enable = "avx512f")]
10558 #[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
10559 #[rustc_args_required_const(2)]
10560 pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
10561 let a = a.as_i64x8();
10562 macro_rules! call {
10563 ($imm8:expr) => {
10564 vpsrliq(a, $imm8)
10565 };
10566 }
10567 let shf = constify_imm8_sae!(imm8, call);
10568 let zero = _mm512_setzero_si512().as_i64x8();
10569 transmute(simd_select_bitmask(k, shf, zero))
10570 }
10571
10572 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
10573 ///
10574 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280)
10575 #[inline]
10576 #[target_feature(enable = "avx512f")]
10577 #[cfg_attr(test, assert_instr(vpslld))]
10578 pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
10579 transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
10580 }
10581
10582 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10583 ///
10584 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278)
10585 #[inline]
10586 #[target_feature(enable = "avx512f")]
10587 #[cfg_attr(test, assert_instr(vpslld))]
10588 pub unsafe fn _mm512_mask_sll_epi32(
10589 src: __m512i,
10590 k: __mmask16,
10591 a: __m512i,
10592 count: __m128i,
10593 ) -> __m512i {
10594 let shf = _mm512_sll_epi32(a, count).as_i32x16();
10595 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10596 }
10597
10598 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10599 ///
10600 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279)
10601 #[inline]
10602 #[target_feature(enable = "avx512f")]
10603 #[cfg_attr(test, assert_instr(vpslld))]
10604 pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
10605 let shf = _mm512_sll_epi32(a, count).as_i32x16();
10606 let zero = _mm512_setzero_si512().as_i32x16();
10607 transmute(simd_select_bitmask(k, shf, zero))
10608 }
10609
10610 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
10611 ///
10612 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492)
10613 #[inline]
10614 #[target_feature(enable = "avx512f")]
10615 #[cfg_attr(test, assert_instr(vpsrld))]
10616 pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
10617 transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
10618 }
10619
10620 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10621 ///
10622 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490)
10623 #[inline]
10624 #[target_feature(enable = "avx512f")]
10625 #[cfg_attr(test, assert_instr(vpsrld))]
10626 pub unsafe fn _mm512_mask_srl_epi32(
10627 src: __m512i,
10628 k: __mmask16,
10629 a: __m512i,
10630 count: __m128i,
10631 ) -> __m512i {
10632 let shf = _mm512_srl_epi32(a, count).as_i32x16();
10633 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10634 }
10635
10636 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10637 ///
10638 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491)
10639 #[inline]
10640 #[target_feature(enable = "avx512f")]
10641 #[cfg_attr(test, assert_instr(vpsrld))]
10642 pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
10643 let shf = _mm512_srl_epi32(a, count).as_i32x16();
10644 let zero = _mm512_setzero_si512().as_i32x16();
10645 transmute(simd_select_bitmask(k, shf, zero))
10646 }
10647
10648 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
10649 ///
10650 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289)
10651 #[inline]
10652 #[target_feature(enable = "avx512f")]
10653 #[cfg_attr(test, assert_instr(vpsllq))]
10654 pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
10655 transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
10656 }
10657
10658 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10659 ///
10660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287)
10661 #[inline]
10662 #[target_feature(enable = "avx512f")]
10663 #[cfg_attr(test, assert_instr(vpsllq))]
10664 pub unsafe fn _mm512_mask_sll_epi64(
10665 src: __m512i,
10666 k: __mmask8,
10667 a: __m512i,
10668 count: __m128i,
10669 ) -> __m512i {
10670 let shf = _mm512_sll_epi64(a, count).as_i64x8();
10671 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10672 }
10673
10674 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10675 ///
10676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
10677 #[inline]
10678 #[target_feature(enable = "avx512f")]
10679 #[cfg_attr(test, assert_instr(vpsllq))]
10680 pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
10681 let shf = _mm512_sll_epi64(a, count).as_i64x8();
10682 let zero = _mm512_setzero_si512().as_i64x8();
10683 transmute(simd_select_bitmask(k, shf, zero))
10684 }
10685
10686 /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
10687 ///
10688 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501)
10689 #[inline]
10690 #[target_feature(enable = "avx512f")]
10691 #[cfg_attr(test, assert_instr(vpsrlq))]
10692 pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
10693 transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
10694 }
10695
10696 /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10697 ///
10698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499)
10699 #[inline]
10700 #[target_feature(enable = "avx512f")]
10701 #[cfg_attr(test, assert_instr(vpsrlq))]
10702 pub unsafe fn _mm512_mask_srl_epi64(
10703 src: __m512i,
10704 k: __mmask8,
10705 a: __m512i,
10706 count: __m128i,
10707 ) -> __m512i {
10708 let shf = _mm512_srl_epi64(a, count).as_i64x8();
10709 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10710 }
10711
10712 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10713 ///
10714 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
10715 #[inline]
10716 #[target_feature(enable = "avx512f")]
10717 #[cfg_attr(test, assert_instr(vpsrlq))]
10718 pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
10719 let shf = _mm512_srl_epi64(a, count).as_i64x8();
10720 let zero = _mm512_setzero_si512().as_i64x8();
10721 transmute(simd_select_bitmask(k, shf, zero))
10722 }
10723
10724 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
10725 ///
10726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407)
10727 #[inline]
10728 #[target_feature(enable = "avx512f")]
10729 #[cfg_attr(test, assert_instr(vpsrad))]
10730 pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
10731 transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
10732 }
10733
10734 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10735 ///
10736 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405)
10737 #[inline]
10738 #[target_feature(enable = "avx512f")]
10739 #[cfg_attr(test, assert_instr(vpsrad))]
10740 pub unsafe fn _mm512_mask_sra_epi32(
10741 src: __m512i,
10742 k: __mmask16,
10743 a: __m512i,
10744 count: __m128i,
10745 ) -> __m512i {
10746 let shf = _mm512_sra_epi32(a, count).as_i32x16();
10747 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10748 }
10749
10750 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10751 ///
10752 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406)
10753 #[inline]
10754 #[target_feature(enable = "avx512f")]
10755 #[cfg_attr(test, assert_instr(vpsrad))]
10756 pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
10757 let shf = _mm512_sra_epi32(a, count).as_i32x16();
10758 let zero = _mm512_setzero_si512().as_i32x16();
10759 transmute(simd_select_bitmask(k, shf, zero))
10760 }
10761
10762 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
10763 ///
10764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416)
10765 #[inline]
10766 #[target_feature(enable = "avx512f")]
10767 #[cfg_attr(test, assert_instr(vpsraq))]
10768 pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
10769 transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
10770 }
10771
10772 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10773 ///
10774 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414)
10775 #[inline]
10776 #[target_feature(enable = "avx512f")]
10777 #[cfg_attr(test, assert_instr(vpsraq))]
10778 pub unsafe fn _mm512_mask_sra_epi64(
10779 src: __m512i,
10780 k: __mmask8,
10781 a: __m512i,
10782 count: __m128i,
10783 ) -> __m512i {
10784 let shf = _mm512_sra_epi64(a, count).as_i64x8();
10785 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10786 }
10787
10788 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10789 ///
10790 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415)
10791 #[inline]
10792 #[target_feature(enable = "avx512f")]
10793 #[cfg_attr(test, assert_instr(vpsraq))]
10794 pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
10795 let shf = _mm512_sra_epi64(a, count).as_i64x8();
10796 let zero = _mm512_setzero_si512().as_i64x8();
10797 transmute(simd_select_bitmask(k, shf, zero))
10798 }
10799
10800 /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
10801 ///
10802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
10803 #[inline]
10804 #[target_feature(enable = "avx512f")]
10805 #[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
10806 #[rustc_args_required_const(1)]
10807 pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
10808 let a = a.as_i32x16();
10809 macro_rules! call {
10810 ($imm8:expr) => {
10811 vpsraid(a, $imm8)
10812 };
10813 }
10814 let r = constify_imm8_sae!(imm8, call);
10815 transmute(r)
10816 }
10817
10818 /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10819 ///
10820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
10821 #[inline]
10822 #[target_feature(enable = "avx512f")]
10823 #[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
10824 #[rustc_args_required_const(3)]
10825 pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
10826 let a = a.as_i32x16();
10827 macro_rules! call {
10828 ($imm8:expr) => {
10829 vpsraid(a, $imm8)
10830 };
10831 }
10832 let shf = constify_imm8_sae!(imm8, call);
10833 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10834 }
10835
10836 /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10837 ///
10838 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
10839 #[inline]
10840 #[target_feature(enable = "avx512f")]
10841 #[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
10842 #[rustc_args_required_const(2)]
10843 pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
10844 let a = a.as_i32x16();
10845 macro_rules! call {
10846 ($imm8:expr) => {
10847 vpsraid(a, $imm8)
10848 };
10849 }
10850 let shf = constify_imm8_sae!(imm8, call);
10851 let zero = _mm512_setzero_si512().as_i32x16();
10852 transmute(simd_select_bitmask(k, shf, zero))
10853 }
10854
10855 /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
10856 ///
10857 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
10858 #[inline]
10859 #[target_feature(enable = "avx512f")]
10860 #[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
10861 #[rustc_args_required_const(1)]
10862 pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
10863 let a = a.as_i64x8();
10864 macro_rules! call {
10865 ($imm8:expr) => {
10866 vpsraiq(a, $imm8)
10867 };
10868 }
10869 let r = constify_imm8_sae!(imm8, call);
10870 transmute(r)
10871 }
10872
10873 /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10874 ///
10875 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
10876 #[inline]
10877 #[target_feature(enable = "avx512f")]
10878 #[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
10879 #[rustc_args_required_const(3)]
10880 pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
10881 let a = a.as_i64x8();
10882 macro_rules! call {
10883 ($imm8:expr) => {
10884 vpsraiq(a, $imm8)
10885 };
10886 }
10887 let shf = constify_imm8_sae!(imm8, call);
10888 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10889 }
10890
10891 /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10892 ///
10893 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
10894 #[inline]
10895 #[target_feature(enable = "avx512f")]
10896 #[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
10897 #[rustc_args_required_const(2)]
10898 pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
10899 let a = a.as_i64x8();
10900 macro_rules! call {
10901 ($imm8:expr) => {
10902 vpsraiq(a, $imm8)
10903 };
10904 }
10905 let shf = constify_imm8_sae!(imm8, call);
10906 let zero = _mm512_setzero_si512().as_i64x8();
10907 transmute(simd_select_bitmask(k, shf, zero))
10908 }
10909
10910 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
10911 ///
10912 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465)
10913 #[inline]
10914 #[target_feature(enable = "avx512f")]
10915 #[cfg_attr(test, assert_instr(vpsravd))]
10916 pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
10917 transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
10918 }
10919
10920 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10921 ///
10922 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463)
10923 #[inline]
10924 #[target_feature(enable = "avx512f")]
10925 #[cfg_attr(test, assert_instr(vpsravd))]
10926 pub unsafe fn _mm512_mask_srav_epi32(
10927 src: __m512i,
10928 k: __mmask16,
10929 a: __m512i,
10930 count: __m512i,
10931 ) -> __m512i {
10932 let shf = _mm512_srav_epi32(a, count).as_i32x16();
10933 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
10934 }
10935
10936 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10937 ///
10938 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464)
10939 #[inline]
10940 #[target_feature(enable = "avx512f")]
10941 #[cfg_attr(test, assert_instr(vpsravd))]
10942 pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
10943 let shf = _mm512_srav_epi32(a, count).as_i32x16();
10944 let zero = _mm512_setzero_si512().as_i32x16();
10945 transmute(simd_select_bitmask(k, shf, zero))
10946 }
10947
10948 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
10949 ///
10950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474)
10951 #[inline]
10952 #[target_feature(enable = "avx512f")]
10953 #[cfg_attr(test, assert_instr(vpsravq))]
10954 pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
10955 transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
10956 }
10957
10958 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10959 ///
10960 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472)
10961 #[inline]
10962 #[target_feature(enable = "avx512f")]
10963 #[cfg_attr(test, assert_instr(vpsravq))]
10964 pub unsafe fn _mm512_mask_srav_epi64(
10965 src: __m512i,
10966 k: __mmask8,
10967 a: __m512i,
10968 count: __m512i,
10969 ) -> __m512i {
10970 let shf = _mm512_srav_epi64(a, count).as_i64x8();
10971 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
10972 }
10973
10974 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10975 ///
10976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473)
10977 #[inline]
10978 #[target_feature(enable = "avx512f")]
10979 #[cfg_attr(test, assert_instr(vpsravq))]
10980 pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
10981 let shf = _mm512_srav_epi64(a, count).as_i64x8();
10982 let zero = _mm512_setzero_si512().as_i64x8();
10983 transmute(simd_select_bitmask(k, shf, zero))
10984 }
10985
10986 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
10987 ///
10988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703)
10989 #[inline]
10990 #[target_feature(enable = "avx512f")]
10991 #[cfg_attr(test, assert_instr(vprolvd))]
10992 pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
10993 transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
10994 }
10995
10996 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10997 ///
10998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701)
10999 #[inline]
11000 #[target_feature(enable = "avx512f")]
11001 #[cfg_attr(test, assert_instr(vprolvd))]
11002 pub unsafe fn _mm512_mask_rolv_epi32(
11003 src: __m512i,
11004 k: __mmask16,
11005 a: __m512i,
11006 b: __m512i,
11007 ) -> __m512i {
11008 let rol = _mm512_rolv_epi32(a, b).as_i32x16();
11009 transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
11010 }
11011
11012 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11013 ///
11014 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702)
11015 #[inline]
11016 #[target_feature(enable = "avx512f")]
11017 #[cfg_attr(test, assert_instr(vprolvd))]
11018 pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
11019 let rol = _mm512_rolv_epi32(a, b).as_i32x16();
11020 let zero = _mm512_setzero_si512().as_i32x16();
11021 transmute(simd_select_bitmask(k, rol, zero))
11022 }
11023
11024 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
11025 ///
11026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739)
11027 #[inline]
11028 #[target_feature(enable = "avx512f")]
11029 #[cfg_attr(test, assert_instr(vprorvd))]
11030 pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
11031 transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
11032 }
11033
11034 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11035 ///
11036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737)
11037 #[inline]
11038 #[target_feature(enable = "avx512f")]
11039 #[cfg_attr(test, assert_instr(vprorvd))]
11040 pub unsafe fn _mm512_mask_rorv_epi32(
11041 src: __m512i,
11042 k: __mmask16,
11043 a: __m512i,
11044 b: __m512i,
11045 ) -> __m512i {
11046 let ror = _mm512_rorv_epi32(a, b).as_i32x16();
11047 transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
11048 }
11049
11050 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11051 ///
11052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738)
11053 #[inline]
11054 #[target_feature(enable = "avx512f")]
11055 #[cfg_attr(test, assert_instr(vprorvd))]
11056 pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
11057 let ror = _mm512_rorv_epi32(a, b).as_i32x16();
11058 let zero = _mm512_setzero_si512().as_i32x16();
11059 transmute(simd_select_bitmask(k, ror, zero))
11060 }
11061
11062 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
11063 ///
11064 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712)
11065 #[inline]
11066 #[target_feature(enable = "avx512f")]
11067 #[cfg_attr(test, assert_instr(vprolvq))]
11068 pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
11069 transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
11070 }
11071
11072 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11073 ///
11074 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710)
11075 #[inline]
11076 #[target_feature(enable = "avx512f")]
11077 #[cfg_attr(test, assert_instr(vprolvq))]
11078 pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
11079 let rol = _mm512_rolv_epi64(a, b).as_i64x8();
11080 transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
11081 }
11082
11083 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11084 ///
11085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711)
11086 #[inline]
11087 #[target_feature(enable = "avx512f")]
11088 #[cfg_attr(test, assert_instr(vprolvq))]
11089 pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
11090 let rol = _mm512_rolv_epi64(a, b).as_i64x8();
11091 let zero = _mm512_setzero_si512().as_i64x8();
11092 transmute(simd_select_bitmask(k, rol, zero))
11093 }
11094
11095 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
11096 ///
11097 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748)
11098 #[inline]
11099 #[target_feature(enable = "avx512f")]
11100 #[cfg_attr(test, assert_instr(vprorvq))]
11101 pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
11102 transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
11103 }
11104
11105 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106 ///
11107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746)
11108 #[inline]
11109 #[target_feature(enable = "avx512f")]
11110 #[cfg_attr(test, assert_instr(vprorvq))]
11111 pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
11112 let ror = _mm512_rorv_epi64(a, b).as_i64x8();
11113 transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
11114 }
11115
11116 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11117 ///
11118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747)
11119 #[inline]
11120 #[target_feature(enable = "avx512f")]
11121 #[cfg_attr(test, assert_instr(vprorvq))]
11122 pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
11123 let ror = _mm512_rorv_epi64(a, b).as_i64x8();
11124 let zero = _mm512_setzero_si512().as_i64x8();
11125 transmute(simd_select_bitmask(k, ror, zero))
11126 }
11127
11128 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11129 ///
11130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342)
11131 #[inline]
11132 #[target_feature(enable = "avx512f")]
11133 #[cfg_attr(test, assert_instr(vpsllvd))]
11134 pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
11135 transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
11136 }
11137
11138 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11139 ///
11140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340)
11141 #[inline]
11142 #[target_feature(enable = "avx512f")]
11143 #[cfg_attr(test, assert_instr(vpsllvd))]
11144 pub unsafe fn _mm512_mask_sllv_epi32(
11145 src: __m512i,
11146 k: __mmask16,
11147 a: __m512i,
11148 count: __m512i,
11149 ) -> __m512i {
11150 let shf = _mm512_sllv_epi32(a, count).as_i32x16();
11151 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
11152 }
11153
11154 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11155 ///
11156 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341)
11157 #[inline]
11158 #[target_feature(enable = "avx512f")]
11159 #[cfg_attr(test, assert_instr(vpsllvd))]
11160 pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
11161 let shf = _mm512_sllv_epi32(a, count).as_i32x16();
11162 let zero = _mm512_setzero_si512().as_i32x16();
11163 transmute(simd_select_bitmask(k, shf, zero))
11164 }
11165
11166 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11167 ///
11168 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554)
11169 #[inline]
11170 #[target_feature(enable = "avx512f")]
11171 #[cfg_attr(test, assert_instr(vpsrlvd))]
11172 pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
11173 transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
11174 }
11175
11176 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11177 ///
11178 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552)
11179 #[inline]
11180 #[target_feature(enable = "avx512f")]
11181 #[cfg_attr(test, assert_instr(vpsrlvd))]
11182 pub unsafe fn _mm512_mask_srlv_epi32(
11183 src: __m512i,
11184 k: __mmask16,
11185 a: __m512i,
11186 count: __m512i,
11187 ) -> __m512i {
11188 let shf = _mm512_srlv_epi32(a, count).as_i32x16();
11189 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
11190 }
11191
11192 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11193 ///
11194 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553)
11195 #[inline]
11196 #[target_feature(enable = "avx512f")]
11197 #[cfg_attr(test, assert_instr(vpsrlvd))]
11198 pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
11199 let shf = _mm512_srlv_epi32(a, count).as_i32x16();
11200 let zero = _mm512_setzero_si512().as_i32x16();
11201 transmute(simd_select_bitmask(k, shf, zero))
11202 }
11203
11204 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11205 ///
11206 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351)
11207 #[inline]
11208 #[target_feature(enable = "avx512f")]
11209 #[cfg_attr(test, assert_instr(vpsllvq))]
11210 pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
11211 transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
11212 }
11213
11214 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11215 ///
11216 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349)
11217 #[inline]
11218 #[target_feature(enable = "avx512f")]
11219 #[cfg_attr(test, assert_instr(vpsllvq))]
11220 pub unsafe fn _mm512_mask_sllv_epi64(
11221 src: __m512i,
11222 k: __mmask8,
11223 a: __m512i,
11224 count: __m512i,
11225 ) -> __m512i {
11226 let shf = _mm512_sllv_epi64(a, count).as_i64x8();
11227 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
11228 }
11229
11230 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11231 ///
11232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350)
11233 #[inline]
11234 #[target_feature(enable = "avx512f")]
11235 #[cfg_attr(test, assert_instr(vpsllvq))]
11236 pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
11237 let shf = _mm512_sllv_epi64(a, count).as_i64x8();
11238 let zero = _mm512_setzero_si512().as_i64x8();
11239 transmute(simd_select_bitmask(k, shf, zero))
11240 }
11241
11242 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11243 ///
11244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563)
11245 #[inline]
11246 #[target_feature(enable = "avx512f")]
11247 #[cfg_attr(test, assert_instr(vpsrlvq))]
11248 pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
11249 transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
11250 }
11251
11252 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11253 ///
11254 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561)
11255 #[inline]
11256 #[target_feature(enable = "avx512f")]
11257 #[cfg_attr(test, assert_instr(vpsrlvq))]
11258 pub unsafe fn _mm512_mask_srlv_epi64(
11259 src: __m512i,
11260 k: __mmask8,
11261 a: __m512i,
11262 count: __m512i,
11263 ) -> __m512i {
11264 let shf = _mm512_srlv_epi64(a, count).as_i64x8();
11265 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
11266 }
11267
11268 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11269 ///
11270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562)
11271 #[inline]
11272 #[target_feature(enable = "avx512f")]
11273 #[cfg_attr(test, assert_instr(vpsrlvq))]
11274 pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
11275 let shf = _mm512_srlv_epi64(a, count).as_i64x8();
11276 let zero = _mm512_setzero_si512().as_i64x8();
11277 transmute(simd_select_bitmask(k, shf, zero))
11278 }
11279
11280 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
11281 ///
11282 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permute_ps&expand=4170)
11283 #[inline]
11284 #[target_feature(enable = "avx512f")]
11285 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
11286 #[rustc_args_required_const(1)]
11287 pub unsafe fn _mm512_permute_ps(a: __m512, imm8: i32) -> __m512 {
11288 let a = a.as_f32x16();
11289 macro_rules! call {
11290 ($imm8:expr) => {
11291 vpermilps(a, _mm512_set1_epi32($imm8).as_i32x16())
11292 };
11293 }
11294 let r = constify_imm8_sae!(imm8, call);
11295 transmute(r)
11296 }
11297
11298 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11299 ///
11300 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permute_ps&expand=4168)
11301 #[inline]
11302 #[target_feature(enable = "avx512f")]
11303 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
11304 #[rustc_args_required_const(3)]
11305 pub unsafe fn _mm512_mask_permute_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
11306 let a = a.as_f32x16();
11307 macro_rules! call {
11308 ($imm8:expr) => {
11309 vpermilps(a, _mm512_set1_epi32($imm8).as_i32x16())
11310 };
11311 }
11312 let permute = constify_imm8_sae!(imm8, call);
11313 transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
11314 }
11315
11316 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11317 ///
11318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permute_ps&expand=4169)
11319 #[inline]
11320 #[target_feature(enable = "avx512f")]
11321 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
11322 #[rustc_args_required_const(2)]
11323 pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
11324 let a = a.as_f32x16();
11325 macro_rules! call {
11326 ($imm8:expr) => {
11327 vpermilps(a, _mm512_set1_epi32($imm8).as_i32x16())
11328 };
11329 }
11330 let permute = constify_imm8_sae!(imm8, call);
11331 let zero = _mm512_setzero_ps().as_f32x16();
11332 transmute(simd_select_bitmask(k, permute, zero))
11333 }
11334
11335 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
11336 ///
11337 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permute_pd&expand=4161)
11338 #[inline]
11339 #[target_feature(enable = "avx512f")]
11340 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
11341 #[rustc_args_required_const(1)]
11342 pub unsafe fn _mm512_permute_pd(a: __m512d, imm8: i32) -> __m512d {
11343 let a = a.as_f64x8();
11344 macro_rules! call {
11345 ($imm8:expr) => {
11346 vpermilpd(a, _mm512_set1_epi64($imm8).as_i64x8())
11347 };
11348 }
11349 let r = constify_imm8_sae!(imm8, call);
11350 transmute(r)
11351 }
11352
11353 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11354 ///
11355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permute_pd&expand=4159)
11356 #[inline]
11357 #[target_feature(enable = "avx512f")]
11358 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
11359 #[rustc_args_required_const(3)]
11360 pub unsafe fn _mm512_mask_permute_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
11361 let a = a.as_f64x8();
11362 macro_rules! call {
11363 ($imm8:expr) => {
11364 vpermilpd(a, _mm512_set1_epi64($imm8).as_i64x8())
11365 };
11366 }
11367 let permute = constify_imm8_sae!(imm8, call);
11368 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
11369 }
11370
11371 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11372 ///
11373 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permute_pd&expand=4160)
11374 #[inline]
11375 #[target_feature(enable = "avx512f")]
11376 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
11377 #[rustc_args_required_const(2)]
11378 pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
11379 let a = a.as_f64x8();
11380 macro_rules! call {
11381 ($imm8:expr) => {
11382 vpermilpd(a, _mm512_set1_epi64($imm8).as_i64x8())
11383 };
11384 }
11385 let permute = constify_imm8_sae!(imm8, call);
11386 let zero = _mm512_setzero_pd().as_f64x8();
11387 transmute(simd_select_bitmask(k, permute, zero))
11388 }
11389
11390 /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
11391 ///
11392 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex_epi64&expand=4208)
11393 #[inline]
11394 #[target_feature(enable = "avx512f")]
11395 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
11396 //shoud be vpermq, but generate vpermpd. It generates vpermq with mask. change to vbroadcast becaise CI Windows
11397 #[rustc_args_required_const(1)]
11398 pub unsafe fn _mm512_permutex_epi64(a: __m512i, imm8: i32) -> __m512i {
11399 let a = a.as_i64x8();
11400 macro_rules! call {
11401 ($imm8:expr) => {
11402 vpermq(a, _mm512_set1_epi64($imm8).as_i64x8())
11403 };
11404 }
11405 let r = constify_imm8_sae!(imm8, call);
11406 transmute(r)
11407 }
11408
11409 /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11410 ///
11411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex_epi64&expand=4206)
11412 #[inline]
11413 #[target_feature(enable = "avx512f")]
11414 #[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))] //shoud be vpermq. change to vpbroadcast becaise CI Windows
11415 #[rustc_args_required_const(3)]
11416 pub unsafe fn _mm512_mask_permutex_epi64(
11417 src: __m512i,
11418 k: __mmask8,
11419 a: __m512i,
11420 imm8: i32,
11421 ) -> __m512i {
11422 let a = a.as_i64x8();
11423 macro_rules! call {
11424 ($imm8:expr) => {
11425 vpermq(a, _mm512_set1_epi64($imm8).as_i64x8())
11426 };
11427 }
11428 let permute = constify_imm8_sae!(imm8, call);
11429 transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
11430 }
11431
11432 /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11433 ///
11434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex_epi64&expand=4207)
11435 #[inline]
11436 #[target_feature(enable = "avx512f")]
11437 #[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))] //shoud be vpermq. change to vpbroadcast becaise CI Windows
11438 #[rustc_args_required_const(2)]
11439 pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
11440 let a = a.as_i64x8();
11441 macro_rules! call {
11442 ($imm8:expr) => {
11443 vpermq(a, _mm512_set1_epi64($imm8).as_i64x8())
11444 };
11445 }
11446 let permute = constify_imm8_sae!(imm8, call);
11447 let zero = _mm512_setzero_si512().as_i64x8();
11448 transmute(simd_select_bitmask(k, permute, zero))
11449 }
11450
11451 /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
11452 ///
11453 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex_pd&expand=4214)
11454 #[inline]
11455 #[target_feature(enable = "avx512f")]
11456 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
11457 #[rustc_args_required_const(1)]
11458 pub unsafe fn _mm512_permutex_pd(a: __m512d, imm8: i32) -> __m512d {
11459 let a = a.as_f64x8();
11460 macro_rules! call {
11461 ($imm8:expr) => {
11462 vpermpd(a, _mm512_set1_epi64($imm8).as_i64x8())
11463 };
11464 }
11465 let r = constify_imm8_sae!(imm8, call);
11466 transmute(r)
11467 }
11468
11469 /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11470 ///
11471 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex_pd&expand=4212)
11472 #[inline]
11473 #[target_feature(enable = "avx512f")]
11474 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
11475 #[rustc_args_required_const(3)]
11476 pub unsafe fn _mm512_mask_permutex_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
11477 let a = a.as_f64x8();
11478 macro_rules! call {
11479 ($imm8:expr) => {
11480 vpermpd(a, _mm512_set1_epi64($imm8).as_i64x8())
11481 };
11482 }
11483 let permute = constify_imm8_sae!(imm8, call);
11484 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
11485 }
11486
11487 /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11488 ///
11489 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex_pd&expand=4213)
11490 #[inline]
11491 #[target_feature(enable = "avx512f")]
11492 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
11493 #[rustc_args_required_const(2)]
11494 pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
11495 let a = a.as_f64x8();
11496 macro_rules! call {
11497 ($imm8:expr) => {
11498 vpermpd(a, _mm512_set1_epi64($imm8).as_i64x8())
11499 };
11500 }
11501 let permute = constify_imm8_sae!(imm8, call);
11502 let zero = _mm512_setzero_pd().as_f64x8();
11503 transmute(simd_select_bitmask(k, permute, zero))
11504 }
11505
11506 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
11507 ///
11508 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_epi32&expand=4182)
11509 #[inline]
11510 #[target_feature(enable = "avx512f")]
11511 #[cfg_attr(test, assert_instr(vperm))] //should be vpermd, but generate vpermps. It generates vpermd with mask
11512 pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
11513 transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
11514 }
11515
11516 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
11517 ///
11518 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_epi32&expand=4181)
11519 #[inline]
11520 #[target_feature(enable = "avx512f")]
11521 #[cfg_attr(test, assert_instr(vpermd))]
11522 pub unsafe fn _mm512_mask_permutevar_epi32(
11523 src: __m512i,
11524 k: __mmask16,
11525 idx: __m512i,
11526 a: __m512i,
11527 ) -> __m512i {
11528 let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
11529 transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
11530 }
11531
11532 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
11533 ///
11534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_ps&expand=4200)
11535 #[inline]
11536 #[target_feature(enable = "avx512f")]
11537 #[cfg_attr(test, assert_instr(vpermilps))]
11538 pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
11539 transmute(vpermilps(a.as_f32x16(), b.as_i32x16()))
11540 }
11541
11542 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11543 ///
11544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_ps&expand=4198)
11545 #[inline]
11546 #[target_feature(enable = "avx512f")]
11547 #[cfg_attr(test, assert_instr(vpermilps))]
11548 pub unsafe fn _mm512_mask_permutevar_ps(
11549 src: __m512,
11550 k: __mmask16,
11551 a: __m512,
11552 b: __m512i,
11553 ) -> __m512 {
11554 let permute = _mm512_permutevar_ps(a, b).as_f32x16();
11555 transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
11556 }
11557
11558 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11559 ///
11560 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutevar_ps&expand=4199)
11561 #[inline]
11562 #[target_feature(enable = "avx512f")]
11563 #[cfg_attr(test, assert_instr(vpermilps))]
11564 pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
11565 let permute = _mm512_permutevar_ps(a, b).as_f32x16();
11566 let zero = _mm512_setzero_ps().as_f32x16();
11567 transmute(simd_select_bitmask(k, permute, zero))
11568 }
11569
11570 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
11571 ///
11572 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_pd&expand=4191)
11573 #[inline]
11574 #[target_feature(enable = "avx512f")]
11575 #[cfg_attr(test, assert_instr(vpermilpd))]
11576 pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
11577 transmute(vpermilpd(a.as_f64x8(), b.as_i64x8()))
11578 }
11579
11580 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11581 ///
11582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_pd&expand=4189)
11583 #[inline]
11584 #[target_feature(enable = "avx512f")]
11585 #[cfg_attr(test, assert_instr(vpermilpd))]
11586 pub unsafe fn _mm512_mask_permutevar_pd(
11587 src: __m512d,
11588 k: __mmask8,
11589 a: __m512d,
11590 b: __m512i,
11591 ) -> __m512d {
11592 let permute = _mm512_permutevar_pd(a, b).as_f64x8();
11593 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
11594 }
11595
11596 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597 ///
11598 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutevar_pd&expand=4190)
11599 #[inline]
11600 #[target_feature(enable = "avx512f")]
11601 #[cfg_attr(test, assert_instr(vpermilpd))]
11602 pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
11603 let permute = _mm512_permutevar_pd(a, b).as_f64x8();
11604 let zero = _mm512_setzero_pd().as_f64x8();
11605 transmute(simd_select_bitmask(k, permute, zero))
11606 }
11607
11608 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
11609 ///
11610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_epi32&expand=4301)
11611 #[inline]
11612 #[target_feature(enable = "avx512f")]
11613 #[cfg_attr(test, assert_instr(vperm))] //should be vpermd, but generate vpermps. It generates vpermd with mask
11614 pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
11615 transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
11616 }
11617
11618 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11619 ///
11620 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_epi32&expand=4299)
11621 #[inline]
11622 #[target_feature(enable = "avx512f")]
11623 #[cfg_attr(test, assert_instr(vpermd))]
11624 pub unsafe fn _mm512_mask_permutexvar_epi32(
11625 src: __m512i,
11626 k: __mmask16,
11627 idx: __m512i,
11628 a: __m512i,
11629 ) -> __m512i {
11630 let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
11631 transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
11632 }
11633
11634 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11635 ///
11636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_epi32&expand=4300)
11637 #[inline]
11638 #[target_feature(enable = "avx512f")]
11639 #[cfg_attr(test, assert_instr(vpermd))]
11640 pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
11641 let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
11642 let zero = _mm512_setzero_si512().as_i32x16();
11643 transmute(simd_select_bitmask(k, permute, zero))
11644 }
11645
11646 /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
11647 ///
11648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_epi64&expand=4307)
11649 #[inline]
11650 #[target_feature(enable = "avx512f")]
11651 #[cfg_attr(test, assert_instr(vperm))] //should be vpermq, but generate vpermpd. It generates vpermd with mask
11652 pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
11653 transmute(vpermq(a.as_i64x8(), idx.as_i64x8()))
11654 }
11655
11656 /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11657 ///
11658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_epi64&expand=4305)
11659 #[inline]
11660 #[target_feature(enable = "avx512f")]
11661 #[cfg_attr(test, assert_instr(vpermq))]
11662 pub unsafe fn _mm512_mask_permutexvar_epi64(
11663 src: __m512i,
11664 k: __mmask8,
11665 idx: __m512i,
11666 a: __m512i,
11667 ) -> __m512i {
11668 let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
11669 transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
11670 }
11671
11672 /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11673 ///
11674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_epi64&expand=4306)
11675 #[inline]
11676 #[target_feature(enable = "avx512f")]
11677 #[cfg_attr(test, assert_instr(vpermq))]
11678 pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
11679 let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
11680 let zero = _mm512_setzero_si512().as_i64x8();
11681 transmute(simd_select_bitmask(k, permute, zero))
11682 }
11683
11684 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
11685 ///
11686 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_ps&expand=4200)
11687 #[inline]
11688 #[target_feature(enable = "avx512f")]
11689 #[cfg_attr(test, assert_instr(vpermps))]
11690 pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
11691 transmute(vpermps(a.as_f32x16(), idx.as_i32x16()))
11692 }
11693
11694 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11695 ///
11696 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_ps&expand=4326)
11697 #[inline]
11698 #[target_feature(enable = "avx512f")]
11699 #[cfg_attr(test, assert_instr(vpermps))]
11700 pub unsafe fn _mm512_mask_permutexvar_ps(
11701 src: __m512,
11702 k: __mmask16,
11703 idx: __m512i,
11704 a: __m512,
11705 ) -> __m512 {
11706 let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
11707 transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
11708 }
11709
11710 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11711 ///
11712 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_ps&expand=4327)
11713 #[inline]
11714 #[target_feature(enable = "avx512f")]
11715 #[cfg_attr(test, assert_instr(vpermps))]
11716 pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
11717 let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
11718 let zero = _mm512_setzero_ps().as_f32x16();
11719 transmute(simd_select_bitmask(k, permute, zero))
11720 }
11721
11722 /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
11723 ///
11724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_pd&expand=4322)
11725 #[inline]
11726 #[target_feature(enable = "avx512f")]
11727 #[cfg_attr(test, assert_instr(vpermpd))]
11728 pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
11729 transmute(vpermpd(a.as_f64x8(), idx.as_i64x8()))
11730 }
11731
11732 /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11733 ///
11734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_pd&expand=4320)
11735 #[inline]
11736 #[target_feature(enable = "avx512f")]
11737 #[cfg_attr(test, assert_instr(vpermpd))]
11738 pub unsafe fn _mm512_mask_permutexvar_pd(
11739 src: __m512d,
11740 k: __mmask8,
11741 idx: __m512i,
11742 a: __m512d,
11743 ) -> __m512d {
11744 let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
11745 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
11746 }
11747
11748 /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11749 ///
11750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_pd&expand=4321)
11751 #[inline]
11752 #[target_feature(enable = "avx512f")]
11753 #[cfg_attr(test, assert_instr(vpermpd))]
11754 pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
11755 let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
11756 let zero = _mm512_setzero_pd().as_f64x8();
11757 transmute(simd_select_bitmask(k, permute, zero))
11758 }
11759
11760 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11761 ///
11762 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_epi32&expand=4238)
11763 #[inline]
11764 #[target_feature(enable = "avx512f")]
11765 #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
11766 pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
11767 transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16()))
11768 }
11769
11770 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11771 ///
11772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_epi32&expand=4235)
11773 #[inline]
11774 #[target_feature(enable = "avx512f")]
11775 #[cfg_attr(test, assert_instr(vpermt2d))]
11776 pub unsafe fn _mm512_mask_permutex2var_epi32(
11777 a: __m512i,
11778 k: __mmask16,
11779 idx: __m512i,
11780 b: __m512i,
11781 ) -> __m512i {
11782 let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
11783 transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
11784 }
11785
11786 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11787 ///
11788 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_epi32&expand=4237)
11789 #[inline]
11790 #[target_feature(enable = "avx512f")]
11791 #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
11792 pub unsafe fn _mm512_maskz_permutex2var_epi32(
11793 k: __mmask16,
11794 a: __m512i,
11795 idx: __m512i,
11796 b: __m512i,
11797 ) -> __m512i {
11798 let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
11799 let zero = _mm512_setzero_si512().as_i32x16();
11800 transmute(simd_select_bitmask(k, permute, zero))
11801 }
11802
11803 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
11804 ///
11805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_epi32&expand=4236)
11806 #[inline]
11807 #[target_feature(enable = "avx512f")]
11808 #[cfg_attr(test, assert_instr(vpermi2d))]
11809 pub unsafe fn _mm512_mask2_permutex2var_epi32(
11810 a: __m512i,
11811 idx: __m512i,
11812 k: __mmask16,
11813 b: __m512i,
11814 ) -> __m512i {
11815 let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
11816 transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
11817 }
11818
11819 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11820 ///
11821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_epi64&expand=4250)
11822 #[inline]
11823 #[target_feature(enable = "avx512f")]
11824 #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
11825 pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
11826 transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8()))
11827 }
11828
11829 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11830 ///
11831 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_epi64&expand=4247)
11832 #[inline]
11833 #[target_feature(enable = "avx512f")]
11834 #[cfg_attr(test, assert_instr(vpermt2q))]
11835 pub unsafe fn _mm512_mask_permutex2var_epi64(
11836 a: __m512i,
11837 k: __mmask8,
11838 idx: __m512i,
11839 b: __m512i,
11840 ) -> __m512i {
11841 let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
11842 transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
11843 }
11844
11845 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11846 ///
11847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_epi64&expand=4249)
11848 #[inline]
11849 #[target_feature(enable = "avx512f")]
11850 #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
11851 pub unsafe fn _mm512_maskz_permutex2var_epi64(
11852 k: __mmask8,
11853 a: __m512i,
11854 idx: __m512i,
11855 b: __m512i,
11856 ) -> __m512i {
11857 let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
11858 let zero = _mm512_setzero_si512().as_i64x8();
11859 transmute(simd_select_bitmask(k, permute, zero))
11860 }
11861
11862 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
11863 ///
11864 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_epi64&expand=4248)
11865 #[inline]
11866 #[target_feature(enable = "avx512f")]
11867 #[cfg_attr(test, assert_instr(vpermi2q))]
11868 pub unsafe fn _mm512_mask2_permutex2var_epi64(
11869 a: __m512i,
11870 idx: __m512i,
11871 k: __mmask8,
11872 b: __m512i,
11873 ) -> __m512i {
11874 let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
11875 transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
11876 }
11877
11878 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11879 ///
11880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_ps&expand=4286)
11881 #[inline]
11882 #[target_feature(enable = "avx512f")]
11883 #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
11884 pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
11885 transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16()))
11886 }
11887
11888 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11889 ///
11890 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_ps&expand=4283)
11891 #[inline]
11892 #[target_feature(enable = "avx512f")]
11893 #[cfg_attr(test, assert_instr(vpermt2ps))]
11894 pub unsafe fn _mm512_mask_permutex2var_ps(
11895 a: __m512,
11896 k: __mmask16,
11897 idx: __m512i,
11898 b: __m512,
11899 ) -> __m512 {
11900 let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
11901 transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
11902 }
11903
11904 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11905 ///
11906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_ps&expand=4285)
11907 #[inline]
11908 #[target_feature(enable = "avx512f")]
11909 #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
11910 pub unsafe fn _mm512_maskz_permutex2var_ps(
11911 k: __mmask16,
11912 a: __m512,
11913 idx: __m512i,
11914 b: __m512,
11915 ) -> __m512 {
11916 let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
11917 let zero = _mm512_setzero_ps().as_f32x16();
11918 transmute(simd_select_bitmask(k, permute, zero))
11919 }
11920
11921 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
11922 ///
11923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_ps&expand=4284)
11924 #[inline]
11925 #[target_feature(enable = "avx512f")]
11926 #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
11927 pub unsafe fn _mm512_mask2_permutex2var_ps(
11928 a: __m512,
11929 idx: __m512i,
11930 k: __mmask16,
11931 b: __m512,
11932 ) -> __m512 {
11933 let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
11934 let zero = _mm512_setzero_ps().as_f32x16();
11935 transmute(simd_select_bitmask(k, permute, zero))
11936 }
11937
11938 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11939 ///
11940 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_pd&expand=4274)
11941 #[inline]
11942 #[target_feature(enable = "avx512f")]
11943 #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
11944 pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
11945 transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8()))
11946 }
11947
11948 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11949 ///
11950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_pd&expand=4271)
11951 #[inline]
11952 #[target_feature(enable = "avx512f")]
11953 #[cfg_attr(test, assert_instr(vpermt2pd))]
11954 pub unsafe fn _mm512_mask_permutex2var_pd(
11955 a: __m512d,
11956 k: __mmask8,
11957 idx: __m512i,
11958 b: __m512d,
11959 ) -> __m512d {
11960 let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
11961 transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
11962 }
11963
11964 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11965 ///
11966 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_pd&expand=4273)
11967 #[inline]
11968 #[target_feature(enable = "avx512f")]
11969 #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
11970 pub unsafe fn _mm512_maskz_permutex2var_pd(
11971 k: __mmask8,
11972 a: __m512d,
11973 idx: __m512i,
11974 b: __m512d,
11975 ) -> __m512d {
11976 let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
11977 let zero = _mm512_setzero_pd().as_f64x8();
11978 transmute(simd_select_bitmask(k, permute, zero))
11979 }
11980
11981 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
11982 ///
11983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_pd&expand=4272)
11984 #[inline]
11985 #[target_feature(enable = "avx512f")]
11986 #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
11987 pub unsafe fn _mm512_mask2_permutex2var_pd(
11988 a: __m512d,
11989 idx: __m512i,
11990 k: __mmask8,
11991 b: __m512d,
11992 ) -> __m512d {
11993 let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
11994 let zero = _mm512_setzero_pd().as_f64x8();
11995 transmute(simd_select_bitmask(k, permute, zero))
11996 }
11997
11998 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
11999 ///
12000 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_epi32&expand=5150)
12001 #[inline]
12002 #[target_feature(enable = "avx512f")]
12003 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] //should be vpshufd, but generate vpermilps
12004 #[rustc_args_required_const(1)]
12005 pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
12006 let imm8 = (imm8 & 0xFF) as u8;
12007
12008 let a = a.as_i32x16();
12009 macro_rules! shuffle4 {
12010 (
12011 $a:expr,
12012 $b:expr,
12013 $c:expr,
12014 $d:expr,
12015 $e:expr,
12016 $f:expr,
12017 $g:expr,
12018 $h:expr,
12019 $i:expr,
12020 $j:expr,
12021 $k:expr,
12022 $l:expr,
12023 $m:expr,
12024 $n:expr,
12025 $o:expr,
12026 $p:expr
12027 ) => {
12028 simd_shuffle16(
12029 a,
12030 a,
12031 [
12032 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12033 ],
12034 )
12035 };
12036 }
12037 macro_rules! shuffle3 {
12038 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12039 match (imm8 >> 6) & 0x3 {
12040 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
12041 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
12042 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
12043 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
12044 }
12045 };
12046 }
12047 macro_rules! shuffle2 {
12048 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12049 match (imm8 >> 4) & 0x3 {
12050 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
12051 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
12052 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
12053 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
12054 }
12055 };
12056 }
12057 macro_rules! shuffle1 {
12058 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12059 match (imm8 >> 2) & 0x3 {
12060 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
12061 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
12062 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
12063 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
12064 }
12065 };
12066 }
12067 let r: i32x16 = match imm8 & 0x3 {
12068 0 => shuffle1!(0, 4, 8, 12),
12069 1 => shuffle1!(1, 5, 9, 13),
12070 2 => shuffle1!(2, 6, 10, 14),
12071 _ => shuffle1!(3, 7, 11, 15),
12072 };
12073 transmute(r)
12074 }
12075
12076 /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12077 ///
12078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_epi32&expand=5148)
12079 #[inline]
12080 #[target_feature(enable = "avx512f")]
12081 #[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] //should be vpshufd, but generate vpermilps
12082 #[rustc_args_required_const(3)]
12083 pub unsafe fn _mm512_mask_shuffle_epi32(
12084 src: __m512i,
12085 k: __mmask16,
12086 a: __m512i,
12087 imm8: _MM_PERM_ENUM,
12088 ) -> __m512i {
12089 let imm8 = (imm8 & 0xFF) as u8;
12090
12091 let a = a.as_i32x16();
12092 macro_rules! shuffle4 {
12093 (
12094 $a:expr,
12095 $b:expr,
12096 $c:expr,
12097 $d:expr,
12098 $e:expr,
12099 $f:expr,
12100 $g:expr,
12101 $h:expr,
12102 $i:expr,
12103 $j:expr,
12104 $k:expr,
12105 $l:expr,
12106 $m:expr,
12107 $n:expr,
12108 $o:expr,
12109 $p:expr
12110 ) => {
12111 simd_shuffle16(
12112 a,
12113 a,
12114 [
12115 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12116 ],
12117 )
12118 };
12119 }
12120 macro_rules! shuffle3 {
12121 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12122 match (imm8 >> 6) & 0x3 {
12123 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
12124 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
12125 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
12126 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
12127 }
12128 };
12129 }
12130 macro_rules! shuffle2 {
12131 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12132 match (imm8 >> 4) & 0x3 {
12133 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
12134 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
12135 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
12136 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
12137 }
12138 };
12139 }
12140 macro_rules! shuffle1 {
12141 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12142 match (imm8 >> 2) & 0x3 {
12143 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
12144 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
12145 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
12146 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
12147 }
12148 };
12149 }
12150 let shuffle: i32x16 = match imm8 & 0x3 {
12151 0 => shuffle1!(0, 4, 8, 12),
12152 1 => shuffle1!(1, 5, 9, 13),
12153 2 => shuffle1!(2, 6, 10, 14),
12154 _ => shuffle1!(3, 7, 11, 15),
12155 };
12156 transmute(simd_select_bitmask(k, shuffle, src.as_i32x16()))
12157 }
12158
12159 /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12160 ///
12161 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_epi32&expand=5149)
12162 #[inline]
12163 #[target_feature(enable = "avx512f")]
12164 #[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] //should be vpshufd, but generate vpermilps
12165 #[rustc_args_required_const(2)]
12166 pub unsafe fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
12167 let imm8 = (imm8 & 0xFF) as u8;
12168
12169 let a = a.as_i32x16();
12170 macro_rules! shuffle4 {
12171 (
12172 $a:expr,
12173 $b:expr,
12174 $c:expr,
12175 $d:expr,
12176 $e:expr,
12177 $f:expr,
12178 $g:expr,
12179 $h:expr,
12180 $i:expr,
12181 $j:expr,
12182 $k:expr,
12183 $l:expr,
12184 $m:expr,
12185 $n:expr,
12186 $o:expr,
12187 $p:expr
12188 ) => {
12189 simd_shuffle16(
12190 a,
12191 a,
12192 [
12193 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12194 ],
12195 )
12196 };
12197 }
12198 macro_rules! shuffle3 {
12199 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12200 match (imm8 >> 6) & 0x3 {
12201 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
12202 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
12203 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
12204 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
12205 }
12206 };
12207 }
12208 macro_rules! shuffle2 {
12209 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12210 match (imm8 >> 4) & 0x3 {
12211 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
12212 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
12213 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
12214 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
12215 }
12216 };
12217 }
12218 macro_rules! shuffle1 {
12219 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12220 match (imm8 >> 2) & 0x3 {
12221 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
12222 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
12223 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
12224 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
12225 }
12226 };
12227 }
12228 let shuffle: i32x16 = match imm8 & 0x3 {
12229 0 => shuffle1!(0, 4, 8, 12),
12230 1 => shuffle1!(1, 5, 9, 13),
12231 2 => shuffle1!(2, 6, 10, 14),
12232 _ => shuffle1!(3, 7, 11, 15),
12233 };
12234 let zero = _mm512_setzero_si512().as_i32x16();
12235 transmute(simd_select_bitmask(k, shuffle, zero))
12236 }
12237
12238 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
12239 ///
12240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_ps&expand=5203)
12241 #[inline]
12242 #[target_feature(enable = "avx512f")]
12243 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
12244 #[rustc_args_required_const(2)]
12245 pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512, imm8: i32) -> __m512 {
12246 assert!(imm8 >= 0 && imm8 <= 255);
12247 let imm8 = (imm8 & 0xFF) as u8;
12248 macro_rules! shuffle4 {
12249 (
12250 $a:expr,
12251 $b:expr,
12252 $c:expr,
12253 $d:expr,
12254 $e:expr,
12255 $f:expr,
12256 $g:expr,
12257 $h:expr,
12258 $i:expr,
12259 $j:expr,
12260 $k:expr,
12261 $l:expr,
12262 $m:expr,
12263 $n:expr,
12264 $o:expr,
12265 $p:expr
12266 ) => {
12267 simd_shuffle16(
12268 a,
12269 b,
12270 [
12271 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12272 ],
12273 )
12274 };
12275 }
12276 macro_rules! shuffle3 {
12277 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12278 match (imm8 >> 6) & 0x3 {
12279 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
12280 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
12281 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
12282 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
12283 }
12284 };
12285 }
12286 macro_rules! shuffle2 {
12287 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12288 match (imm8 >> 4) & 0x3 {
12289 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
12290 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
12291 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
12292 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
12293 }
12294 };
12295 }
12296 macro_rules! shuffle1 {
12297 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12298 match (imm8 >> 2) & 0x3 {
12299 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
12300 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
12301 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
12302 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
12303 }
12304 };
12305 }
12306 match imm8 & 0x3 {
12307 0 => shuffle1!(0, 4, 8, 12),
12308 1 => shuffle1!(1, 5, 9, 13),
12309 2 => shuffle1!(2, 6, 10, 14),
12310 _ => shuffle1!(3, 7, 11, 15),
12311 }
12312 }
12313
12314 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12315 ///
12316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_ps&expand=5201)
12317 #[inline]
12318 #[target_feature(enable = "avx512f")]
12319 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
12320 #[rustc_args_required_const(4)]
12321 pub unsafe fn _mm512_mask_shuffle_ps(
12322 src: __m512,
12323 k: __mmask16,
12324 a: __m512,
12325 b: __m512,
12326 imm8: i32,
12327 ) -> __m512 {
12328 assert!(imm8 >= 0 && imm8 <= 255);
12329 let imm8 = (imm8 & 0xFF) as u8;
12330 macro_rules! shuffle4 {
12331 (
12332 $a:expr,
12333 $b:expr,
12334 $c:expr,
12335 $d:expr,
12336 $e:expr,
12337 $f:expr,
12338 $g:expr,
12339 $h:expr,
12340 $i:expr,
12341 $j:expr,
12342 $k:expr,
12343 $l:expr,
12344 $m:expr,
12345 $n:expr,
12346 $o:expr,
12347 $p:expr
12348 ) => {
12349 simd_shuffle16(
12350 a,
12351 b,
12352 [
12353 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12354 ],
12355 )
12356 };
12357 }
12358 macro_rules! shuffle3 {
12359 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12360 match (imm8 >> 6) & 0x3 {
12361 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
12362 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
12363 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
12364 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
12365 }
12366 };
12367 }
12368 macro_rules! shuffle2 {
12369 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12370 match (imm8 >> 4) & 0x3 {
12371 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
12372 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
12373 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
12374 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
12375 }
12376 };
12377 }
12378 macro_rules! shuffle1 {
12379 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12380 match (imm8 >> 2) & 0x3 {
12381 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
12382 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
12383 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
12384 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
12385 }
12386 };
12387 }
12388 let shuffle = match imm8 & 0x3 {
12389 0 => shuffle1!(0, 4, 8, 12),
12390 1 => shuffle1!(1, 5, 9, 13),
12391 2 => shuffle1!(2, 6, 10, 14),
12392 _ => shuffle1!(3, 7, 11, 15),
12393 };
12394
12395 transmute(simd_select_bitmask(k, shuffle, src.as_f32x16()))
12396 }
12397
12398 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12399 ///
12400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_ps&expand=5202)
12401 #[inline]
12402 #[target_feature(enable = "avx512f")]
12403 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
12404 #[rustc_args_required_const(3)]
12405 pub unsafe fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
12406 assert!(imm8 >= 0 && imm8 <= 255);
12407 let imm8 = (imm8 & 0xFF) as u8;
12408 macro_rules! shuffle4 {
12409 (
12410 $a:expr,
12411 $b:expr,
12412 $c:expr,
12413 $d:expr,
12414 $e:expr,
12415 $f:expr,
12416 $g:expr,
12417 $h:expr,
12418 $i:expr,
12419 $j:expr,
12420 $k:expr,
12421 $l:expr,
12422 $m:expr,
12423 $n:expr,
12424 $o:expr,
12425 $p:expr
12426 ) => {
12427 simd_shuffle16(
12428 a,
12429 b,
12430 [
12431 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12432 ],
12433 )
12434 };
12435 }
12436 macro_rules! shuffle3 {
12437 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12438 match (imm8 >> 6) & 0x3 {
12439 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
12440 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
12441 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
12442 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
12443 }
12444 };
12445 }
12446 macro_rules! shuffle2 {
12447 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12448 match (imm8 >> 4) & 0x3 {
12449 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
12450 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
12451 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
12452 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
12453 }
12454 };
12455 }
12456 macro_rules! shuffle1 {
12457 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12458 match (imm8 >> 2) & 0x3 {
12459 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
12460 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
12461 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
12462 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
12463 }
12464 };
12465 }
12466 let shuffle = match imm8 & 0x3 {
12467 0 => shuffle1!(0, 4, 8, 12),
12468 1 => shuffle1!(1, 5, 9, 13),
12469 2 => shuffle1!(2, 6, 10, 14),
12470 _ => shuffle1!(3, 7, 11, 15),
12471 };
12472
12473 let zero = _mm512_setzero_ps().as_f32x16();
12474 transmute(simd_select_bitmask(k, shuffle, zero))
12475 }
12476
12477 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
12478 ///
12479 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_pd&expand=5192)
12480 #[inline]
12481 #[target_feature(enable = "avx512f")]
12482 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
12483 #[rustc_args_required_const(2)]
12484 pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
12485 assert!(imm8 >= 0 && imm8 <= 255);
12486 let imm8 = (imm8 & 0xFF) as u8;
12487 macro_rules! shuffle8 {
12488 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
12489 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
12490 };
12491 }
12492 macro_rules! shuffle7 {
12493 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
12494 match (imm8 >> 7) & 0x1 {
12495 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
12496 _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
12497 }
12498 };
12499 }
12500 macro_rules! shuffle6 {
12501 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
12502 match (imm8 >> 6) & 0x1 {
12503 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
12504 _ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
12505 }
12506 };
12507 }
12508 macro_rules! shuffle5 {
12509 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
12510 match (imm8 >> 5) & 0x1 {
12511 0 => shuffle6!($a, $b, $c, $d, $e, 12),
12512 _ => shuffle6!($a, $b, $c, $d, $e, 13),
12513 }
12514 };
12515 }
12516 macro_rules! shuffle4 {
12517 ($a:expr, $b:expr, $c:expr, $d:expr) => {
12518 match (imm8 >> 4) & 0x1 {
12519 0 => shuffle5!($a, $b, $c, $d, 4),
12520 _ => shuffle5!($a, $b, $c, $d, 5),
12521 }
12522 };
12523 }
12524 macro_rules! shuffle3 {
12525 ($a:expr, $b:expr, $c:expr) => {
12526 match (imm8 >> 3) & 0x1 {
12527 0 => shuffle4!($a, $b, $c, 10),
12528 _ => shuffle4!($a, $b, $c, 11),
12529 }
12530 };
12531 }
12532 macro_rules! shuffle2 {
12533 ($a:expr, $b:expr) => {
12534 match (imm8 >> 2) & 0x1 {
12535 0 => shuffle3!($a, $b, 2),
12536 _ => shuffle3!($a, $b, 3),
12537 }
12538 };
12539 }
12540 macro_rules! shuffle1 {
12541 ($a:expr) => {
12542 match (imm8 >> 1) & 0x1 {
12543 0 => shuffle2!($a, 8),
12544 _ => shuffle2!($a, 9),
12545 }
12546 };
12547 }
12548 match imm8 & 0x1 {
12549 0 => shuffle1!(0),
12550 _ => shuffle1!(1),
12551 }
12552 }
12553
12554 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12555 ///
12556 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_pd&expand=5190)
12557 #[inline]
12558 #[target_feature(enable = "avx512f")]
12559 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
12560 #[rustc_args_required_const(4)]
12561 pub unsafe fn _mm512_mask_shuffle_pd(
12562 src: __m512d,
12563 k: __mmask8,
12564 a: __m512d,
12565 b: __m512d,
12566 imm8: i32,
12567 ) -> __m512d {
12568 assert!(imm8 >= 0 && imm8 <= 255);
12569 let imm8 = (imm8 & 0xFF) as u8;
12570 macro_rules! shuffle8 {
12571 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
12572 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
12573 };
12574 }
12575 macro_rules! shuffle7 {
12576 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
12577 match (imm8 >> 7) & 0x1 {
12578 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
12579 _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
12580 }
12581 };
12582 }
12583 macro_rules! shuffle6 {
12584 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
12585 match (imm8 >> 6) & 0x1 {
12586 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
12587 _ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
12588 }
12589 };
12590 }
12591 macro_rules! shuffle5 {
12592 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
12593 match (imm8 >> 5) & 0x1 {
12594 0 => shuffle6!($a, $b, $c, $d, $e, 12),
12595 _ => shuffle6!($a, $b, $c, $d, $e, 13),
12596 }
12597 };
12598 }
12599 macro_rules! shuffle4 {
12600 ($a:expr, $b:expr, $c:expr, $d:expr) => {
12601 match (imm8 >> 4) & 0x1 {
12602 0 => shuffle5!($a, $b, $c, $d, 4),
12603 _ => shuffle5!($a, $b, $c, $d, 5),
12604 }
12605 };
12606 }
12607 macro_rules! shuffle3 {
12608 ($a:expr, $b:expr, $c:expr) => {
12609 match (imm8 >> 3) & 0x1 {
12610 0 => shuffle4!($a, $b, $c, 10),
12611 _ => shuffle4!($a, $b, $c, 11),
12612 }
12613 };
12614 }
12615 macro_rules! shuffle2 {
12616 ($a:expr, $b:expr) => {
12617 match (imm8 >> 2) & 0x1 {
12618 0 => shuffle3!($a, $b, 2),
12619 _ => shuffle3!($a, $b, 3),
12620 }
12621 };
12622 }
12623 macro_rules! shuffle1 {
12624 ($a:expr) => {
12625 match (imm8 >> 1) & 0x1 {
12626 0 => shuffle2!($a, 8),
12627 _ => shuffle2!($a, 9),
12628 }
12629 };
12630 }
12631 let shuffle = match imm8 & 0x1 {
12632 0 => shuffle1!(0),
12633 _ => shuffle1!(1),
12634 };
12635
12636 transmute(simd_select_bitmask(k, shuffle, src.as_f64x8()))
12637 }
12638
12639 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12640 ///
12641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_pd&expand=5191)
12642 #[inline]
12643 #[target_feature(enable = "avx512f")]
12644 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
12645 #[rustc_args_required_const(3)]
12646 pub unsafe fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __m512d {
12647 assert!(imm8 >= 0 && imm8 <= 255);
12648 let imm8 = (imm8 & 0xFF) as u8;
12649 macro_rules! shuffle8 {
12650 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
12651 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
12652 };
12653 }
12654 macro_rules! shuffle7 {
12655 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
12656 match (imm8 >> 7) & 0x1 {
12657 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
12658 _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
12659 }
12660 };
12661 }
12662 macro_rules! shuffle6 {
12663 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
12664 match (imm8 >> 6) & 0x1 {
12665 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
12666 _ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
12667 }
12668 };
12669 }
12670 macro_rules! shuffle5 {
12671 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
12672 match (imm8 >> 5) & 0x1 {
12673 0 => shuffle6!($a, $b, $c, $d, $e, 12),
12674 _ => shuffle6!($a, $b, $c, $d, $e, 13),
12675 }
12676 };
12677 }
12678 macro_rules! shuffle4 {
12679 ($a:expr, $b:expr, $c:expr, $d:expr) => {
12680 match (imm8 >> 4) & 0x1 {
12681 0 => shuffle5!($a, $b, $c, $d, 4),
12682 _ => shuffle5!($a, $b, $c, $d, 5),
12683 }
12684 };
12685 }
12686 macro_rules! shuffle3 {
12687 ($a:expr, $b:expr, $c:expr) => {
12688 match (imm8 >> 3) & 0x1 {
12689 0 => shuffle4!($a, $b, $c, 10),
12690 _ => shuffle4!($a, $b, $c, 11),
12691 }
12692 };
12693 }
12694 macro_rules! shuffle2 {
12695 ($a:expr, $b:expr) => {
12696 match (imm8 >> 2) & 0x1 {
12697 0 => shuffle3!($a, $b, 2),
12698 _ => shuffle3!($a, $b, 3),
12699 }
12700 };
12701 }
12702 macro_rules! shuffle1 {
12703 ($a:expr) => {
12704 match (imm8 >> 1) & 0x1 {
12705 0 => shuffle2!($a, 8),
12706 _ => shuffle2!($a, 9),
12707 }
12708 };
12709 }
12710 let shuffle = match imm8 & 0x1 {
12711 0 => shuffle1!(0),
12712 _ => shuffle1!(1),
12713 };
12714
12715 let zero = _mm512_setzero_pd().as_f64x8();
12716 transmute(simd_select_bitmask(k, shuffle, zero))
12717 }
12718
12719 /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
12720 ///
12721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_i32&expand=5177)
12722 #[inline]
12723 #[target_feature(enable = "avx512f")]
12724 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] //should be vshufi32x4, but generate vshufi64x2
12725 #[rustc_args_required_const(2)]
12726 pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
12727 assert!(imm8 >= 0 && imm8 <= 255);
12728 let imm8 = (imm8 & 0xFF) as u8;
12729 let a = a.as_i32x16();
12730 let b = b.as_i32x16();
12731 macro_rules! shuffle4 {
12732 (
12733 $a:expr,
12734 $b:expr,
12735 $c:expr,
12736 $d:expr,
12737 $e:expr,
12738 $f:expr,
12739 $g:expr,
12740 $h:expr,
12741 $i:expr,
12742 $j:expr,
12743 $k:expr,
12744 $l:expr,
12745 $m:expr,
12746 $n:expr,
12747 $o:expr,
12748 $p:expr
12749 ) => {
12750 simd_shuffle16(
12751 a,
12752 b,
12753 [
12754 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12755 ],
12756 )
12757 };
12758 }
12759 macro_rules! shuffle3 {
12760 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12761 match (imm8 >> 6) & 0x3 {
12762 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
12763 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
12764 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
12765 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
12766 }
12767 };
12768 }
12769 macro_rules! shuffle2 {
12770 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12771 match (imm8 >> 4) & 0x3 {
12772 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
12773 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
12774 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
12775 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
12776 }
12777 };
12778 }
12779 macro_rules! shuffle1 {
12780 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12781 match (imm8 >> 2) & 0x3 {
12782 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
12783 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
12784 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
12785 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
12786 }
12787 };
12788 }
12789 let r: i32x16 = match imm8 & 0x3 {
12790 0 => shuffle1!(0, 1, 2, 3),
12791 1 => shuffle1!(4, 5, 6, 7),
12792 2 => shuffle1!(8, 9, 10, 11),
12793 _ => shuffle1!(12, 13, 14, 15),
12794 };
12795
12796 transmute(r)
12797 }
12798
12799 /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12800 ///
12801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_i32x&expand=5175)
12802 #[inline]
12803 #[target_feature(enable = "avx512f")]
12804 #[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
12805 #[rustc_args_required_const(4)]
12806 pub unsafe fn _mm512_mask_shuffle_i32x4(
12807 src: __m512i,
12808 k: __mmask16,
12809 a: __m512i,
12810 b: __m512i,
12811 imm8: i32,
12812 ) -> __m512i {
12813 assert!(imm8 >= 0 && imm8 <= 255);
12814 let imm8 = (imm8 & 0xFF) as u8;
12815 let a = a.as_i32x16();
12816 let b = b.as_i32x16();
12817 macro_rules! shuffle4 {
12818 (
12819 $a:expr,
12820 $b:expr,
12821 $c:expr,
12822 $d:expr,
12823 $e:expr,
12824 $f:expr,
12825 $g:expr,
12826 $h:expr,
12827 $i:expr,
12828 $j:expr,
12829 $k:expr,
12830 $l:expr,
12831 $m:expr,
12832 $n:expr,
12833 $o:expr,
12834 $p:expr
12835 ) => {
12836 simd_shuffle16(
12837 a,
12838 b,
12839 [
12840 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12841 ],
12842 )
12843 };
12844 }
12845 macro_rules! shuffle3 {
12846 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12847 match (imm8 >> 6) & 0x3 {
12848 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
12849 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
12850 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
12851 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
12852 }
12853 };
12854 }
12855 macro_rules! shuffle2 {
12856 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12857 match (imm8 >> 4) & 0x3 {
12858 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
12859 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
12860 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
12861 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
12862 }
12863 };
12864 }
12865 macro_rules! shuffle1 {
12866 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12867 match (imm8 >> 2) & 0x3 {
12868 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
12869 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
12870 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
12871 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
12872 }
12873 };
12874 }
12875 let shuffle = match imm8 & 0x3 {
12876 0 => shuffle1!(0, 1, 2, 3),
12877 1 => shuffle1!(4, 5, 6, 7),
12878 2 => shuffle1!(8, 9, 10, 11),
12879 _ => shuffle1!(12, 13, 14, 15),
12880 };
12881
12882 transmute(simd_select_bitmask(k, shuffle, src.as_i32x16()))
12883 }
12884
12885 /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12886 ///
12887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_i32&expand=5176)
12888 #[inline]
12889 #[target_feature(enable = "avx512f")]
12890 #[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
12891 #[rustc_args_required_const(3)]
12892 pub unsafe fn _mm512_maskz_shuffle_i32x4(
12893 k: __mmask16,
12894 a: __m512i,
12895 b: __m512i,
12896 imm8: i32,
12897 ) -> __m512i {
12898 assert!(imm8 >= 0 && imm8 <= 255);
12899 let imm8 = (imm8 & 0xFF) as u8;
12900 let a = a.as_i32x16();
12901 let b = b.as_i32x16();
12902 macro_rules! shuffle4 {
12903 (
12904 $a:expr,
12905 $b:expr,
12906 $c:expr,
12907 $d:expr,
12908 $e:expr,
12909 $f:expr,
12910 $g:expr,
12911 $h:expr,
12912 $i:expr,
12913 $j:expr,
12914 $k:expr,
12915 $l:expr,
12916 $m:expr,
12917 $n:expr,
12918 $o:expr,
12919 $p:expr
12920 ) => {
12921 simd_shuffle16(
12922 a,
12923 b,
12924 [
12925 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
12926 ],
12927 )
12928 };
12929 }
12930 macro_rules! shuffle3 {
12931 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
12932 match (imm8 >> 6) & 0x3 {
12933 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
12934 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
12935 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
12936 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
12937 }
12938 };
12939 }
12940 macro_rules! shuffle2 {
12941 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
12942 match (imm8 >> 4) & 0x3 {
12943 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
12944 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
12945 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
12946 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
12947 }
12948 };
12949 }
12950 macro_rules! shuffle1 {
12951 ($a:expr, $e:expr, $i: expr, $m: expr) => {
12952 match (imm8 >> 2) & 0x3 {
12953 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
12954 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
12955 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
12956 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
12957 }
12958 };
12959 }
12960 let shuffle = match imm8 & 0x3 {
12961 0 => shuffle1!(0, 1, 2, 3),
12962 1 => shuffle1!(4, 5, 6, 7),
12963 2 => shuffle1!(8, 9, 10, 11),
12964 _ => shuffle1!(12, 13, 14, 15),
12965 };
12966
12967 let zero = _mm512_setzero_si512().as_i32x16();
12968 transmute(simd_select_bitmask(k, shuffle, zero))
12969 }
12970
12971 /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
12972 ///
12973 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_i64x2&expand=5183)
12974 #[inline]
12975 #[target_feature(enable = "avx512f")]
12976 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
12977 #[rustc_args_required_const(2)]
12978 pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
12979 assert!(imm8 >= 0 && imm8 <= 255);
12980 let imm8 = (imm8 & 0xFF) as u8;
12981 macro_rules! shuffle4 {
12982 (
12983 $a:expr,
12984 $b:expr,
12985 $c:expr,
12986 $d:expr,
12987 $e:expr,
12988 $f:expr,
12989 $g:expr,
12990 $h:expr
12991 ) => {
12992 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
12993 };
12994 }
12995 macro_rules! shuffle3 {
12996 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
12997 match (imm8 >> 6) & 0x3 {
12998 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
12999 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
13000 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
13001 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
13002 }
13003 };
13004 }
13005 macro_rules! shuffle2 {
13006 ($a:expr, $b:expr, $e:expr, $f:expr) => {
13007 match (imm8 >> 4) & 0x3 {
13008 0 => shuffle3!($a, $b, $e, $f, 8, 9),
13009 1 => shuffle3!($a, $b, $e, $f, 10, 11),
13010 2 => shuffle3!($a, $b, $e, $f, 12, 13),
13011 _ => shuffle3!($a, $b, $e, $f, 14, 15),
13012 }
13013 };
13014 }
13015 macro_rules! shuffle1 {
13016 ($a:expr, $e:expr) => {
13017 match (imm8 >> 2) & 0x3 {
13018 0 => shuffle2!($a, $e, 0, 1),
13019 1 => shuffle2!($a, $e, 2, 3),
13020 2 => shuffle2!($a, $e, 4, 5),
13021 _ => shuffle2!($a, $e, 6, 7),
13022 }
13023 };
13024 }
13025 match imm8 & 0x3 {
13026 0 => shuffle1!(0, 1),
13027 1 => shuffle1!(2, 3),
13028 2 => shuffle1!(4, 5),
13029 _ => shuffle1!(6, 7),
13030 }
13031 }
13032
13033 /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13034 ///
13035 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_i64x&expand=5181)
13036 #[inline]
13037 #[target_feature(enable = "avx512f")]
13038 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
13039 #[rustc_args_required_const(4)]
13040 pub unsafe fn _mm512_mask_shuffle_i64x2(
13041 src: __m512i,
13042 k: __mmask8,
13043 a: __m512i,
13044 b: __m512i,
13045 imm8: i32,
13046 ) -> __m512i {
13047 assert!(imm8 >= 0 && imm8 <= 255);
13048 let imm8 = (imm8 & 0xFF) as u8;
13049 macro_rules! shuffle4 {
13050 (
13051 $a:expr,
13052 $b:expr,
13053 $c:expr,
13054 $d:expr,
13055 $e:expr,
13056 $f:expr,
13057 $g:expr,
13058 $h:expr
13059 ) => {
13060 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
13061 };
13062 }
13063 macro_rules! shuffle3 {
13064 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
13065 match (imm8 >> 6) & 0x3 {
13066 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
13067 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
13068 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
13069 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
13070 }
13071 };
13072 }
13073 macro_rules! shuffle2 {
13074 ($a:expr, $b:expr, $e:expr, $f:expr) => {
13075 match (imm8 >> 4) & 0x3 {
13076 0 => shuffle3!($a, $b, $e, $f, 8, 9),
13077 1 => shuffle3!($a, $b, $e, $f, 10, 11),
13078 2 => shuffle3!($a, $b, $e, $f, 12, 13),
13079 _ => shuffle3!($a, $b, $e, $f, 14, 15),
13080 }
13081 };
13082 }
13083 macro_rules! shuffle1 {
13084 ($a:expr, $e:expr) => {
13085 match (imm8 >> 2) & 0x3 {
13086 0 => shuffle2!($a, $e, 0, 1),
13087 1 => shuffle2!($a, $e, 2, 3),
13088 2 => shuffle2!($a, $e, 4, 5),
13089 _ => shuffle2!($a, $e, 6, 7),
13090 }
13091 };
13092 }
13093 let shuffle = match imm8 & 0x3 {
13094 0 => shuffle1!(0, 1),
13095 1 => shuffle1!(2, 3),
13096 2 => shuffle1!(4, 5),
13097 _ => shuffle1!(6, 7),
13098 };
13099
13100 transmute(simd_select_bitmask(k, shuffle, src.as_i64x8()))
13101 }
13102
13103 /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13104 ///
13105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_i64&expand=5182)
13106 #[inline]
13107 #[target_feature(enable = "avx512f")]
13108 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
13109 #[rustc_args_required_const(3)]
13110 pub unsafe fn _mm512_maskz_shuffle_i64x2(
13111 k: __mmask8,
13112 a: __m512i,
13113 b: __m512i,
13114 imm8: i32,
13115 ) -> __m512i {
13116 assert!(imm8 >= 0 && imm8 <= 255);
13117 let imm8 = (imm8 & 0xFF) as u8;
13118 macro_rules! shuffle4 {
13119 (
13120 $a:expr,
13121 $b:expr,
13122 $c:expr,
13123 $d:expr,
13124 $e:expr,
13125 $f:expr,
13126 $g:expr,
13127 $h:expr
13128 ) => {
13129 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
13130 };
13131 }
13132 macro_rules! shuffle3 {
13133 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
13134 match (imm8 >> 6) & 0x3 {
13135 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
13136 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
13137 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
13138 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
13139 }
13140 };
13141 }
13142 macro_rules! shuffle2 {
13143 ($a:expr, $b:expr, $e:expr, $f:expr) => {
13144 match (imm8 >> 4) & 0x3 {
13145 0 => shuffle3!($a, $b, $e, $f, 8, 9),
13146 1 => shuffle3!($a, $b, $e, $f, 10, 11),
13147 2 => shuffle3!($a, $b, $e, $f, 12, 13),
13148 _ => shuffle3!($a, $b, $e, $f, 14, 15),
13149 }
13150 };
13151 }
13152 macro_rules! shuffle1 {
13153 ($a:expr, $e:expr) => {
13154 match (imm8 >> 2) & 0x3 {
13155 0 => shuffle2!($a, $e, 0, 1),
13156 1 => shuffle2!($a, $e, 2, 3),
13157 2 => shuffle2!($a, $e, 4, 5),
13158 _ => shuffle2!($a, $e, 6, 7),
13159 }
13160 };
13161 }
13162 let shuffle = match imm8 & 0x3 {
13163 0 => shuffle1!(0, 1),
13164 1 => shuffle1!(2, 3),
13165 2 => shuffle1!(4, 5),
13166 _ => shuffle1!(6, 7),
13167 };
13168
13169 let zero = _mm512_setzero_si512().as_i64x8();
13170 transmute(simd_select_bitmask(k, shuffle, zero))
13171 }
13172
13173 /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
13174 ///
13175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_f32x4&expand=5165)
13176 #[inline]
13177 #[target_feature(enable = "avx512f")]
13178 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] //should be vshuff32x4, but generate vshuff64x2
13179 #[rustc_args_required_const(2)]
13180 pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 {
13181 assert!(imm8 >= 0 && imm8 <= 255);
13182 let imm8 = (imm8 & 0xFF) as u8;
13183 macro_rules! shuffle4 {
13184 (
13185 $a:expr,
13186 $b:expr,
13187 $c:expr,
13188 $d:expr,
13189 $e:expr,
13190 $f:expr,
13191 $g:expr,
13192 $h:expr,
13193 $i:expr,
13194 $j:expr,
13195 $k:expr,
13196 $l:expr,
13197 $m:expr,
13198 $n:expr,
13199 $o:expr,
13200 $p:expr
13201 ) => {
13202 simd_shuffle16(
13203 a,
13204 b,
13205 [
13206 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
13207 ],
13208 )
13209 };
13210 }
13211 macro_rules! shuffle3 {
13212 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
13213 match (imm8 >> 6) & 0x3 {
13214 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
13215 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
13216 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
13217 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
13218 }
13219 };
13220 }
13221 macro_rules! shuffle2 {
13222 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
13223 match (imm8 >> 4) & 0x3 {
13224 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
13225 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
13226 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
13227 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
13228 }
13229 };
13230 }
13231 macro_rules! shuffle1 {
13232 ($a:expr, $e:expr, $i: expr, $m: expr) => {
13233 match (imm8 >> 2) & 0x3 {
13234 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
13235 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
13236 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
13237 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
13238 }
13239 };
13240 }
13241 match imm8 & 0x3 {
13242 0 => shuffle1!(0, 1, 2, 3),
13243 1 => shuffle1!(4, 5, 6, 7),
13244 2 => shuffle1!(8, 9, 10, 11),
13245 _ => shuffle1!(12, 13, 14, 15),
13246 }
13247 }
13248
13249 /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13250 ///
13251 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_f32&expand=5163)
13252 #[inline]
13253 #[target_feature(enable = "avx512f")]
13254 #[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
13255 #[rustc_args_required_const(4)]
13256 pub unsafe fn _mm512_mask_shuffle_f32x4(
13257 src: __m512,
13258 k: __mmask16,
13259 a: __m512,
13260 b: __m512,
13261 imm8: i32,
13262 ) -> __m512 {
13263 assert!(imm8 >= 0 && imm8 <= 255);
13264 let imm8 = (imm8 & 0xFF) as u8;
13265 macro_rules! shuffle4 {
13266 (
13267 $a:expr,
13268 $b:expr,
13269 $c:expr,
13270 $d:expr,
13271 $e:expr,
13272 $f:expr,
13273 $g:expr,
13274 $h:expr,
13275 $i:expr,
13276 $j:expr,
13277 $k:expr,
13278 $l:expr,
13279 $m:expr,
13280 $n:expr,
13281 $o:expr,
13282 $p:expr
13283 ) => {
13284 simd_shuffle16(
13285 a,
13286 b,
13287 [
13288 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
13289 ],
13290 )
13291 };
13292 }
13293 macro_rules! shuffle3 {
13294 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
13295 match (imm8 >> 6) & 0x3 {
13296 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
13297 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
13298 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
13299 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
13300 }
13301 };
13302 }
13303 macro_rules! shuffle2 {
13304 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
13305 match (imm8 >> 4) & 0x3 {
13306 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
13307 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
13308 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
13309 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
13310 }
13311 };
13312 }
13313 macro_rules! shuffle1 {
13314 ($a:expr, $e:expr, $i: expr, $m: expr) => {
13315 match (imm8 >> 2) & 0x3 {
13316 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
13317 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
13318 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
13319 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
13320 }
13321 };
13322 }
13323 let shuffle = match imm8 & 0x3 {
13324 0 => shuffle1!(0, 1, 2, 3),
13325 1 => shuffle1!(4, 5, 6, 7),
13326 2 => shuffle1!(8, 9, 10, 11),
13327 _ => shuffle1!(12, 13, 14, 15),
13328 };
13329
13330 transmute(simd_select_bitmask(k, shuffle, src.as_f32x16()))
13331 }
13332
13333 /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13334 ///
13335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_f32&expand=5164)
13336 #[inline]
13337 #[target_feature(enable = "avx512f")]
13338 #[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
13339 #[rustc_args_required_const(3)]
13340 pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
13341 assert!(imm8 >= 0 && imm8 <= 255);
13342 let imm8 = (imm8 & 0xFF) as u8;
13343 macro_rules! shuffle4 {
13344 (
13345 $a:expr,
13346 $b:expr,
13347 $c:expr,
13348 $d:expr,
13349 $e:expr,
13350 $f:expr,
13351 $g:expr,
13352 $h:expr,
13353 $i:expr,
13354 $j:expr,
13355 $k:expr,
13356 $l:expr,
13357 $m:expr,
13358 $n:expr,
13359 $o:expr,
13360 $p:expr
13361 ) => {
13362 simd_shuffle16(
13363 a,
13364 b,
13365 [
13366 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
13367 ],
13368 )
13369 };
13370 }
13371 macro_rules! shuffle3 {
13372 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
13373 match (imm8 >> 6) & 0x3 {
13374 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
13375 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
13376 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
13377 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
13378 }
13379 };
13380 }
13381 macro_rules! shuffle2 {
13382 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
13383 match (imm8 >> 4) & 0x3 {
13384 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
13385 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
13386 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
13387 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
13388 }
13389 };
13390 }
13391 macro_rules! shuffle1 {
13392 ($a:expr, $e:expr, $i: expr, $m: expr) => {
13393 match (imm8 >> 2) & 0x3 {
13394 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
13395 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
13396 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
13397 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
13398 }
13399 };
13400 }
13401 let shuffle = match imm8 & 0x3 {
13402 0 => shuffle1!(0, 1, 2, 3),
13403 1 => shuffle1!(4, 5, 6, 7),
13404 2 => shuffle1!(8, 9, 10, 11),
13405 _ => shuffle1!(12, 13, 14, 15),
13406 };
13407
13408 let zero = _mm512_setzero_ps().as_f32x16();
13409 transmute(simd_select_bitmask(k, shuffle, zero))
13410 }
13411
13412 /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
13413 ///
13414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_f64x2&expand=5171)
13415 #[inline]
13416 #[target_feature(enable = "avx512f")]
13417 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
13418 #[rustc_args_required_const(2)]
13419 pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
13420 assert!(imm8 >= 0 && imm8 <= 255);
13421 let imm8 = (imm8 & 0xFF) as u8;
13422 macro_rules! shuffle4 {
13423 (
13424 $a:expr,
13425 $b:expr,
13426 $c:expr,
13427 $d:expr,
13428 $e:expr,
13429 $f:expr,
13430 $g:expr,
13431 $h:expr
13432 ) => {
13433 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
13434 };
13435 }
13436 macro_rules! shuffle3 {
13437 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
13438 match (imm8 >> 6) & 0x3 {
13439 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
13440 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
13441 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
13442 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
13443 }
13444 };
13445 }
13446 macro_rules! shuffle2 {
13447 ($a:expr, $b:expr, $e:expr, $f:expr) => {
13448 match (imm8 >> 4) & 0x3 {
13449 0 => shuffle3!($a, $b, $e, $f, 8, 9),
13450 1 => shuffle3!($a, $b, $e, $f, 10, 11),
13451 2 => shuffle3!($a, $b, $e, $f, 12, 13),
13452 _ => shuffle3!($a, $b, $e, $f, 14, 15),
13453 }
13454 };
13455 }
13456 macro_rules! shuffle1 {
13457 ($a:expr, $e:expr) => {
13458 match (imm8 >> 2) & 0x3 {
13459 0 => shuffle2!($a, $e, 0, 1),
13460 1 => shuffle2!($a, $e, 2, 3),
13461 2 => shuffle2!($a, $e, 4, 5),
13462 _ => shuffle2!($a, $e, 6, 7),
13463 }
13464 };
13465 }
13466 match imm8 & 0x3 {
13467 0 => shuffle1!(0, 1),
13468 1 => shuffle1!(2, 3),
13469 2 => shuffle1!(4, 5),
13470 _ => shuffle1!(6, 7),
13471 }
13472 }
13473
13474 /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13475 ///
13476 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_f64x2&expand=5169)
13477 #[inline]
13478 #[target_feature(enable = "avx512f")]
13479 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
13480 #[rustc_args_required_const(4)]
13481 pub unsafe fn _mm512_mask_shuffle_f64x2(
13482 src: __m512d,
13483 k: __mmask8,
13484 a: __m512d,
13485 b: __m512d,
13486 imm8: i32,
13487 ) -> __m512d {
13488 assert!(imm8 >= 0 && imm8 <= 255);
13489 let imm8 = (imm8 & 0xFF) as u8;
13490 macro_rules! shuffle4 {
13491 (
13492 $a:expr,
13493 $b:expr,
13494 $c:expr,
13495 $d:expr,
13496 $e:expr,
13497 $f:expr,
13498 $g:expr,
13499 $h:expr
13500 ) => {
13501 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
13502 };
13503 }
13504 macro_rules! shuffle3 {
13505 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
13506 match (imm8 >> 6) & 0x3 {
13507 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
13508 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
13509 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
13510 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
13511 }
13512 };
13513 }
13514 macro_rules! shuffle2 {
13515 ($a:expr, $b:expr, $e:expr, $f:expr) => {
13516 match (imm8 >> 4) & 0x3 {
13517 0 => shuffle3!($a, $b, $e, $f, 8, 9),
13518 1 => shuffle3!($a, $b, $e, $f, 10, 11),
13519 2 => shuffle3!($a, $b, $e, $f, 12, 13),
13520 _ => shuffle3!($a, $b, $e, $f, 14, 15),
13521 }
13522 };
13523 }
13524 macro_rules! shuffle1 {
13525 ($a:expr, $e:expr) => {
13526 match (imm8 >> 2) & 0x3 {
13527 0 => shuffle2!($a, $e, 0, 1),
13528 1 => shuffle2!($a, $e, 2, 3),
13529 2 => shuffle2!($a, $e, 4, 5),
13530 _ => shuffle2!($a, $e, 6, 7),
13531 }
13532 };
13533 }
13534 let shuffle = match imm8 & 0x3 {
13535 0 => shuffle1!(0, 1),
13536 1 => shuffle1!(2, 3),
13537 2 => shuffle1!(4, 5),
13538 _ => shuffle1!(6, 7),
13539 };
13540
13541 transmute(simd_select_bitmask(k, shuffle, src.as_f64x8()))
13542 }
13543
13544 /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13545 ///
13546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_f64x2&expand=5170)
13547 #[inline]
13548 #[target_feature(enable = "avx512f")]
13549 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
13550 #[rustc_args_required_const(3)]
13551 pub unsafe fn _mm512_maskz_shuffle_f64x2(
13552 k: __mmask8,
13553 a: __m512d,
13554 b: __m512d,
13555 imm8: i32,
13556 ) -> __m512d {
13557 assert!(imm8 >= 0 && imm8 <= 255);
13558 let imm8 = (imm8 & 0xFF) as u8;
13559 macro_rules! shuffle4 {
13560 (
13561 $a:expr,
13562 $b:expr,
13563 $c:expr,
13564 $d:expr,
13565 $e:expr,
13566 $f:expr,
13567 $g:expr,
13568 $h:expr
13569 ) => {
13570 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
13571 };
13572 }
13573 macro_rules! shuffle3 {
13574 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
13575 match (imm8 >> 6) & 0x3 {
13576 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
13577 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
13578 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
13579 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
13580 }
13581 };
13582 }
13583 macro_rules! shuffle2 {
13584 ($a:expr, $b:expr, $e:expr, $f:expr) => {
13585 match (imm8 >> 4) & 0x3 {
13586 0 => shuffle3!($a, $b, $e, $f, 8, 9),
13587 1 => shuffle3!($a, $b, $e, $f, 10, 11),
13588 2 => shuffle3!($a, $b, $e, $f, 12, 13),
13589 _ => shuffle3!($a, $b, $e, $f, 14, 15),
13590 }
13591 };
13592 }
13593 macro_rules! shuffle1 {
13594 ($a:expr, $e:expr) => {
13595 match (imm8 >> 2) & 0x3 {
13596 0 => shuffle2!($a, $e, 0, 1),
13597 1 => shuffle2!($a, $e, 2, 3),
13598 2 => shuffle2!($a, $e, 4, 5),
13599 _ => shuffle2!($a, $e, 6, 7),
13600 }
13601 };
13602 }
13603 let shuffle = match imm8 & 0x3 {
13604 0 => shuffle1!(0, 1),
13605 1 => shuffle1!(2, 3),
13606 2 => shuffle1!(4, 5),
13607 _ => shuffle1!(6, 7),
13608 };
13609
13610 let zero = _mm512_setzero_pd().as_f64x8();
13611 transmute(simd_select_bitmask(k, shuffle, zero))
13612 }
13613
13614 /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
13615 ///
13616 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extractf32x4_ps&expand=2442)
13617 #[inline]
13618 #[target_feature(enable = "avx512f")]
13619 #[cfg_attr(
13620 all(test, not(target_os = "windows")),
13621 assert_instr(vextractf32x4, imm8 = 3)
13622 )]
13623 #[rustc_args_required_const(1)]
13624 pub unsafe fn _mm512_extractf32x4_ps(a: __m512, imm8: i32) -> __m128 {
13625 assert!(imm8 >= 0 && imm8 <= 3);
13626 match imm8 & 0x3 {
13627 0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
13628 1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
13629 2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
13630 _ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
13631 }
13632 }
13633
13634 /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13635 ///
13636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extractf32x4_ps&expand=2443)
13637 #[inline]
13638 #[target_feature(enable = "avx512f")]
13639 #[cfg_attr(
13640 all(test, not(target_os = "windows")),
13641 assert_instr(vextractf32x4, imm8 = 3)
13642 )]
13643 #[rustc_args_required_const(3)]
13644 pub unsafe fn _mm512_mask_extractf32x4_ps(
13645 src: __m128,
13646 k: __mmask8,
13647 a: __m512,
13648 imm8: i32,
13649 ) -> __m128 {
13650 assert!(imm8 >= 0 && imm8 <= 3);
13651 let extract: __m128 = match imm8 & 0x3 {
13652 0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
13653 1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
13654 2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
13655 _ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
13656 };
13657 transmute(simd_select_bitmask(k, extract.as_f32x4(), src.as_f32x4()))
13658 }
13659
13660 /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13661 ///
13662 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extractf32x4_ps&expand=2444)
13663 #[inline]
13664 #[target_feature(enable = "avx512f")]
13665 #[cfg_attr(
13666 all(test, not(target_os = "windows")),
13667 assert_instr(vextractf32x4, imm8 = 3)
13668 )]
13669 #[rustc_args_required_const(2)]
13670 pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512, imm8: i32) -> __m128 {
13671 assert!(imm8 >= 0 && imm8 <= 3);
13672 let extract: __m128 = match imm8 & 0x3 {
13673 0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
13674 1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
13675 2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
13676 _ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
13677 };
13678 let zero = _mm_setzero_ps().as_f32x4();
13679 transmute(simd_select_bitmask(k, extract.as_f32x4(), zero))
13680 }
13681
13682 /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the result in dst.
13683 ///
13684 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extracti64x4_epi64&expand=2473)
13685 #[inline]
13686 #[target_feature(enable = "avx512f")]
13687 #[cfg_attr(
13688 all(test, not(target_os = "windows")),
13689 assert_instr(vextractf64x4, imm8 = 1) //should be vextracti64x4
13690 )]
13691 #[rustc_args_required_const(1)]
13692 pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i, imm8: i32) -> __m256i {
13693 assert!(imm8 >= 0 && imm8 <= 1);
13694 match imm8 & 0x1 {
13695 0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
13696 _ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
13697 }
13698 }
13699
13700 /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13701 ///
13702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extracti64x4_epi64&expand=2474)
13703 #[inline]
13704 #[target_feature(enable = "avx512f")]
13705 #[cfg_attr(
13706 all(test, not(target_os = "windows")),
13707 assert_instr(vextracti64x4, imm8 = 1)
13708 )]
13709 #[rustc_args_required_const(3)]
13710 pub unsafe fn _mm512_mask_extracti64x4_epi64(
13711 src: __m256i,
13712 k: __mmask8,
13713 a: __m512i,
13714 imm8: i32,
13715 ) -> __m256i {
13716 assert!(imm8 >= 0 && imm8 <= 1);
13717 let extract = match imm8 & 0x1 {
13718 0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
13719 _ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
13720 };
13721 transmute(simd_select_bitmask(k, extract, src.as_i64x4()))
13722 }
13723
13724 /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13725 ///
13726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extracti64x4_epi64&expand=2475)
13727 #[inline]
13728 #[target_feature(enable = "avx512f")]
13729 #[cfg_attr(
13730 all(test, not(target_os = "windows")),
13731 assert_instr(vextracti64x4, imm8 = 1)
13732 )]
13733 #[rustc_args_required_const(2)]
13734 pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m256i {
13735 assert!(imm8 >= 0 && imm8 <= 1);
13736 let extract: __m256i = match imm8 & 0x1 {
13737 0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
13738 _ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
13739 };
13740 let zero = _mm256_setzero_si256().as_i64x4();
13741 transmute(simd_select_bitmask(k, extract.as_i64x4(), zero))
13742 }
13743
13744 /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
13745 ///
13746 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extractf64x4_pd&expand=2454)
13747 #[inline]
13748 #[target_feature(enable = "avx512f")]
13749 #[cfg_attr(
13750 all(test, not(target_os = "windows")),
13751 assert_instr(vextractf64x4, imm8 = 1)
13752 )]
13753 #[rustc_args_required_const(1)]
13754 pub unsafe fn _mm512_extractf64x4_pd(a: __m512d, imm8: i32) -> __m256d {
13755 assert!(imm8 >= 0 && imm8 <= 1);
13756 match imm8 & 0x1 {
13757 0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
13758 _ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
13759 }
13760 }
13761
13762 /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13763 ///
13764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extractf64x4_pd&expand=2455)
13765 #[inline]
13766 #[target_feature(enable = "avx512f")]
13767 #[cfg_attr(
13768 all(test, not(target_os = "windows")),
13769 assert_instr(vextractf64x4, imm8 = 1)
13770 )]
13771 #[rustc_args_required_const(3)]
13772 pub unsafe fn _mm512_mask_extractf64x4_pd(
13773 src: __m256d,
13774 k: __mmask8,
13775 a: __m512d,
13776 imm8: i32,
13777 ) -> __m256d {
13778 assert!(imm8 >= 0 && imm8 <= 1);
13779 let extract = match imm8 & 0x1 {
13780 0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
13781 _ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
13782 };
13783 transmute(simd_select_bitmask(k, extract, src))
13784 }
13785
13786 /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13787 ///
13788 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extractf64x4_pd&expand=2456)
13789 #[inline]
13790 #[target_feature(enable = "avx512f")]
13791 #[cfg_attr(
13792 all(test, not(target_os = "windows")),
13793 assert_instr(vextractf64x4, imm8 = 1)
13794 )]
13795 #[rustc_args_required_const(2)]
13796 pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m256d {
13797 assert!(imm8 >= 0 && imm8 <= 1);
13798 let extract = match imm8 & 0x1 {
13799 0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
13800 _ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
13801 };
13802 let zero = _mm256_setzero_pd();
13803 transmute(simd_select_bitmask(k, extract, zero))
13804 }
13805
13806 /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the result in dst.
13807 ///
13808 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extracti32x4_epi32&expand=2461)
13809 #[inline]
13810 #[target_feature(enable = "avx512f")]
13811 #[cfg_attr(
13812 all(test, not(target_os = "windows")),
13813 assert_instr(vextractf32x4, imm8 = 3) //should be vextracti32x4
13814 )]
13815 #[rustc_args_required_const(1)]
13816 pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i {
13817 assert!(imm8 >= 0 && imm8 <= 3);
13818 let a = a.as_i32x16();
13819 let undefined = _mm512_undefined_epi32().as_i32x16();
13820 let extract: i32x4 = match imm8 & 0x3 {
13821 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
13822 1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
13823 2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
13824 _ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
13825 };
13826 transmute(extract)
13827 }
13828
13829 /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13830 ///
13831 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extracti32x4_epi32&expand=2462)
13832 #[inline]
13833 #[target_feature(enable = "avx512f")]
13834 #[cfg_attr(
13835 all(test, not(target_os = "windows")),
13836 assert_instr(vextracti32x4, imm8 = 3)
13837 )]
13838 #[rustc_args_required_const(3)]
13839 pub unsafe fn _mm512_mask_extracti32x4_epi32(
13840 src: __m128i,
13841 k: __mmask8,
13842 a: __m512i,
13843 imm8: i32,
13844 ) -> __m128i {
13845 assert!(imm8 >= 0 && imm8 <= 3);
13846 let a = a.as_i32x16();
13847 let undefined = _mm512_undefined_epi32().as_i32x16();
13848 let extract: i32x4 = match imm8 & 0x3 {
13849 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
13850 1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
13851 2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
13852 _ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
13853 };
13854 transmute(simd_select_bitmask(k, extract, src.as_i32x4()))
13855 }
13856
13857 /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13858 ///
13859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extracti32x4_epi32&expand=2463)
13860 #[inline]
13861 #[target_feature(enable = "avx512f")]
13862 #[cfg_attr(
13863 all(test, not(target_os = "windows")),
13864 assert_instr(vextracti32x4, imm8 = 3)
13865 )]
13866 #[rustc_args_required_const(2)]
13867 pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i, imm8: i32) -> __m128i {
13868 assert!(imm8 >= 0 && imm8 <= 3);
13869 let a = a.as_i32x16();
13870 let undefined = _mm512_undefined_epi32().as_i32x16();
13871 let extract: i32x4 = match imm8 & 0x3 {
13872 0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
13873 1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
13874 2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
13875 _ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
13876 };
13877 let zero = _mm_setzero_si128().as_i32x4();
13878 transmute(simd_select_bitmask(k, extract, zero))
13879 }
13880
13881 /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
13882 ///
13883 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_moveldup_ps&expand=3862)
13884 #[inline]
13885 #[target_feature(enable = "avx512f")]
13886 #[cfg_attr(test, assert_instr(vmovsldup))]
13887 pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
13888 let r: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
13889 transmute(r)
13890 }
13891
13892 /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13893 ///
13894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_moveldup_ps&expand=3860)
13895 #[inline]
13896 #[target_feature(enable = "avx512f")]
13897 #[cfg_attr(test, assert_instr(vmovsldup))]
13898 pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
13899 let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
13900 transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
13901 }
13902
13903 /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13904 ///
13905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_moveldup_ps&expand=3861)
13906 #[inline]
13907 #[target_feature(enable = "avx512f")]
13908 #[cfg_attr(test, assert_instr(vmovsldup))]
13909 pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
13910 let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
13911 let zero = _mm512_setzero_ps().as_f32x16();
13912 transmute(simd_select_bitmask(k, mov, zero))
13913 }
13914
13915 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
13916 ///
13917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_movehdup_ps&expand=3852)
13918 #[inline]
13919 #[target_feature(enable = "avx512f")]
13920 #[cfg_attr(test, assert_instr(vmovshdup))]
13921 pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
13922 let r: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
13923 transmute(r)
13924 }
13925
13926 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13927 ///
13928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_movehdup&expand=3850)
13929 #[inline]
13930 #[target_feature(enable = "avx512f")]
13931 #[cfg_attr(test, assert_instr(vmovshdup))]
13932 pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
13933 let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
13934 transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
13935 }
13936
13937 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13938 ///
13939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_moveh&expand=3851)
13940 #[inline]
13941 #[target_feature(enable = "avx512f")]
13942 #[cfg_attr(test, assert_instr(vmovshdup))]
13943 pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
13944 let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
13945 let zero = _mm512_setzero_ps().as_f32x16();
13946 transmute(simd_select_bitmask(k, mov, zero))
13947 }
13948
13949 /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
13950 ///
13951 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_movedup_pd&expand=3843)
13952 #[inline]
13953 #[target_feature(enable = "avx512f")]
13954 #[cfg_attr(test, assert_instr(vmovddup))]
13955 pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
13956 let r: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
13957 transmute(r)
13958 }
13959
13960 /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13961 ///
13962 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_movedup_pd&expand=3841)
13963 #[inline]
13964 #[target_feature(enable = "avx512f")]
13965 #[cfg_attr(test, assert_instr(vmovddup))]
13966 pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
13967 let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
13968 transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
13969 }
13970
13971 /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13972 ///
13973 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_movedup_pd&expand=3842)
13974 #[inline]
13975 #[target_feature(enable = "avx512f")]
13976 #[cfg_attr(test, assert_instr(vmovddup))]
13977 pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
13978 let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
13979 let zero = _mm512_setzero_pd().as_f64x8();
13980 transmute(simd_select_bitmask(k, mov, zero))
13981 }
13982
13983 /// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
13984 ///
13985 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_inserti32x4&expand=3174)
13986 #[inline]
13987 #[target_feature(enable = "avx512f")]
13988 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))] //should be vinserti32x4
13989 #[rustc_args_required_const(2)]
13990 pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i, imm8: i32) -> __m512i {
13991 assert!(imm8 >= 0 && imm8 <= 3);
13992 let a = a.as_i32x16();
13993 let b = _mm512_castsi128_si512(b).as_i32x16();
13994 let ret: i32x16 = match imm8 & 0b11 {
13995 0 => simd_shuffle16(
13996 a,
13997 b,
13998 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
13999 ),
14000 1 => simd_shuffle16(
14001 a,
14002 b,
14003 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14004 ),
14005 2 => simd_shuffle16(
14006 a,
14007 b,
14008 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14009 ),
14010 _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14011 };
14012 transmute(ret)
14013 }
14014
14015 /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14016 ///
14017 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_inserti32x4&expand=3175)
14018 #[inline]
14019 #[target_feature(enable = "avx512f")]
14020 #[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
14021 #[rustc_args_required_const(4)]
14022 pub unsafe fn _mm512_mask_inserti32x4(
14023 src: __m512i,
14024 k: __mmask16,
14025 a: __m512i,
14026 b: __m128i,
14027 imm8: i32,
14028 ) -> __m512i {
14029 assert!(imm8 >= 0 && imm8 <= 3);
14030 let a = a.as_i32x16();
14031 let b = _mm512_castsi128_si512(b).as_i32x16();
14032 let insert: i32x16 = match imm8 & 0b11 {
14033 0 => simd_shuffle16(
14034 a,
14035 b,
14036 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14037 ),
14038 1 => simd_shuffle16(
14039 a,
14040 b,
14041 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14042 ),
14043 2 => simd_shuffle16(
14044 a,
14045 b,
14046 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14047 ),
14048 _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14049 };
14050 transmute(simd_select_bitmask(k, insert, src.as_i32x16()))
14051 }
14052
14053 /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14054 ///
14055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_inserti32x4&expand=3176)
14056 #[inline]
14057 #[target_feature(enable = "avx512f")]
14058 #[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
14059 #[rustc_args_required_const(3)]
14060 pub unsafe fn _mm512_maskz_inserti32x4(k: __mmask16, a: __m512i, b: __m128i, imm8: i32) -> __m512i {
14061 assert!(imm8 >= 0 && imm8 <= 3);
14062 let a = a.as_i32x16();
14063 let b = _mm512_castsi128_si512(b).as_i32x16();
14064 let insert = match imm8 & 0b11 {
14065 0 => simd_shuffle16(
14066 a,
14067 b,
14068 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14069 ),
14070 1 => simd_shuffle16(
14071 a,
14072 b,
14073 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14074 ),
14075 2 => simd_shuffle16(
14076 a,
14077 b,
14078 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14079 ),
14080 _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14081 };
14082 let zero = _mm512_setzero_si512().as_i32x16();
14083 transmute(simd_select_bitmask(k, insert, zero))
14084 }
14085
14086 /// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
14087 ///
14088 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_inserti64x4&expand=3186)
14089 #[inline]
14090 #[target_feature(enable = "avx512f")]
14091 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))] //should be vinserti64x4
14092 #[rustc_args_required_const(2)]
14093 pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i, imm8: i32) -> __m512i {
14094 assert!(imm8 >= 0 && imm8 <= 1);
14095 let b = _mm512_castsi256_si512(b);
14096 match imm8 & 0b1 {
14097 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
14098 _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
14099 }
14100 }
14101
14102 /// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14103 ///
14104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_inserti64x4&expand=3187)
14105 #[inline]
14106 #[target_feature(enable = "avx512f")]
14107 #[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
14108 #[rustc_args_required_const(4)]
14109 pub unsafe fn _mm512_mask_inserti64x4(
14110 src: __m512i,
14111 k: __mmask8,
14112 a: __m512i,
14113 b: __m256i,
14114 imm8: i32,
14115 ) -> __m512i {
14116 assert!(imm8 >= 0 && imm8 <= 1);
14117 let b = _mm512_castsi256_si512(b);
14118 let insert = match imm8 & 0b1 {
14119 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
14120 _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
14121 };
14122 transmute(simd_select_bitmask(k, insert, src.as_i64x8()))
14123 }
14124
14125 /// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14126 ///
14127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_inserti64x4&expand=3188)
14128 #[inline]
14129 #[target_feature(enable = "avx512f")]
14130 #[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
14131 #[rustc_args_required_const(3)]
14132 pub unsafe fn _mm512_maskz_inserti64x4(k: __mmask8, a: __m512i, b: __m256i, imm8: i32) -> __m512i {
14133 assert!(imm8 >= 0 && imm8 <= 1);
14134 let b = _mm512_castsi256_si512(b);
14135 let insert = match imm8 & 0b1 {
14136 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
14137 _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
14138 };
14139 let zero = _mm512_setzero_si512().as_i64x8();
14140 transmute(simd_select_bitmask(k, insert, zero))
14141 }
14142
14143 /// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
14144 ///
14145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_insertf32x4&expand=3155)
14146 #[inline]
14147 #[target_feature(enable = "avx512f")]
14148 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
14149 #[rustc_args_required_const(2)]
14150 pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128, imm8: i32) -> __m512 {
14151 assert!(imm8 >= 0 && imm8 <= 3);
14152 let b = _mm512_castps128_ps512(b);
14153 match imm8 & 0b11 {
14154 0 => simd_shuffle16(
14155 a,
14156 b,
14157 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14158 ),
14159 1 => simd_shuffle16(
14160 a,
14161 b,
14162 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14163 ),
14164 2 => simd_shuffle16(
14165 a,
14166 b,
14167 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14168 ),
14169 _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14170 }
14171 }
14172
14173 /// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14174 ///
14175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_insertf32x4&expand=3156)
14176 #[inline]
14177 #[target_feature(enable = "avx512f")]
14178 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
14179 #[rustc_args_required_const(4)]
14180 pub unsafe fn _mm512_mask_insertf32x4(
14181 src: __m512,
14182 k: __mmask16,
14183 a: __m512,
14184 b: __m128,
14185 imm8: i32,
14186 ) -> __m512 {
14187 assert!(imm8 >= 0 && imm8 <= 3);
14188 let b = _mm512_castps128_ps512(b);
14189 let insert = match imm8 & 0b11 {
14190 0 => simd_shuffle16(
14191 a,
14192 b,
14193 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14194 ),
14195 1 => simd_shuffle16(
14196 a,
14197 b,
14198 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14199 ),
14200 2 => simd_shuffle16(
14201 a,
14202 b,
14203 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14204 ),
14205 _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14206 };
14207 transmute(simd_select_bitmask(k, insert, src.as_f32x16()))
14208 }
14209
14210 /// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14211 ///
14212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_insertf32x4&expand=3157)
14213 #[inline]
14214 #[target_feature(enable = "avx512f")]
14215 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
14216 #[rustc_args_required_const(3)]
14217 pub unsafe fn _mm512_maskz_insertf32x4(k: __mmask16, a: __m512, b: __m128, imm8: i32) -> __m512 {
14218 assert!(imm8 >= 0 && imm8 <= 3);
14219 let b = _mm512_castps128_ps512(b);
14220 let insert = match imm8 & 0b11 {
14221 0 => simd_shuffle16(
14222 a,
14223 b,
14224 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14225 ),
14226 1 => simd_shuffle16(
14227 a,
14228 b,
14229 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14230 ),
14231 2 => simd_shuffle16(
14232 a,
14233 b,
14234 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14235 ),
14236 _ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14237 };
14238 let zero = _mm512_setzero_ps().as_f32x16();
14239 transmute(simd_select_bitmask(k, insert, zero))
14240 }
14241
14242 /// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
14243 ///
14244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_insertf64x4&expand=3167)
14245 #[inline]
14246 #[target_feature(enable = "avx512f")]
14247 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
14248 #[rustc_args_required_const(2)]
14249 pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d, imm8: i32) -> __m512d {
14250 assert!(imm8 >= 0 && imm8 <= 1);
14251 let b = _mm512_castpd256_pd512(b);
14252 match imm8 & 0b1 {
14253 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
14254 _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
14255 }
14256 }
14257
14258 /// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14259 ///
14260 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_insertf64x4&expand=3168)
14261 #[inline]
14262 #[target_feature(enable = "avx512f")]
14263 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
14264 #[rustc_args_required_const(4)]
14265 pub unsafe fn _mm512_mask_insertf64x4(
14266 src: __m512d,
14267 k: __mmask8,
14268 a: __m512d,
14269 b: __m256d,
14270 imm8: i32,
14271 ) -> __m512d {
14272 assert!(imm8 >= 0 && imm8 <= 1);
14273 let b = _mm512_castpd256_pd512(b);
14274 let insert = match imm8 & 0b1 {
14275 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
14276 _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
14277 };
14278 transmute(simd_select_bitmask(k, insert, src.as_f64x8()))
14279 }
14280
14281 /// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14282 ///
14283 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_insertf64x4&expand=3169)
14284 #[inline]
14285 #[target_feature(enable = "avx512f")]
14286 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
14287 #[rustc_args_required_const(3)]
14288 pub unsafe fn _mm512_maskz_insertf64x4(k: __mmask8, a: __m512d, b: __m256d, imm8: i32) -> __m512d {
14289 assert!(imm8 >= 0 && imm8 <= 1);
14290 let b = _mm512_castpd256_pd512(b);
14291 let insert = match imm8 & 0b1 {
14292 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
14293 _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
14294 };
14295 let zero = _mm512_setzero_pd().as_f64x8();
14296 transmute(simd_select_bitmask(k, insert, zero))
14297 }
14298
14299 /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
14300 ///
14301 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_epi32&expand=6021)
14302 #[inline]
14303 #[target_feature(enable = "avx512f")]
14304 #[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
14305 pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
14306 let a = a.as_i32x16();
14307 let b = b.as_i32x16();
14308 let r: i32x16 = simd_shuffle16(
14309 a,
14310 b,
14311 [
14312 2,
14313 18,
14314 3,
14315 19,
14316 2 + 4,
14317 18 + 4,
14318 3 + 4,
14319 19 + 4,
14320 2 + 8,
14321 18 + 8,
14322 3 + 8,
14323 19 + 8,
14324 2 + 12,
14325 18 + 12,
14326 3 + 12,
14327 19 + 12,
14328 ],
14329 );
14330 transmute(r)
14331 }
14332
14333 /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14334 ///
14335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_epi32&expand=6019)
14336 #[inline]
14337 #[target_feature(enable = "avx512f")]
14338 #[cfg_attr(test, assert_instr(vpunpckhdq))]
14339 pub unsafe fn _mm512_mask_unpackhi_epi32(
14340 src: __m512i,
14341 k: __mmask16,
14342 a: __m512i,
14343 b: __m512i,
14344 ) -> __m512i {
14345 let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
14346 transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
14347 }
14348
14349 /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14350 ///
14351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_epi32&expand=6020)
14352 #[inline]
14353 #[target_feature(enable = "avx512f")]
14354 #[cfg_attr(test, assert_instr(vpunpckhdq))]
14355 pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
14356 let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
14357 let zero = _mm512_setzero_si512().as_i32x16();
14358 transmute(simd_select_bitmask(k, unpackhi, zero))
14359 }
14360
14361 /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and
14362 /// store the results in dst.
14363 ///
14364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_epi64&expand=6030)
14365 #[inline]
14366 #[target_feature(enable = "avx512f")]
14367 #[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
14368 pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
14369 simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
14370 }
14371
14372 /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14373 ///
14374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_epi64&expand=6028)
14375 #[inline]
14376 #[target_feature(enable = "avx512f")]
14377 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
14378 pub unsafe fn _mm512_mask_unpackhi_epi64(
14379 src: __m512i,
14380 k: __mmask8,
14381 a: __m512i,
14382 b: __m512i,
14383 ) -> __m512i {
14384 let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
14385 transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
14386 }
14387
14388 /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14389 ///
14390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_epi64&expand=6029)
14391 #[inline]
14392 #[target_feature(enable = "avx512f")]
14393 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
14394 pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
14395 let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
14396 let zero = _mm512_setzero_si512().as_i64x8();
14397 transmute(simd_select_bitmask(k, unpackhi, zero))
14398 }
14399
14400 /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
14401 ///
14402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_ps&expand=6060)
14403 #[inline]
14404 #[target_feature(enable = "avx512f")]
14405 #[cfg_attr(test, assert_instr(vunpckhps))]
14406 pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
14407 simd_shuffle16(
14408 a,
14409 b,
14410 [
14411 2,
14412 18,
14413 3,
14414 19,
14415 2 + 4,
14416 18 + 4,
14417 3 + 4,
14418 19 + 4,
14419 2 + 8,
14420 18 + 8,
14421 3 + 8,
14422 19 + 8,
14423 2 + 12,
14424 18 + 12,
14425 3 + 12,
14426 19 + 12,
14427 ],
14428 )
14429 }
14430
14431 /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14432 ///
14433 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_ps&expand=6058)
14434 #[inline]
14435 #[target_feature(enable = "avx512f")]
14436 #[cfg_attr(test, assert_instr(vunpckhps))]
14437 pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
14438 let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
14439 transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
14440 }
14441
14442 /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14443 ///
14444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_ps&expand=6059)
14445 #[inline]
14446 #[target_feature(enable = "avx512f")]
14447 #[cfg_attr(test, assert_instr(vunpckhps))]
14448 pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
14449 let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
14450 let zero = _mm512_setzero_ps().as_f32x16();
14451 transmute(simd_select_bitmask(k, unpackhi, zero))
14452 }
14453
14454 /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
14455 ///
14456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_pd&expand=6048)
14457 #[inline]
14458 #[target_feature(enable = "avx512f")]
14459 #[cfg_attr(test, assert_instr(vunpckhpd))]
14460 pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
14461 simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
14462 }
14463
14464 /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14465 ///
14466 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_pd&expand=6046)
14467 #[inline]
14468 #[target_feature(enable = "avx512f")]
14469 #[cfg_attr(test, assert_instr(vunpckhpd))]
14470 pub unsafe fn _mm512_mask_unpackhi_pd(
14471 src: __m512d,
14472 k: __mmask8,
14473 a: __m512d,
14474 b: __m512d,
14475 ) -> __m512d {
14476 let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
14477 transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
14478 }
14479
14480 /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14481 ///
14482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_pd&expand=6047)
14483 #[inline]
14484 #[target_feature(enable = "avx512f")]
14485 #[cfg_attr(test, assert_instr(vunpckhpd))]
14486 pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
14487 let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
14488 let zero = _mm512_setzero_pd().as_f64x8();
14489 transmute(simd_select_bitmask(k, unpackhi, zero))
14490 }
14491
14492 /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
14493 ///
14494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_epi32&expand=6078)
14495 #[inline]
14496 #[target_feature(enable = "avx512f")]
14497 #[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
14498 pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
14499 let a = a.as_i32x16();
14500 let b = b.as_i32x16();
14501 let r: i32x16 = simd_shuffle16(
14502 a,
14503 b,
14504 [
14505 0,
14506 16,
14507 1,
14508 17,
14509 0 + 4,
14510 16 + 4,
14511 1 + 4,
14512 17 + 4,
14513 0 + 8,
14514 16 + 8,
14515 1 + 8,
14516 17 + 8,
14517 0 + 12,
14518 16 + 12,
14519 1 + 12,
14520 17 + 12,
14521 ],
14522 );
14523 transmute(r)
14524 }
14525
14526 /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14527 ///
14528 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_epi32&expand=6076)
14529 #[inline]
14530 #[target_feature(enable = "avx512f")]
14531 #[cfg_attr(test, assert_instr(vpunpckldq))]
14532 pub unsafe fn _mm512_mask_unpacklo_epi32(
14533 src: __m512i,
14534 k: __mmask16,
14535 a: __m512i,
14536 b: __m512i,
14537 ) -> __m512i {
14538 let unpackhi = _mm512_unpacklo_epi32(a, b).as_i32x16();
14539 transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
14540 }
14541
14542 /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14543 ///
14544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_epi32&expand=6077)
14545 #[inline]
14546 #[target_feature(enable = "avx512f")]
14547 #[cfg_attr(test, assert_instr(vpunpckldq))]
14548 pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
14549 let unpackhi = _mm512_unpacklo_epi32(a, b).as_i32x16();
14550 let zero = _mm512_setzero_si512().as_i32x16();
14551 transmute(simd_select_bitmask(k, unpackhi, zero))
14552 }
14553
14554 /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
14555 ///
14556 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_epi64&expand=6087)
14557 #[inline]
14558 #[target_feature(enable = "avx512f")]
14559 #[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
14560 pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
14561 simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
14562 }
14563
14564 /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14565 ///
14566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_epi64&expand=6085)
14567 #[inline]
14568 #[target_feature(enable = "avx512f")]
14569 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
14570 pub unsafe fn _mm512_mask_unpacklo_epi64(
14571 src: __m512i,
14572 k: __mmask8,
14573 a: __m512i,
14574 b: __m512i,
14575 ) -> __m512i {
14576 let unpackhi = _mm512_unpacklo_epi64(a, b).as_i64x8();
14577 transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
14578 }
14579
14580 /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14581 ///
14582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_epi64&expand=6086)
14583 #[inline]
14584 #[target_feature(enable = "avx512f")]
14585 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
14586 pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
14587 let unpackhi = _mm512_unpacklo_epi64(a, b).as_i64x8();
14588 let zero = _mm512_setzero_si512().as_i64x8();
14589 transmute(simd_select_bitmask(k, unpackhi, zero))
14590 }
14591
14592 /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
14593 ///
14594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_ps&expand=6117)
14595 #[inline]
14596 #[target_feature(enable = "avx512f")]
14597 #[cfg_attr(test, assert_instr(vunpcklps))]
14598 pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
14599 simd_shuffle16(
14600 a,
14601 b,
14602 [
14603 0,
14604 16,
14605 1,
14606 17,
14607 0 + 4,
14608 16 + 4,
14609 1 + 4,
14610 17 + 4,
14611 0 + 8,
14612 16 + 8,
14613 1 + 8,
14614 17 + 8,
14615 0 + 12,
14616 16 + 12,
14617 1 + 12,
14618 17 + 12,
14619 ],
14620 )
14621 }
14622
14623 /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14624 ///
14625 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_ps&expand=6115)
14626 #[inline]
14627 #[target_feature(enable = "avx512f")]
14628 #[cfg_attr(test, assert_instr(vunpcklps))]
14629 pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
14630 let unpackhi = _mm512_unpacklo_ps(a, b).as_f32x16();
14631 transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
14632 }
14633
14634 /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14635 ///
14636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_ps&expand=6116)
14637 #[inline]
14638 #[target_feature(enable = "avx512f")]
14639 #[cfg_attr(test, assert_instr(vunpcklps))]
14640 pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
14641 let unpackhi = _mm512_unpacklo_ps(a, b).as_f32x16();
14642 let zero = _mm512_setzero_ps().as_f32x16();
14643 transmute(simd_select_bitmask(k, unpackhi, zero))
14644 }
14645
14646 /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
14647 ///
14648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_pd&expand=6105)
14649 #[inline]
14650 #[target_feature(enable = "avx512f")]
14651 #[cfg_attr(test, assert_instr(vunpcklpd))]
14652 pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
14653 simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
14654 }
14655
14656 /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14657 ///
14658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_pd&expand=6103)
14659 #[inline]
14660 #[target_feature(enable = "avx512f")]
14661 #[cfg_attr(test, assert_instr(vunpcklpd))]
14662 pub unsafe fn _mm512_mask_unpacklo_pd(
14663 src: __m512d,
14664 k: __mmask8,
14665 a: __m512d,
14666 b: __m512d,
14667 ) -> __m512d {
14668 let unpackhi = _mm512_unpacklo_pd(a, b).as_f64x8();
14669 transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
14670 }
14671
14672 /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14673 ///
14674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_pd&expand=6104)
14675 #[inline]
14676 #[target_feature(enable = "avx512f")]
14677 #[cfg_attr(test, assert_instr(vunpcklpd))]
14678 pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
14679 let unpackhi = _mm512_unpacklo_pd(a, b).as_f64x8();
14680 let zero = _mm512_setzero_pd().as_f64x8();
14681 transmute(simd_select_bitmask(k, unpackhi, zero))
14682 }
14683
14684 /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14685 ///
14686 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps128_ps512&expand=621)
14687 #[inline]
14688 #[target_feature(enable = "avx512f")]
14689 pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
14690 simd_shuffle16(
14691 a,
14692 _mm_set1_ps(-1.),
14693 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
14694 )
14695 }
14696
14697 /// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14698 ///
14699 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps256_ps512&expand=623)
14700 #[inline]
14701 #[target_feature(enable = "avx512f")]
14702 pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
14703 simd_shuffle16(
14704 a,
14705 _mm256_set1_ps(-1.),
14706 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
14707 )
14708 }
14709
14710 /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14711 ///
14712 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextps128_ps512&expand=6196)
14713 #[inline]
14714 #[target_feature(enable = "avx512f")]
14715 pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
14716 simd_shuffle16(
14717 a,
14718 _mm_set1_ps(0.),
14719 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
14720 )
14721 }
14722
14723 /// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14724 ///
14725 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextps256_ps512&expand=6197)
14726 #[inline]
14727 #[target_feature(enable = "avx512f")]
14728 pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
14729 simd_shuffle16(
14730 a,
14731 _mm256_set1_ps(0.),
14732 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
14733 )
14734 }
14735
14736 /// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14737 ///
14738 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps512_ps128&expand=624)
14739 #[inline]
14740 #[target_feature(enable = "avx512f")]
14741 pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
14742 simd_shuffle4(a, a, [0, 1, 2, 3])
14743 }
14744
14745 /// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14746 ///
14747 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps512_ps256&expand=625)
14748 #[inline]
14749 #[target_feature(enable = "avx512f")]
14750 pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
14751 simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
14752 }
14753
14754 /// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14755 ///
14756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps_pd&expand=616)
14757 #[inline]
14758 #[target_feature(enable = "avx512f")]
14759 pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
14760 transmute(a.as_m512())
14761 }
14762
14763 /// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14764 ///
14765 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps_si512&expand=619)
14766 #[inline]
14767 #[target_feature(enable = "avx512f")]
14768 pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
14769 transmute(a.as_m512())
14770 }
14771
14772 /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14773 ///
14774 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd128_pd512&expand=609)
14775 #[inline]
14776 #[target_feature(enable = "avx512f")]
14777 pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
14778 simd_shuffle8(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
14779 }
14780
14781 /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14782 ///
14783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd256_pd512&expand=611)
14784 #[inline]
14785 #[target_feature(enable = "avx512f")]
14786 pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
14787 simd_shuffle8(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
14788 }
14789
14790 /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14791 ///
14792 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextpd128_pd512&expand=6193)
14793 #[inline]
14794 #[target_feature(enable = "avx512f")]
14795 pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
14796 simd_shuffle8(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
14797 }
14798
14799 /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14800 ///
14801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextpd256_pd512&expand=6194)
14802 #[inline]
14803 #[target_feature(enable = "avx512f")]
14804 pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
14805 simd_shuffle8(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
14806 }
14807
14808 /// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14809 ///
14810 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd512_pd128&expand=612)
14811 #[inline]
14812 #[target_feature(enable = "avx512f")]
14813 pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
14814 simd_shuffle2(a, a, [0, 1])
14815 }
14816
14817 /// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14818 ///
14819 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd512_pd256&expand=613)
14820 #[inline]
14821 #[target_feature(enable = "avx512f")]
14822 pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
14823 simd_shuffle4(a, a, [0, 1, 2, 3])
14824 }
14825
14826 /// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14827 ///
14828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd_ps&expand=604)
14829 #[inline]
14830 #[target_feature(enable = "avx512f")]
14831 pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
14832 transmute(a.as_m512d())
14833 }
14834
14835 /// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14836 ///
14837 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd_si512&expand=607)
14838 #[inline]
14839 #[target_feature(enable = "avx512f")]
14840 pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
14841 transmute(a.as_m512d())
14842 }
14843
14844 /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14845 ///
14846 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi128_si512&expand=629)
14847 #[inline]
14848 #[target_feature(enable = "avx512f")]
14849 pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
14850 simd_shuffle8(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
14851 }
14852
14853 /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14854 ///
14855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi256_si512&expand=633)
14856 #[inline]
14857 #[target_feature(enable = "avx512f")]
14858 pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
14859 simd_shuffle8(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
14860 }
14861
14862 /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14863 ///
14864 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextsi128_si512&expand=6199)
14865 #[inline]
14866 #[target_feature(enable = "avx512f")]
14867 pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
14868 simd_shuffle8(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
14869 }
14870
14871 /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14872 ///
14873 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextsi256_si512&expand=6200)
14874 #[inline]
14875 #[target_feature(enable = "avx512f")]
14876 pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
14877 simd_shuffle8(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
14878 }
14879
14880 /// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14881 ///
14882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_si128&expand=636)
14883 #[inline]
14884 #[target_feature(enable = "avx512f")]
14885 pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
14886 simd_shuffle2(a, a, [0, 1])
14887 }
14888
14889 /// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14890 ///
14891 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_si256&expand=637)
14892 #[inline]
14893 #[target_feature(enable = "avx512f")]
14894 pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
14895 simd_shuffle4(a, a, [0, 1, 2, 3])
14896 }
14897
14898 /// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14899 ///
14900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_ps&expand=635)
14901 #[inline]
14902 #[target_feature(enable = "avx512f")]
14903 pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
14904 transmute(a)
14905 }
14906
14907 /// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14908 ///
14909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_pd&expand=634)
14910 #[inline]
14911 #[target_feature(enable = "avx512f")]
14912 pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
14913 transmute(a)
14914 }
14915
14916 /// Broadcast the low packed 32-bit integer from a to all elements of dst.
14917 ///
14918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastd_epi32&expand=545)
14919 #[inline]
14920 #[target_feature(enable = "avx512f")]
14921 #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
14922 pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
14923 let a = _mm512_castsi128_si512(a).as_i32x16();
14924 let ret: i32x16 = simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
14925 transmute(ret)
14926 }
14927
14928 /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14929 ///
14930 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastd_epi32&expand=546)
14931 #[inline]
14932 #[target_feature(enable = "avx512f")]
14933 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
14934 pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
14935 let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
14936 transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
14937 }
14938
14939 /// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14940 ///
14941 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastd_epi32&expand=547)
14942 #[inline]
14943 #[target_feature(enable = "avx512f")]
14944 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
14945 pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
14946 let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
14947 let zero = _mm512_setzero_si512().as_i32x16();
14948 transmute(simd_select_bitmask(k, broadcast, zero))
14949 }
14950
14951 /// Broadcast the low packed 64-bit integer from a to all elements of dst.
14952 ///
14953 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastq_epi64&expand=560)
14954 #[inline]
14955 #[target_feature(enable = "avx512f")]
14956 #[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq
14957 pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
14958 simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
14959 }
14960
14961 /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14962 ///
14963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastq_epi64&expand=561)
14964 #[inline]
14965 #[target_feature(enable = "avx512f")]
14966 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
14967 pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
14968 let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
14969 transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
14970 }
14971
14972 /// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14973 ///
14974 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastq_epi64&expand=562)
14975 #[inline]
14976 #[target_feature(enable = "avx512f")]
14977 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
14978 pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
14979 let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
14980 let zero = _mm512_setzero_si512().as_i64x8();
14981 transmute(simd_select_bitmask(k, broadcast, zero))
14982 }
14983
14984 /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
14985 ///
14986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastss_ps&expand=578)
14987 #[inline]
14988 #[target_feature(enable = "avx512f")]
14989 #[cfg_attr(test, assert_instr(vbroadcastss))]
14990 pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
14991 simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
14992 }
14993
14994 /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14995 ///
14996 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastss_ps&expand=579)
14997 #[inline]
14998 #[target_feature(enable = "avx512f")]
14999 #[cfg_attr(test, assert_instr(vbroadcastss))]
15000 pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
15001 let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
15002 transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
15003 }
15004
15005 /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15006 ///
15007 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastss_ps&expand=580)
15008 #[inline]
15009 #[target_feature(enable = "avx512f")]
15010 #[cfg_attr(test, assert_instr(vbroadcastss))]
15011 pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
15012 let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
15013 let zero = _mm512_setzero_ps().as_f32x16();
15014 transmute(simd_select_bitmask(k, broadcast, zero))
15015 }
15016
15017 /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
15018 ///
15019 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastsd_pd&expand=567)
15020 #[inline]
15021 #[target_feature(enable = "avx512f")]
15022 #[cfg_attr(test, assert_instr(vbroadcastsd))]
15023 pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
15024 simd_shuffle8(a, a, [1, 1, 1, 1, 1, 1, 1, 1])
15025 }
15026
15027 /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15028 ///
15029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastsd_pd&expand=568)
15030 #[inline]
15031 #[target_feature(enable = "avx512f")]
15032 #[cfg_attr(test, assert_instr(vbroadcastsd))]
15033 pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
15034 let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
15035 transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
15036 }
15037
15038 /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15039 ///
15040 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastsd_pd&expand=569)
15041 #[inline]
15042 #[target_feature(enable = "avx512f")]
15043 #[cfg_attr(test, assert_instr(vbroadcastsd))]
15044 pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
15045 let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
15046 let zero = _mm512_setzero_pd().as_f64x8();
15047 transmute(simd_select_bitmask(k, broadcast, zero))
15048 }
15049
15050 /// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
15051 ///
15052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_i32x4&expand=510)
15053 #[inline]
15054 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
15055 pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
15056 let a = _mm512_castsi128_si512(a).as_i32x16();
15057 let ret: i32x16 = simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
15058 transmute(ret)
15059 }
15060
15061 /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15062 ///
15063 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_i32x4&expand=511)
15064 #[inline]
15065 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
15066 pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
15067 let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
15068 transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
15069 }
15070
15071 /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15072 ///
15073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_i32x4&expand=512)
15074 #[inline]
15075 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
15076 pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
15077 let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
15078 let zero = _mm512_setzero_si512().as_i32x16();
15079 transmute(simd_select_bitmask(k, broadcast, zero))
15080 }
15081
15082 /// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
15083 ///
15084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_i64x4&expand=522)
15085 #[inline]
15086 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
15087 pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
15088 simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
15089 }
15090
15091 /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15092 ///
15093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_i64x4&expand=523)
15094 #[inline]
15095 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
15096 pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
15097 let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
15098 transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
15099 }
15100
15101 /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15102 ///
15103 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_i64x4&expand=524)
15104 #[inline]
15105 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
15106 pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
15107 let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
15108 let zero = _mm512_setzero_si512().as_i64x8();
15109 transmute(simd_select_bitmask(k, broadcast, zero))
15110 }
15111
15112 /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
15113 ///
15114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_f32x4&expand=483)
15115 #[inline]
15116 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
15117 pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
15118 simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
15119 }
15120
15121 /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15122 ///
15123 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_f32x4&expand=484)
15124 #[inline]
15125 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
15126 pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
15127 let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
15128 transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
15129 }
15130
15131 /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15132 ///
15133 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_f32x4&expand=485)
15134 #[inline]
15135 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
15136 pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
15137 let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
15138 let zero = _mm512_setzero_ps().as_f32x16();
15139 transmute(simd_select_bitmask(k, broadcast, zero))
15140 }
15141
15142 /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
15143 ///
15144 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_f64x4&expand=495)
15145 #[inline]
15146 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
15147 pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
15148 simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
15149 }
15150
15151 /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15152 ///
15153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_f64x4&expand=496)
15154 #[inline]
15155 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
15156 pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
15157 let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
15158 transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
15159 }
15160
15161 /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15162 ///
15163 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_f64x4&expand=497)
15164 #[inline]
15165 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
15166 pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
15167 let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
15168 let zero = _mm512_setzero_pd().as_f64x8();
15169 transmute(simd_select_bitmask(k, broadcast, zero))
15170 }
15171
15172 /// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
15173 ///
15174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_epi32&expand=435)
15175 #[inline]
15176 #[target_feature(enable = "avx512f")]
15177 #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
15178 pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15179 transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16()))
15180 }
15181
15182 /// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
15183 ///
15184 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_epi64&expand=438)
15185 #[inline]
15186 #[target_feature(enable = "avx512f")]
15187 #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
15188 pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15189 transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8()))
15190 }
15191
15192 /// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
15193 ///
15194 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_ps&expand=451)
15195 #[inline]
15196 #[target_feature(enable = "avx512f")]
15197 #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
15198 pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
15199 transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16()))
15200 }
15201
15202 /// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
15203 ///
15204 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_pd&expand=446)
15205 #[inline]
15206 #[target_feature(enable = "avx512f")]
15207 #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
15208 pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
15209 transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8()))
15210 }
15211
15212 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
15213 ///
15214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_alignr_epi32&expand=245)
15215 #[inline]
15216 #[target_feature(enable = "avx512f")]
15217 #[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
15218 #[rustc_args_required_const(2)]
15219 pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
15220 assert!(imm8 >= 0 && imm8 <= 255);
15221 let a = a.as_i32x16();
15222 let b = b.as_i32x16();
15223 let imm8: i32 = imm8 % 16;
15224 let r: i32x16 = match imm8 {
15225 0 => simd_shuffle16(
15226 a,
15227 b,
15228 [
15229 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
15230 ],
15231 ),
15232 1 => simd_shuffle16(
15233 a,
15234 b,
15235 [
15236 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
15237 ],
15238 ),
15239 2 => simd_shuffle16(
15240 a,
15241 b,
15242 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
15243 ),
15244 3 => simd_shuffle16(
15245 a,
15246 b,
15247 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
15248 ),
15249 4 => simd_shuffle16(
15250 a,
15251 b,
15252 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
15253 ),
15254 5 => simd_shuffle16(
15255 a,
15256 b,
15257 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
15258 ),
15259 6 => simd_shuffle16(
15260 a,
15261 b,
15262 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
15263 ),
15264 7 => simd_shuffle16(
15265 a,
15266 b,
15267 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
15268 ),
15269 8 => simd_shuffle16(
15270 a,
15271 b,
15272 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
15273 ),
15274 9 => simd_shuffle16(
15275 a,
15276 b,
15277 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
15278 ),
15279 10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
15280 11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
15281 12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
15282 13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
15283 14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
15284 _ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
15285 };
15286 transmute(r)
15287 }
15288
15289 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15290 ///
15291 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_alignr_epi32&expand=246)
15292 #[inline]
15293 #[target_feature(enable = "avx512f")]
15294 #[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
15295 #[rustc_args_required_const(4)]
15296 pub unsafe fn _mm512_mask_alignr_epi32(
15297 src: __m512i,
15298 k: __mmask16,
15299 a: __m512i,
15300 b: __m512i,
15301 imm8: i32,
15302 ) -> __m512i {
15303 assert!(imm8 >= 0 && imm8 <= 255);
15304 let a = a.as_i32x16();
15305 let b = b.as_i32x16();
15306 let imm8: i32 = imm8 % 16;
15307 let r: i32x16 = match imm8 {
15308 0 => simd_shuffle16(
15309 a,
15310 b,
15311 [
15312 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
15313 ],
15314 ),
15315 1 => simd_shuffle16(
15316 a,
15317 b,
15318 [
15319 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
15320 ],
15321 ),
15322 2 => simd_shuffle16(
15323 a,
15324 b,
15325 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
15326 ),
15327 3 => simd_shuffle16(
15328 a,
15329 b,
15330 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
15331 ),
15332 4 => simd_shuffle16(
15333 a,
15334 b,
15335 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
15336 ),
15337 5 => simd_shuffle16(
15338 a,
15339 b,
15340 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
15341 ),
15342 6 => simd_shuffle16(
15343 a,
15344 b,
15345 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
15346 ),
15347 7 => simd_shuffle16(
15348 a,
15349 b,
15350 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
15351 ),
15352 8 => simd_shuffle16(
15353 a,
15354 b,
15355 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
15356 ),
15357 9 => simd_shuffle16(
15358 a,
15359 b,
15360 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
15361 ),
15362 10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
15363 11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
15364 12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
15365 13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
15366 14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
15367 _ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
15368 };
15369 transmute(simd_select_bitmask(k, r, src.as_i32x16()))
15370 }
15371
15372 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15373 ///
15374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_alignr_epi32&expand=247)
15375 #[inline]
15376 #[target_feature(enable = "avx512f")]
15377 #[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
15378 #[rustc_args_required_const(3)]
15379 pub unsafe fn _mm512_maskz_alignr_epi32(
15380 k: __mmask16,
15381 a: __m512i,
15382 b: __m512i,
15383 imm8: i32,
15384 ) -> __m512i {
15385 assert!(imm8 >= 0 && imm8 <= 255);
15386 let a = a.as_i32x16();
15387 let b = b.as_i32x16();
15388 let imm8: i32 = imm8 % 16;
15389 let r: i32x16 = match imm8 {
15390 0 => simd_shuffle16(
15391 a,
15392 b,
15393 [
15394 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
15395 ],
15396 ),
15397 1 => simd_shuffle16(
15398 a,
15399 b,
15400 [
15401 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
15402 ],
15403 ),
15404 2 => simd_shuffle16(
15405 a,
15406 b,
15407 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
15408 ),
15409 3 => simd_shuffle16(
15410 a,
15411 b,
15412 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
15413 ),
15414 4 => simd_shuffle16(
15415 a,
15416 b,
15417 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
15418 ),
15419 5 => simd_shuffle16(
15420 a,
15421 b,
15422 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
15423 ),
15424 6 => simd_shuffle16(
15425 a,
15426 b,
15427 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
15428 ),
15429 7 => simd_shuffle16(
15430 a,
15431 b,
15432 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
15433 ),
15434 8 => simd_shuffle16(
15435 a,
15436 b,
15437 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
15438 ),
15439 9 => simd_shuffle16(
15440 a,
15441 b,
15442 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
15443 ),
15444 10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
15445 11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
15446 12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
15447 13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
15448 14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
15449 _ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
15450 };
15451 let zero = _mm512_setzero_si512().as_i32x16();
15452 transmute(simd_select_bitmask(k, r, zero))
15453 }
15454
15455 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
15456 ///
15457 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_alignr_epi64&expand=254)
15458 #[inline]
15459 #[target_feature(enable = "avx512f")]
15460 #[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
15461 #[rustc_args_required_const(2)]
15462 pub unsafe fn _mm512_alignr_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
15463 assert!(imm8 >= 0 && imm8 <= 255);
15464 let imm8: i32 = imm8 % 8;
15465 let r: i64x8 = match imm8 {
15466 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
15467 1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
15468 2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
15469 3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
15470 4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
15471 5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
15472 6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
15473 _ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
15474 };
15475 transmute(r)
15476 }
15477
15478 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15479 ///
15480 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_alignr_epi64&expand=255)
15481 #[inline]
15482 #[target_feature(enable = "avx512f")]
15483 #[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
15484 #[rustc_args_required_const(4)]
15485 pub unsafe fn _mm512_mask_alignr_epi64(
15486 src: __m512i,
15487 k: __mmask8,
15488 a: __m512i,
15489 b: __m512i,
15490 imm8: i32,
15491 ) -> __m512i {
15492 assert!(imm8 >= 0 && imm8 <= 255);
15493 let imm8: i32 = imm8 % 8;
15494 let r: i64x8 = match imm8 {
15495 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
15496 1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
15497 2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
15498 3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
15499 4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
15500 5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
15501 6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
15502 _ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
15503 };
15504 transmute(simd_select_bitmask(k, r, src.as_i64x8()))
15505 }
15506
15507 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15508 ///
15509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_alignr_epi64&expand=256)
15510 #[inline]
15511 #[target_feature(enable = "avx512f")]
15512 #[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
15513 #[rustc_args_required_const(3)]
15514 pub unsafe fn _mm512_maskz_alignr_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
15515 assert!(imm8 >= 0 && imm8 <= 255);
15516 let imm8: i32 = imm8 % 8;
15517 let r: i64x8 = match imm8 {
15518 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
15519 1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
15520 2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
15521 3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
15522 4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
15523 5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
15524 6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
15525 _ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
15526 };
15527 let zero = _mm512_setzero_si512().as_i64x8();
15528 transmute(simd_select_bitmask(k, r, zero))
15529 }
15530
15531 /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
15532 ///
15533 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
15534 #[inline]
15535 #[target_feature(enable = "avx512f")]
15536 #[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
15537 pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
15538 transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
15539 }
15540
15541 /// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15542 ///
15543 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273)
15544 #[inline]
15545 #[target_feature(enable = "avx512f")]
15546 #[cfg_attr(test, assert_instr(vpandd))]
15547 pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15548 let and = _mm512_and_epi32(a, b).as_i32x16();
15549 transmute(simd_select_bitmask(k, and, src.as_i32x16()))
15550 }
15551
15552 /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15553 ///
15554 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274)
15555 #[inline]
15556 #[target_feature(enable = "avx512f")]
15557 #[cfg_attr(test, assert_instr(vpandd))]
15558 pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15559 let and = _mm512_and_epi32(a, b).as_i32x16();
15560 let zero = _mm512_setzero_si512().as_i32x16();
15561 transmute(simd_select_bitmask(k, and, zero))
15562 }
15563
15564 /// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
15565 ///
15566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279)
15567 #[inline]
15568 #[target_feature(enable = "avx512f")]
15569 #[cfg_attr(test, assert_instr(vpandq))]
15570 pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
15571 transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
15572 }
15573
15574 /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15575 ///
15576 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280)
15577 #[inline]
15578 #[target_feature(enable = "avx512f")]
15579 #[cfg_attr(test, assert_instr(vpandq))]
15580 pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15581 let and = _mm512_and_epi64(a, b).as_i64x8();
15582 transmute(simd_select_bitmask(k, and, src.as_i64x8()))
15583 }
15584
15585 /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15586 ///
15587 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274)
15588 #[inline]
15589 #[target_feature(enable = "avx512f")]
15590 #[cfg_attr(test, assert_instr(vpandq))]
15591 pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15592 let and = _mm512_and_epi64(a, b).as_i64x8();
15593 let zero = _mm512_setzero_si512().as_i64x8();
15594 transmute(simd_select_bitmask(k, and, zero))
15595 }
15596
15597 /// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
15598 ///
15599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
15600 #[inline]
15601 #[target_feature(enable = "avx512f")]
15602 #[cfg_attr(test, assert_instr(vpandq))]
15603 pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
15604 transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
15605 }
15606
15607 /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
15608 ///
15609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
15610 #[inline]
15611 #[target_feature(enable = "avx512f")]
15612 #[cfg_attr(test, assert_instr(vporq))]
15613 pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
15614 transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
15615 }
15616
15617 /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15618 ///
15619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040)
15620 #[inline]
15621 #[target_feature(enable = "avx512f")]
15622 #[cfg_attr(test, assert_instr(vpord))]
15623 pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15624 let or = _mm512_or_epi32(a, b).as_i32x16();
15625 transmute(simd_select_bitmask(k, or, src.as_i32x16()))
15626 }
15627
15628 /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15629 ///
15630 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041)
15631 #[inline]
15632 #[target_feature(enable = "avx512f")]
15633 #[cfg_attr(test, assert_instr(vpord))]
15634 pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15635 let or = _mm512_or_epi32(a, b).as_i32x16();
15636 let zero = _mm512_setzero_si512().as_i32x16();
15637 transmute(simd_select_bitmask(k, or, zero))
15638 }
15639
15640 /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
15641 ///
15642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051)
15643 #[inline]
15644 #[target_feature(enable = "avx512f")]
15645 #[cfg_attr(test, assert_instr(vporq))]
15646 pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
15647 transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
15648 }
15649
15650 /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15651 ///
15652 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049)
15653 #[inline]
15654 #[target_feature(enable = "avx512f")]
15655 #[cfg_attr(test, assert_instr(vporq))]
15656 pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15657 let or = _mm512_or_epi64(a, b).as_i64x8();
15658 transmute(simd_select_bitmask(k, or, src.as_i64x8()))
15659 }
15660
15661 /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15662 ///
15663 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050)
15664 #[inline]
15665 #[target_feature(enable = "avx512f")]
15666 #[cfg_attr(test, assert_instr(vporq))]
15667 pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15668 let or = _mm512_or_epi64(a, b).as_i64x8();
15669 let zero = _mm512_setzero_si512().as_i64x8();
15670 transmute(simd_select_bitmask(k, or, zero))
15671 }
15672
15673 /// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
15674 ///
15675 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072)
15676 #[inline]
15677 #[target_feature(enable = "avx512f")]
15678 #[cfg_attr(test, assert_instr(vporq))]
15679 pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
15680 transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
15681 }
15682
15683 /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
15684 ///
15685 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
15686 #[inline]
15687 #[target_feature(enable = "avx512f")]
15688 #[cfg_attr(test, assert_instr(vpxorq))]
15689 pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
15690 transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
15691 }
15692
15693 /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15694 ///
15695 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140)
15696 #[inline]
15697 #[target_feature(enable = "avx512f")]
15698 #[cfg_attr(test, assert_instr(vpxord))]
15699 pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15700 let xor = _mm512_xor_epi32(a, b).as_i32x16();
15701 transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
15702 }
15703
15704 /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15705 ///
15706 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141)
15707 #[inline]
15708 #[target_feature(enable = "avx512f")]
15709 #[cfg_attr(test, assert_instr(vpxord))]
15710 pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15711 let xor = _mm512_xor_epi32(a, b).as_i32x16();
15712 let zero = _mm512_setzero_si512().as_i32x16();
15713 transmute(simd_select_bitmask(k, xor, zero))
15714 }
15715
15716 /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
15717 ///
15718 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151)
15719 #[inline]
15720 #[target_feature(enable = "avx512f")]
15721 #[cfg_attr(test, assert_instr(vpxorq))]
15722 pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
15723 transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
15724 }
15725
15726 /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15727 ///
15728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149)
15729 #[inline]
15730 #[target_feature(enable = "avx512f")]
15731 #[cfg_attr(test, assert_instr(vpxorq))]
15732 pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15733 let xor = _mm512_xor_epi64(a, b).as_i64x8();
15734 transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
15735 }
15736
15737 /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15738 ///
15739 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150)
15740 #[inline]
15741 #[target_feature(enable = "avx512f")]
15742 #[cfg_attr(test, assert_instr(vpxorq))]
15743 pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15744 let xor = _mm512_xor_epi64(a, b).as_i64x8();
15745 let zero = _mm512_setzero_si512().as_i64x8();
15746 transmute(simd_select_bitmask(k, xor, zero))
15747 }
15748
15749 /// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
15750 ///
15751 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172)
15752 #[inline]
15753 #[target_feature(enable = "avx512f")]
15754 #[cfg_attr(test, assert_instr(vpxorq))]
15755 pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
15756 transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
15757 }
15758
15759 /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
15760 ///
15761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_andnot_epi32&expand=310)
15762 #[inline]
15763 #[target_feature(enable = "avx512f")]
15764 #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
15765 pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
15766 _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
15767 }
15768
15769 /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15770 ///
15771 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_andnot_epi32&expand=311)
15772 #[inline]
15773 #[target_feature(enable = "avx512f")]
15774 #[cfg_attr(test, assert_instr(vpandnd))]
15775 pub unsafe fn _mm512_mask_andnot_epi32(
15776 src: __m512i,
15777 k: __mmask16,
15778 a: __m512i,
15779 b: __m512i,
15780 ) -> __m512i {
15781 let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
15782 transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
15783 }
15784
15785 /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15786 ///
15787 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_andnot_epi32&expand=312)
15788 #[inline]
15789 #[target_feature(enable = "avx512f")]
15790 #[cfg_attr(test, assert_instr(vpandnd))]
15791 pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
15792 let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
15793 let zero = _mm512_setzero_si512().as_i32x16();
15794 transmute(simd_select_bitmask(k, andnot, zero))
15795 }
15796
15797 /// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
15798 ///
15799 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_andnot_epi64&expand=317)
15800 #[inline]
15801 #[target_feature(enable = "avx512f")]
15802 #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
15803 pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
15804 _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
15805 }
15806
15807 /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15808 ///
15809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_andnot_epi64&expand=318)
15810 #[inline]
15811 #[target_feature(enable = "avx512f")]
15812 #[cfg_attr(test, assert_instr(vpandnq))]
15813 pub unsafe fn _mm512_mask_andnot_epi64(
15814 src: __m512i,
15815 k: __mmask8,
15816 a: __m512i,
15817 b: __m512i,
15818 ) -> __m512i {
15819 let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
15820 transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
15821 }
15822
15823 /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15824 ///
15825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_andnot_epi64&expand=319)
15826 #[inline]
15827 #[target_feature(enable = "avx512f")]
15828 #[cfg_attr(test, assert_instr(vpandnq))]
15829 pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
15830 let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
15831 let zero = _mm512_setzero_si512().as_i64x8();
15832 transmute(simd_select_bitmask(k, andnot, zero))
15833 }
15834
15835 /// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
15836 ///
15837 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_andnot_si512&expand=340)
15838 #[inline]
15839 #[target_feature(enable = "avx512f")]
15840 #[cfg_attr(test, assert_instr(vpandnq))]
15841 pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
15842 _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
15843 }
15844
15845 /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
15846 ///
15847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
15848 #[inline]
15849 #[target_feature(enable = "avx512f")]
15850 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
15851 pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
15852 transmute(a & b)
15853 }
15854
15855 /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
15856 ///
15857 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210)
15858 #[inline]
15859 #[target_feature(enable = "avx512f")]
15860 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
15861 pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
15862 transmute(a & b)
15863 }
15864
15865 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
15866 ///
15867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
15868 #[inline]
15869 #[target_feature(enable = "avx512f")]
15870 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
15871 pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
15872 transmute(a | b)
15873 }
15874
15875 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
15876 ///
15877 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237)
15878 #[inline]
15879 #[target_feature(enable = "avx512f")]
15880 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
15881 pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
15882 transmute(a | b)
15883 }
15884
15885 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
15886 ///
15887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
15888 #[inline]
15889 #[target_feature(enable = "avx512f")]
15890 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
15891 pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
15892 transmute(a ^ b)
15893 }
15894
15895 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
15896 ///
15897 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289)
15898 #[inline]
15899 #[target_feature(enable = "avx512f")]
15900 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
15901 pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
15902 transmute(a ^ b)
15903 }
15904
15905 /// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
15906 ///
15907 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=knot_mask16&expand=3233)
15908 #[inline]
15909 #[target_feature(enable = "avx512f")]
15910 pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
15911 transmute(a ^ 0b11111111_11111111)
15912 }
15913
15914 /// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
15915 ///
15916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_knot&expand=3231)
15917 #[inline]
15918 #[target_feature(enable = "avx512f")]
15919 pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
15920 transmute(a ^ 0b11111111_11111111)
15921 }
15922
15923 /// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
15924 ///
15925 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kandn_mask16&expand=3218)
15926 #[inline]
15927 #[target_feature(enable = "avx512f")]
15928 #[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
15929 pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
15930 _mm512_kand(_mm512_knot(a), b)
15931 }
15932
15933 /// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
15934 ///
15935 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kandn&expand=3216)
15936 #[inline]
15937 #[target_feature(enable = "avx512f")]
15938 #[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
15939 pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
15940 _mm512_kand(_mm512_knot(a), b)
15941 }
15942
15943 /// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
15944 ///
15945 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxnor_mask16&expand=3285)
15946 #[inline]
15947 #[target_feature(enable = "avx512f")]
15948 #[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
15949 pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
15950 _mm512_knot(_mm512_kxor(a, b))
15951 }
15952
15953 /// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
15954 ///
15955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxnor&expand=3283)
15956 #[inline]
15957 #[target_feature(enable = "avx512f")]
15958 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
15959 pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
15960 _mm512_knot(_mm512_kxor(a, b))
15961 }
15962
15963 /// Copy 16-bit mask a to k.
15964 ///
15965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_kmov&expand=3228)
15966 #[inline]
15967 #[target_feature(enable = "avx512f")]
15968 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
15969 pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
15970 let r: u16 = a;
15971 transmute(r)
15972 }
15973
15974 /// Converts integer mask into bitmask, storing the result in dst.
15975 ///
15976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_int2mask&expand=3189)
15977 #[inline]
15978 #[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
15979 pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
15980 let r: u16 = mask as u16;
15981 transmute(r)
15982 }
15983
15984 /// Converts bit mask k1 into an integer value, storing the results in dst.
15985 ///
15986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2int&expand=3544)
15987 #[inline]
15988 #[target_feature(enable = "avx512f")]
15989 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
15990 pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
15991 let r: i32 = k1 as i32;
15992 transmute(r)
15993 }
15994
15995 /// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
15996 ///
15997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kunpackb&expand=3280)
15998 #[inline]
15999 #[target_feature(enable = "avx512f")]
16000 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
16001 pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
16002 let a = a & 0b00000000_11111111;
16003 let b = b & 0b11111111_00000000;
16004 transmute(a | b)
16005 }
16006
16007 /// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
16008 ///
16009 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kortestc&expand=3247)
16010 #[inline]
16011 #[target_feature(enable = "avx512f")]
16012 #[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
16013 pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
16014 let r = a | b;
16015 if r == 0b11111111_11111111 {
16016 1
16017 } else {
16018 0
16019 }
16020 }
16021
16022 /// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
16023 ///
16024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_test_epi32_mask&expand=5890)
16025 #[inline]
16026 #[target_feature(enable = "avx512f")]
16027 #[cfg_attr(test, assert_instr(vptestmd))]
16028 pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
16029 let and = _mm512_and_epi32(a, b);
16030 let zero = _mm512_setzero_si512();
16031 _mm512_cmpneq_epi32_mask(and, zero)
16032 }
16033
16034 /// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
16035 ///
16036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_test_epi32_mask&expand=5889)
16037 #[inline]
16038 #[target_feature(enable = "avx512f")]
16039 #[cfg_attr(test, assert_instr(vptestmd))]
16040 pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
16041 let and = _mm512_and_epi32(a, b);
16042 let zero = _mm512_setzero_si512();
16043 _mm512_mask_cmpneq_epi32_mask(k, and, zero)
16044 }
16045
16046 /// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
16047 ///
16048 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_test_epi64_mask&expand=5896)
16049 #[inline]
16050 #[target_feature(enable = "avx512f")]
16051 #[cfg_attr(test, assert_instr(vptestmq))]
16052 pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
16053 let and = _mm512_and_epi64(a, b);
16054 let zero = _mm512_setzero_si512();
16055 _mm512_cmpneq_epi64_mask(and, zero)
16056 }
16057
16058 /// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
16059 ///
16060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_test_epi64_mask&expand=5895)
16061 #[inline]
16062 #[target_feature(enable = "avx512f")]
16063 #[cfg_attr(test, assert_instr(vptestmq))]
16064 pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
16065 let and = _mm512_and_epi64(a, b);
16066 let zero = _mm512_setzero_si512();
16067 _mm512_mask_cmpneq_epi64_mask(k, and, zero)
16068 }
16069
16070 /// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
16071 ///
16072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_testn_epi32_mask&expand=5921)
16073 #[inline]
16074 #[target_feature(enable = "avx512f")]
16075 #[cfg_attr(test, assert_instr(vptestnmd))]
16076 pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
16077 let and = _mm512_and_epi32(a, b);
16078 let zero = _mm512_setzero_si512();
16079 _mm512_cmpeq_epi32_mask(and, zero)
16080 }
16081
16082 /// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
16083 ///
16084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_testn_epi32_mask&expand=5920)
16085 #[inline]
16086 #[target_feature(enable = "avx512f")]
16087 #[cfg_attr(test, assert_instr(vptestnmd))]
16088 pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
16089 let and = _mm512_and_epi32(a, b);
16090 let zero = _mm512_setzero_si512();
16091 _mm512_mask_cmpeq_epi32_mask(k, and, zero)
16092 }
16093
16094 /// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
16095 ///
16096 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_testn_epi64_mask&expand=5927)
16097 #[inline]
16098 #[target_feature(enable = "avx512f")]
16099 #[cfg_attr(test, assert_instr(vptestnmq))]
16100 pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
16101 let and = _mm512_and_epi64(a, b);
16102 let zero = _mm512_setzero_si512();
16103 _mm512_cmpeq_epi64_mask(and, zero)
16104 }
16105
16106 /// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
16107 ///
16108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_testn_epi64_mask&expand=5926)
16109 #[inline]
16110 #[target_feature(enable = "avx512f")]
16111 #[cfg_attr(test, assert_instr(vptestnmq))]
16112 pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
16113 let and = _mm512_and_epi64(a, b);
16114 let zero = _mm512_setzero_si512();
16115 _mm512_mask_cmpeq_epi64_mask(k, and, zero)
16116 }
16117
16118 /// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
16119 ///
16120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_stream_ps&expand=5671)
16121 #[inline]
16122 #[target_feature(enable = "avx512f")]
16123 #[cfg_attr(test, assert_instr(vmovntps))]
16124 #[allow(clippy::cast_ptr_alignment)]
16125 pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
16126 intrinsics::nontemporal_store(mem_addr as *mut __m512, a);
16127 }
16128
16129 /// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
16130 ///
16131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_stream_pd&expand=5667)
16132 #[inline]
16133 #[target_feature(enable = "avx512f")]
16134 #[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntpd
16135 #[allow(clippy::cast_ptr_alignment)]
16136 pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
16137 intrinsics::nontemporal_store(mem_addr as *mut __m512d, a);
16138 }
16139
16140 /// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
16141 ///
16142 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_stream_si512&expand=5675)
16143 #[inline]
16144 #[target_feature(enable = "avx512f")]
16145 #[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntdq
16146 #[allow(clippy::cast_ptr_alignment)]
16147 pub unsafe fn _mm512_stream_si512(mem_addr: *mut i64, a: __m512i) {
16148 intrinsics::nontemporal_store(mem_addr as *mut __m512i, a);
16149 }
16150
16151 /// Sets packed 32-bit integers in `dst` with the supplied values.
16152 ///
16153 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
16154 #[inline]
16155 #[target_feature(enable = "avx512f")]
16156 pub unsafe fn _mm512_set_ps(
16157 e0: f32,
16158 e1: f32,
16159 e2: f32,
16160 e3: f32,
16161 e4: f32,
16162 e5: f32,
16163 e6: f32,
16164 e7: f32,
16165 e8: f32,
16166 e9: f32,
16167 e10: f32,
16168 e11: f32,
16169 e12: f32,
16170 e13: f32,
16171 e14: f32,
16172 e15: f32,
16173 ) -> __m512 {
16174 _mm512_setr_ps(
16175 e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16176 )
16177 }
16178
16179 /// Sets packed 32-bit integers in `dst` with the supplied values in
16180 /// reverse order.
16181 ///
16182 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
16183 #[inline]
16184 #[target_feature(enable = "avx512f")]
16185 pub unsafe fn _mm512_setr_ps(
16186 e0: f32,
16187 e1: f32,
16188 e2: f32,
16189 e3: f32,
16190 e4: f32,
16191 e5: f32,
16192 e6: f32,
16193 e7: f32,
16194 e8: f32,
16195 e9: f32,
16196 e10: f32,
16197 e11: f32,
16198 e12: f32,
16199 e13: f32,
16200 e14: f32,
16201 e15: f32,
16202 ) -> __m512 {
16203 let r = f32x16::new(
16204 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
16205 );
16206 transmute(r)
16207 }
16208
16209 /// Broadcast 64-bit float `a` to all elements of `dst`.
16210 #[inline]
16211 #[target_feature(enable = "avx512f")]
16212 pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
16213 transmute(f64x8::splat(a))
16214 }
16215
16216 /// Broadcast 32-bit float `a` to all elements of `dst`.
16217 #[inline]
16218 #[target_feature(enable = "avx512f")]
16219 pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
16220 transmute(f32x16::splat(a))
16221 }
16222
16223 /// Sets packed 32-bit integers in `dst` with the supplied values.
16224 #[inline]
16225 #[target_feature(enable = "avx512f")]
16226 pub unsafe fn _mm512_set_epi32(
16227 e15: i32,
16228 e14: i32,
16229 e13: i32,
16230 e12: i32,
16231 e11: i32,
16232 e10: i32,
16233 e9: i32,
16234 e8: i32,
16235 e7: i32,
16236 e6: i32,
16237 e5: i32,
16238 e4: i32,
16239 e3: i32,
16240 e2: i32,
16241 e1: i32,
16242 e0: i32,
16243 ) -> __m512i {
16244 _mm512_setr_epi32(
16245 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
16246 )
16247 }
16248
16249 /// Broadcast 8-bit integer a to all elements of dst.
16250 ///
16251 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set1_epi8&expand=4972)
16252 #[inline]
16253 #[target_feature(enable = "avx512f")]
16254 pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
16255 transmute(i8x64::splat(a))
16256 }
16257
16258 /// Broadcast the low packed 16-bit integer from a to all all elements of dst.
16259 ///
16260 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set1_epi16&expand=4944)
16261 #[inline]
16262 #[target_feature(enable = "avx512f")]
16263 pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i {
16264 transmute(i16x32::splat(a))
16265 }
16266
16267 /// Broadcast 32-bit integer `a` to all elements of `dst`.
16268 #[inline]
16269 #[target_feature(enable = "avx512f")]
16270 pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
16271 transmute(i32x16::splat(a))
16272 }
16273
16274 /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16275 ///
16276 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_set1_epi32&expand=4951)
16277 #[inline]
16278 #[target_feature(enable = "avx512f")]
16279 #[cfg_attr(test, assert_instr(vpbroadcastd))]
16280 pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
16281 let r = _mm512_set1_epi32(a).as_i32x16();
16282 transmute(simd_select_bitmask(k, r, src.as_i32x16()))
16283 }
16284
16285 /// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16286 ///
16287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_set1_epi32&expand=4952)
16288 #[inline]
16289 #[target_feature(enable = "avx512f")]
16290 #[cfg_attr(test, assert_instr(vpbroadcastd))]
16291 pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
16292 let r = _mm512_set1_epi32(a).as_i32x16();
16293 let zero = _mm512_setzero_si512().as_i32x16();
16294 transmute(simd_select_bitmask(k, r, zero))
16295 }
16296
16297 /// Broadcast 64-bit integer `a` to all elements of `dst`.
16298 #[inline]
16299 #[target_feature(enable = "avx512f")]
16300 pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
16301 transmute(i64x8::splat(a))
16302 }
16303
16304 /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16305 ///
16306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_set1_epi64&expand=4959)
16307 #[inline]
16308 #[target_feature(enable = "avx512f")]
16309 #[cfg_attr(test, assert_instr(vpbroadcastq))]
16310 pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
16311 let r = _mm512_set1_epi64(a).as_i64x8();
16312 transmute(simd_select_bitmask(k, r, src.as_i64x8()))
16313 }
16314
16315 /// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16316 ///
16317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_set1_epi64&expand=4960)
16318 #[inline]
16319 #[target_feature(enable = "avx512f")]
16320 #[cfg_attr(test, assert_instr(vpbroadcastq))]
16321 pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
16322 let r = _mm512_set1_epi64(a).as_i64x8();
16323 let zero = _mm512_setzero_si512().as_i64x8();
16324 transmute(simd_select_bitmask(k, r, zero))
16325 }
16326
16327 /// Set packed 64-bit integers in dst with the repeated 4 element sequence.
16328 ///
16329 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_epi64&expand=4983)
16330 #[inline]
16331 #[target_feature(enable = "avx512f")]
16332 pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
16333 let r = i64x8::new(d, c, b, a, d, c, b, a);
16334 transmute(r)
16335 }
16336
16337 /// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
16338 ///
16339 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_epi64&expand=5010)
16340 #[inline]
16341 #[target_feature(enable = "avx512f")]
16342 pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
16343 let r = i64x8::new(a, b, c, d, a, b, c, d);
16344 transmute(r)
16345 }
16346
16347 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
16348 ///
16349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_ps_mask&expand=1074)
16350 #[inline]
16351 #[target_feature(enable = "avx512f")]
16352 #[cfg_attr(test, assert_instr(vcmp))]
16353 pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16354 _mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
16355 }
16356
16357 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16358 ///
16359 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmplt_ps_mask&expand=1075)
16360 #[inline]
16361 #[target_feature(enable = "avx512f")]
16362 #[cfg_attr(test, assert_instr(vcmp))]
16363 pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16364 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_LT_OS)
16365 }
16366
16367 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
16368 ///
16369 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpnlt_ps_mask&expand=1154)
16370 #[inline]
16371 #[target_feature(enable = "avx512f")]
16372 #[cfg_attr(test, assert_instr(vcmp))]
16373 pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16374 _mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
16375 }
16376
16377 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16378 ///
16379 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpnlt_ps_mask&expand=1155)
16380 #[inline]
16381 #[target_feature(enable = "avx512f")]
16382 #[cfg_attr(test, assert_instr(vcmp))]
16383 pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16384 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NLT_US)
16385 }
16386
16387 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
16388 ///
16389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmple_ps_mask&expand=1013)
16390 #[inline]
16391 #[target_feature(enable = "avx512f")]
16392 #[cfg_attr(test, assert_instr(vcmp))]
16393 pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16394 _mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
16395 }
16396
16397 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16398 ///
16399 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmple_ps_mask&expand=1014)
16400 #[inline]
16401 #[target_feature(enable = "avx512f")]
16402 #[cfg_attr(test, assert_instr(vcmp))]
16403 pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16404 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_LE_OS)
16405 }
16406
16407 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
16408 ///
16409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpnle_ps_mask&expand=1146)
16410 #[inline]
16411 #[target_feature(enable = "avx512f")]
16412 #[cfg_attr(test, assert_instr(vcmp))]
16413 pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16414 _mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
16415 }
16416
16417 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16418 ///
16419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpnle_ps_mask&expand=1147)
16420 #[inline]
16421 #[target_feature(enable = "avx512f")]
16422 #[cfg_attr(test, assert_instr(vcmp))]
16423 pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16424 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NLE_US)
16425 }
16426
16427 /// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
16428 ///
16429 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpeq_ps_mask&expand=828)
16430 #[inline]
16431 #[target_feature(enable = "avx512f")]
16432 #[cfg_attr(test, assert_instr(vcmp))]
16433 pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16434 _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
16435 }
16436
16437 /// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16438 ///
16439 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpeq_ps_mask&expand=829)
16440 #[inline]
16441 #[target_feature(enable = "avx512f")]
16442 #[cfg_attr(test, assert_instr(vcmp))]
16443 pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16444 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_EQ_OQ)
16445 }
16446
16447 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
16448 ///
16449 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpneq_ps_mask&expand=1130)
16450 #[inline]
16451 #[target_feature(enable = "avx512f")]
16452 #[cfg_attr(test, assert_instr(vcmp))]
16453 pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16454 _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
16455 }
16456
16457 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16458 ///
16459 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpneq_ps_mask&expand=1131)
16460 #[inline]
16461 #[target_feature(enable = "avx512f")]
16462 #[cfg_attr(test, assert_instr(vcmp))]
16463 pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16464 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NEQ_UQ)
16465 }
16466
16467 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
16468 ///
16469 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmp_ps_mask&expand=749)
16470 #[inline]
16471 #[target_feature(enable = "avx512f")]
16472 #[rustc_args_required_const(2)]
16473 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16474 pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, imm8: i32) -> __mmask16 {
16475 let neg_one = -1;
16476 macro_rules! call {
16477 ($imm5:expr) => {
16478 vcmpps(
16479 a.as_f32x16(),
16480 b.as_f32x16(),
16481 $imm5,
16482 neg_one,
16483 _MM_FROUND_CUR_DIRECTION,
16484 )
16485 };
16486 }
16487 let r = constify_imm5!(imm8, call);
16488 transmute(r)
16489 }
16490
16491 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16492 ///
16493 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmp_ps_mask&expand=750)
16494 #[inline]
16495 #[target_feature(enable = "avx512f")]
16496 #[rustc_args_required_const(3)]
16497 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16498 pub unsafe fn _mm512_mask_cmp_ps_mask(k1: __mmask16, a: __m512, b: __m512, imm8: i32) -> __mmask16 {
16499 macro_rules! call {
16500 ($imm5:expr) => {
16501 vcmpps(
16502 a.as_f32x16(),
16503 b.as_f32x16(),
16504 $imm5,
16505 k1 as i16,
16506 _MM_FROUND_CUR_DIRECTION,
16507 )
16508 };
16509 }
16510 let r = constify_imm5!(imm8, call);
16511 transmute(r)
16512 }
16513
16514 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
16515 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16516 ///
16517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmp_round_ps_mask&expand=753)
16518 #[inline]
16519 #[target_feature(enable = "avx512f")]
16520 #[rustc_args_required_const(2, 3)]
16521 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16522 pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, imm8: i32, sae: i32) -> __mmask16 {
16523 let neg_one = -1;
16524 macro_rules! call {
16525 ($imm5:expr, $imm4:expr) => {
16526 vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
16527 };
16528 }
16529 let r = constify_imm5_sae!(imm8, sae, call);
16530 transmute(r)
16531 }
16532
16533 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
16534 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16535 ///
16536 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmp_round_ps_mask&expand=754)
16537 #[inline]
16538 #[target_feature(enable = "avx512f")]
16539 #[rustc_args_required_const(3, 4)]
16540 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16541 pub unsafe fn _mm512_mask_cmp_round_ps_mask(
16542 m: __mmask16,
16543 a: __m512,
16544 b: __m512,
16545 imm8: i32,
16546 sae: i32,
16547 ) -> __mmask16 {
16548 macro_rules! call {
16549 ($imm5:expr, $imm4:expr) => {
16550 vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
16551 };
16552 }
16553 let r = constify_imm5_sae!(imm8, sae, call);
16554 transmute(r)
16555 }
16556
16557 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
16558 ///
16559 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpord_ps_mask&expand=1162)
16560 #[inline]
16561 #[target_feature(enable = "avx512f")]
16562 #[cfg_attr(test, assert_instr(vcmp))]
16563 pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16564 _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
16565 }
16566
16567 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16568 ///
16569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpord_ps_mask&expand=1163)
16570 #[inline]
16571 #[target_feature(enable = "avx512f")]
16572 #[cfg_attr(test, assert_instr(vcmp))]
16573 pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16574 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_ORD_Q)
16575 }
16576
16577 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
16578 ///
16579 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpunord_ps_mask&expand=1170)
16580 #[inline]
16581 #[target_feature(enable = "avx512f")]
16582 #[cfg_attr(test, assert_instr(vcmp))]
16583 pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
16584 _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
16585 }
16586
16587 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16588 ///
16589 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
16590 #[inline]
16591 #[target_feature(enable = "avx512f")]
16592 #[cfg_attr(test, assert_instr(vcmp))]
16593 pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
16594 _mm512_mask_cmp_ps_mask(k1, a, b, _CMP_UNORD_Q)
16595 }
16596
16597 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
16598 ///
16599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_pd_mask&expand=1071)
16600 #[inline]
16601 #[target_feature(enable = "avx512f")]
16602 #[cfg_attr(test, assert_instr(vcmp))]
16603 pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16604 _mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
16605 }
16606
16607 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16608 ///
16609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_pd_mask&expand=1072)
16610 #[inline]
16611 #[target_feature(enable = "avx512f")]
16612 #[cfg_attr(test, assert_instr(vcmp))]
16613 pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16614 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_LT_OS)
16615 }
16616
16617 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
16618 ///
16619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnlt_pd_mask&expand=1151)
16620 #[inline]
16621 #[target_feature(enable = "avx512f")]
16622 #[cfg_attr(test, assert_instr(vcmp))]
16623 pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16624 _mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
16625 }
16626
16627 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16628 ///
16629 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
16630 #[inline]
16631 #[target_feature(enable = "avx512f")]
16632 #[cfg_attr(test, assert_instr(vcmp))]
16633 pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16634 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
16635 }
16636
16637 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
16638 ///
16639 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_pd_mask&expand=1010)
16640 #[inline]
16641 #[target_feature(enable = "avx512f")]
16642 #[cfg_attr(test, assert_instr(vcmp))]
16643 pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16644 _mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
16645 }
16646
16647 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16648 ///
16649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_pd_mask&expand=1011)
16650 #[inline]
16651 #[target_feature(enable = "avx512f")]
16652 #[cfg_attr(test, assert_instr(vcmp))]
16653 pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16654 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_LE_OS)
16655 }
16656
16657 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
16658 ///
16659 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnle_pd_mask&expand=1143)
16660 #[inline]
16661 #[target_feature(enable = "avx512f")]
16662 #[cfg_attr(test, assert_instr(vcmp))]
16663 pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16664 _mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
16665 }
16666
16667 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16668 ///
16669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
16670 #[inline]
16671 #[target_feature(enable = "avx512f")]
16672 #[cfg_attr(test, assert_instr(vcmp))]
16673 pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16674 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_NLE_US)
16675 }
16676
16677 /// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
16678 ///
16679 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_pd_mask&expand=822)
16680 #[inline]
16681 #[target_feature(enable = "avx512f")]
16682 #[cfg_attr(test, assert_instr(vcmp))]
16683 pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16684 _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
16685 }
16686
16687 /// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16688 ///
16689 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_pd_mask&expand=823)
16690 #[inline]
16691 #[target_feature(enable = "avx512f")]
16692 #[cfg_attr(test, assert_instr(vcmp))]
16693 pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16694 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_EQ_OQ)
16695 }
16696
16697 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
16698 ///
16699 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_pd_mask&expand=1127)
16700 #[inline]
16701 #[target_feature(enable = "avx512f")]
16702 #[cfg_attr(test, assert_instr(vcmp))]
16703 pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16704 _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
16705 }
16706
16707 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16708 ///
16709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
16710 #[inline]
16711 #[target_feature(enable = "avx512f")]
16712 #[cfg_attr(test, assert_instr(vcmp))]
16713 pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16714 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_NEQ_UQ)
16715 }
16716
16717 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
16718 ///
16719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_pd_mask&expand=741)
16720 #[inline]
16721 #[target_feature(enable = "avx512f")]
16722 #[rustc_args_required_const(2)]
16723 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16724 pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, imm8: i32) -> __mmask8 {
16725 let neg_one = -1;
16726 macro_rules! call {
16727 ($imm5:expr) => {
16728 vcmppd(
16729 a.as_f64x8(),
16730 b.as_f64x8(),
16731 $imm5,
16732 neg_one,
16733 _MM_FROUND_CUR_DIRECTION,
16734 )
16735 };
16736 }
16737 let r = constify_imm5!(imm8, call);
16738 transmute(r)
16739 }
16740
16741 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16742 ///
16743 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_pd_mask&expand=742)
16744 #[inline]
16745 #[target_feature(enable = "avx512f")]
16746 #[rustc_args_required_const(3)]
16747 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16748 pub unsafe fn _mm512_mask_cmp_pd_mask(k1: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __mmask8 {
16749 macro_rules! call {
16750 ($imm5:expr) => {
16751 vcmppd(
16752 a.as_f64x8(),
16753 b.as_f64x8(),
16754 $imm5,
16755 k1 as i8,
16756 _MM_FROUND_CUR_DIRECTION,
16757 )
16758 };
16759 }
16760 let r = constify_imm5!(imm8, call);
16761 transmute(r)
16762 }
16763
16764 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
16765 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16766 ///
16767 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_pd_mask&expand=751)
16768 #[inline]
16769 #[target_feature(enable = "avx512f")]
16770 #[rustc_args_required_const(2, 3)]
16771 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16772 pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, imm8: i32, sae: i32) -> __mmask8 {
16773 let neg_one = -1;
16774 macro_rules! call {
16775 ($imm5:expr, $imm4:expr) => {
16776 vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
16777 };
16778 }
16779 let r = constify_imm5_sae!(imm8, sae, call);
16780 transmute(r)
16781 }
16782
16783 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
16784 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16785 ///
16786 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_pd_mask&expand=752)
16787 #[inline]
16788 #[target_feature(enable = "avx512f")]
16789 #[rustc_args_required_const(3, 4)]
16790 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16791 pub unsafe fn _mm512_mask_cmp_round_pd_mask(
16792 k1: __mmask8,
16793 a: __m512d,
16794 b: __m512d,
16795 imm8: i32,
16796 sae: i32,
16797 ) -> __mmask8 {
16798 macro_rules! call {
16799 ($imm5:expr, $imm4:expr) => {
16800 vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, k1 as i8, $imm4)
16801 };
16802 }
16803 let r = constify_imm5_sae!(imm8, sae, call);
16804 transmute(r)
16805 }
16806
16807 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
16808 ///
16809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpord_pd_mask&expand=1159)
16810 #[inline]
16811 #[target_feature(enable = "avx512f")]
16812 #[cfg_attr(test, assert_instr(vcmp))]
16813 pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16814 _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
16815 }
16816
16817 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16818 ///
16819 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpord_pd_mask&expand=1160)
16820 #[inline]
16821 #[target_feature(enable = "avx512f")]
16822 #[cfg_attr(test, assert_instr(vcmp))]
16823 pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16824 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_ORD_Q)
16825 }
16826
16827 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
16828 ///
16829 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpunord_pd_mask&expand=1167)
16830 #[inline]
16831 #[target_feature(enable = "avx512f")]
16832 #[cfg_attr(test, assert_instr(vcmp))]
16833 pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
16834 _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
16835 }
16836
16837 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16838 ///
16839 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
16840 #[inline]
16841 #[target_feature(enable = "avx512f")]
16842 #[cfg_attr(test, assert_instr(vcmp))]
16843 pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
16844 _mm512_mask_cmp_pd_mask(k1, a, b, _CMP_UNORD_Q)
16845 }
16846
16847 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
16848 ///
16849 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=763)
16850 #[inline]
16851 #[target_feature(enable = "avx512f")]
16852 #[rustc_args_required_const(2)]
16853 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16854 pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, imm8: i32) -> __mmask8 {
16855 let neg_one = -1;
16856 macro_rules! call {
16857 ($imm5:expr) => {
16858 vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
16859 };
16860 }
16861 let r = constify_imm5!(imm8, call);
16862 transmute(r)
16863 }
16864
16865 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
16866 ///
16867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=764)
16868 #[inline]
16869 #[target_feature(enable = "avx512f")]
16870 #[rustc_args_required_const(3)]
16871 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16872 pub unsafe fn _mm_mask_cmp_ss_mask(k1: __mmask8, a: __m128, b: __m128, imm8: i32) -> __mmask8 {
16873 macro_rules! call {
16874 ($imm5:expr) => {
16875 vcmpss(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
16876 };
16877 }
16878 let r = constify_imm5!(imm8, call);
16879 transmute(r)
16880 }
16881
16882 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
16883 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16884 ///
16885 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=757)
16886 #[inline]
16887 #[target_feature(enable = "avx512f")]
16888 #[rustc_args_required_const(2, 3)]
16889 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16890 pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, imm8: i32, sae: i32) -> __mmask8 {
16891 let neg_one = -1;
16892 macro_rules! call {
16893 ($imm5:expr, $imm4:expr) => {
16894 vcmpss(a, b, $imm5, neg_one, $imm4)
16895 };
16896 }
16897 let r = constify_imm5_sae!(imm8, sae, call);
16898 transmute(r)
16899 }
16900
16901 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
16902 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16903 ///
16904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=758)
16905 #[inline]
16906 #[target_feature(enable = "avx512f")]
16907 #[rustc_args_required_const(3, 4)]
16908 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16909 pub unsafe fn _mm_mask_cmp_round_ss_mask(
16910 k1: __mmask8,
16911 a: __m128,
16912 b: __m128,
16913 imm8: i32,
16914 sae: i32,
16915 ) -> __mmask8 {
16916 macro_rules! call {
16917 ($imm5:expr, $imm4:expr) => {
16918 vcmpss(a, b, $imm5, k1 as i8, $imm4)
16919 };
16920 }
16921 let r = constify_imm5_sae!(imm8, sae, call);
16922 transmute(r)
16923 }
16924
16925 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
16926 ///
16927 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=760)
16928 #[inline]
16929 #[target_feature(enable = "avx512f")]
16930 #[rustc_args_required_const(2)]
16931 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16932 pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
16933 let neg_one = -1;
16934 macro_rules! call {
16935 ($imm5:expr) => {
16936 vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
16937 };
16938 }
16939 let r = constify_imm5!(imm8, call);
16940 transmute(r)
16941 }
16942
16943 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
16944 ///
16945 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=761)
16946 #[inline]
16947 #[target_feature(enable = "avx512f")]
16948 #[rustc_args_required_const(3)]
16949 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16950 pub unsafe fn _mm_mask_cmp_sd_mask(k1: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
16951 macro_rules! call {
16952 ($imm5:expr) => {
16953 vcmpsd(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
16954 };
16955 }
16956 let r = constify_imm5!(imm8, call);
16957 transmute(r)
16958 }
16959
16960 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
16961 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16962 ///
16963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=755)
16964 #[inline]
16965 #[target_feature(enable = "avx512f")]
16966 #[rustc_args_required_const(2, 3)]
16967 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16968 pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> __mmask8 {
16969 let neg_one = -1;
16970 macro_rules! call {
16971 ($imm5:expr, $imm4:expr) => {
16972 vcmpsd(a, b, $imm5, neg_one, $imm4)
16973 };
16974 }
16975 let r = constify_imm5_sae!(imm8, sae, call);
16976 transmute(r)
16977 }
16978
16979 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
16980 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16981 ///
16982 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=756)
16983 #[inline]
16984 #[target_feature(enable = "avx512f")]
16985 #[rustc_args_required_const(3, 4)]
16986 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16987 pub unsafe fn _mm_mask_cmp_round_sd_mask(
16988 k1: __mmask8,
16989 a: __m128d,
16990 b: __m128d,
16991 imm8: i32,
16992 sae: i32,
16993 ) -> __mmask8 {
16994 macro_rules! call {
16995 ($imm5:expr, $imm4:expr) => {
16996 vcmpsd(a, b, $imm5, k1 as i8, $imm4)
16997 };
16998 }
16999 let r = constify_imm5_sae!(imm8, sae, call);
17000 transmute(r)
17001 }
17002
17003 /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
17004 ///
17005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epu32_mask&expand=1056)
17006 #[inline]
17007 #[target_feature(enable = "avx512f")]
17008 #[cfg_attr(test, assert_instr(vpcmp))]
17009 pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17010 simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
17011 }
17012
17013 /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17014 ///
17015 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
17016 #[inline]
17017 #[target_feature(enable = "avx512f")]
17018 #[cfg_attr(test, assert_instr(vpcmp))]
17019 pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17020 _mm512_cmplt_epu32_mask(a, b) & k1
17021 }
17022
17023 /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
17024 ///
17025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu32_mask&expand=933)
17026 #[inline]
17027 #[target_feature(enable = "avx512f")]
17028 #[cfg_attr(test, assert_instr(vpcmp))]
17029 pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17030 simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
17031 }
17032
17033 /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17034 ///
17035 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
17036 #[inline]
17037 #[target_feature(enable = "avx512f")]
17038 #[cfg_attr(test, assert_instr(vpcmp))]
17039 pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17040 _mm512_cmpgt_epu32_mask(a, b) & k1
17041 }
17042
17043 /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17044 ///
17045 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu32_mask&expand=995)
17046 #[inline]
17047 #[target_feature(enable = "avx512f")]
17048 #[cfg_attr(test, assert_instr(vpcmp))]
17049 pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17050 simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
17051 }
17052
17053 /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17054 ///
17055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu32_mask&expand=996)
17056 #[inline]
17057 #[target_feature(enable = "avx512f")]
17058 #[cfg_attr(test, assert_instr(vpcmp))]
17059 pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17060 _mm512_cmple_epu32_mask(a, b) & k1
17061 }
17062
17063 /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17064 ///
17065 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu32_mask&expand=873)
17066 #[inline]
17067 #[target_feature(enable = "avx512f")]
17068 #[cfg_attr(test, assert_instr(vpcmp))]
17069 pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17070 simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
17071 }
17072
17073 /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17074 ///
17075 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu32_mask&expand=874)
17076 #[inline]
17077 #[target_feature(enable = "avx512f")]
17078 #[cfg_attr(test, assert_instr(vpcmp))]
17079 pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17080 _mm512_cmpge_epu32_mask(a, b) & k1
17081 }
17082
17083 /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
17084 ///
17085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu32_mask&expand=807)
17086 #[inline]
17087 #[target_feature(enable = "avx512f")]
17088 #[cfg_attr(test, assert_instr(vpcmp))]
17089 pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17090 simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
17091 }
17092
17093 /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17094 ///
17095 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
17096 #[inline]
17097 #[target_feature(enable = "avx512f")]
17098 #[cfg_attr(test, assert_instr(vpcmp))]
17099 pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17100 _mm512_cmpeq_epu32_mask(a, b) & k1
17101 }
17102
17103 /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
17104 ///
17105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu32_mask&expand=1112)
17106 #[inline]
17107 #[target_feature(enable = "avx512f")]
17108 #[cfg_attr(test, assert_instr(vpcmp))]
17109 pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17110 simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
17111 }
17112
17113 /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17114 ///
17115 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
17116 #[inline]
17117 #[target_feature(enable = "avx512f")]
17118 #[cfg_attr(test, assert_instr(vpcmp))]
17119 pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17120 _mm512_cmpneq_epu32_mask(a, b) & k1
17121 }
17122
17123 /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17124 ///
17125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu32_mask&expand=721)
17126 #[inline]
17127 #[target_feature(enable = "avx512f")]
17128 #[rustc_args_required_const(2)]
17129 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17130 pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask16 {
17131 let neg_one = -1;
17132 macro_rules! call {
17133 ($imm3:expr) => {
17134 vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
17135 };
17136 }
17137 let r = constify_imm3!(imm8, call);
17138 transmute(r)
17139 }
17140
17141 /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17142 ///
17143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu32_mask&expand=722)
17144 #[inline]
17145 #[target_feature(enable = "avx512f")]
17146 #[rustc_args_required_const(3)]
17147 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17148 pub unsafe fn _mm512_mask_cmp_epu32_mask(
17149 k1: __mmask16,
17150 a: __m512i,
17151 b: __m512i,
17152 imm8: _MM_CMPINT_ENUM,
17153 ) -> __mmask16 {
17154 macro_rules! call {
17155 ($imm3:expr) => {
17156 vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, k1 as i16)
17157 };
17158 }
17159 let r = constify_imm3!(imm8, call);
17160 transmute(r)
17161 }
17162
17163 /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
17164 ///
17165 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi32_mask&expand=1029)
17166 #[inline]
17167 #[target_feature(enable = "avx512f")]
17168 #[cfg_attr(test, assert_instr(vpcmp))]
17169 pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17170 simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
17171 }
17172
17173 /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17174 ///
17175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
17176 #[inline]
17177 #[target_feature(enable = "avx512f")]
17178 #[cfg_attr(test, assert_instr(vpcmp))]
17179 pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17180 _mm512_cmplt_epi32_mask(a, b) & k1
17181 }
17182
17183 /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
17184 ///
17185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi32_mask&expand=905)
17186 #[inline]
17187 #[target_feature(enable = "avx512f")]
17188 #[cfg_attr(test, assert_instr(vpcmp))]
17189 pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17190 simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
17191 }
17192
17193 /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17194 ///
17195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
17196 #[inline]
17197 #[target_feature(enable = "avx512f")]
17198 #[cfg_attr(test, assert_instr(vpcmp))]
17199 pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17200 _mm512_cmpgt_epi32_mask(a, b) & k1
17201 }
17202
17203 /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17204 ///
17205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi32_mask&expand=971)
17206 #[inline]
17207 #[target_feature(enable = "avx512f")]
17208 #[cfg_attr(test, assert_instr(vpcmp))]
17209 pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17210 simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
17211 }
17212
17213 /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17214 ///
17215 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi32_mask&expand=972)
17216 #[inline]
17217 #[target_feature(enable = "avx512f")]
17218 #[cfg_attr(test, assert_instr(vpcmp))]
17219 pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17220 _mm512_cmple_epi32_mask(a, b) & k1
17221 }
17222
17223 /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17224 ///
17225 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi32_mask&expand=849)
17226 #[inline]
17227 #[target_feature(enable = "avx512f")]
17228 #[cfg_attr(test, assert_instr(vpcmp))]
17229 pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17230 simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
17231 }
17232
17233 /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17234 ///
17235 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi32_mask&expand=850)
17236 #[inline]
17237 #[target_feature(enable = "avx512f")]
17238 #[cfg_attr(test, assert_instr(vpcmp))]
17239 pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17240 _mm512_cmpge_epi32_mask(a, b) & k1
17241 }
17242
17243 /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
17244 ///
17245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi32_mask&expand=779)
17246 #[inline]
17247 #[target_feature(enable = "avx512f")]
17248 #[cfg_attr(test, assert_instr(vpcmp))]
17249 pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17250 simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
17251 }
17252
17253 /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17254 ///
17255 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
17256 #[inline]
17257 #[target_feature(enable = "avx512f")]
17258 #[cfg_attr(test, assert_instr(vpcmp))]
17259 pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17260 _mm512_cmpeq_epi32_mask(a, b) & k1
17261 }
17262
17263 /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
17264 ///
17265 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi32_mask&expand=1088)
17266 #[inline]
17267 #[target_feature(enable = "avx512f")]
17268 #[cfg_attr(test, assert_instr(vpcmp))]
17269 pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
17270 simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
17271 }
17272
17273 /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17274 ///
17275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
17276 #[inline]
17277 #[target_feature(enable = "avx512f")]
17278 #[cfg_attr(test, assert_instr(vpcmp))]
17279 pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
17280 _mm512_cmpneq_epi32_mask(a, b) & k1
17281 }
17282
17283 /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17284 ///
17285 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi32_mask&expand=697)
17286 #[inline]
17287 #[target_feature(enable = "avx512f")]
17288 #[rustc_args_required_const(2)]
17289 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17290 pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask16 {
17291 let neg_one = -1;
17292 macro_rules! call {
17293 ($imm3:expr) => {
17294 vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
17295 };
17296 }
17297 let r = constify_imm3!(imm8, call);
17298 transmute(r)
17299 }
17300
17301 /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17302 ///
17303 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi32_mask&expand=698)
17304 #[inline]
17305 #[target_feature(enable = "avx512f")]
17306 #[rustc_args_required_const(3)]
17307 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17308 pub unsafe fn _mm512_mask_cmp_epi32_mask(
17309 k1: __mmask16,
17310 a: __m512i,
17311 b: __m512i,
17312 imm8: _MM_CMPINT_ENUM,
17313 ) -> __mmask16 {
17314 macro_rules! call {
17315 ($imm3:expr) => {
17316 vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, k1 as i16)
17317 };
17318 }
17319 let r = constify_imm3!(imm8, call);
17320 transmute(r)
17321 }
17322
17323 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
17324 ///
17325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epu64_mask&expand=1062)
17326 #[inline]
17327 #[target_feature(enable = "avx512f")]
17328 #[cfg_attr(test, assert_instr(vpcmp))]
17329 pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17330 simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
17331 }
17332
17333 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17334 ///
17335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
17336 #[inline]
17337 #[target_feature(enable = "avx512f")]
17338 #[cfg_attr(test, assert_instr(vpcmp))]
17339 pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17340 _mm512_cmplt_epu64_mask(a, b) & k1
17341 }
17342
17343 /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
17344 ///
17345 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu64_mask&expand=939)
17346 #[inline]
17347 #[target_feature(enable = "avx512f")]
17348 #[cfg_attr(test, assert_instr(vpcmp))]
17349 pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17350 simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
17351 }
17352
17353 /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17354 ///
17355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
17356 #[inline]
17357 #[target_feature(enable = "avx512f")]
17358 #[cfg_attr(test, assert_instr(vpcmp))]
17359 pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17360 _mm512_cmpgt_epu64_mask(a, b) & k1
17361 }
17362
17363 /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17364 ///
17365 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu64_mask&expand=1001)
17366 #[inline]
17367 #[target_feature(enable = "avx512f")]
17368 #[cfg_attr(test, assert_instr(vpcmp))]
17369 pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17370 simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
17371 }
17372
17373 /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17374 ///
17375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu64_mask&expand=1002)
17376 #[inline]
17377 #[target_feature(enable = "avx512f")]
17378 #[cfg_attr(test, assert_instr(vpcmp))]
17379 pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17380 _mm512_cmple_epu64_mask(a, b) & k1
17381 }
17382
17383 /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17384 ///
17385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu64_mask&expand=879)
17386 #[inline]
17387 #[target_feature(enable = "avx512f")]
17388 #[cfg_attr(test, assert_instr(vpcmp))]
17389 pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17390 simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
17391 }
17392
17393 /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17394 ///
17395 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu64_mask&expand=880)
17396 #[inline]
17397 #[target_feature(enable = "avx512f")]
17398 #[cfg_attr(test, assert_instr(vpcmp))]
17399 pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17400 _mm512_cmpge_epu64_mask(b, a) & k1
17401 }
17402
17403 /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
17404 ///
17405 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu64_mask&expand=813)
17406 #[inline]
17407 #[target_feature(enable = "avx512f")]
17408 #[cfg_attr(test, assert_instr(vpcmp))]
17409 pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17410 simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
17411 }
17412
17413 /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17414 ///
17415 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
17416 #[inline]
17417 #[target_feature(enable = "avx512f")]
17418 #[cfg_attr(test, assert_instr(vpcmp))]
17419 pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17420 _mm512_cmpeq_epu64_mask(a, b) & k1
17421 }
17422
17423 /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
17424 ///
17425 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu64_mask&expand=1118)
17426 #[inline]
17427 #[target_feature(enable = "avx512f")]
17428 #[cfg_attr(test, assert_instr(vpcmp))]
17429 pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17430 simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
17431 }
17432
17433 /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17434 ///
17435 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
17436 #[inline]
17437 #[target_feature(enable = "avx512f")]
17438 #[cfg_attr(test, assert_instr(vpcmp))]
17439 pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17440 _mm512_cmpneq_epu64_mask(a, b) & k1
17441 }
17442
17443 /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17444 ///
17445 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu64_mask&expand=727)
17446 #[inline]
17447 #[target_feature(enable = "avx512f")]
17448 #[rustc_args_required_const(2)]
17449 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17450 pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
17451 let neg_one = -1;
17452 macro_rules! call {
17453 ($imm3:expr) => {
17454 vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
17455 };
17456 }
17457 let r = constify_imm3!(imm8, call);
17458 transmute(r)
17459 }
17460
17461 /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17462 ///
17463 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu64_mask&expand=728)
17464 #[inline]
17465 #[target_feature(enable = "avx512f")]
17466 #[rustc_args_required_const(3)]
17467 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17468 pub unsafe fn _mm512_mask_cmp_epu64_mask(
17469 k1: __mmask8,
17470 a: __m512i,
17471 b: __m512i,
17472 imm8: _MM_CMPINT_ENUM,
17473 ) -> __mmask8 {
17474 macro_rules! call {
17475 ($imm3:expr) => {
17476 vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, k1 as i8)
17477 };
17478 }
17479 let r = constify_imm3!(imm8, call);
17480 transmute(r)
17481 }
17482
17483 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
17484 ///
17485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi64_mask&expand=1037)
17486 #[inline]
17487 #[target_feature(enable = "avx512f")]
17488 #[cfg_attr(test, assert_instr(vpcmp))]
17489 pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17490 simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
17491 }
17492
17493 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17494 ///
17495 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
17496 #[inline]
17497 #[target_feature(enable = "avx512f")]
17498 #[cfg_attr(test, assert_instr(vpcmp))]
17499 pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17500 _mm512_cmplt_epi64_mask(a, b) & k1
17501 }
17502
17503 /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
17504 ///
17505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi64_mask&expand=913)
17506 #[inline]
17507 #[target_feature(enable = "avx512f")]
17508 #[cfg_attr(test, assert_instr(vpcmp))]
17509 pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17510 simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
17511 }
17512
17513 /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17514 ///
17515 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
17516 #[inline]
17517 #[target_feature(enable = "avx512f")]
17518 #[cfg_attr(test, assert_instr(vpcmp))]
17519 pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17520 _mm512_cmpgt_epi64_mask(a, b) & k1
17521 }
17522
17523 /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17524 ///
17525 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi64_mask&expand=977)
17526 #[inline]
17527 #[target_feature(enable = "avx512f")]
17528 #[cfg_attr(test, assert_instr(vpcmp))]
17529 pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17530 simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
17531 }
17532
17533 /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17534 ///
17535 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi64_mask&expand=978)
17536 #[inline]
17537 #[target_feature(enable = "avx512f")]
17538 #[cfg_attr(test, assert_instr(vpcmp))]
17539 pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17540 _mm512_cmple_epi64_mask(a, b) & k1
17541 }
17542
17543 /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17544 ///
17545 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi64_mask&expand=855)
17546 #[inline]
17547 #[target_feature(enable = "avx512f")]
17548 #[cfg_attr(test, assert_instr(vpcmp))]
17549 pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17550 simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
17551 }
17552
17553 /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17554 ///
17555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi64_mask&expand=856)
17556 #[inline]
17557 #[target_feature(enable = "avx512f")]
17558 #[cfg_attr(test, assert_instr(vpcmp))]
17559 pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17560 _mm512_cmpge_epi64_mask(b, a) & k1
17561 }
17562
17563 /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
17564 ///
17565 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi64_mask&expand=787)
17566 #[inline]
17567 #[target_feature(enable = "avx512f")]
17568 #[cfg_attr(test, assert_instr(vpcmp))]
17569 pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17570 simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
17571 }
17572
17573 /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17574 ///
17575 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
17576 #[inline]
17577 #[target_feature(enable = "avx512f")]
17578 #[cfg_attr(test, assert_instr(vpcmp))]
17579 pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17580 _mm512_cmpeq_epi64_mask(a, b) & k1
17581 }
17582
17583 /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
17584 ///
17585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi64_mask&expand=1094)
17586 #[inline]
17587 #[target_feature(enable = "avx512f")]
17588 #[cfg_attr(test, assert_instr(vpcmp))]
17589 pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
17590 simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
17591 }
17592
17593 /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17594 ///
17595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
17596 #[inline]
17597 #[target_feature(enable = "avx512f")]
17598 #[cfg_attr(test, assert_instr(vpcmp))]
17599 pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
17600 _mm512_cmpneq_epi64_mask(a, b) & k1
17601 }
17602
17603 /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17604 ///
17605 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi64_mask&expand=703)
17606 #[inline]
17607 #[target_feature(enable = "avx512f")]
17608 #[rustc_args_required_const(2)]
17609 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17610 pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
17611 let neg_one = -1;
17612 macro_rules! call {
17613 ($imm3:expr) => {
17614 vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
17615 };
17616 }
17617 let r = constify_imm3!(imm8, call);
17618 transmute(r)
17619 }
17620
17621 /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17622 ///
17623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi64_mask&expand=704)
17624 #[inline]
17625 #[target_feature(enable = "avx512f")]
17626 #[rustc_args_required_const(3)]
17627 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17628 pub unsafe fn _mm512_mask_cmp_epi64_mask(
17629 k1: __mmask8,
17630 a: __m512i,
17631 b: __m512i,
17632 imm8: _MM_CMPINT_ENUM,
17633 ) -> __mmask8 {
17634 macro_rules! call {
17635 ($imm3:expr) => {
17636 vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, k1 as i8)
17637 };
17638 }
17639 let r = constify_imm3!(imm8, call);
17640 transmute(r)
17641 }
17642
17643 /// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
17644 ///
17645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_epi32&expand=4556)
17646 #[inline]
17647 #[target_feature(enable = "avx512f")]
17648 pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
17649 simd_reduce_add_unordered(a.as_i32x16())
17650 }
17651
17652 /// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
17653 ///
17654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_epi32&expand=4555)
17655 #[inline]
17656 #[target_feature(enable = "avx512f")]
17657 pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
17658 simd_reduce_add_unordered(simd_select_bitmask(
17659 k,
17660 a.as_i32x16(),
17661 _mm512_setzero_si512().as_i32x16(),
17662 ))
17663 }
17664
17665 /// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
17666 ///
17667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_epi64&expand=4558)
17668 #[inline]
17669 #[target_feature(enable = "avx512f")]
17670 pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
17671 simd_reduce_add_unordered(a.as_i64x8())
17672 }
17673
17674 /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
17675 ///
17676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_epi64&expand=4557)
17677 #[inline]
17678 #[target_feature(enable = "avx512f")]
17679 pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
17680 simd_reduce_add_unordered(simd_select_bitmask(
17681 k,
17682 a.as_i64x8(),
17683 _mm512_setzero_si512().as_i64x8(),
17684 ))
17685 }
17686
17687 /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
17688 ///
17689 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_ps&expand=4562)
17690 #[inline]
17691 #[target_feature(enable = "avx512f")]
17692 pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
17693 simd_reduce_add_unordered(a.as_f32x16())
17694 }
17695
17696 /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
17697 ///
17698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_ps&expand=4561)
17699 #[inline]
17700 #[target_feature(enable = "avx512f")]
17701 pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
17702 simd_reduce_add_unordered(simd_select_bitmask(
17703 k,
17704 a.as_f32x16(),
17705 _mm512_setzero_ps().as_f32x16(),
17706 ))
17707 }
17708
17709 /// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
17710 ///
17711 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_pd&expand=4560)
17712 #[inline]
17713 #[target_feature(enable = "avx512f")]
17714 pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
17715 simd_reduce_add_unordered(a.as_f64x8())
17716 }
17717
17718 /// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
17719 ///
17720 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_pd&expand=4559)
17721 #[inline]
17722 #[target_feature(enable = "avx512f")]
17723 pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
17724 simd_reduce_add_unordered(simd_select_bitmask(
17725 k,
17726 a.as_f64x8(),
17727 _mm512_setzero_pd().as_f64x8(),
17728 ))
17729 }
17730
17731 /// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
17732 ///
17733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_epi32&expand=4600)
17734 #[inline]
17735 #[target_feature(enable = "avx512f")]
17736 pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
17737 simd_reduce_mul_unordered(a.as_i32x16())
17738 }
17739
17740 /// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
17741 ///
17742 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_epi32&expand=4599)
17743 #[inline]
17744 #[target_feature(enable = "avx512f")]
17745 pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
17746 simd_reduce_mul_unordered(simd_select_bitmask(
17747 k,
17748 a.as_i32x16(),
17749 _mm512_set1_epi32(1).as_i32x16(),
17750 ))
17751 }
17752
17753 /// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
17754 ///
17755 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_epi64&expand=4602)
17756 #[inline]
17757 #[target_feature(enable = "avx512f")]
17758 pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
17759 simd_reduce_mul_unordered(a.as_i64x8())
17760 }
17761
17762 /// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
17763 ///
17764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_epi64&expand=4601)
17765 #[inline]
17766 #[target_feature(enable = "avx512f")]
17767 pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
17768 simd_reduce_mul_unordered(simd_select_bitmask(
17769 k,
17770 a.as_i64x8(),
17771 _mm512_set1_epi64(1).as_i64x8(),
17772 ))
17773 }
17774
17775 /// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
17776 ///
17777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_ps&expand=4606)
17778 #[inline]
17779 #[target_feature(enable = "avx512f")]
17780 pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
17781 simd_reduce_mul_unordered(a.as_f32x16())
17782 }
17783
17784 /// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
17785 ///
17786 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_ps&expand=4605)
17787 #[inline]
17788 #[target_feature(enable = "avx512f")]
17789 pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
17790 simd_reduce_mul_unordered(simd_select_bitmask(
17791 k,
17792 a.as_f32x16(),
17793 _mm512_set1_ps(1.).as_f32x16(),
17794 ))
17795 }
17796
17797 /// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
17798 ///
17799 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_pd&expand=4604)
17800 #[inline]
17801 #[target_feature(enable = "avx512f")]
17802 pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
17803 simd_reduce_mul_unordered(a.as_f64x8())
17804 }
17805
17806 /// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
17807 ///
17808 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_pd&expand=4603)
17809 #[inline]
17810 #[target_feature(enable = "avx512f")]
17811 pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
17812 simd_reduce_mul_unordered(simd_select_bitmask(
17813 k,
17814 a.as_f64x8(),
17815 _mm512_set1_pd(1.).as_f64x8(),
17816 ))
17817 }
17818
17819 /// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
17820 ///
17821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epi32&expand=4576)
17822 #[inline]
17823 #[target_feature(enable = "avx512f")]
17824 pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
17825 simd_reduce_max(a.as_i32x16())
17826 }
17827
17828 /// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17829 ///
17830 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epi32&expand=4575)
17831 #[inline]
17832 #[target_feature(enable = "avx512f")]
17833 pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
17834 simd_reduce_max(simd_select_bitmask(
17835 k,
17836 a.as_i32x16(),
17837 _mm512_undefined_epi32().as_i32x16(),
17838 ))
17839 }
17840
17841 /// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
17842 ///
17843 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epi64&expand=4578)
17844 #[inline]
17845 #[target_feature(enable = "avx512f")]
17846 pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
17847 simd_reduce_max(a.as_i64x8())
17848 }
17849
17850 /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17851 ///
17852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epi64&expand=4577)
17853 #[inline]
17854 #[target_feature(enable = "avx512f")]
17855 pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
17856 simd_reduce_max(simd_select_bitmask(
17857 k,
17858 a.as_i64x8(),
17859 _mm512_set1_epi64(0).as_i64x8(),
17860 ))
17861 }
17862
17863 /// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
17864 ///
17865 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epu32&expand=4580)
17866 #[inline]
17867 #[target_feature(enable = "avx512f")]
17868 pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
17869 simd_reduce_max(a.as_u32x16())
17870 }
17871
17872 /// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17873 ///
17874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epu32&expand=4579)
17875 #[inline]
17876 #[target_feature(enable = "avx512f")]
17877 pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
17878 simd_reduce_max(simd_select_bitmask(
17879 k,
17880 a.as_u32x16(),
17881 _mm512_undefined_epi32().as_u32x16(),
17882 ))
17883 }
17884
17885 /// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
17886 ///
17887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epu64&expand=4582)
17888 #[inline]
17889 #[target_feature(enable = "avx512f")]
17890 pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
17891 simd_reduce_max(a.as_u64x8())
17892 }
17893
17894 /// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17895 ///
17896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epu64&expand=4581)
17897 #[inline]
17898 #[target_feature(enable = "avx512f")]
17899 pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
17900 simd_reduce_max(simd_select_bitmask(
17901 k,
17902 a.as_u64x8(),
17903 _mm512_set1_epi64(0).as_u64x8(),
17904 ))
17905 }
17906
17907 /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
17908 ///
17909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_ps&expand=4586)
17910 #[inline]
17911 #[target_feature(enable = "avx512f")]
17912 pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 {
17913 simd_reduce_max(a.as_f32x16())
17914 }
17915
17916 /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
17917 ///
17918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_ps&expand=4585)
17919 #[inline]
17920 #[target_feature(enable = "avx512f")]
17921 pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
17922 simd_reduce_max(simd_select_bitmask(
17923 k,
17924 a.as_f32x16(),
17925 _mm512_undefined_ps().as_f32x16(),
17926 ))
17927 }
17928
17929 /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
17930 ///
17931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_pd&expand=4584)
17932 #[inline]
17933 #[target_feature(enable = "avx512f")]
17934 pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
17935 simd_reduce_max(a.as_f64x8())
17936 }
17937
17938 /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
17939 ///
17940 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_pd&expand=4583)
17941 #[inline]
17942 #[target_feature(enable = "avx512f")]
17943 pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
17944 simd_reduce_max(simd_select_bitmask(
17945 k,
17946 a.as_f64x8(),
17947 _mm512_undefined_pd().as_f64x8(),
17948 ))
17949 }
17950
17951 /// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
17952 ///
17953 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epi32&expand=4588)
17954 #[inline]
17955 #[target_feature(enable = "avx512f")]
17956 pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
17957 simd_reduce_min(a.as_i32x16())
17958 }
17959
17960 /// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
17961 ///
17962 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epi32&expand=4587)
17963 #[inline]
17964 #[target_feature(enable = "avx512f")]
17965 pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
17966 simd_reduce_min(simd_select_bitmask(
17967 k,
17968 a.as_i32x16(),
17969 _mm512_undefined_epi32().as_i32x16(),
17970 ))
17971 }
17972
17973 /// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
17974 ///
17975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epi64&expand=4590)
17976 #[inline]
17977 #[target_feature(enable = "avx512f")]
17978 pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
17979 simd_reduce_min(a.as_i64x8())
17980 }
17981
17982 /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
17983 ///
17984 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epi64&expand=4589)
17985 #[inline]
17986 #[target_feature(enable = "avx512f")]
17987 pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
17988 simd_reduce_min(simd_select_bitmask(
17989 k,
17990 a.as_i64x8(),
17991 _mm512_set1_epi64(0).as_i64x8(),
17992 ))
17993 }
17994
17995 /// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
17996 ///
17997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epu32&expand=4592)
17998 #[inline]
17999 #[target_feature(enable = "avx512f")]
18000 pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
18001 simd_reduce_min(a.as_u32x16())
18002 }
18003
18004 /// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
18005 ///
18006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epu32&expand=4591)
18007 #[inline]
18008 #[target_feature(enable = "avx512f")]
18009 pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
18010 simd_reduce_min(simd_select_bitmask(
18011 k,
18012 a.as_u32x16(),
18013 _mm512_undefined_epi32().as_u32x16(),
18014 ))
18015 }
18016
18017 /// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
18018 ///
18019 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epu64&expand=4594)
18020 #[inline]
18021 #[target_feature(enable = "avx512f")]
18022 pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
18023 simd_reduce_min(a.as_u64x8())
18024 }
18025
18026 /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
18027 ///
18028 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epi64&expand=4589)
18029 #[inline]
18030 #[target_feature(enable = "avx512f")]
18031 pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
18032 simd_reduce_min(simd_select_bitmask(
18033 k,
18034 a.as_u64x8(),
18035 _mm512_set1_epi64(0).as_u64x8(),
18036 ))
18037 }
18038
18039 /// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
18040 ///
18041 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_ps&expand=4598)
18042 #[inline]
18043 #[target_feature(enable = "avx512f")]
18044 pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 {
18045 simd_reduce_min(a.as_f32x16())
18046 }
18047
18048 /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
18049 ///
18050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_ps&expand=4597)
18051 #[inline]
18052 #[target_feature(enable = "avx512f")]
18053 pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
18054 simd_reduce_min(simd_select_bitmask(
18055 k,
18056 a.as_f32x16(),
18057 _mm512_undefined_ps().as_f32x16(),
18058 ))
18059 }
18060
18061 /// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
18062 ///
18063 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_pd&expand=4596)
18064 #[inline]
18065 #[target_feature(enable = "avx512f")]
18066 pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
18067 simd_reduce_min(a.as_f64x8())
18068 }
18069
18070 /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
18071 ///
18072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_pd&expand=4595)
18073 #[inline]
18074 #[target_feature(enable = "avx512f")]
18075 pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
18076 simd_reduce_min(simd_select_bitmask(
18077 k,
18078 a.as_f64x8(),
18079 _mm512_undefined_pd().as_f64x8(),
18080 ))
18081 }
18082
18083 /// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
18084 ///
18085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_and_epi32&expand=4564)
18086 #[inline]
18087 #[target_feature(enable = "avx512f")]
18088 pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
18089 simd_reduce_and(a.as_i32x16())
18090 }
18091
18092 /// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
18093 ///
18094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_and_epi32&expand=4563)
18095 #[inline]
18096 #[target_feature(enable = "avx512f")]
18097 pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
18098 simd_reduce_and(simd_select_bitmask(
18099 k,
18100 a.as_i32x16(),
18101 _mm512_set1_epi32(
18102 1 << 0
18103 | 1 << 1
18104 | 1 << 2
18105 | 1 << 3
18106 | 1 << 4
18107 | 1 << 5
18108 | 1 << 6
18109 | 1 << 7
18110 | 1 << 8
18111 | 1 << 9
18112 | 1 << 10
18113 | 1 << 11
18114 | 1 << 12
18115 | 1 << 13
18116 | 1 << 14
18117 | 1 << 15,
18118 )
18119 .as_i32x16(),
18120 ))
18121 }
18122
18123 /// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
18124 ///
18125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_and_epi64&expand=4566)
18126 #[inline]
18127 #[target_feature(enable = "avx512f")]
18128 pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
18129 simd_reduce_and(a.as_i64x8())
18130 }
18131
18132 /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
18133 ///
18134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_epi64&expand=4557)
18135 #[inline]
18136 #[target_feature(enable = "avx512f")]
18137 pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
18138 simd_reduce_and(simd_select_bitmask(
18139 k,
18140 a.as_i64x8(),
18141 _mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
18142 .as_i64x8(),
18143 ))
18144 }
18145
18146 /// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
18147 ///
18148 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_or_epi32&expand=4608)
18149 #[inline]
18150 #[target_feature(enable = "avx512f")]
18151 pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
18152 simd_reduce_or(a.as_i32x16())
18153 }
18154
18155 /// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
18156 ///
18157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_or_epi32&expand=4607)
18158 #[inline]
18159 #[target_feature(enable = "avx512f")]
18160 pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
18161 simd_reduce_or(simd_select_bitmask(
18162 k,
18163 a.as_i32x16(),
18164 _mm512_setzero_si512().as_i32x16(),
18165 ))
18166 }
18167
18168 /// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
18169 ///
18170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_or_epi64&expand=4610)
18171 #[inline]
18172 #[target_feature(enable = "avx512f")]
18173 pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
18174 simd_reduce_or(a.as_i64x8())
18175 }
18176
18177 /// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
18178 ///
18179 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_or_epi64&expand=4609)
18180 #[inline]
18181 #[target_feature(enable = "avx512f")]
18182 pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
18183 simd_reduce_or(simd_select_bitmask(
18184 k,
18185 a.as_i64x8(),
18186 _mm512_setzero_si512().as_i64x8(),
18187 ))
18188 }
18189
18190 /// Returns vector of type `__m512d` with undefined elements.
18191 ///
18192 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
18193 #[inline]
18194 #[target_feature(enable = "avx512f")]
18195 // This intrinsic has no corresponding instruction.
18196 pub unsafe fn _mm512_undefined_pd() -> __m512d {
18197 _mm512_set1_pd(0.0)
18198 }
18199
18200 /// Returns vector of type `__m512` with undefined elements.
18201 ///
18202 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
18203 #[inline]
18204 #[target_feature(enable = "avx512f")]
18205 // This intrinsic has no corresponding instruction.
18206 pub unsafe fn _mm512_undefined_ps() -> __m512 {
18207 _mm512_set1_ps(0.0)
18208 }
18209
18210 /// Return vector of type __m512i with undefined elements.
18211 ///
18212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_undefined_epi32&expand=5995)
18213 #[inline]
18214 #[target_feature(enable = "avx512f")]
18215 // This intrinsic has no corresponding instruction.
18216 pub unsafe fn _mm512_undefined_epi32() -> __m512i {
18217 _mm512_set1_epi32(0)
18218 }
18219
18220 /// Return vector of type __m512 with undefined elements.
18221 ///
18222 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_undefined&expand=5994)
18223 #[inline]
18224 #[target_feature(enable = "avx512f")]
18225 // This intrinsic has no corresponding instruction.
18226 pub unsafe fn _mm512_undefined() -> __m512 {
18227 _mm512_set1_ps(0.0)
18228 }
18229
18230 /// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
18231 ///
18232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_loadu_epi32&expand=3377)
18233 #[inline]
18234 #[target_feature(enable = "avx512f")]
18235 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18236 pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
18237 ptr::read_unaligned(mem_addr as *const __m512i)
18238 }
18239
18240 /// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
18241 ///
18242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_storeu_epi32&expand=5628)
18243 #[inline]
18244 #[target_feature(enable = "avx512f")]
18245 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18246 pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
18247 ptr::write_unaligned(mem_addr as *mut __m512i, a);
18248 }
18249
18250 /// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
18251 ///
18252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_loadu_epi64&expand=3386)
18253 #[inline]
18254 #[target_feature(enable = "avx512f")]
18255 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
18256 pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
18257 ptr::read_unaligned(mem_addr as *const __m512i)
18258 }
18259
18260 /// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
18261 ///
18262 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_storeu_epi64&expand=5634)
18263 #[inline]
18264 #[target_feature(enable = "avx512f")]
18265 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
18266 pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
18267 ptr::write_unaligned(mem_addr as *mut __m512i, a);
18268 }
18269
18270 /// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
18271 ///
18272 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_loadu_si512&expand=3420)
18273 #[inline]
18274 #[target_feature(enable = "avx512f")]
18275 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18276 pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
18277 ptr::read_unaligned(mem_addr as *const __m512i)
18278 }
18279
18280 /// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
18281 ///
18282 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_storeu_si512&expand=5657)
18283 #[inline]
18284 #[target_feature(enable = "avx512f")]
18285 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18286 pub unsafe fn _mm512_storeu_si512(mem_addr: *mut i32, a: __m512i) {
18287 ptr::write_unaligned(mem_addr as *mut __m512i, a);
18288 }
18289
18290 /// Loads 512-bits (composed of 8 packed double-precision (64-bit)
18291 /// floating-point elements) from memory into result.
18292 /// `mem_addr` does not need to be aligned on any particular boundary.
18293 ///
18294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
18295 #[inline]
18296 #[target_feature(enable = "avx512f")]
18297 #[cfg_attr(test, assert_instr(vmovups))]
18298 pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
18299 ptr::read_unaligned(mem_addr as *const __m512d)
18300 }
18301
18302 /// Stores 512-bits (composed of 8 packed double-precision (64-bit)
18303 /// floating-point elements) from `a` into memory.
18304 /// `mem_addr` does not need to be aligned on any particular boundary.
18305 ///
18306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
18307 #[inline]
18308 #[target_feature(enable = "avx512f")]
18309 #[cfg_attr(test, assert_instr(vmovups))]
18310 pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
18311 ptr::write_unaligned(mem_addr as *mut __m512d, a);
18312 }
18313
18314 /// Loads 512-bits (composed of 16 packed single-precision (32-bit)
18315 /// floating-point elements) from memory into result.
18316 /// `mem_addr` does not need to be aligned on any particular boundary.
18317 ///
18318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
18319 #[inline]
18320 #[target_feature(enable = "avx512f")]
18321 #[cfg_attr(test, assert_instr(vmovups))]
18322 pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
18323 ptr::read_unaligned(mem_addr as *const __m512)
18324 }
18325
18326 /// Stores 512-bits (composed of 16 packed single-precision (32-bit)
18327 /// floating-point elements) from `a` into memory.
18328 /// `mem_addr` does not need to be aligned on any particular boundary.
18329 ///
18330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
18331 #[inline]
18332 #[target_feature(enable = "avx512f")]
18333 #[cfg_attr(test, assert_instr(vmovups))]
18334 #[stable(feature = "simd_x86", since = "1.27.0")]
18335 pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
18336 ptr::write_unaligned(mem_addr as *mut __m512, a);
18337 }
18338
18339 /// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18340 ///
18341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_si512&expand=3345)
18342 #[inline]
18343 #[target_feature(enable = "avx512f")]
18344 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18345 pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
18346 ptr::read(mem_addr as *const __m512i)
18347 }
18348
18349 /// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18350 ///
18351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_si512&expand=5598)
18352 #[inline]
18353 #[target_feature(enable = "avx512f")]
18354 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18355 pub unsafe fn _mm512_store_si512(mem_addr: *mut i32, a: __m512i) {
18356 ptr::write(mem_addr as *mut __m512i, a);
18357 }
18358
18359 /// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18360 ///
18361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_epi32&expand=3304)
18362 #[inline]
18363 #[target_feature(enable = "avx512f")]
18364 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18365 pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
18366 ptr::read(mem_addr as *const __m512i)
18367 }
18368
18369 /// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18370 ///
18371 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_epi32&expand=5569)
18372 #[inline]
18373 #[target_feature(enable = "avx512f")]
18374 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18375 pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
18376 ptr::write(mem_addr as *mut __m512i, a);
18377 }
18378
18379 /// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18380 ///
18381 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_epi64&expand=3313)
18382 #[inline]
18383 #[target_feature(enable = "avx512f")]
18384 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
18385 pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
18386 ptr::read(mem_addr as *const __m512i)
18387 }
18388
18389 /// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18390 ///
18391 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_epi64&expand=5575)
18392 #[inline]
18393 #[target_feature(enable = "avx512f")]
18394 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
18395 pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
18396 ptr::write(mem_addr as *mut __m512i, a);
18397 }
18398
18399 /// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18400 ///
18401 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_ps&expand=3336)
18402 #[inline]
18403 #[target_feature(enable = "avx512f")]
18404 #[cfg_attr(test, assert_instr(vmovaps))]
18405 pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
18406 ptr::read(mem_addr as *const __m512)
18407 }
18408
18409 /// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18410 ///
18411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_ps&expand=5592)
18412 #[inline]
18413 #[target_feature(enable = "avx512f")]
18414 #[cfg_attr(test, assert_instr(vmovaps))]
18415 pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
18416 ptr::write(mem_addr as *mut __m512, a);
18417 }
18418
18419 /// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18420 ///
18421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_pd&expand=3326)
18422 #[inline]
18423 #[target_feature(enable = "avx512f")]
18424 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
18425 pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
18426 ptr::read(mem_addr as *const __m512d)
18427 }
18428
18429 /// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18430 ///
18431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_pd&expand=5585)
18432 #[inline]
18433 #[target_feature(enable = "avx512f")]
18434 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
18435 pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
18436 ptr::write(mem_addr as *mut __m512d, a);
18437 }
18438
18439 /// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
18440 ///
18441 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_pd&expand=5002)
18442 #[inline]
18443 #[target_feature(enable = "avx512f")]
18444 pub unsafe fn _mm512_setr_pd(
18445 e0: f64,
18446 e1: f64,
18447 e2: f64,
18448 e3: f64,
18449 e4: f64,
18450 e5: f64,
18451 e6: f64,
18452 e7: f64,
18453 ) -> __m512d {
18454 let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
18455 transmute(r)
18456 }
18457
18458 /// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
18459 ///
18460 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_pd&expand=4924)
18461 #[inline]
18462 #[target_feature(enable = "avx512f")]
18463 pub unsafe fn _mm512_set_pd(
18464 e0: f64,
18465 e1: f64,
18466 e2: f64,
18467 e3: f64,
18468 e4: f64,
18469 e5: f64,
18470 e6: f64,
18471 e7: f64,
18472 ) -> __m512d {
18473 _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
18474 }
18475
18476 /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18477 ///
18478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_move_ss&expand=3832)
18479 #[inline]
18480 #[target_feature(enable = "avx512f")]
18481 #[cfg_attr(test, assert_instr(vmovss))]
18482 pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18483 let extractsrc: f32 = simd_extract(src, 0);
18484 let mut mov: f32 = extractsrc;
18485 if (k & 0b00000001) != 0 {
18486 mov = simd_extract(b, 0);
18487 }
18488 let r = simd_insert(a, 0, mov);
18489 transmute(r)
18490 }
18491
18492 /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18493 ///
18494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_move_ss&expand=3833)
18495 #[inline]
18496 #[target_feature(enable = "avx512f")]
18497 #[cfg_attr(test, assert_instr(vmovss))]
18498 pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18499 let mut mov: f32 = 0.;
18500 if (k & 0b00000001) != 0 {
18501 mov = simd_extract(b, 0);
18502 }
18503 let r = simd_insert(a, 0, mov);
18504 transmute(r)
18505 }
18506
18507 /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18508 ///
18509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_move_sd&expand=3829)
18510 #[inline]
18511 #[target_feature(enable = "avx512f")]
18512 #[cfg_attr(test, assert_instr(vmovsd))]
18513 pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18514 let extractsrc: f64 = simd_extract(src, 0);
18515 let mut mov: f64 = extractsrc;
18516 if (k & 0b00000001) != 0 {
18517 mov = simd_extract(b, 0);
18518 }
18519 let r = simd_insert(a, 0, mov);
18520 transmute(r)
18521 }
18522
18523 /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18524 ///
18525 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_move_sd&expand=3830)
18526 #[inline]
18527 #[target_feature(enable = "avx512f")]
18528 #[cfg_attr(test, assert_instr(vmovsd))]
18529 pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18530 let mut mov: f64 = 0.;
18531 if (k & 0b00000001) != 0 {
18532 mov = simd_extract(b, 0);
18533 }
18534 let r = simd_insert(a, 0, mov);
18535 transmute(r)
18536 }
18537
18538 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18539 ///
18540 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_ss&expand=159)
18541 #[inline]
18542 #[target_feature(enable = "avx512f")]
18543 #[cfg_attr(test, assert_instr(vaddss))]
18544 pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18545 let extractsrc: f32 = simd_extract(src, 0);
18546 let mut add: f32 = extractsrc;
18547 if (k & 0b00000001) != 0 {
18548 let extracta: f32 = simd_extract(a, 0);
18549 let extractb: f32 = simd_extract(b, 0);
18550 add = extracta + extractb;
18551 }
18552 let r = simd_insert(a, 0, add);
18553 transmute(r)
18554 }
18555
18556 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18557 ///
18558 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_add_ss&expand=160)
18559 #[inline]
18560 #[target_feature(enable = "avx512f")]
18561 #[cfg_attr(test, assert_instr(vaddss))]
18562 pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18563 let mut add: f32 = 0.;
18564 if (k & 0b00000001) != 0 {
18565 let extracta: f32 = simd_extract(a, 0);
18566 let extractb: f32 = simd_extract(b, 0);
18567 add = extracta + extractb;
18568 }
18569 let r = simd_insert(a, 0, add);
18570 transmute(r)
18571 }
18572
18573 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18574 ///
18575 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_sd&expand=155)
18576 #[inline]
18577 #[target_feature(enable = "avx512f")]
18578 #[cfg_attr(test, assert_instr(vaddsd))]
18579 pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18580 let extractsrc: f64 = simd_extract(src, 0);
18581 let mut add: f64 = extractsrc;
18582 if (k & 0b00000001) != 0 {
18583 let extracta: f64 = simd_extract(a, 0);
18584 let extractb: f64 = simd_extract(b, 0);
18585 add = extracta + extractb;
18586 }
18587 let r = simd_insert(a, 0, add);
18588 transmute(r)
18589 }
18590
18591 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18592 ///
18593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_add_sd&expand=156)
18594 #[inline]
18595 #[target_feature(enable = "avx512f")]
18596 #[cfg_attr(test, assert_instr(vaddsd))]
18597 pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18598 let mut add: f64 = 0.;
18599 if (k & 0b00000001) != 0 {
18600 let extracta: f64 = simd_extract(a, 0);
18601 let extractb: f64 = simd_extract(b, 0);
18602 add = extracta + extractb;
18603 }
18604 let r = simd_insert(a, 0, add);
18605 transmute(r)
18606 }
18607
18608 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18609 ///
18610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_ss&expand=5750)
18611 #[inline]
18612 #[target_feature(enable = "avx512f")]
18613 #[cfg_attr(test, assert_instr(vsubss))]
18614 pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18615 let extractsrc: f32 = simd_extract(src, 0);
18616 let mut add: f32 = extractsrc;
18617 if (k & 0b00000001) != 0 {
18618 let extracta: f32 = simd_extract(a, 0);
18619 let extractb: f32 = simd_extract(b, 0);
18620 add = extracta - extractb;
18621 }
18622 let r = simd_insert(a, 0, add);
18623 transmute(r)
18624 }
18625
18626 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18627 ///
18628 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_ss&expand=5751)
18629 #[inline]
18630 #[target_feature(enable = "avx512f")]
18631 #[cfg_attr(test, assert_instr(vsubss))]
18632 pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18633 let mut add: f32 = 0.;
18634 if (k & 0b00000001) != 0 {
18635 let extracta: f32 = simd_extract(a, 0);
18636 let extractb: f32 = simd_extract(b, 0);
18637 add = extracta - extractb;
18638 }
18639 let r = simd_insert(a, 0, add);
18640 transmute(r)
18641 }
18642
18643 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18644 ///
18645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_sd&expand=5746)
18646 #[inline]
18647 #[target_feature(enable = "avx512f")]
18648 #[cfg_attr(test, assert_instr(vsubsd))]
18649 pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18650 let extractsrc: f64 = simd_extract(src, 0);
18651 let mut add: f64 = extractsrc;
18652 if (k & 0b00000001) != 0 {
18653 let extracta: f64 = simd_extract(a, 0);
18654 let extractb: f64 = simd_extract(b, 0);
18655 add = extracta - extractb;
18656 }
18657 let r = simd_insert(a, 0, add);
18658 transmute(r)
18659 }
18660
18661 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18662 ///
18663 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_sd&expand=5747)
18664 #[inline]
18665 #[target_feature(enable = "avx512f")]
18666 #[cfg_attr(test, assert_instr(vsubsd))]
18667 pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18668 let mut add: f64 = 0.;
18669 if (k & 0b00000001) != 0 {
18670 let extracta: f64 = simd_extract(a, 0);
18671 let extractb: f64 = simd_extract(b, 0);
18672 add = extracta - extractb;
18673 }
18674 let r = simd_insert(a, 0, add);
18675 transmute(r)
18676 }
18677
18678 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18679 ///
18680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_ss&expand=3950)
18681 #[inline]
18682 #[target_feature(enable = "avx512f")]
18683 #[cfg_attr(test, assert_instr(vmulss))]
18684 pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18685 let extractsrc: f32 = simd_extract(src, 0);
18686 let mut add: f32 = extractsrc;
18687 if (k & 0b00000001) != 0 {
18688 let extracta: f32 = simd_extract(a, 0);
18689 let extractb: f32 = simd_extract(b, 0);
18690 add = extracta * extractb;
18691 }
18692 let r = simd_insert(a, 0, add);
18693 transmute(r)
18694 }
18695
18696 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18697 ///
18698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_ss&expand=3951)
18699 #[inline]
18700 #[target_feature(enable = "avx512f")]
18701 #[cfg_attr(test, assert_instr(vmulss))]
18702 pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18703 let mut add: f32 = 0.;
18704 if (k & 0b00000001) != 0 {
18705 let extracta: f32 = simd_extract(a, 0);
18706 let extractb: f32 = simd_extract(b, 0);
18707 add = extracta * extractb;
18708 }
18709 let r = simd_insert(a, 0, add);
18710 transmute(r)
18711 }
18712
18713 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18714 ///
18715 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_sd&expand=3947)
18716 #[inline]
18717 #[target_feature(enable = "avx512f")]
18718 #[cfg_attr(test, assert_instr(vmulsd))]
18719 pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18720 let extractsrc: f64 = simd_extract(src, 0);
18721 let mut add: f64 = extractsrc;
18722 if (k & 0b00000001) != 0 {
18723 let extracta: f64 = simd_extract(a, 0);
18724 let extractb: f64 = simd_extract(b, 0);
18725 add = extracta * extractb;
18726 }
18727 let r = simd_insert(a, 0, add);
18728 transmute(r)
18729 }
18730
18731 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18732 ///
18733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_sd&expand=3948)
18734 #[inline]
18735 #[target_feature(enable = "avx512f")]
18736 #[cfg_attr(test, assert_instr(vmulsd))]
18737 pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18738 let mut add: f64 = 0.;
18739 if (k & 0b00000001) != 0 {
18740 let extracta: f64 = simd_extract(a, 0);
18741 let extractb: f64 = simd_extract(b, 0);
18742 add = extracta * extractb;
18743 }
18744 let r = simd_insert(a, 0, add);
18745 transmute(r)
18746 }
18747
18748 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18749 ///
18750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_ss&expand=2181)
18751 #[inline]
18752 #[target_feature(enable = "avx512f")]
18753 #[cfg_attr(test, assert_instr(vdivss))]
18754 pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18755 let extractsrc: f32 = simd_extract(src, 0);
18756 let mut add: f32 = extractsrc;
18757 if (k & 0b00000001) != 0 {
18758 let extracta: f32 = simd_extract(a, 0);
18759 let extractb: f32 = simd_extract(b, 0);
18760 add = extracta / extractb;
18761 }
18762 let r = simd_insert(a, 0, add);
18763 transmute(r)
18764 }
18765
18766 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18767 ///
18768 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_ss&expand=2182)
18769 #[inline]
18770 #[target_feature(enable = "avx512f")]
18771 #[cfg_attr(test, assert_instr(vdivss))]
18772 pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18773 let mut add: f32 = 0.;
18774 if (k & 0b00000001) != 0 {
18775 let extracta: f32 = simd_extract(a, 0);
18776 let extractb: f32 = simd_extract(b, 0);
18777 add = extracta / extractb;
18778 }
18779 let r = simd_insert(a, 0, add);
18780 transmute(r)
18781 }
18782
18783 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18784 ///
18785 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_sd&expand=2178)
18786 #[inline]
18787 #[target_feature(enable = "avx512f")]
18788 #[cfg_attr(test, assert_instr(vdivsd))]
18789 pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18790 let extractsrc: f64 = simd_extract(src, 0);
18791 let mut add: f64 = extractsrc;
18792 if (k & 0b00000001) != 0 {
18793 let extracta: f64 = simd_extract(a, 0);
18794 let extractb: f64 = simd_extract(b, 0);
18795 add = extracta / extractb;
18796 }
18797 let r = simd_insert(a, 0, add);
18798 transmute(r)
18799 }
18800
18801 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18802 ///
18803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_sd&expand=2179)
18804 #[inline]
18805 #[target_feature(enable = "avx512f")]
18806 #[cfg_attr(test, assert_instr(vdivsd))]
18807 pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18808 let mut add: f64 = 0.;
18809 if (k & 0b00000001) != 0 {
18810 let extracta: f64 = simd_extract(a, 0);
18811 let extractb: f64 = simd_extract(b, 0);
18812 add = extracta / extractb;
18813 }
18814 let r = simd_insert(a, 0, add);
18815 transmute(r)
18816 }
18817
18818 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18819 ///
18820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_ss&expand=3672)
18821 #[inline]
18822 #[target_feature(enable = "avx512f")]
18823 #[cfg_attr(test, assert_instr(vmaxss))]
18824 pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18825 transmute(vmaxss(
18826 a.as_f32x4(),
18827 b.as_f32x4(),
18828 src.as_f32x4(),
18829 k,
18830 _MM_FROUND_CUR_DIRECTION,
18831 ))
18832 }
18833
18834 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18835 ///
18836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_ss&expand=3673)
18837 #[inline]
18838 #[target_feature(enable = "avx512f")]
18839 #[cfg_attr(test, assert_instr(vmaxss))]
18840 pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18841 transmute(vmaxss(
18842 a.as_f32x4(),
18843 b.as_f32x4(),
18844 _mm_setzero_ps().as_f32x4(),
18845 k,
18846 _MM_FROUND_CUR_DIRECTION,
18847 ))
18848 }
18849
18850 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18851 ///
18852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_sd&expand=3669)
18853 #[inline]
18854 #[target_feature(enable = "avx512f")]
18855 #[cfg_attr(test, assert_instr(vmaxsd))]
18856 pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18857 transmute(vmaxsd(
18858 a.as_f64x2(),
18859 b.as_f64x2(),
18860 src.as_f64x2(),
18861 k,
18862 _MM_FROUND_CUR_DIRECTION,
18863 ))
18864 }
18865
18866 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18867 ///
18868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_sd&expand=3670)
18869 #[inline]
18870 #[target_feature(enable = "avx512f")]
18871 #[cfg_attr(test, assert_instr(vmaxsd))]
18872 pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18873 transmute(vmaxsd(
18874 a.as_f64x2(),
18875 b.as_f64x2(),
18876 _mm_setzero_pd().as_f64x2(),
18877 k,
18878 _MM_FROUND_CUR_DIRECTION,
18879 ))
18880 }
18881
18882 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18883 ///
18884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_ss&expand=3786)
18885 #[inline]
18886 #[target_feature(enable = "avx512f")]
18887 #[cfg_attr(test, assert_instr(vminss))]
18888 pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18889 transmute(vminss(
18890 a.as_f32x4(),
18891 b.as_f32x4(),
18892 src.as_f32x4(),
18893 k,
18894 _MM_FROUND_CUR_DIRECTION,
18895 ))
18896 }
18897
18898 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18899 ///
18900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_ss&expand=3787)
18901 #[inline]
18902 #[target_feature(enable = "avx512f")]
18903 #[cfg_attr(test, assert_instr(vminss))]
18904 pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18905 transmute(vminss(
18906 a.as_f32x4(),
18907 b.as_f32x4(),
18908 _mm_setzero_ps().as_f32x4(),
18909 k,
18910 _MM_FROUND_CUR_DIRECTION,
18911 ))
18912 }
18913
18914 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18915 ///
18916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_sd&expand=3783)
18917 #[inline]
18918 #[target_feature(enable = "avx512f")]
18919 #[cfg_attr(test, assert_instr(vminsd))]
18920 pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18921 transmute(vminsd(
18922 a.as_f64x2(),
18923 b.as_f64x2(),
18924 src.as_f64x2(),
18925 k,
18926 _MM_FROUND_CUR_DIRECTION,
18927 ))
18928 }
18929
18930 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18931 ///
18932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_sd&expand=3784)
18933 #[inline]
18934 #[target_feature(enable = "avx512f")]
18935 #[cfg_attr(test, assert_instr(vminsd))]
18936 pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18937 transmute(vminsd(
18938 a.as_f64x2(),
18939 b.as_f64x2(),
18940 _mm_setzero_pd().as_f64x2(),
18941 k,
18942 _MM_FROUND_CUR_DIRECTION,
18943 ))
18944 }
18945
18946 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18947 ///
18948 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_ss&expand=5387)
18949 #[inline]
18950 #[target_feature(enable = "avx512f")]
18951 #[cfg_attr(test, assert_instr(vsqrtss))]
18952 pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
18953 transmute(vsqrtss(
18954 a.as_f32x4(),
18955 b.as_f32x4(),
18956 src.as_f32x4(),
18957 k,
18958 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
18959 ))
18960 }
18961
18962 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18963 ///
18964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_ss&expand=5388)
18965 #[inline]
18966 #[target_feature(enable = "avx512f")]
18967 #[cfg_attr(test, assert_instr(vsqrtss))]
18968 pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
18969 transmute(vsqrtss(
18970 a.as_f32x4(),
18971 b.as_f32x4(),
18972 _mm_setzero_ps().as_f32x4(),
18973 k,
18974 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
18975 ))
18976 }
18977
18978 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18979 ///
18980 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_sd&expand=5384)
18981 #[inline]
18982 #[target_feature(enable = "avx512f")]
18983 #[cfg_attr(test, assert_instr(vsqrtsd))]
18984 pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
18985 transmute(vsqrtsd(
18986 a.as_f64x2(),
18987 b.as_f64x2(),
18988 src.as_f64x2(),
18989 k,
18990 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
18991 ))
18992 }
18993
18994 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18995 ///
18996 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_sd&expand=5385)
18997 #[inline]
18998 #[target_feature(enable = "avx512f")]
18999 #[cfg_attr(test, assert_instr(vsqrtsd))]
19000 pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19001 transmute(vsqrtsd(
19002 a.as_f64x2(),
19003 b.as_f64x2(),
19004 _mm_setzero_pd().as_f64x2(),
19005 k,
19006 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
19007 ))
19008 }
19009
19010 /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19011 ///
19012 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rsqrt14_ss&expand=4825)
19013 #[inline]
19014 #[target_feature(enable = "avx512f")]
19015 #[cfg_attr(test, assert_instr(vrsqrt14ss))]
19016 pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
19017 transmute(vrsqrt14ss(
19018 a.as_f32x4(),
19019 b.as_f32x4(),
19020 _mm_setzero_ps().as_f32x4(),
19021 0b1,
19022 ))
19023 }
19024
19025 /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19026 ///
19027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rsqrt14_ss&expand=4823)
19028 #[inline]
19029 #[target_feature(enable = "avx512f")]
19030 #[cfg_attr(test, assert_instr(vrsqrt14ss))]
19031 pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
19032 transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
19033 }
19034
19035 /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19036 ///
19037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rsqrt14_ss&expand=4824)
19038 #[inline]
19039 #[target_feature(enable = "avx512f")]
19040 #[cfg_attr(test, assert_instr(vrsqrt14ss))]
19041 pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
19042 transmute(vrsqrt14ss(
19043 a.as_f32x4(),
19044 b.as_f32x4(),
19045 _mm_setzero_ps().as_f32x4(),
19046 k,
19047 ))
19048 }
19049
19050 /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19051 ///
19052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rsqrt14_sd&expand=4822)
19053 #[inline]
19054 #[target_feature(enable = "avx512f")]
19055 #[cfg_attr(test, assert_instr(vrsqrt14sd))]
19056 pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
19057 transmute(vrsqrt14sd(
19058 a.as_f64x2(),
19059 b.as_f64x2(),
19060 _mm_setzero_pd().as_f64x2(),
19061 0b1,
19062 ))
19063 }
19064
19065 /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19066 ///
19067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rsqrt14_sd&expand=4820)
19068 #[inline]
19069 #[target_feature(enable = "avx512f")]
19070 #[cfg_attr(test, assert_instr(vrsqrt14sd))]
19071 pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19072 transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
19073 }
19074
19075 /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19076 ///
19077 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rsqrt14_sd&expand=4821)
19078 #[inline]
19079 #[target_feature(enable = "avx512f")]
19080 #[cfg_attr(test, assert_instr(vrsqrt14sd))]
19081 pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19082 transmute(vrsqrt14sd(
19083 a.as_f64x2(),
19084 b.as_f64x2(),
19085 _mm_setzero_pd().as_f64x2(),
19086 k,
19087 ))
19088 }
19089
19090 /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19091 ///
19092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rcp14_ss&expand=4508)
19093 #[inline]
19094 #[target_feature(enable = "avx512f")]
19095 #[cfg_attr(test, assert_instr(vrcp14ss))]
19096 pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
19097 transmute(vrcp14ss(
19098 a.as_f32x4(),
19099 b.as_f32x4(),
19100 _mm_setzero_ps().as_f32x4(),
19101 0b1,
19102 ))
19103 }
19104
19105 /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19106 ///
19107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rcp14_ss&expand=4506)
19108 #[inline]
19109 #[target_feature(enable = "avx512f")]
19110 #[cfg_attr(test, assert_instr(vrcp14ss))]
19111 pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
19112 transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
19113 }
19114
19115 /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19116 ///
19117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rcp14_ss&expand=4507)
19118 #[inline]
19119 #[target_feature(enable = "avx512f")]
19120 #[cfg_attr(test, assert_instr(vrcp14ss))]
19121 pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
19122 transmute(vrcp14ss(
19123 a.as_f32x4(),
19124 b.as_f32x4(),
19125 _mm_setzero_ps().as_f32x4(),
19126 k,
19127 ))
19128 }
19129
19130 /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19131 ///
19132 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rcp14_sd&expand=4505)
19133 #[inline]
19134 #[target_feature(enable = "avx512f")]
19135 #[cfg_attr(test, assert_instr(vrcp14sd))]
19136 pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
19137 transmute(vrcp14sd(
19138 a.as_f64x2(),
19139 b.as_f64x2(),
19140 _mm_setzero_pd().as_f64x2(),
19141 0b1,
19142 ))
19143 }
19144
19145 /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19146 ///
19147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rcp14_sd&expand=4503)
19148 #[inline]
19149 #[target_feature(enable = "avx512f")]
19150 #[cfg_attr(test, assert_instr(vrcp14sd))]
19151 pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19152 transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
19153 }
19154
19155 /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19156 ///
19157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rcp14_sd&expand=4504)
19158 #[inline]
19159 #[target_feature(enable = "avx512f")]
19160 #[cfg_attr(test, assert_instr(vrcp14sd))]
19161 pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19162 transmute(vrcp14sd(
19163 a.as_f64x2(),
19164 b.as_f64x2(),
19165 _mm_setzero_pd().as_f64x2(),
19166 k,
19167 ))
19168 }
19169
19170 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19171 ///
19172 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_ss&expand=2862)
19173 #[inline]
19174 #[target_feature(enable = "avx512f")]
19175 #[cfg_attr(test, assert_instr(vgetexpss))]
19176 pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
19177 transmute(vgetexpss(
19178 a.as_f32x4(),
19179 b.as_f32x4(),
19180 _mm_setzero_ps().as_f32x4(),
19181 0b1,
19182 _MM_FROUND_NO_EXC,
19183 ))
19184 }
19185
19186 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19187 ///
19188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_ss&expand=2863)
19189 #[inline]
19190 #[target_feature(enable = "avx512f")]
19191 #[cfg_attr(test, assert_instr(vgetexpss))]
19192 pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
19193 transmute(vgetexpss(
19194 a.as_f32x4(),
19195 b.as_f32x4(),
19196 src.as_f32x4(),
19197 k,
19198 _MM_FROUND_NO_EXC,
19199 ))
19200 }
19201
19202 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19203 ///
19204 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_ss&expand=2864)
19205 #[inline]
19206 #[target_feature(enable = "avx512f")]
19207 #[cfg_attr(test, assert_instr(vgetexpss))]
19208 pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
19209 transmute(vgetexpss(
19210 a.as_f32x4(),
19211 b.as_f32x4(),
19212 _mm_setzero_ps().as_f32x4(),
19213 k,
19214 _MM_FROUND_NO_EXC,
19215 ))
19216 }
19217
19218 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19219 ///
19220 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_sd&expand=2859)
19221 #[inline]
19222 #[target_feature(enable = "avx512f")]
19223 #[cfg_attr(test, assert_instr(vgetexpsd))]
19224 pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
19225 transmute(vgetexpsd(
19226 a.as_f64x2(),
19227 b.as_f64x2(),
19228 _mm_setzero_pd().as_f64x2(),
19229 0b1,
19230 _MM_FROUND_NO_EXC,
19231 ))
19232 }
19233
19234 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19235 ///
19236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_sd&expand=2860)
19237 #[inline]
19238 #[target_feature(enable = "avx512f")]
19239 #[cfg_attr(test, assert_instr(vgetexpsd))]
19240 pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19241 transmute(vgetexpsd(
19242 a.as_f64x2(),
19243 b.as_f64x2(),
19244 src.as_f64x2(),
19245 k,
19246 _MM_FROUND_NO_EXC,
19247 ))
19248 }
19249
19250 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19251 ///
19252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_sd&expand=2861)
19253 #[inline]
19254 #[target_feature(enable = "avx512f")]
19255 #[cfg_attr(test, assert_instr(vgetexpsd))]
19256 pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19257 transmute(vgetexpsd(
19258 a.as_f64x2(),
19259 b.as_f64x2(),
19260 _mm_setzero_pd().as_f64x2(),
19261 k,
19262 _MM_FROUND_NO_EXC,
19263 ))
19264 }
19265
19266 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19267 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19268 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19269 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19270 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19271 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19272 /// The sign is determined by sc which can take the following values:\
19273 /// _MM_MANT_SIGN_src // sign = sign(src)\
19274 /// _MM_MANT_SIGN_zero // sign = 0\
19275 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19276 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19277 ///
19278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_ss&expand=2898)
19279 #[inline]
19280 #[target_feature(enable = "avx512f")]
19281 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
19282 #[rustc_args_required_const(2, 3)]
19283 pub unsafe fn _mm_getmant_ss(
19284 a: __m128,
19285 b: __m128,
19286 norm: _MM_MANTISSA_NORM_ENUM,
19287 sign: _MM_MANTISSA_SIGN_ENUM,
19288 ) -> __m128 {
19289 macro_rules! call {
19290 ($imm4_1:expr, $imm2:expr) => {
19291 vgetmantss(
19292 a.as_f32x4(),
19293 b.as_f32x4(),
19294 $imm2 << 2 | $imm4_1,
19295 _mm_setzero_ps().as_f32x4(),
19296 0b1,
19297 _MM_FROUND_CUR_DIRECTION,
19298 )
19299 };
19300 }
19301 let r = constify_imm4_mantissas!(norm, sign, call);
19302 transmute(r)
19303 }
19304
19305 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19306 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19307 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19308 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19309 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19310 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19311 /// The sign is determined by sc which can take the following values:\
19312 /// _MM_MANT_SIGN_src // sign = sign(src)\
19313 /// _MM_MANT_SIGN_zero // sign = 0\
19314 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19315 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19316 ///
19317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_ss&expand=2899)
19318 #[inline]
19319 #[target_feature(enable = "avx512f")]
19320 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
19321 #[rustc_args_required_const(4, 5)]
19322 pub unsafe fn _mm_mask_getmant_ss(
19323 src: __m128,
19324 k: __mmask8,
19325 a: __m128,
19326 b: __m128,
19327 norm: _MM_MANTISSA_NORM_ENUM,
19328 sign: _MM_MANTISSA_SIGN_ENUM,
19329 ) -> __m128 {
19330 macro_rules! call {
19331 ($imm4_1:expr, $imm2:expr) => {
19332 vgetmantss(
19333 a.as_f32x4(),
19334 b.as_f32x4(),
19335 $imm2 << 2 | $imm4_1,
19336 src.as_f32x4(),
19337 k,
19338 _MM_FROUND_CUR_DIRECTION,
19339 )
19340 };
19341 }
19342 let r = constify_imm4_mantissas!(norm, sign, call);
19343 transmute(r)
19344 }
19345
19346 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19347 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19348 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19349 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19350 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19351 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19352 /// The sign is determined by sc which can take the following values:\
19353 /// _MM_MANT_SIGN_src // sign = sign(src)\
19354 /// _MM_MANT_SIGN_zero // sign = 0\
19355 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19356 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19357 ///
19358 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_ss&expand=2900)
19359 #[inline]
19360 #[target_feature(enable = "avx512f")]
19361 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
19362 #[rustc_args_required_const(3, 4)]
19363 pub unsafe fn _mm_maskz_getmant_ss(
19364 k: __mmask8,
19365 a: __m128,
19366 b: __m128,
19367 norm: _MM_MANTISSA_NORM_ENUM,
19368 sign: _MM_MANTISSA_SIGN_ENUM,
19369 ) -> __m128 {
19370 macro_rules! call {
19371 ($imm4_1:expr, $imm2:expr) => {
19372 vgetmantss(
19373 a.as_f32x4(),
19374 b.as_f32x4(),
19375 $imm2 << 2 | $imm4_1,
19376 _mm_setzero_ps().as_f32x4(),
19377 k,
19378 _MM_FROUND_CUR_DIRECTION,
19379 )
19380 };
19381 }
19382 let r = constify_imm4_mantissas!(norm, sign, call);
19383 transmute(r)
19384 }
19385
19386 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19387 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19388 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19389 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19390 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19391 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19392 /// The sign is determined by sc which can take the following values:\
19393 /// _MM_MANT_SIGN_src // sign = sign(src)\
19394 /// _MM_MANT_SIGN_zero // sign = 0\
19395 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19396 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19397 ///
19398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_sd&expand=2895)
19399 #[inline]
19400 #[target_feature(enable = "avx512f")]
19401 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
19402 #[rustc_args_required_const(2, 3)]
19403 pub unsafe fn _mm_getmant_sd(
19404 a: __m128d,
19405 b: __m128d,
19406 norm: _MM_MANTISSA_NORM_ENUM,
19407 sign: _MM_MANTISSA_SIGN_ENUM,
19408 ) -> __m128d {
19409 macro_rules! call {
19410 ($imm4_1:expr, $imm2:expr) => {
19411 vgetmantsd(
19412 a.as_f64x2(),
19413 b.as_f64x2(),
19414 $imm2 << 2 | $imm4_1,
19415 _mm_setzero_pd().as_f64x2(),
19416 0b1,
19417 _MM_FROUND_CUR_DIRECTION,
19418 )
19419 };
19420 }
19421 let r = constify_imm4_mantissas!(norm, sign, call);
19422 transmute(r)
19423 }
19424
19425 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19426 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19427 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19428 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19429 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19430 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19431 /// The sign is determined by sc which can take the following values:\
19432 /// _MM_MANT_SIGN_src // sign = sign(src)\
19433 /// _MM_MANT_SIGN_zero // sign = 0\
19434 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19435 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19436 ///
19437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_sd&expand=2896)
19438 #[inline]
19439 #[target_feature(enable = "avx512f")]
19440 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
19441 #[rustc_args_required_const(4, 5)]
19442 pub unsafe fn _mm_mask_getmant_sd(
19443 src: __m128d,
19444 k: __mmask8,
19445 a: __m128d,
19446 b: __m128d,
19447 norm: _MM_MANTISSA_NORM_ENUM,
19448 sign: _MM_MANTISSA_SIGN_ENUM,
19449 ) -> __m128d {
19450 macro_rules! call {
19451 ($imm4_1:expr, $imm2:expr) => {
19452 vgetmantsd(
19453 a.as_f64x2(),
19454 b.as_f64x2(),
19455 $imm2 << 2 | $imm4_1,
19456 src.as_f64x2(),
19457 k,
19458 _MM_FROUND_CUR_DIRECTION,
19459 )
19460 };
19461 }
19462 let r = constify_imm4_mantissas!(norm, sign, call);
19463 transmute(r)
19464 }
19465
19466 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19467 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19468 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19469 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19470 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19471 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19472 /// The sign is determined by sc which can take the following values:\
19473 /// _MM_MANT_SIGN_src // sign = sign(src)\
19474 /// _MM_MANT_SIGN_zero // sign = 0\
19475 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19476 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19477 ///
19478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_sd&expand=2897)
19479 #[inline]
19480 #[target_feature(enable = "avx512f")]
19481 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
19482 #[rustc_args_required_const(3, 4)]
19483 pub unsafe fn _mm_maskz_getmant_sd(
19484 k: __mmask8,
19485 a: __m128d,
19486 b: __m128d,
19487 norm: _MM_MANTISSA_NORM_ENUM,
19488 sign: _MM_MANTISSA_SIGN_ENUM,
19489 ) -> __m128d {
19490 macro_rules! call {
19491 ($imm4_1:expr, $imm2:expr) => {
19492 vgetmantsd(
19493 a.as_f64x2(),
19494 b.as_f64x2(),
19495 $imm2 << 2 | $imm4_1,
19496 _mm_setzero_pd().as_f64x2(),
19497 k,
19498 _MM_FROUND_CUR_DIRECTION,
19499 )
19500 };
19501 }
19502 let r = constify_imm4_mantissas!(norm, sign, call);
19503 transmute(r)
19504 }
19505
19506 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
19507 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19508 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19509 /// _MM_FROUND_TO_NEG_INF // round down\
19510 /// _MM_FROUND_TO_POS_INF // round up\
19511 /// _MM_FROUND_TO_ZERO // truncate\
19512 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19513 ///
19514 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_ss&expand=4802)
19515 #[inline]
19516 #[target_feature(enable = "avx512f")]
19517 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 255))]
19518 #[rustc_args_required_const(2)]
19519 pub unsafe fn _mm_roundscale_ss(a: __m128, b: __m128, imm8: i32) -> __m128 {
19520 let a = a.as_f32x4();
19521 let b = b.as_f32x4();
19522 let zero = _mm_setzero_ps().as_f32x4();
19523 macro_rules! call {
19524 ($imm8:expr) => {
19525 vrndscaless(a, b, zero, 0b11111111, $imm8, _MM_FROUND_CUR_DIRECTION)
19526 };
19527 }
19528 let r = constify_imm8_sae!(imm8, call);
19529 transmute(r)
19530 }
19531
19532 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
19533 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19534 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19535 /// _MM_FROUND_TO_NEG_INF // round down\
19536 /// _MM_FROUND_TO_POS_INF // round up\
19537 /// _MM_FROUND_TO_ZERO // truncate\
19538 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19539 ///
19540 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_ss&expand=4800)
19541 #[inline]
19542 #[target_feature(enable = "avx512f")]
19543 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
19544 #[rustc_args_required_const(4)]
19545 pub unsafe fn _mm_mask_roundscale_ss(
19546 src: __m128,
19547 k: __mmask8,
19548 a: __m128,
19549 b: __m128,
19550 imm8: i32,
19551 ) -> __m128 {
19552 let a = a.as_f32x4();
19553 let b = b.as_f32x4();
19554 let src = src.as_f32x4();
19555 macro_rules! call {
19556 ($imm8:expr) => {
19557 vrndscaless(a, b, src, k, $imm8, _MM_FROUND_CUR_DIRECTION)
19558 };
19559 }
19560 let r = constify_imm8_sae!(imm8, call);
19561 transmute(r)
19562 }
19563
19564 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
19565 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19566 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19567 /// _MM_FROUND_TO_NEG_INF // round down\
19568 /// _MM_FROUND_TO_POS_INF // round up\
19569 /// _MM_FROUND_TO_ZERO // truncate\
19570 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19571 ///
19572 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_ss&expand=4801)
19573 #[inline]
19574 #[target_feature(enable = "avx512f")]
19575 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
19576 #[rustc_args_required_const(3)]
19577 pub unsafe fn _mm_maskz_roundscale_ss(k: __mmask8, a: __m128, b: __m128, imm8: i32) -> __m128 {
19578 let a = a.as_f32x4();
19579 let b = b.as_f32x4();
19580 let zero = _mm_setzero_ps().as_f32x4();
19581 macro_rules! call {
19582 ($imm8:expr) => {
19583 vrndscaless(a, b, zero, k, $imm8, _MM_FROUND_CUR_DIRECTION)
19584 };
19585 }
19586 let r = constify_imm8_sae!(imm8, call);
19587 transmute(r)
19588 }
19589
19590 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
19591 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19592 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19593 /// _MM_FROUND_TO_NEG_INF // round down\
19594 /// _MM_FROUND_TO_POS_INF // round up\
19595 /// _MM_FROUND_TO_ZERO // truncate\
19596 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19597 ///
19598 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_sd&expand=4799)
19599 #[inline]
19600 #[target_feature(enable = "avx512f")]
19601 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 255))]
19602 #[rustc_args_required_const(2)]
19603 pub unsafe fn _mm_roundscale_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
19604 let a = a.as_f64x2();
19605 let b = b.as_f64x2();
19606 let zero = _mm_setzero_pd().as_f64x2();
19607 macro_rules! call {
19608 ($imm8:expr) => {
19609 vrndscalesd(a, b, zero, 0b11111111, $imm8, _MM_FROUND_CUR_DIRECTION)
19610 };
19611 }
19612 let r = constify_imm8_sae!(imm8, call);
19613 transmute(r)
19614 }
19615
19616 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
19617 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19618 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19619 /// _MM_FROUND_TO_NEG_INF // round down\
19620 /// _MM_FROUND_TO_POS_INF // round up\
19621 /// _MM_FROUND_TO_ZERO // truncate\
19622 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19623 ///
19624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_sd&expand=4797)
19625 #[inline]
19626 #[target_feature(enable = "avx512f")]
19627 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
19628 #[rustc_args_required_const(4)]
19629 pub unsafe fn _mm_mask_roundscale_sd(
19630 src: __m128d,
19631 k: __mmask8,
19632 a: __m128d,
19633 b: __m128d,
19634 imm8: i32,
19635 ) -> __m128d {
19636 let a = a.as_f64x2();
19637 let b = b.as_f64x2();
19638 let src = src.as_f64x2();
19639 macro_rules! call {
19640 ($imm8:expr) => {
19641 vrndscalesd(a, b, src, k, $imm8, _MM_FROUND_CUR_DIRECTION)
19642 };
19643 }
19644 let r = constify_imm8_sae!(imm8, call);
19645 transmute(r)
19646 }
19647
19648 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
19649 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19650 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19651 /// _MM_FROUND_TO_NEG_INF // round down\
19652 /// _MM_FROUND_TO_POS_INF // round up\
19653 /// _MM_FROUND_TO_ZERO // truncate\
19654 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19655 ///
19656 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_sd&expand=4798)
19657 #[inline]
19658 #[target_feature(enable = "avx512f")]
19659 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
19660 #[rustc_args_required_const(3)]
19661 pub unsafe fn _mm_maskz_roundscale_sd(k: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __m128d {
19662 let a = a.as_f64x2();
19663 let b = b.as_f64x2();
19664 let zero = _mm_setzero_pd().as_f64x2();
19665 macro_rules! call {
19666 ($imm8:expr) => {
19667 vrndscalesd(a, b, zero, k, $imm8, _MM_FROUND_CUR_DIRECTION)
19668 };
19669 }
19670 let r = constify_imm8_sae!(imm8, call);
19671 transmute(r)
19672 }
19673
19674 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
19675 ///
19676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_ss&expand=4901)
19677 #[inline]
19678 #[target_feature(enable = "avx512f")]
19679 #[cfg_attr(test, assert_instr(vscalefss))]
19680 pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
19681 transmute(vscalefss(
19682 a.as_f32x4(),
19683 b.as_f32x4(),
19684 _mm_setzero_ps().as_f32x4(),
19685 0b11111111,
19686 _MM_FROUND_CUR_DIRECTION,
19687 ))
19688 }
19689
19690 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19691 ///
19692 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_ss&expand=4899)
19693 #[inline]
19694 #[target_feature(enable = "avx512f")]
19695 #[cfg_attr(test, assert_instr(vscalefss))]
19696 pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
19697 transmute(vscalefss(
19698 a.as_f32x4(),
19699 b.as_f32x4(),
19700 src.as_f32x4(),
19701 k,
19702 _MM_FROUND_CUR_DIRECTION,
19703 ))
19704 }
19705
19706 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19707 ///
19708 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_ss&expand=4900)
19709 #[inline]
19710 #[target_feature(enable = "avx512f")]
19711 #[cfg_attr(test, assert_instr(vscalefss))]
19712 pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
19713 transmute(vscalefss(
19714 a.as_f32x4(),
19715 b.as_f32x4(),
19716 _mm_setzero_ps().as_f32x4(),
19717 k,
19718 _MM_FROUND_CUR_DIRECTION,
19719 ))
19720 }
19721
19722 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
19723 ///
19724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_sd&expand=4898)
19725 #[inline]
19726 #[target_feature(enable = "avx512f")]
19727 #[cfg_attr(test, assert_instr(vscalefsd))]
19728 pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
19729 transmute(vscalefsd(
19730 a.as_f64x2(),
19731 b.as_f64x2(),
19732 _mm_setzero_pd().as_f64x2(),
19733 0b11111111,
19734 _MM_FROUND_CUR_DIRECTION,
19735 ))
19736 }
19737
19738 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19739 ///
19740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_sd&expand=4896)
19741 #[inline]
19742 #[target_feature(enable = "avx512f")]
19743 #[cfg_attr(test, assert_instr(vscalefsd))]
19744 pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19745 transmute(vscalefsd(
19746 a.as_f64x2(),
19747 b.as_f64x2(),
19748 src.as_f64x2(),
19749 k,
19750 _MM_FROUND_CUR_DIRECTION,
19751 ))
19752 }
19753
19754 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19755 ///
19756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_sd&expand=4897)
19757 #[inline]
19758 #[target_feature(enable = "avx512f")]
19759 #[cfg_attr(test, assert_instr(vscalefsd))]
19760 pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
19761 transmute(vscalefsd(
19762 a.as_f64x2(),
19763 b.as_f64x2(),
19764 _mm_setzero_pd().as_f64x2(),
19765 k,
19766 _MM_FROUND_CUR_DIRECTION,
19767 ))
19768 }
19769
19770 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19771 ///
19772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_ss&expand=2582)
19773 #[inline]
19774 #[target_feature(enable = "avx512f")]
19775 #[cfg_attr(test, assert_instr(vfmadd213ss))]
19776 pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
19777 let mut fmadd: f32 = simd_extract(a, 0);
19778 if (k & 0b00000001) != 0 {
19779 let extractb: f32 = simd_extract(b, 0);
19780 let extractc: f32 = simd_extract(c, 0);
19781 fmadd = vfmadd132ss(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19782 }
19783 let r = simd_insert(a, 0, fmadd);
19784 transmute(r)
19785 }
19786
19787 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19788 ///
19789 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_ss&expand=2584)
19790 #[inline]
19791 #[target_feature(enable = "avx512f")]
19792 #[cfg_attr(test, assert_instr(vfmadd213ss))]
19793 pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
19794 let mut fmadd: f32 = 0.;
19795 if (k & 0b00000001) != 0 {
19796 let extracta: f32 = simd_extract(a, 0);
19797 let extractb: f32 = simd_extract(b, 0);
19798 let extractc: f32 = simd_extract(c, 0);
19799 fmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19800 }
19801 let r = simd_insert(a, 0, fmadd);
19802 transmute(r)
19803 }
19804
19805 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
19806 ///
19807 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_ss&expand=2583)
19808 #[inline]
19809 #[target_feature(enable = "avx512f")]
19810 #[cfg_attr(test, assert_instr(vfmadd213ss))]
19811 pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
19812 let mut fmadd: f32 = simd_extract(c, 0);
19813 if (k & 0b00000001) != 0 {
19814 let extracta: f32 = simd_extract(a, 0);
19815 let extractb: f32 = simd_extract(b, 0);
19816 fmadd = vfmadd132ss(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
19817 }
19818 let r = simd_insert(c, 0, fmadd);
19819 transmute(r)
19820 }
19821
19822 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19823 ///
19824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_sd&expand=2578)
19825 #[inline]
19826 #[target_feature(enable = "avx512f")]
19827 #[cfg_attr(test, assert_instr(vfmadd213sd))]
19828 pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
19829 let mut fmadd: f64 = simd_extract(a, 0);
19830 if (k & 0b00000001) != 0 {
19831 let extractb: f64 = simd_extract(b, 0);
19832 let extractc: f64 = simd_extract(c, 0);
19833 fmadd = vfmadd132sd(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19834 }
19835 let r = simd_insert(a, 0, fmadd);
19836 transmute(r)
19837 }
19838
19839 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19840 ///
19841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_sd&expand=2580)
19842 #[inline]
19843 #[target_feature(enable = "avx512f")]
19844 #[cfg_attr(test, assert_instr(vfmadd213sd))]
19845 pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
19846 let mut fmadd: f64 = 0.;
19847 if (k & 0b00000001) != 0 {
19848 let extracta: f64 = simd_extract(a, 0);
19849 let extractb: f64 = simd_extract(b, 0);
19850 let extractc: f64 = simd_extract(c, 0);
19851 fmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19852 }
19853 let r = simd_insert(a, 0, fmadd);
19854 transmute(r)
19855 }
19856
19857 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
19858 ///
19859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_sd&expand=2579)
19860 #[inline]
19861 #[target_feature(enable = "avx512f")]
19862 #[cfg_attr(test, assert_instr(vfmadd213sd))]
19863 pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
19864 let mut fmadd: f64 = simd_extract(c, 0);
19865 if (k & 0b00000001) != 0 {
19866 let extracta: f64 = simd_extract(a, 0);
19867 let extractb: f64 = simd_extract(b, 0);
19868 fmadd = vfmadd132sd(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
19869 }
19870 let r = simd_insert(c, 0, fmadd);
19871 transmute(r)
19872 }
19873
19874 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
19875 ///
19876 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_ss&expand=2668)
19877 #[inline]
19878 #[target_feature(enable = "avx512f")]
19879 #[cfg_attr(test, assert_instr(vfmsub213ss))]
19880 pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
19881 let mut fmsub: f32 = simd_extract(a, 0);
19882 if (k & 0b00000001) != 0 {
19883 let extractb: f32 = simd_extract(b, 0);
19884 let extractc: f32 = simd_extract(c, 0);
19885 let extractc = -extractc;
19886 fmsub = vfmadd132ss(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19887 }
19888 let r = simd_insert(a, 0, fmsub);
19889 transmute(r)
19890 }
19891
19892 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19893 ///
19894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_ss&expand=2670)
19895 #[inline]
19896 #[target_feature(enable = "avx512f")]
19897 #[cfg_attr(test, assert_instr(vfmsub213ss))]
19898 pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
19899 let mut fmsub: f32 = 0.;
19900 if (k & 0b00000001) != 0 {
19901 let extracta: f32 = simd_extract(a, 0);
19902 let extractb: f32 = simd_extract(b, 0);
19903 let extractc: f32 = simd_extract(c, 0);
19904 let extractc = -extractc;
19905 fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19906 }
19907 let r = simd_insert(a, 0, fmsub);
19908 transmute(r)
19909 }
19910
19911 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
19912 ///
19913 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_ss&expand=2669)
19914 #[inline]
19915 #[target_feature(enable = "avx512f")]
19916 #[cfg_attr(test, assert_instr(vfmsub213ss))]
19917 pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
19918 let mut fmsub: f32 = simd_extract(c, 0);
19919 if (k & 0b00000001) != 0 {
19920 let extracta: f32 = simd_extract(a, 0);
19921 let extractb: f32 = simd_extract(b, 0);
19922 let extractc = -fmsub;
19923 fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19924 }
19925 let r = simd_insert(c, 0, fmsub);
19926 transmute(r)
19927 }
19928
19929 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19930 ///
19931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_sd&expand=2664)
19932 #[inline]
19933 #[target_feature(enable = "avx512f")]
19934 #[cfg_attr(test, assert_instr(vfmsub213sd))]
19935 pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
19936 let mut fmsub: f64 = simd_extract(a, 0);
19937 if (k & 0b00000001) != 0 {
19938 let extractb: f64 = simd_extract(b, 0);
19939 let extractc: f64 = simd_extract(c, 0);
19940 let extractc = -extractc;
19941 fmsub = vfmadd132sd(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19942 }
19943 let r = simd_insert(a, 0, fmsub);
19944 transmute(r)
19945 }
19946
19947 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19948 ///
19949 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_sd&expand=2666)
19950 #[inline]
19951 #[target_feature(enable = "avx512f")]
19952 #[cfg_attr(test, assert_instr(vfmsub213sd))]
19953 pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
19954 let mut fmsub: f64 = 0.;
19955 if (k & 0b00000001) != 0 {
19956 let extracta: f64 = simd_extract(a, 0);
19957 let extractb: f64 = simd_extract(b, 0);
19958 let extractc: f64 = simd_extract(c, 0);
19959 let extractc = -extractc;
19960 fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19961 }
19962 let r = simd_insert(a, 0, fmsub);
19963 transmute(r)
19964 }
19965
19966 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
19967 ///
19968 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_sd&expand=2665)
19969 #[inline]
19970 #[target_feature(enable = "avx512f")]
19971 #[cfg_attr(test, assert_instr(vfmsub213sd))]
19972 pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
19973 let mut fmsub: f64 = simd_extract(c, 0);
19974 if (k & 0b00000001) != 0 {
19975 let extracta: f64 = simd_extract(a, 0);
19976 let extractb: f64 = simd_extract(b, 0);
19977 let extractc = -fmsub;
19978 fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19979 }
19980 let r = simd_insert(c, 0, fmsub);
19981 transmute(r)
19982 }
19983
19984 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19985 ///
19986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_ss&expand=2748)
19987 #[inline]
19988 #[target_feature(enable = "avx512f")]
19989 #[cfg_attr(test, assert_instr(vfnmadd213ss))]
19990 pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
19991 let mut fnmadd: f32 = simd_extract(a, 0);
19992 if (k & 0b00000001) != 0 {
19993 let extracta = -fnmadd;
19994 let extractb: f32 = simd_extract(b, 0);
19995 let extractc: f32 = simd_extract(c, 0);
19996 fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
19997 }
19998 let r = simd_insert(a, 0, fnmadd);
19999 transmute(r)
20000 }
20001
20002 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
20003 ///
20004 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_ss&expand=2750)
20005 #[inline]
20006 #[target_feature(enable = "avx512f")]
20007 #[cfg_attr(test, assert_instr(vfnmadd213ss))]
20008 pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
20009 let mut fnmadd: f32 = 0.;
20010 if (k & 0b00000001) != 0 {
20011 let extracta: f32 = simd_extract(a, 0);
20012 let extracta = -extracta;
20013 let extractb: f32 = simd_extract(b, 0);
20014 let extractc: f32 = simd_extract(c, 0);
20015 fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20016 }
20017 let r = simd_insert(a, 0, fnmadd);
20018 transmute(r)
20019 }
20020
20021 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
20022 ///
20023 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_ss&expand=2749)
20024 #[inline]
20025 #[target_feature(enable = "avx512f")]
20026 #[cfg_attr(test, assert_instr(vfnmadd213ss))]
20027 pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
20028 let mut fnmadd: f32 = simd_extract(c, 0);
20029 if (k & 0b00000001) != 0 {
20030 let extracta: f32 = simd_extract(a, 0);
20031 let extracta = -extracta;
20032 let extractb: f32 = simd_extract(b, 0);
20033 fnmadd = vfmadd132ss(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
20034 }
20035 let r = simd_insert(c, 0, fnmadd);
20036 transmute(r)
20037 }
20038
20039 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20040 ///
20041 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_sd&expand=2744)
20042 #[inline]
20043 #[target_feature(enable = "avx512f")]
20044 #[cfg_attr(test, assert_instr(vfnmadd213sd))]
20045 pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
20046 let mut fnmadd: f64 = simd_extract(a, 0);
20047 if (k & 0b00000001) != 0 {
20048 let extracta = -fnmadd;
20049 let extractb: f64 = simd_extract(b, 0);
20050 let extractc: f64 = simd_extract(c, 0);
20051 fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20052 }
20053 let r = simd_insert(a, 0, fnmadd);
20054 transmute(r)
20055 }
20056
20057 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20058 ///
20059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_sd&expand=2746)
20060 #[inline]
20061 #[target_feature(enable = "avx512f")]
20062 #[cfg_attr(test, assert_instr(vfnmadd213sd))]
20063 pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
20064 let mut fnmadd: f64 = 0.;
20065 if (k & 0b00000001) != 0 {
20066 let extracta: f64 = simd_extract(a, 0);
20067 let extracta = -extracta;
20068 let extractb: f64 = simd_extract(b, 0);
20069 let extractc: f64 = simd_extract(c, 0);
20070 fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20071 }
20072 let r = simd_insert(a, 0, fnmadd);
20073 transmute(r)
20074 }
20075
20076 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
20077 ///
20078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_sd&expand=2745)
20079 #[inline]
20080 #[target_feature(enable = "avx512f")]
20081 #[cfg_attr(test, assert_instr(vfnmadd213sd))]
20082 pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
20083 let mut fnmadd: f64 = simd_extract(c, 0);
20084 if (k & 0b00000001) != 0 {
20085 let extracta: f64 = simd_extract(a, 0);
20086 let extracta = -extracta;
20087 let extractb: f64 = simd_extract(b, 0);
20088 fnmadd = vfmadd132sd(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
20089 }
20090 let r = simd_insert(c, 0, fnmadd);
20091 transmute(r)
20092 }
20093
20094 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
20095 ///
20096 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_ss&expand=2796)
20097 #[inline]
20098 #[target_feature(enable = "avx512f")]
20099 #[cfg_attr(test, assert_instr(vfnmsub213ss))]
20100 pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
20101 let mut fnmsub: f32 = simd_extract(a, 0);
20102 if (k & 0b00000001) != 0 {
20103 let extracta = -fnmsub;
20104 let extractb: f32 = simd_extract(b, 0);
20105 let extractc: f32 = simd_extract(c, 0);
20106 let extractc = -extractc;
20107 fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20108 }
20109 let r = simd_insert(a, 0, fnmsub);
20110 transmute(r)
20111 }
20112
20113 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
20114 ///
20115 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_ss&expand=2798)
20116 #[inline]
20117 #[target_feature(enable = "avx512f")]
20118 #[cfg_attr(test, assert_instr(vfnmsub213ss))]
20119 pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
20120 let mut fnmsub: f32 = 0.;
20121 if (k & 0b00000001) != 0 {
20122 let extracta: f32 = simd_extract(a, 0);
20123 let extracta = -extracta;
20124 let extractb: f32 = simd_extract(b, 0);
20125 let extractc: f32 = simd_extract(c, 0);
20126 let extractc = -extractc;
20127 fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20128 }
20129 let r = simd_insert(a, 0, fnmsub);
20130 transmute(r)
20131 }
20132
20133 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
20134 ///
20135 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_ss&expand=2797)
20136 #[inline]
20137 #[target_feature(enable = "avx512f")]
20138 #[cfg_attr(test, assert_instr(vfnmsub213ss))]
20139 pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
20140 let mut fnmsub: f32 = simd_extract(c, 0);
20141 if (k & 0b00000001) != 0 {
20142 let extracta: f32 = simd_extract(a, 0);
20143 let extracta = -extracta;
20144 let extractb: f32 = simd_extract(b, 0);
20145 let extractc = -fnmsub;
20146 fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20147 }
20148 let r = simd_insert(c, 0, fnmsub);
20149 transmute(r)
20150 }
20151
20152 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20153 ///
20154 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_sd&expand=2792)
20155 #[inline]
20156 #[target_feature(enable = "avx512f")]
20157 #[cfg_attr(test, assert_instr(vfnmsub213sd))]
20158 pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
20159 let mut fnmsub: f64 = simd_extract(a, 0);
20160 if (k & 0b00000001) != 0 {
20161 let extracta = -fnmsub;
20162 let extractb: f64 = simd_extract(b, 0);
20163 let extractc: f64 = simd_extract(c, 0);
20164 let extractc = -extractc;
20165 fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20166 }
20167 let r = simd_insert(a, 0, fnmsub);
20168 transmute(r)
20169 }
20170
20171 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20172 ///
20173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_sd&expand=2794)
20174 #[inline]
20175 #[target_feature(enable = "avx512f")]
20176 #[cfg_attr(test, assert_instr(vfnmsub213sd))]
20177 pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
20178 let mut fnmsub: f64 = 0.;
20179 if (k & 0b00000001) != 0 {
20180 let extracta: f64 = simd_extract(a, 0);
20181 let extracta = -extracta;
20182 let extractb: f64 = simd_extract(b, 0);
20183 let extractc: f64 = simd_extract(c, 0);
20184 let extractc = -extractc;
20185 fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20186 }
20187 let r = simd_insert(a, 0, fnmsub);
20188 transmute(r)
20189 }
20190
20191 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
20192 ///
20193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_sd&expand=2793)
20194 #[inline]
20195 #[target_feature(enable = "avx512f")]
20196 #[cfg_attr(test, assert_instr(vfnmsub213sd))]
20197 pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
20198 let mut fnmsub: f64 = simd_extract(c, 0);
20199 if (k & 0b00000001) != 0 {
20200 let extracta: f64 = simd_extract(a, 0);
20201 let extracta = -extracta;
20202 let extractb: f64 = simd_extract(b, 0);
20203 let extractc = -fnmsub;
20204 fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
20205 }
20206 let r = simd_insert(c, 0, fnmsub);
20207 transmute(r)
20208 }
20209
20210 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20211 ///
20212 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20213 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20214 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20215 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20216 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20217 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20218 ///
20219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ss&expand=151)
20220 #[inline]
20221 #[target_feature(enable = "avx512f")]
20222 #[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
20223 #[rustc_args_required_const(2)]
20224 pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
20225 macro_rules! call {
20226 ($imm4:expr) => {
20227 vaddss(
20228 a.as_f32x4(),
20229 b.as_f32x4(),
20230 _mm_setzero_ps().as_f32x4(),
20231 0b1,
20232 $imm4,
20233 )
20234 };
20235 }
20236 transmute(constify_imm4_round!(rounding, call))
20237 }
20238
20239 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20240 ///
20241 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20242 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20243 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20244 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20245 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20246 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20247 ///
20248 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_ss&expand=152)
20249 #[inline]
20250 #[target_feature(enable = "avx512f")]
20251 #[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
20252 #[rustc_args_required_const(4)]
20253 pub unsafe fn _mm_mask_add_round_ss(
20254 src: __m128,
20255 k: __mmask8,
20256 a: __m128,
20257 b: __m128,
20258 rounding: i32,
20259 ) -> __m128 {
20260 macro_rules! call {
20261 ($imm4:expr) => {
20262 vaddss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
20263 };
20264 }
20265 transmute(constify_imm4_round!(rounding, call))
20266 }
20267
20268 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20269 ///
20270 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20271 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20272 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20273 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20274 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20275 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20276 ///
20277 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_ss&expand=153)
20278 #[inline]
20279 #[target_feature(enable = "avx512f")]
20280 #[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
20281 #[rustc_args_required_const(3)]
20282 pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
20283 macro_rules! call {
20284 ($imm4:expr) => {
20285 vaddss(
20286 a.as_f32x4(),
20287 b.as_f32x4(),
20288 _mm_setzero_ps().as_f32x4(),
20289 k,
20290 $imm4,
20291 )
20292 };
20293 }
20294 transmute(constify_imm4_round!(rounding, call))
20295 }
20296
20297 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20298 ///
20299 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20300 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20301 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20302 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20303 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20304 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20305 ///
20306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sd&expand=148)
20307 #[inline]
20308 #[target_feature(enable = "avx512f")]
20309 #[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
20310 #[rustc_args_required_const(2)]
20311 pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
20312 macro_rules! call {
20313 ($imm4:expr) => {
20314 vaddsd(
20315 a.as_f64x2(),
20316 b.as_f64x2(),
20317 _mm_setzero_pd().as_f64x2(),
20318 0b1,
20319 $imm4,
20320 )
20321 };
20322 }
20323 transmute(constify_imm4_round!(rounding, call))
20324 }
20325
20326 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20327 ///
20328 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20329 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20330 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20331 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20332 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20333 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20334 ///
20335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_Sd&expand=149)
20336 #[inline]
20337 #[target_feature(enable = "avx512f")]
20338 #[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
20339 #[rustc_args_required_const(4)]
20340 pub unsafe fn _mm_mask_add_round_sd(
20341 src: __m128d,
20342 k: __mmask8,
20343 a: __m128d,
20344 b: __m128d,
20345 rounding: i32,
20346 ) -> __m128d {
20347 macro_rules! call {
20348 ($imm4:expr) => {
20349 vaddsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
20350 };
20351 }
20352 transmute(constify_imm4_round!(rounding, call))
20353 }
20354
20355 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20356 ///
20357 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20358 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20359 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20360 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20361 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20362 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20363 ///
20364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sd&expand=150)
20365 #[inline]
20366 #[target_feature(enable = "avx512f")]
20367 #[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
20368 #[rustc_args_required_const(3)]
20369 pub unsafe fn _mm_maskz_add_round_sd(
20370 k: __mmask8,
20371 a: __m128d,
20372 b: __m128d,
20373 rounding: i32,
20374 ) -> __m128d {
20375 macro_rules! call {
20376 ($imm4:expr) => {
20377 vaddsd(
20378 a.as_f64x2(),
20379 b.as_f64x2(),
20380 _mm_setzero_pd().as_f64x2(),
20381 k,
20382 $imm4,
20383 )
20384 };
20385 }
20386 transmute(constify_imm4_round!(rounding, call))
20387 }
20388
20389 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20390 ///
20391 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20392 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20393 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20394 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20395 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20396 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20397 ///
20398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_ss&expand=5745)
20399 #[inline]
20400 #[target_feature(enable = "avx512f")]
20401 #[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
20402 #[rustc_args_required_const(2)]
20403 pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
20404 macro_rules! call {
20405 ($imm4:expr) => {
20406 vsubss(
20407 a.as_f32x4(),
20408 b.as_f32x4(),
20409 _mm_setzero_ps().as_f32x4(),
20410 0b1,
20411 $imm4,
20412 )
20413 };
20414 }
20415 transmute(constify_imm4_round!(rounding, call))
20416 }
20417
20418 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20419 ///
20420 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20421 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20422 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20423 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20424 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20425 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20426 ///
20427 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_ss&expand=5743)
20428 #[inline]
20429 #[target_feature(enable = "avx512f")]
20430 #[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
20431 #[rustc_args_required_const(4)]
20432 pub unsafe fn _mm_mask_sub_round_ss(
20433 src: __m128,
20434 k: __mmask8,
20435 a: __m128,
20436 b: __m128,
20437 rounding: i32,
20438 ) -> __m128 {
20439 macro_rules! call {
20440 ($imm4:expr) => {
20441 vsubss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
20442 };
20443 }
20444 transmute(constify_imm4_round!(rounding, call))
20445 }
20446
20447 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20448 ///
20449 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20450 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20451 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20452 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20453 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20454 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20455 ///
20456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_ss&expand=5744)
20457 #[inline]
20458 #[target_feature(enable = "avx512f")]
20459 #[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
20460 #[rustc_args_required_const(3)]
20461 pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
20462 macro_rules! call {
20463 ($imm4:expr) => {
20464 vsubss(
20465 a.as_f32x4(),
20466 b.as_f32x4(),
20467 _mm_setzero_ps().as_f32x4(),
20468 k,
20469 $imm4,
20470 )
20471 };
20472 }
20473 transmute(constify_imm4_round!(rounding, call))
20474 }
20475
20476 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20477 ///
20478 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20479 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20480 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20481 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20482 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20483 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20484 ///
20485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_sd&expand=5742)
20486 #[inline]
20487 #[target_feature(enable = "avx512f")]
20488 #[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
20489 #[rustc_args_required_const(2)]
20490 pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
20491 macro_rules! call {
20492 ($imm4:expr) => {
20493 vsubsd(
20494 a.as_f64x2(),
20495 b.as_f64x2(),
20496 _mm_setzero_pd().as_f64x2(),
20497 0b1,
20498 $imm4,
20499 )
20500 };
20501 }
20502 transmute(constify_imm4_round!(rounding, call))
20503 }
20504
20505 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20506 ///
20507 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20508 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20509 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20510 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20511 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20512 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20513 ///
20514 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_sd&expand=5740)
20515 #[inline]
20516 #[target_feature(enable = "avx512f")]
20517 #[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
20518 #[rustc_args_required_const(4)]
20519 pub unsafe fn _mm_mask_sub_round_sd(
20520 src: __m128d,
20521 k: __mmask8,
20522 a: __m128d,
20523 b: __m128d,
20524 rounding: i32,
20525 ) -> __m128d {
20526 macro_rules! call {
20527 ($imm4:expr) => {
20528 vsubsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
20529 };
20530 }
20531 transmute(constify_imm4_round!(rounding, call))
20532 }
20533
20534 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20535 ///
20536 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20537 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20538 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20539 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20540 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20541 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20542 ///
20543 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_sd&expand=5741)
20544 #[inline]
20545 #[target_feature(enable = "avx512f")]
20546 #[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
20547 #[rustc_args_required_const(3)]
20548 pub unsafe fn _mm_maskz_sub_round_sd(
20549 k: __mmask8,
20550 a: __m128d,
20551 b: __m128d,
20552 rounding: i32,
20553 ) -> __m128d {
20554 macro_rules! call {
20555 ($imm4:expr) => {
20556 vsubsd(
20557 a.as_f64x2(),
20558 b.as_f64x2(),
20559 _mm_setzero_pd().as_f64x2(),
20560 k,
20561 $imm4,
20562 )
20563 };
20564 }
20565 transmute(constify_imm4_round!(rounding, call))
20566 }
20567
20568 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20569 ///
20570 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20571 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20572 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20573 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20574 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20575 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20576 ///
20577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_ss&expand=3946)
20578 #[inline]
20579 #[target_feature(enable = "avx512f")]
20580 #[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
20581 #[rustc_args_required_const(2)]
20582 pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
20583 macro_rules! call {
20584 ($imm4:expr) => {
20585 vmulss(
20586 a.as_f32x4(),
20587 b.as_f32x4(),
20588 _mm_setzero_ps().as_f32x4(),
20589 0b1,
20590 $imm4,
20591 )
20592 };
20593 }
20594 transmute(constify_imm4_round!(rounding, call))
20595 }
20596
20597 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20598 ///
20599 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20600 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20601 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20602 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20603 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20604 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20605 ///
20606 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_ss&expand=3944)
20607 #[inline]
20608 #[target_feature(enable = "avx512f")]
20609 #[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
20610 #[rustc_args_required_const(4)]
20611 pub unsafe fn _mm_mask_mul_round_ss(
20612 src: __m128,
20613 k: __mmask8,
20614 a: __m128,
20615 b: __m128,
20616 rounding: i32,
20617 ) -> __m128 {
20618 macro_rules! call {
20619 ($imm4:expr) => {
20620 vmulss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
20621 };
20622 }
20623 transmute(constify_imm4_round!(rounding, call))
20624 }
20625
20626 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20627 ///
20628 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20629 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20630 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20631 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20632 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20633 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20634 ///
20635 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_ss&expand=3945)
20636 #[inline]
20637 #[target_feature(enable = "avx512f")]
20638 #[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
20639 #[rustc_args_required_const(3)]
20640 pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
20641 macro_rules! call {
20642 ($imm4:expr) => {
20643 vmulss(
20644 a.as_f32x4(),
20645 b.as_f32x4(),
20646 _mm_setzero_ps().as_f32x4(),
20647 k,
20648 $imm4,
20649 )
20650 };
20651 }
20652 transmute(constify_imm4_round!(rounding, call))
20653 }
20654
20655 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20656 ///
20657 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20658 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20659 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20660 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20661 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20662 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20663 ///
20664 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_sd&expand=3943)
20665 #[inline]
20666 #[target_feature(enable = "avx512f")]
20667 #[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
20668 #[rustc_args_required_const(2)]
20669 pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
20670 macro_rules! call {
20671 ($imm4:expr) => {
20672 vmulsd(
20673 a.as_f64x2(),
20674 b.as_f64x2(),
20675 _mm_setzero_pd().as_f64x2(),
20676 0b1,
20677 $imm4,
20678 )
20679 };
20680 }
20681 transmute(constify_imm4_round!(rounding, call))
20682 }
20683
20684 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20685 ///
20686 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20687 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20688 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20689 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20690 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20691 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20692 ///
20693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_sd&expand=3941)
20694 #[inline]
20695 #[target_feature(enable = "avx512f")]
20696 #[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
20697 #[rustc_args_required_const(4)]
20698 pub unsafe fn _mm_mask_mul_round_sd(
20699 src: __m128d,
20700 k: __mmask8,
20701 a: __m128d,
20702 b: __m128d,
20703 rounding: i32,
20704 ) -> __m128d {
20705 macro_rules! call {
20706 ($imm4:expr) => {
20707 vmulsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
20708 };
20709 }
20710 transmute(constify_imm4_round!(rounding, call))
20711 }
20712
20713 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20714 ///
20715 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20716 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20717 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20718 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20719 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20720 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20721 ///
20722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_sd&expand=3942)
20723 #[inline]
20724 #[target_feature(enable = "avx512f")]
20725 #[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
20726 #[rustc_args_required_const(3)]
20727 pub unsafe fn _mm_maskz_mul_round_sd(
20728 k: __mmask8,
20729 a: __m128d,
20730 b: __m128d,
20731 rounding: i32,
20732 ) -> __m128d {
20733 macro_rules! call {
20734 ($imm4:expr) => {
20735 vmulsd(
20736 a.as_f64x2(),
20737 b.as_f64x2(),
20738 _mm_setzero_pd().as_f64x2(),
20739 k,
20740 $imm4,
20741 )
20742 };
20743 }
20744 transmute(constify_imm4_round!(rounding, call))
20745 }
20746
20747 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20748 ///
20749 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20750 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20751 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20752 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20753 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20754 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20755 ///
20756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_ss&expand=2174)
20757 #[inline]
20758 #[target_feature(enable = "avx512f")]
20759 #[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
20760 #[rustc_args_required_const(2)]
20761 pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
20762 macro_rules! call {
20763 ($imm4:expr) => {
20764 vdivss(
20765 a.as_f32x4(),
20766 b.as_f32x4(),
20767 _mm_setzero_ps().as_f32x4(),
20768 0b1,
20769 $imm4,
20770 )
20771 };
20772 }
20773 transmute(constify_imm4_round!(rounding, call))
20774 }
20775
20776 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20777 ///
20778 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20779 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20780 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20781 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20782 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20783 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20784 ///
20785 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_ss&expand=2175)
20786 #[inline]
20787 #[target_feature(enable = "avx512f")]
20788 #[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
20789 #[rustc_args_required_const(4)]
20790 pub unsafe fn _mm_mask_div_round_ss(
20791 src: __m128,
20792 k: __mmask8,
20793 a: __m128,
20794 b: __m128,
20795 rounding: i32,
20796 ) -> __m128 {
20797 macro_rules! call {
20798 ($imm4:expr) => {
20799 vdivss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
20800 };
20801 }
20802 transmute(constify_imm4_round!(rounding, call))
20803 }
20804
20805 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20806 ///
20807 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20808 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20809 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20810 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20811 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20812 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20813 ///
20814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_ss&expand=2176)
20815 #[inline]
20816 #[target_feature(enable = "avx512f")]
20817 #[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
20818 #[rustc_args_required_const(3)]
20819 pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
20820 macro_rules! call {
20821 ($imm4:expr) => {
20822 vdivss(
20823 a.as_f32x4(),
20824 b.as_f32x4(),
20825 _mm_setzero_ps().as_f32x4(),
20826 k,
20827 $imm4,
20828 )
20829 };
20830 }
20831 transmute(constify_imm4_round!(rounding, call))
20832 }
20833
20834 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20835 ///
20836 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20837 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20838 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20839 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20840 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20841 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20842 ///
20843 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_sd&expand=2171)
20844 #[inline]
20845 #[target_feature(enable = "avx512f")]
20846 #[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
20847 #[rustc_args_required_const(2)]
20848 pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
20849 macro_rules! call {
20850 ($imm4:expr) => {
20851 vdivsd(
20852 a.as_f64x2(),
20853 b.as_f64x2(),
20854 _mm_setzero_pd().as_f64x2(),
20855 0b1,
20856 $imm4,
20857 )
20858 };
20859 }
20860 transmute(constify_imm4_round!(rounding, call))
20861 }
20862
20863 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20864 ///
20865 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20866 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20867 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20868 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20869 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20870 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20871 ///
20872 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_sd&expand=2172)
20873 #[inline]
20874 #[target_feature(enable = "avx512f")]
20875 #[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
20876 #[rustc_args_required_const(4)]
20877 pub unsafe fn _mm_mask_div_round_sd(
20878 src: __m128d,
20879 k: __mmask8,
20880 a: __m128d,
20881 b: __m128d,
20882 rounding: i32,
20883 ) -> __m128d {
20884 macro_rules! call {
20885 ($imm4:expr) => {
20886 vdivsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
20887 };
20888 }
20889 transmute(constify_imm4_round!(rounding, call))
20890 }
20891
20892 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20893 ///
20894 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20895 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20896 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20897 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20898 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20899 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20900 ///
20901 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_sd&expand=2173)
20902 #[inline]
20903 #[target_feature(enable = "avx512f")]
20904 #[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
20905 #[rustc_args_required_const(3)]
20906 pub unsafe fn _mm_maskz_div_round_sd(
20907 k: __mmask8,
20908 a: __m128d,
20909 b: __m128d,
20910 rounding: i32,
20911 ) -> __m128d {
20912 macro_rules! call {
20913 ($imm4:expr) => {
20914 vdivsd(
20915 a.as_f64x2(),
20916 b.as_f64x2(),
20917 _mm_setzero_pd().as_f64x2(),
20918 k,
20919 $imm4,
20920 )
20921 };
20922 }
20923 transmute(constify_imm4_round!(rounding, call))
20924 }
20925
20926 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20927 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20928 ///
20929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_ss&expand=3668)
20930 #[inline]
20931 #[target_feature(enable = "avx512f")]
20932 #[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
20933 #[rustc_args_required_const(2)]
20934 pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
20935 macro_rules! call {
20936 ($imm4:expr) => {
20937 vmaxss(
20938 a.as_f32x4(),
20939 b.as_f32x4(),
20940 _mm_setzero_ps().as_f32x4(),
20941 0b1,
20942 $imm4,
20943 )
20944 };
20945 }
20946 transmute(constify_imm4_sae!(sae, call))
20947 }
20948
20949 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20950 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20951 ///
20952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_ss&expand=3672)
20953 #[inline]
20954 #[target_feature(enable = "avx512f")]
20955 #[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
20956 #[rustc_args_required_const(4)]
20957 pub unsafe fn _mm_mask_max_round_ss(
20958 src: __m128,
20959 k: __mmask8,
20960 a: __m128,
20961 b: __m128,
20962 sae: i32,
20963 ) -> __m128 {
20964 macro_rules! call {
20965 ($imm4:expr) => {
20966 vmaxss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
20967 };
20968 }
20969 transmute(constify_imm4_sae!(sae, call))
20970 }
20971
20972 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20973 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20974 ///
20975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_round_ss&expand=3667)
20976 #[inline]
20977 #[target_feature(enable = "avx512f")]
20978 #[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
20979 #[rustc_args_required_const(3)]
20980 pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
20981 macro_rules! call {
20982 ($imm4:expr) => {
20983 vmaxss(
20984 a.as_f32x4(),
20985 b.as_f32x4(),
20986 _mm_setzero_ps().as_f32x4(),
20987 k,
20988 $imm4,
20989 )
20990 };
20991 }
20992 transmute(constify_imm4_sae!(sae, call))
20993 }
20994
20995 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20996 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20997 ///
20998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_sd&expand=3665)
20999 #[inline]
21000 #[target_feature(enable = "avx512f")]
21001 #[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
21002 #[rustc_args_required_const(2)]
21003 pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
21004 macro_rules! call {
21005 ($imm4:expr) => {
21006 vmaxsd(
21007 a.as_f64x2(),
21008 b.as_f64x2(),
21009 _mm_setzero_pd().as_f64x2(),
21010 0b1,
21011 $imm4,
21012 )
21013 };
21014 }
21015 transmute(constify_imm4_sae!(sae, call))
21016 }
21017
21018 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21019 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21020 ///
21021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_round_sd&expand=3663)
21022 #[inline]
21023 #[target_feature(enable = "avx512f")]
21024 #[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
21025 #[rustc_args_required_const(4)]
21026 pub unsafe fn _mm_mask_max_round_sd(
21027 src: __m128d,
21028 k: __mmask8,
21029 a: __m128d,
21030 b: __m128d,
21031 sae: i32,
21032 ) -> __m128d {
21033 macro_rules! call {
21034 ($imm4:expr) => {
21035 vmaxsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
21036 };
21037 }
21038 transmute(constify_imm4_sae!(sae, call))
21039 }
21040
21041 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21042 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21043 ///
21044 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_sd&expand=3670)
21045 #[inline]
21046 #[target_feature(enable = "avx512f")]
21047 #[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
21048 #[rustc_args_required_const(3)]
21049 pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
21050 macro_rules! call {
21051 ($imm4:expr) => {
21052 vmaxsd(
21053 a.as_f64x2(),
21054 b.as_f64x2(),
21055 _mm_setzero_pd().as_f64x2(),
21056 k,
21057 $imm4,
21058 )
21059 };
21060 }
21061 transmute(constify_imm4_sae!(sae, call))
21062 }
21063
21064 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21065 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21066 ///
21067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_ss&expand=3782)
21068 #[inline]
21069 #[target_feature(enable = "avx512f")]
21070 #[cfg_attr(test, assert_instr(vminss, sae = 8))]
21071 #[rustc_args_required_const(2)]
21072 pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
21073 macro_rules! call {
21074 ($imm4:expr) => {
21075 vminss(
21076 a.as_f32x4(),
21077 b.as_f32x4(),
21078 _mm_setzero_ps().as_f32x4(),
21079 0b1,
21080 $imm4,
21081 )
21082 };
21083 }
21084 transmute(constify_imm4_sae!(sae, call))
21085 }
21086
21087 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21088 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21089 ///
21090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_Ss&expand=3780)
21091 #[inline]
21092 #[target_feature(enable = "avx512f")]
21093 #[cfg_attr(test, assert_instr(vminss, sae = 8))]
21094 #[rustc_args_required_const(4)]
21095 pub unsafe fn _mm_mask_min_round_ss(
21096 src: __m128,
21097 k: __mmask8,
21098 a: __m128,
21099 b: __m128,
21100 sae: i32,
21101 ) -> __m128 {
21102 macro_rules! call {
21103 ($imm4:expr) => {
21104 vminss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
21105 };
21106 }
21107 transmute(constify_imm4_sae!(sae, call))
21108 }
21109
21110 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21111 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21112 ///
21113 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_ss&expand=3781)
21114 #[inline]
21115 #[target_feature(enable = "avx512f")]
21116 #[cfg_attr(test, assert_instr(vminss, sae = 8))]
21117 #[rustc_args_required_const(3)]
21118 pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
21119 macro_rules! call {
21120 ($imm4:expr) => {
21121 vminss(
21122 a.as_f32x4(),
21123 b.as_f32x4(),
21124 _mm_setzero_ps().as_f32x4(),
21125 k,
21126 $imm4,
21127 )
21128 };
21129 }
21130 transmute(constify_imm4_sae!(sae, call))
21131 }
21132
21133 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
21134 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21135 ///
21136 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_sd&expand=3779)
21137 #[inline]
21138 #[target_feature(enable = "avx512f")]
21139 #[cfg_attr(test, assert_instr(vminsd, sae = 8))]
21140 #[rustc_args_required_const(2)]
21141 pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
21142 macro_rules! call {
21143 ($imm4:expr) => {
21144 vminsd(
21145 a.as_f64x2(),
21146 b.as_f64x2(),
21147 _mm_setzero_pd().as_f64x2(),
21148 0b1,
21149 $imm4,
21150 )
21151 };
21152 }
21153 transmute(constify_imm4_sae!(sae, call))
21154 }
21155
21156 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21157 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21158 ///
21159 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_sd&expand=3777)
21160 #[inline]
21161 #[target_feature(enable = "avx512f")]
21162 #[cfg_attr(test, assert_instr(vminsd, sae = 8))]
21163 #[rustc_args_required_const(4)]
21164 pub unsafe fn _mm_mask_min_round_sd(
21165 src: __m128d,
21166 k: __mmask8,
21167 a: __m128d,
21168 b: __m128d,
21169 sae: i32,
21170 ) -> __m128d {
21171 macro_rules! call {
21172 ($imm4:expr) => {
21173 vminsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
21174 };
21175 }
21176 transmute(constify_imm4_sae!(sae, call))
21177 }
21178
21179 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21180 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21181 ///
21182 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_Sd&expand=3778)
21183 #[inline]
21184 #[target_feature(enable = "avx512f")]
21185 #[cfg_attr(test, assert_instr(vminsd, sae = 8))]
21186 #[rustc_args_required_const(3)]
21187 pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
21188 macro_rules! call {
21189 ($imm4:expr) => {
21190 vminsd(
21191 a.as_f64x2(),
21192 b.as_f64x2(),
21193 _mm_setzero_pd().as_f64x2(),
21194 k,
21195 $imm4,
21196 )
21197 };
21198 }
21199 transmute(constify_imm4_sae!(sae, call))
21200 }
21201
21202 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21203 ///
21204 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21205 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21206 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21207 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21208 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21209 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21210 ///
21211 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_ss&expand=5383)
21212 #[inline]
21213 #[target_feature(enable = "avx512f")]
21214 #[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
21215 #[rustc_args_required_const(2)]
21216 pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
21217 macro_rules! call {
21218 ($imm4:expr) => {
21219 vsqrtss(
21220 a.as_f32x4(),
21221 b.as_f32x4(),
21222 _mm_setzero_ps().as_f32x4(),
21223 0b1,
21224 $imm4,
21225 )
21226 };
21227 }
21228 transmute(constify_imm4_round!(rounding, call))
21229 }
21230
21231 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21232 ///
21233 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21234 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21235 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21236 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21237 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21238 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21239 ///
21240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_ss&expand=5381)
21241 #[inline]
21242 #[target_feature(enable = "avx512f")]
21243 #[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
21244 #[rustc_args_required_const(4)]
21245 pub unsafe fn _mm_mask_sqrt_round_ss(
21246 src: __m128,
21247 k: __mmask8,
21248 a: __m128,
21249 b: __m128,
21250 rounding: i32,
21251 ) -> __m128 {
21252 macro_rules! call {
21253 ($imm4:expr) => {
21254 vsqrtss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
21255 };
21256 }
21257 transmute(constify_imm4_round!(rounding, call))
21258 }
21259
21260 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21261 ///
21262 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21263 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21264 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21265 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21266 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21267 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21268 ///
21269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_ss&expand=5382)
21270 #[inline]
21271 #[target_feature(enable = "avx512f")]
21272 #[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
21273 #[rustc_args_required_const(3)]
21274 pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
21275 macro_rules! call {
21276 ($imm4:expr) => {
21277 vsqrtss(
21278 a.as_f32x4(),
21279 b.as_f32x4(),
21280 _mm_setzero_ps().as_f32x4(),
21281 k,
21282 $imm4,
21283 )
21284 };
21285 }
21286 transmute(constify_imm4_round!(rounding, call))
21287 }
21288
21289 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
21290 ///
21291 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21292 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21293 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21294 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21295 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21296 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21297 ///
21298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_sd&expand=5380)
21299 #[inline]
21300 #[target_feature(enable = "avx512f")]
21301 #[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
21302 #[rustc_args_required_const(2)]
21303 pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
21304 macro_rules! call {
21305 ($imm4:expr) => {
21306 vsqrtsd(
21307 a.as_f64x2(),
21308 b.as_f64x2(),
21309 _mm_setzero_pd().as_f64x2(),
21310 0b1,
21311 $imm4,
21312 )
21313 };
21314 }
21315 transmute(constify_imm4_round!(rounding, call))
21316 }
21317
21318 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21319 ///
21320 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21321 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21322 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21323 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21324 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21325 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21326 ///
21327 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_sd&expand=5378)
21328 #[inline]
21329 #[target_feature(enable = "avx512f")]
21330 #[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
21331 #[rustc_args_required_const(4)]
21332 pub unsafe fn _mm_mask_sqrt_round_sd(
21333 src: __m128d,
21334 k: __mmask8,
21335 a: __m128d,
21336 b: __m128d,
21337 rounding: i32,
21338 ) -> __m128d {
21339 macro_rules! call {
21340 ($imm4:expr) => {
21341 vsqrtsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
21342 };
21343 }
21344 transmute(constify_imm4_round!(rounding, call))
21345 }
21346
21347 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21348 ///
21349 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21350 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21351 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21352 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21353 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21354 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21355 ///
21356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_sd&expand=5379)
21357 #[inline]
21358 #[target_feature(enable = "avx512f")]
21359 #[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
21360 #[rustc_args_required_const(3)]
21361 pub unsafe fn _mm_maskz_sqrt_round_sd(
21362 k: __mmask8,
21363 a: __m128d,
21364 b: __m128d,
21365 rounding: i32,
21366 ) -> __m128d {
21367 macro_rules! call {
21368 ($imm4:expr) => {
21369 vsqrtsd(
21370 a.as_f64x2(),
21371 b.as_f64x2(),
21372 _mm_setzero_pd().as_f64x2(),
21373 k,
21374 $imm4,
21375 )
21376 };
21377 }
21378 transmute(constify_imm4_round!(rounding, call))
21379 }
21380
21381 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21382 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21383 ///
21384 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_ss&expand=2856)
21385 #[inline]
21386 #[target_feature(enable = "avx512f")]
21387 #[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
21388 #[rustc_args_required_const(2)]
21389 pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
21390 macro_rules! call {
21391 ($imm4:expr) => {
21392 vgetexpss(
21393 a.as_f32x4(),
21394 b.as_f32x4(),
21395 _mm_setzero_ps().as_f32x4(),
21396 0b1,
21397 $imm4,
21398 )
21399 };
21400 }
21401 let r = constify_imm4_sae!(sae, call);
21402 transmute(r)
21403 }
21404
21405 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21406 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21407 ///
21408 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_ss&expand=2857)
21409 #[inline]
21410 #[target_feature(enable = "avx512f")]
21411 #[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
21412 #[rustc_args_required_const(4)]
21413 pub unsafe fn _mm_mask_getexp_round_ss(
21414 src: __m128,
21415 k: __mmask8,
21416 a: __m128,
21417 b: __m128,
21418 sae: i32,
21419 ) -> __m128 {
21420 macro_rules! call {
21421 ($imm4:expr) => {
21422 vgetexpss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
21423 };
21424 }
21425 let r = constify_imm4_sae!(sae, call);
21426 transmute(r)
21427 }
21428
21429 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21430 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21431 ///
21432 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_ss&expand=2858)
21433 #[inline]
21434 #[target_feature(enable = "avx512f")]
21435 #[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
21436 #[rustc_args_required_const(3)]
21437 pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
21438 macro_rules! call {
21439 ($imm4:expr) => {
21440 vgetexpss(
21441 a.as_f32x4(),
21442 b.as_f32x4(),
21443 _mm_setzero_ps().as_f32x4(),
21444 k,
21445 $imm4,
21446 )
21447 };
21448 }
21449 let r = constify_imm4_sae!(sae, call);
21450 transmute(r)
21451 }
21452
21453 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21454 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21455 ///
21456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_sd&expand=2853)
21457 #[inline]
21458 #[target_feature(enable = "avx512f")]
21459 #[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
21460 #[rustc_args_required_const(2)]
21461 pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
21462 macro_rules! call {
21463 ($imm4:expr) => {
21464 vgetexpsd(
21465 a.as_f64x2(),
21466 b.as_f64x2(),
21467 _mm_setzero_pd().as_f64x2(),
21468 0b1,
21469 $imm4,
21470 )
21471 };
21472 }
21473 let r = constify_imm4_sae!(sae, call);
21474 transmute(r)
21475 }
21476
21477 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21478 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21479 ///
21480 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_sd&expand=2854)
21481 #[inline]
21482 #[target_feature(enable = "avx512f")]
21483 #[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
21484 #[rustc_args_required_const(4)]
21485 pub unsafe fn _mm_mask_getexp_round_sd(
21486 src: __m128d,
21487 k: __mmask8,
21488 a: __m128d,
21489 b: __m128d,
21490 sae: i32,
21491 ) -> __m128d {
21492 macro_rules! call {
21493 ($imm4:expr) => {
21494 vgetexpsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
21495 };
21496 }
21497 let r = constify_imm4_sae!(sae, call);
21498 transmute(r)
21499 }
21500
21501 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21502 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21503 ///
21504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_sd&expand=2855)
21505 #[inline]
21506 #[target_feature(enable = "avx512f")]
21507 #[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
21508 #[rustc_args_required_const(3)]
21509 pub unsafe fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
21510 macro_rules! call {
21511 ($imm4:expr) => {
21512 vgetexpsd(
21513 a.as_f64x2(),
21514 b.as_f64x2(),
21515 _mm_setzero_pd().as_f64x2(),
21516 k,
21517 $imm4,
21518 )
21519 };
21520 }
21521 let r = constify_imm4_sae!(sae, call);
21522 transmute(r)
21523 }
21524
21525 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21526 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21527 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21528 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21529 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21530 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21531 /// The sign is determined by sc which can take the following values:\
21532 /// _MM_MANT_SIGN_src // sign = sign(src)\
21533 /// _MM_MANT_SIGN_zero // sign = 0\
21534 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21535 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21536 ///
21537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_round_ss&expand=2892)
21538 #[inline]
21539 #[target_feature(enable = "avx512f")]
21540 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
21541 #[rustc_args_required_const(2, 3, 4)]
21542 pub unsafe fn _mm_getmant_round_ss(
21543 a: __m128,
21544 b: __m128,
21545 norm: _MM_MANTISSA_NORM_ENUM,
21546 sign: _MM_MANTISSA_SIGN_ENUM,
21547 sae: i32,
21548 ) -> __m128 {
21549 macro_rules! call {
21550 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
21551 vgetmantss(
21552 a.as_f32x4(),
21553 b.as_f32x4(),
21554 $imm2 << 2 | $imm4_1,
21555 _mm_setzero_ps().as_f32x4(),
21556 0b1,
21557 $imm4_2,
21558 )
21559 };
21560 }
21561 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
21562 transmute(r)
21563 }
21564
21565 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21566 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21567 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21568 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21569 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21570 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21571 /// The sign is determined by sc which can take the following values:\
21572 /// _MM_MANT_SIGN_src // sign = sign(src)\
21573 /// _MM_MANT_SIGN_zero // sign = 0\
21574 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21575 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21576 ///
21577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_round_ss&expand=2893)
21578 #[inline]
21579 #[target_feature(enable = "avx512f")]
21580 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
21581 #[rustc_args_required_const(4, 5, 6)]
21582 pub unsafe fn _mm_mask_getmant_round_ss(
21583 src: __m128,
21584 k: __mmask8,
21585 a: __m128,
21586 b: __m128,
21587 norm: _MM_MANTISSA_NORM_ENUM,
21588 sign: _MM_MANTISSA_SIGN_ENUM,
21589 sae: i32,
21590 ) -> __m128 {
21591 macro_rules! call {
21592 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
21593 vgetmantss(
21594 a.as_f32x4(),
21595 b.as_f32x4(),
21596 $imm2 << 2 | $imm4_1,
21597 src.as_f32x4(),
21598 k,
21599 $imm4_2,
21600 )
21601 };
21602 }
21603 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
21604 transmute(r)
21605 }
21606
21607 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21608 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21609 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21610 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21611 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21612 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21613 /// The sign is determined by sc which can take the following values:\
21614 /// _MM_MANT_SIGN_src // sign = sign(src)\
21615 /// _MM_MANT_SIGN_zero // sign = 0\
21616 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21617 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21618 ///
21619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_round_ss&expand=2894)
21620 #[inline]
21621 #[target_feature(enable = "avx512f")]
21622 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
21623 #[rustc_args_required_const(3, 4, 5)]
21624 pub unsafe fn _mm_maskz_getmant_round_ss(
21625 k: __mmask8,
21626 a: __m128,
21627 b: __m128,
21628 norm: _MM_MANTISSA_NORM_ENUM,
21629 sign: _MM_MANTISSA_SIGN_ENUM,
21630 sae: i32,
21631 ) -> __m128 {
21632 macro_rules! call {
21633 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
21634 vgetmantss(
21635 a.as_f32x4(),
21636 b.as_f32x4(),
21637 $imm2 << 2 | $imm4_1,
21638 _mm_setzero_ps().as_f32x4(),
21639 k,
21640 $imm4_2,
21641 )
21642 };
21643 }
21644 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
21645 transmute(r)
21646 }
21647
21648 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21649 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21650 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21651 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21652 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21653 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21654 /// The sign is determined by sc which can take the following values:\
21655 /// _MM_MANT_SIGN_src // sign = sign(src)\
21656 /// _MM_MANT_SIGN_zero // sign = 0\
21657 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21658 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21659 ///
21660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_round_sd&expand=2889)
21661 #[inline]
21662 #[target_feature(enable = "avx512f")]
21663 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
21664 #[rustc_args_required_const(2, 3, 4)]
21665 pub unsafe fn _mm_getmant_round_sd(
21666 a: __m128d,
21667 b: __m128d,
21668 norm: _MM_MANTISSA_NORM_ENUM,
21669 sign: _MM_MANTISSA_SIGN_ENUM,
21670 sae: i32,
21671 ) -> __m128d {
21672 macro_rules! call {
21673 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
21674 vgetmantsd(
21675 a.as_f64x2(),
21676 b.as_f64x2(),
21677 $imm2 << 2 | $imm4_1,
21678 _mm_setzero_pd().as_f64x2(),
21679 0b1,
21680 $imm4_2,
21681 )
21682 };
21683 }
21684 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
21685 transmute(r)
21686 }
21687
21688 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21689 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21690 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21691 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21692 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21693 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21694 /// The sign is determined by sc which can take the following values:\
21695 /// _MM_MANT_SIGN_src // sign = sign(src)\
21696 /// _MM_MANT_SIGN_zero // sign = 0\
21697 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21698 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21699 ///
21700 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_round_sd&expand=2890)
21701 #[inline]
21702 #[target_feature(enable = "avx512f")]
21703 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
21704 #[rustc_args_required_const(4, 5, 6)]
21705 pub unsafe fn _mm_mask_getmant_round_sd(
21706 src: __m128d,
21707 k: __mmask8,
21708 a: __m128d,
21709 b: __m128d,
21710 norm: _MM_MANTISSA_NORM_ENUM,
21711 sign: _MM_MANTISSA_SIGN_ENUM,
21712 sae: i32,
21713 ) -> __m128d {
21714 macro_rules! call {
21715 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
21716 vgetmantsd(
21717 a.as_f64x2(),
21718 b.as_f64x2(),
21719 $imm2 << 2 | $imm4_1,
21720 src.as_f64x2(),
21721 k,
21722 $imm4_2,
21723 )
21724 };
21725 }
21726 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
21727 transmute(r)
21728 }
21729
21730 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21731 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21732 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21733 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21734 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21735 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21736 /// The sign is determined by sc which can take the following values:\
21737 /// _MM_MANT_SIGN_src // sign = sign(src)\
21738 /// _MM_MANT_SIGN_zero // sign = 0\
21739 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21740 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21741 ///
21742 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_round_sd&expand=2891)
21743 #[inline]
21744 #[target_feature(enable = "avx512f")]
21745 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
21746 #[rustc_args_required_const(3, 4, 5)]
21747 pub unsafe fn _mm_maskz_getmant_round_sd(
21748 k: __mmask8,
21749 a: __m128d,
21750 b: __m128d,
21751 norm: _MM_MANTISSA_NORM_ENUM,
21752 sign: _MM_MANTISSA_SIGN_ENUM,
21753 sae: i32,
21754 ) -> __m128d {
21755 macro_rules! call {
21756 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
21757 vgetmantsd(
21758 a.as_f64x2(),
21759 b.as_f64x2(),
21760 $imm2 << 2 | $imm4_1,
21761 _mm_setzero_pd().as_f64x2(),
21762 k,
21763 $imm4_2,
21764 )
21765 };
21766 }
21767 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
21768 transmute(r)
21769 }
21770
21771 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21772 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21773 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21774 /// _MM_FROUND_TO_NEG_INF // round down\
21775 /// _MM_FROUND_TO_POS_INF // round up\
21776 /// _MM_FROUND_TO_ZERO // truncate\
21777 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21778 ///
21779 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21780 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_round_ss&expand=4796)
21781 #[inline]
21782 #[target_feature(enable = "avx512f")]
21783 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
21784 #[rustc_args_required_const(2, 3)]
21785 pub unsafe fn _mm_roundscale_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> __m128 {
21786 let a = a.as_f32x4();
21787 let b = b.as_f32x4();
21788 let zero = _mm_setzero_ps().as_f32x4();
21789 macro_rules! call {
21790 ($imm8:expr, $imm4:expr) => {
21791 vrndscaless(a, b, zero, 0b11111111, $imm8, $imm4)
21792 };
21793 }
21794 let r = constify_imm8_roundscale!(imm8, sae, call);
21795 transmute(r)
21796 }
21797
21798 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21799 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21800 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21801 /// _MM_FROUND_TO_NEG_INF // round down\
21802 /// _MM_FROUND_TO_POS_INF // round up\
21803 /// _MM_FROUND_TO_ZERO // truncate\
21804 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21805 ///
21806 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21807 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_round_ss&expand=4794)
21808 #[inline]
21809 #[target_feature(enable = "avx512f")]
21810 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
21811 #[rustc_args_required_const(4, 5)]
21812 pub unsafe fn _mm_mask_roundscale_round_ss(
21813 src: __m128,
21814 k: __mmask8,
21815 a: __m128,
21816 b: __m128,
21817 imm8: i32,
21818 sae: i32,
21819 ) -> __m128 {
21820 let a = a.as_f32x4();
21821 let b = b.as_f32x4();
21822 let src = src.as_f32x4();
21823 macro_rules! call {
21824 ($imm8:expr, $imm4:expr) => {
21825 vrndscaless(a, b, src, k, $imm8, $imm4)
21826 };
21827 }
21828 let r = constify_imm8_roundscale!(imm8, sae, call);
21829 transmute(r)
21830 }
21831
21832 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21833 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21834 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21835 /// _MM_FROUND_TO_NEG_INF // round down\
21836 /// _MM_FROUND_TO_POS_INF // round up\
21837 /// _MM_FROUND_TO_ZERO // truncate\
21838 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21839 ///
21840 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_round_ss&expand=4795)
21842 #[inline]
21843 #[target_feature(enable = "avx512f")]
21844 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
21845 #[rustc_args_required_const(3, 4)]
21846 pub unsafe fn _mm_maskz_roundscale_round_ss(
21847 k: __mmask8,
21848 a: __m128,
21849 b: __m128,
21850 imm8: i32,
21851 sae: i32,
21852 ) -> __m128 {
21853 let a = a.as_f32x4();
21854 let b = b.as_f32x4();
21855 let zero = _mm_setzero_ps().as_f32x4();
21856 macro_rules! call {
21857 ($imm8:expr, $imm4:expr) => {
21858 vrndscaless(a, b, zero, k, $imm8, $imm4)
21859 };
21860 }
21861 let r = constify_imm8_roundscale!(imm8, sae, call);
21862 transmute(r)
21863 }
21864
21865 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
21866 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21867 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21868 /// _MM_FROUND_TO_NEG_INF // round down\
21869 /// _MM_FROUND_TO_POS_INF // round up\
21870 /// _MM_FROUND_TO_ZERO // truncate\
21871 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21872 ///
21873 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_round_sd&expand=4793)
21875 #[inline]
21876 #[target_feature(enable = "avx512f")]
21877 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
21878 #[rustc_args_required_const(2, 3)]
21879 pub unsafe fn _mm_roundscale_round_sd(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> __m128d {
21880 let a = a.as_f64x2();
21881 let b = b.as_f64x2();
21882 let zero = _mm_setzero_pd().as_f64x2();
21883 macro_rules! call {
21884 ($imm8:expr, $imm4:expr) => {
21885 vrndscalesd(a, b, zero, 0b11111111, $imm8, $imm4)
21886 };
21887 }
21888 let r = constify_imm8_roundscale!(imm8, sae, call);
21889 transmute(r)
21890 }
21891
21892 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21893 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21894 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21895 /// _MM_FROUND_TO_NEG_INF // round down\
21896 /// _MM_FROUND_TO_POS_INF // round up\
21897 /// _MM_FROUND_TO_ZERO // truncate\
21898 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21899 ///
21900 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21901 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_round_sd&expand=4791)
21902 #[inline]
21903 #[target_feature(enable = "avx512f")]
21904 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
21905 #[rustc_args_required_const(4, 5)]
21906 pub unsafe fn _mm_mask_roundscale_round_sd(
21907 src: __m128d,
21908 k: __mmask8,
21909 a: __m128d,
21910 b: __m128d,
21911 imm8: i32,
21912 sae: i32,
21913 ) -> __m128d {
21914 let a = a.as_f64x2();
21915 let b = b.as_f64x2();
21916 let src = src.as_f64x2();
21917 macro_rules! call {
21918 ($imm8:expr, $imm4:expr) => {
21919 vrndscalesd(a, b, src, k, $imm8, $imm4)
21920 };
21921 }
21922 let r = constify_imm8_roundscale!(imm8, sae, call);
21923 transmute(r)
21924 }
21925
21926 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21927 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21928 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21929 /// _MM_FROUND_TO_NEG_INF // round down\
21930 /// _MM_FROUND_TO_POS_INF // round up\
21931 /// _MM_FROUND_TO_ZERO // truncate\
21932 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21933 ///
21934 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21935 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_round_sd&expand=4792)
21936 #[inline]
21937 #[target_feature(enable = "avx512f")]
21938 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
21939 #[rustc_args_required_const(3, 4)]
21940 pub unsafe fn _mm_maskz_roundscale_round_sd(
21941 k: __mmask8,
21942 a: __m128d,
21943 b: __m128d,
21944 imm8: i32,
21945 sae: i32,
21946 ) -> __m128d {
21947 let a = a.as_f64x2();
21948 let b = b.as_f64x2();
21949 let zero = _mm_setzero_pd().as_f64x2();
21950 macro_rules! call {
21951 ($imm8:expr, $imm4:expr) => {
21952 vrndscalesd(a, b, zero, k, $imm8, $imm4)
21953 };
21954 }
21955 let r = constify_imm8_roundscale!(imm8, sae, call);
21956 transmute(r)
21957 }
21958
21959 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21960 ///
21961 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21962 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21963 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21964 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21965 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21966 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21967 ///
21968 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_ss&expand=4895)
21969 #[inline]
21970 #[target_feature(enable = "avx512f")]
21971 #[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
21972 #[rustc_args_required_const(2)]
21973 pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
21974 let a = a.as_f32x4();
21975 let b = b.as_f32x4();
21976 let zero = _mm_setzero_ps().as_f32x4();
21977 macro_rules! call {
21978 ($imm4:expr) => {
21979 vscalefss(a, b, zero, 0b11111111, $imm4)
21980 };
21981 }
21982 let r = constify_imm4_round!(rounding, call);
21983 transmute(r)
21984 }
21985
21986 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21987 ///
21988 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21989 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21990 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21991 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21992 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21993 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21994 ///
21995 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_ss&expand=4893)
21996 #[inline]
21997 #[target_feature(enable = "avx512f")]
21998 #[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
21999 #[rustc_args_required_const(4)]
22000 pub unsafe fn _mm_mask_scalef_round_ss(
22001 src: __m128,
22002 k: __mmask8,
22003 a: __m128,
22004 b: __m128,
22005 rounding: i32,
22006 ) -> __m128 {
22007 macro_rules! call {
22008 ($imm4:expr) => {
22009 vscalefss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k, $imm4)
22010 };
22011 }
22012 let r = constify_imm4_round!(rounding, call);
22013 transmute(r)
22014 }
22015
22016 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22017 ///
22018 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22019 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22020 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22021 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22022 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22023 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22024 ///
22025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_ss&expand=4894)
22026 #[inline]
22027 #[target_feature(enable = "avx512f")]
22028 #[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
22029 #[rustc_args_required_const(3)]
22030 pub unsafe fn _mm_maskz_scalef_round_ss(
22031 k: __mmask8,
22032 a: __m128,
22033 b: __m128,
22034 rounding: i32,
22035 ) -> __m128 {
22036 macro_rules! call {
22037 ($imm4:expr) => {
22038 vscalefss(
22039 a.as_f32x4(),
22040 b.as_f32x4(),
22041 _mm_setzero_ps().as_f32x4(),
22042 k,
22043 $imm4,
22044 )
22045 };
22046 }
22047 let r = constify_imm4_round!(rounding, call);
22048 transmute(r)
22049 }
22050
22051 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22052 ///
22053 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22054 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22055 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22056 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22057 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22058 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22059 ///
22060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_sd&expand=4892)
22061 #[inline]
22062 #[target_feature(enable = "avx512f")]
22063 #[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
22064 #[rustc_args_required_const(2)]
22065 pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
22066 macro_rules! call {
22067 ($imm4:expr) => {
22068 vscalefsd(
22069 a.as_f64x2(),
22070 b.as_f64x2(),
22071 _mm_setzero_pd().as_f64x2(),
22072 0b11111111,
22073 $imm4,
22074 )
22075 };
22076 }
22077 let r = constify_imm4_round!(rounding, call);
22078 transmute(r)
22079 }
22080
22081 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22082 ///
22083 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22084 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22085 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22086 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22087 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22088 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22089 ///
22090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_sd&expand=4890)
22091 #[inline]
22092 #[target_feature(enable = "avx512f")]
22093 #[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
22094 #[rustc_args_required_const(4)]
22095 pub unsafe fn _mm_mask_scalef_round_sd(
22096 src: __m128d,
22097 k: __mmask8,
22098 a: __m128d,
22099 b: __m128d,
22100 rounding: i32,
22101 ) -> __m128d {
22102 macro_rules! call {
22103 ($imm4:expr) => {
22104 vscalefsd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k, $imm4)
22105 };
22106 }
22107 let r = constify_imm4_round!(rounding, call);
22108 transmute(r)
22109 }
22110
22111 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22112 ///
22113 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22114 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22115 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22116 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22117 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22118 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22119 ///
22120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_sd&expand=4891)
22121 #[inline]
22122 #[target_feature(enable = "avx512f")]
22123 #[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
22124 #[rustc_args_required_const(3)]
22125 pub unsafe fn _mm_maskz_scalef_round_sd(
22126 k: __mmask8,
22127 a: __m128d,
22128 b: __m128d,
22129 rounding: i32,
22130 ) -> __m128d {
22131 macro_rules! call {
22132 ($imm4:expr) => {
22133 vscalefsd(
22134 a.as_f64x2(),
22135 b.as_f64x2(),
22136 _mm_setzero_pd().as_f64x2(),
22137 k,
22138 $imm4,
22139 )
22140 };
22141 }
22142 let r = constify_imm4_round!(rounding, call);
22143 transmute(r)
22144 }
22145
22146 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22147 ///
22148 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22149 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22150 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22151 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22152 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22153 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22154 ///
22155 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_ss&expand=2573)
22156 #[inline]
22157 #[target_feature(enable = "avx512f")]
22158 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22159 #[rustc_args_required_const(3)]
22160 pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
22161 let extracta: f32 = simd_extract(a, 0);
22162 let extractb: f32 = simd_extract(b, 0);
22163 let extractc: f32 = simd_extract(c, 0);
22164 macro_rules! call {
22165 ($imm4:expr) => {
22166 vfmadd132ss(extracta, extractb, extractc, $imm4)
22167 };
22168 }
22169 let fmadd = constify_imm4_round!(rounding, call);
22170 let r = simd_insert(a, 0, fmadd);
22171 transmute(r)
22172 }
22173
22174 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22175 ///
22176 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22177 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22178 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22179 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22180 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22181 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22182 ///
22183 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_ss&expand=2574)
22184 #[inline]
22185 #[target_feature(enable = "avx512f")]
22186 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22187 #[rustc_args_required_const(4)]
22188 pub unsafe fn _mm_mask_fmadd_round_ss(
22189 a: __m128,
22190 k: __mmask8,
22191 b: __m128,
22192 c: __m128,
22193 rounding: i32,
22194 ) -> __m128 {
22195 let mut fmadd: f32 = simd_extract(a, 0);
22196 if (k & 0b00000001) != 0 {
22197 let extractb: f32 = simd_extract(b, 0);
22198 let extractc: f32 = simd_extract(c, 0);
22199 macro_rules! call {
22200 ($imm4:expr) => {
22201 vfmadd132ss(fmadd, extractb, extractc, $imm4)
22202 };
22203 }
22204 fmadd = constify_imm4_round!(rounding, call);
22205 }
22206 let r = simd_insert(a, 0, fmadd);
22207 transmute(r)
22208 }
22209
22210 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22211 ///
22212 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22213 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22214 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22215 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22216 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22217 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22218 ///
22219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_ss&expand=2576)
22220 #[inline]
22221 #[target_feature(enable = "avx512f")]
22222 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22223 #[rustc_args_required_const(4)]
22224 pub unsafe fn _mm_maskz_fmadd_round_ss(
22225 k: __mmask8,
22226 a: __m128,
22227 b: __m128,
22228 c: __m128,
22229 rounding: i32,
22230 ) -> __m128 {
22231 let mut fmadd: f32 = 0.;
22232 if (k & 0b00000001) != 0 {
22233 let extracta: f32 = simd_extract(a, 0);
22234 let extractb: f32 = simd_extract(b, 0);
22235 let extractc: f32 = simd_extract(c, 0);
22236 macro_rules! call {
22237 ($imm4:expr) => {
22238 vfmadd132ss(extracta, extractb, extractc, $imm4)
22239 };
22240 }
22241 fmadd = constify_imm4_round!(rounding, call);
22242 }
22243 let r = simd_insert(a, 0, fmadd);
22244 transmute(r)
22245 }
22246
22247 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
22248 ///
22249 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22250 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22251 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22252 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22253 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22254 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22255 ///
22256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_ss&expand=2575)
22257 #[inline]
22258 #[target_feature(enable = "avx512f")]
22259 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22260 #[rustc_args_required_const(4)]
22261 pub unsafe fn _mm_mask3_fmadd_round_ss(
22262 a: __m128,
22263 b: __m128,
22264 c: __m128,
22265 k: __mmask8,
22266 rounding: i32,
22267 ) -> __m128 {
22268 let mut fmadd: f32 = simd_extract(c, 0);
22269 if (k & 0b00000001) != 0 {
22270 let extracta: f32 = simd_extract(a, 0);
22271 let extractb: f32 = simd_extract(b, 0);
22272 macro_rules! call {
22273 ($imm4:expr) => {
22274 vfmadd132ss(extracta, extractb, fmadd, $imm4)
22275 };
22276 }
22277 fmadd = constify_imm4_round!(rounding, call);
22278 }
22279 let r = simd_insert(c, 0, fmadd);
22280 transmute(r)
22281 }
22282
22283 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22284 ///
22285 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22286 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22287 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22288 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22289 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22290 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22291 ///
22292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_sd&expand=2569)
22293 #[inline]
22294 #[target_feature(enable = "avx512f")]
22295 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22296 #[rustc_args_required_const(3)]
22297 pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
22298 let extracta: f64 = simd_extract(a, 0);
22299 let extractb: f64 = simd_extract(b, 0);
22300 let extractc: f64 = simd_extract(c, 0);
22301 macro_rules! call {
22302 ($imm4:expr) => {
22303 vfmadd132sd(extracta, extractb, extractc, $imm4)
22304 };
22305 }
22306 let fmadd = constify_imm4_round!(rounding, call);
22307 let r = simd_insert(a, 0, fmadd);
22308 transmute(r)
22309 }
22310
22311 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22312 ///
22313 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22314 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22315 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22316 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22317 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22318 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22319 ///
22320 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_sd&expand=2570)
22321 #[inline]
22322 #[target_feature(enable = "avx512f")]
22323 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22324 #[rustc_args_required_const(4)]
22325 pub unsafe fn _mm_mask_fmadd_round_sd(
22326 a: __m128d,
22327 k: __mmask8,
22328 b: __m128d,
22329 c: __m128d,
22330 rounding: i32,
22331 ) -> __m128d {
22332 let mut fmadd: f64 = simd_extract(a, 0);
22333 if (k & 0b00000001) != 0 {
22334 let extractb: f64 = simd_extract(b, 0);
22335 let extractc: f64 = simd_extract(c, 0);
22336 macro_rules! call {
22337 ($imm4:expr) => {
22338 vfmadd132sd(fmadd, extractb, extractc, $imm4)
22339 };
22340 }
22341 fmadd = constify_imm4_round!(rounding, call);
22342 }
22343 let r = simd_insert(a, 0, fmadd);
22344 transmute(r)
22345 }
22346
22347 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22348 ///
22349 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22350 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22351 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22352 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22353 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22354 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22355 ///
22356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_sd&expand=2572)
22357 #[inline]
22358 #[target_feature(enable = "avx512f")]
22359 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22360 #[rustc_args_required_const(4)]
22361 pub unsafe fn _mm_maskz_fmadd_round_sd(
22362 k: __mmask8,
22363 a: __m128d,
22364 b: __m128d,
22365 c: __m128d,
22366 rounding: i32,
22367 ) -> __m128d {
22368 let mut fmadd: f64 = 0.;
22369 if (k & 0b00000001) != 0 {
22370 let extracta: f64 = simd_extract(a, 0);
22371 let extractb: f64 = simd_extract(b, 0);
22372 let extractc: f64 = simd_extract(c, 0);
22373 macro_rules! call {
22374 ($imm4:expr) => {
22375 vfmadd132sd(extracta, extractb, extractc, $imm4)
22376 };
22377 }
22378 fmadd = constify_imm4_round!(rounding, call);
22379 }
22380 let r = simd_insert(a, 0, fmadd);
22381 transmute(r)
22382 }
22383
22384 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
22385 ///
22386 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22387 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22388 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22389 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22390 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22391 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22392 ///
22393 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_Sd&expand=2571)
22394 #[inline]
22395 #[target_feature(enable = "avx512f")]
22396 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22397 #[rustc_args_required_const(4)]
22398 pub unsafe fn _mm_mask3_fmadd_round_sd(
22399 a: __m128d,
22400 b: __m128d,
22401 c: __m128d,
22402 k: __mmask8,
22403 rounding: i32,
22404 ) -> __m128d {
22405 let mut fmadd: f64 = simd_extract(c, 0);
22406 if (k & 0b00000001) != 0 {
22407 let extracta: f64 = simd_extract(a, 0);
22408 let extractb: f64 = simd_extract(b, 0);
22409 macro_rules! call {
22410 ($imm4:expr) => {
22411 vfmadd132sd(extracta, extractb, fmadd, $imm4)
22412 };
22413 }
22414 fmadd = constify_imm4_round!(rounding, call);
22415 }
22416 let r = simd_insert(c, 0, fmadd);
22417 transmute(r)
22418 }
22419
22420 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22421 ///
22422 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22423 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22424 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22425 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22426 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22427 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22428 ///
22429 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_ss&expand=2659)
22430 #[inline]
22431 #[target_feature(enable = "avx512f")]
22432 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22433 #[rustc_args_required_const(3)]
22434 pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
22435 let extracta: f32 = simd_extract(a, 0);
22436 let extractb: f32 = simd_extract(b, 0);
22437 let extractc: f32 = simd_extract(c, 0);
22438 let extractc = -extractc;
22439 macro_rules! call {
22440 ($imm4:expr) => {
22441 vfmadd132ss(extracta, extractb, extractc, $imm4)
22442 };
22443 }
22444 let fmsub = constify_imm4_round!(rounding, call);
22445 let r = simd_insert(a, 0, fmsub);
22446 transmute(r)
22447 }
22448
22449 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22450 ///
22451 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22452 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22453 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22454 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22455 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22456 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22457 ///
22458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_ss&expand=2660)
22459 #[inline]
22460 #[target_feature(enable = "avx512f")]
22461 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22462 #[rustc_args_required_const(4)]
22463 pub unsafe fn _mm_mask_fmsub_round_ss(
22464 a: __m128,
22465 k: __mmask8,
22466 b: __m128,
22467 c: __m128,
22468 rounding: i32,
22469 ) -> __m128 {
22470 let mut fmsub: f32 = simd_extract(a, 0);
22471 if (k & 0b00000001) != 0 {
22472 let extractb: f32 = simd_extract(b, 0);
22473 let extractc: f32 = simd_extract(c, 0);
22474 let extractc = -extractc;
22475 macro_rules! call {
22476 ($imm4:expr) => {
22477 vfmadd132ss(fmsub, extractb, extractc, $imm4)
22478 };
22479 }
22480 fmsub = constify_imm4_round!(rounding, call);
22481 }
22482 let r = simd_insert(a, 0, fmsub);
22483 transmute(r)
22484 }
22485
22486 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22487 ///
22488 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22489 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22490 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22491 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22492 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22493 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22494 ///
22495 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_ss&expand=2662)
22496 #[inline]
22497 #[target_feature(enable = "avx512f")]
22498 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22499 #[rustc_args_required_const(4)]
22500 pub unsafe fn _mm_maskz_fmsub_round_ss(
22501 k: __mmask8,
22502 a: __m128,
22503 b: __m128,
22504 c: __m128,
22505 rounding: i32,
22506 ) -> __m128 {
22507 let mut fmsub: f32 = 0.;
22508 if (k & 0b00000001) != 0 {
22509 let extracta: f32 = simd_extract(a, 0);
22510 let extractb: f32 = simd_extract(b, 0);
22511 let extractc: f32 = simd_extract(c, 0);
22512 let extractc = -extractc;
22513 macro_rules! call {
22514 ($imm4:expr) => {
22515 vfmadd132ss(extracta, extractb, extractc, $imm4)
22516 };
22517 }
22518 fmsub = constify_imm4_round!(rounding, call);
22519 }
22520 let r = simd_insert(a, 0, fmsub);
22521 transmute(r)
22522 }
22523
22524 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
22525 ///
22526 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22527 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22528 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22529 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22530 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22531 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22532 ///
22533 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_ss&expand=2661)
22534 #[inline]
22535 #[target_feature(enable = "avx512f")]
22536 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22537 #[rustc_args_required_const(4)]
22538 pub unsafe fn _mm_mask3_fmsub_round_ss(
22539 a: __m128,
22540 b: __m128,
22541 c: __m128,
22542 k: __mmask8,
22543 rounding: i32,
22544 ) -> __m128 {
22545 let mut fmsub: f32 = simd_extract(c, 0);
22546 if (k & 0b00000001) != 0 {
22547 let extracta: f32 = simd_extract(a, 0);
22548 let extractb: f32 = simd_extract(b, 0);
22549 let extractc = -fmsub;
22550 macro_rules! call {
22551 ($imm4:expr) => {
22552 vfmadd132ss(extracta, extractb, extractc, $imm4)
22553 };
22554 }
22555 fmsub = constify_imm4_round!(rounding, call);
22556 }
22557 let r = simd_insert(c, 0, fmsub);
22558 transmute(r)
22559 }
22560
22561 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22562 ///
22563 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22564 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22565 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22566 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22567 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22568 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22569 ///
22570 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_sd&expand=2655)
22571 #[inline]
22572 #[target_feature(enable = "avx512f")]
22573 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22574 #[rustc_args_required_const(3)]
22575 pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
22576 let extracta: f64 = simd_extract(a, 0);
22577 let extractb: f64 = simd_extract(b, 0);
22578 let extractc: f64 = simd_extract(c, 0);
22579 let extractc = -extractc;
22580 macro_rules! call {
22581 ($imm4:expr) => {
22582 vfmadd132sd(extracta, extractb, extractc, $imm4)
22583 };
22584 }
22585 let fmsub = constify_imm4_round!(rounding, call);
22586 let r = simd_insert(a, 0, fmsub);
22587 transmute(r)
22588 }
22589
22590 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22591 ///
22592 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22593 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22594 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22595 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22596 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22597 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22598 ///
22599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_sd&expand=2656)
22600 #[inline]
22601 #[target_feature(enable = "avx512f")]
22602 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22603 #[rustc_args_required_const(4)]
22604 pub unsafe fn _mm_mask_fmsub_round_sd(
22605 a: __m128d,
22606 k: __mmask8,
22607 b: __m128d,
22608 c: __m128d,
22609 rounding: i32,
22610 ) -> __m128d {
22611 let mut fmsub: f64 = simd_extract(a, 0);
22612 if (k & 0b00000001) != 0 {
22613 let extractb: f64 = simd_extract(b, 0);
22614 let extractc: f64 = simd_extract(c, 0);
22615 let extractc = -extractc;
22616 macro_rules! call {
22617 ($imm4:expr) => {
22618 vfmadd132sd(fmsub, extractb, extractc, $imm4)
22619 };
22620 }
22621 fmsub = constify_imm4_round!(rounding, call);
22622 }
22623 let r = simd_insert(a, 0, fmsub);
22624 transmute(r)
22625 }
22626
22627 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22628 ///
22629 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22630 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22631 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22632 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22633 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22634 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22635 ///
22636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_sd&expand=2658)
22637 #[inline]
22638 #[target_feature(enable = "avx512f")]
22639 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22640 #[rustc_args_required_const(4)]
22641 pub unsafe fn _mm_maskz_fmsub_round_sd(
22642 k: __mmask8,
22643 a: __m128d,
22644 b: __m128d,
22645 c: __m128d,
22646 rounding: i32,
22647 ) -> __m128d {
22648 let mut fmsub: f64 = 0.;
22649 if (k & 0b00000001) != 0 {
22650 let extracta: f64 = simd_extract(a, 0);
22651 let extractb: f64 = simd_extract(b, 0);
22652 let extractc: f64 = simd_extract(c, 0);
22653 let extractc = -extractc;
22654 macro_rules! call {
22655 ($imm4:expr) => {
22656 vfmadd132sd(extracta, extractb, extractc, $imm4)
22657 };
22658 }
22659 fmsub = constify_imm4_round!(rounding, call);
22660 }
22661 let r = simd_insert(a, 0, fmsub);
22662 transmute(r)
22663 }
22664
22665 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
22666 ///
22667 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22668 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22669 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22670 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22671 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22672 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22673 ///
22674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_sd&expand=2657)
22675 #[inline]
22676 #[target_feature(enable = "avx512f")]
22677 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22678 #[rustc_args_required_const(4)]
22679 pub unsafe fn _mm_mask3_fmsub_round_sd(
22680 a: __m128d,
22681 b: __m128d,
22682 c: __m128d,
22683 k: __mmask8,
22684 rounding: i32,
22685 ) -> __m128d {
22686 let mut fmsub: f64 = simd_extract(c, 0);
22687 if (k & 0b00000001) != 0 {
22688 let extracta: f64 = simd_extract(a, 0);
22689 let extractb: f64 = simd_extract(b, 0);
22690 let extractc = -fmsub;
22691 macro_rules! call {
22692 ($imm4:expr) => {
22693 vfmadd132sd(extracta, extractb, extractc, $imm4)
22694 };
22695 }
22696 fmsub = constify_imm4_round!(rounding, call);
22697 }
22698 let r = simd_insert(c, 0, fmsub);
22699 transmute(r)
22700 }
22701
22702 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22703 ///
22704 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22705 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22706 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22707 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22708 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22709 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22710 ///
22711 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_ss&expand=2739)
22712 #[inline]
22713 #[target_feature(enable = "avx512f")]
22714 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22715 #[rustc_args_required_const(3)]
22716 pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
22717 let extracta: f32 = simd_extract(a, 0);
22718 let extracta = -extracta;
22719 let extractb: f32 = simd_extract(b, 0);
22720 let extractc: f32 = simd_extract(c, 0);
22721 macro_rules! call {
22722 ($imm4:expr) => {
22723 vfmadd132ss(extracta, extractb, extractc, $imm4)
22724 };
22725 }
22726 let fnmadd = constify_imm4_round!(rounding, call);
22727 let r = simd_insert(a, 0, fnmadd);
22728 transmute(r)
22729 }
22730
22731 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22732 ///
22733 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22734 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22735 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22736 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22737 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22738 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22739 ///
22740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_ss&expand=2740)
22741 #[inline]
22742 #[target_feature(enable = "avx512f")]
22743 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22744 #[rustc_args_required_const(4)]
22745 pub unsafe fn _mm_mask_fnmadd_round_ss(
22746 a: __m128,
22747 k: __mmask8,
22748 b: __m128,
22749 c: __m128,
22750 rounding: i32,
22751 ) -> __m128 {
22752 let mut fnmadd: f32 = simd_extract(a, 0);
22753 if (k & 0b00000001) != 0 {
22754 let extracta = -fnmadd;
22755 let extractb: f32 = simd_extract(b, 0);
22756 let extractc: f32 = simd_extract(c, 0);
22757 macro_rules! call {
22758 ($imm4:expr) => {
22759 vfmadd132ss(extracta, extractb, extractc, $imm4)
22760 };
22761 }
22762 fnmadd = constify_imm4_round!(rounding, call);
22763 }
22764 let r = simd_insert(a, 0, fnmadd);
22765 transmute(r)
22766 }
22767
22768 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22769 ///
22770 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22771 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22772 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22773 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22774 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22775 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22776 ///
22777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_ss&expand=2742)
22778 #[inline]
22779 #[target_feature(enable = "avx512f")]
22780 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22781 #[rustc_args_required_const(4)]
22782 pub unsafe fn _mm_maskz_fnmadd_round_ss(
22783 k: __mmask8,
22784 a: __m128,
22785 b: __m128,
22786 c: __m128,
22787 rounding: i32,
22788 ) -> __m128 {
22789 let mut fnmadd: f32 = 0.;
22790 if (k & 0b00000001) != 0 {
22791 let extracta: f32 = simd_extract(a, 0);
22792 let extracta = -extracta;
22793 let extractb: f32 = simd_extract(b, 0);
22794 let extractc: f32 = simd_extract(c, 0);
22795 macro_rules! call {
22796 ($imm4:expr) => {
22797 vfmadd132ss(extracta, extractb, extractc, $imm4)
22798 };
22799 }
22800 fnmadd = constify_imm4_round!(rounding, call);
22801 }
22802 let r = simd_insert(a, 0, fnmadd);
22803 transmute(r)
22804 }
22805
22806 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
22807 ///
22808 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22809 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22810 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22811 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22812 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22813 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22814 ///
22815 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_ss&expand=2741)
22816 #[inline]
22817 #[target_feature(enable = "avx512f")]
22818 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22819 #[rustc_args_required_const(4)]
22820 pub unsafe fn _mm_mask3_fnmadd_round_ss(
22821 a: __m128,
22822 b: __m128,
22823 c: __m128,
22824 k: __mmask8,
22825 rounding: i32,
22826 ) -> __m128 {
22827 let mut fnmadd: f32 = simd_extract(c, 0);
22828 if (k & 0b00000001) != 0 {
22829 let extracta: f32 = simd_extract(a, 0);
22830 let extracta = -extracta;
22831 let extractb: f32 = simd_extract(b, 0);
22832 macro_rules! call {
22833 ($imm4:expr) => {
22834 vfmadd132ss(extracta, extractb, fnmadd, $imm4)
22835 };
22836 }
22837 fnmadd = constify_imm4_round!(rounding, call);
22838 }
22839 let r = simd_insert(c, 0, fnmadd);
22840 transmute(r)
22841 }
22842
22843 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22844 ///
22845 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22846 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22847 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22848 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22849 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22850 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22851 ///
22852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_sd&expand=2735)
22853 #[inline]
22854 #[target_feature(enable = "avx512f")]
22855 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22856 #[rustc_args_required_const(3)]
22857 pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
22858 let extracta: f64 = simd_extract(a, 0);
22859 let extracta = -extracta;
22860 let extractb: f64 = simd_extract(b, 0);
22861 let extractc: f64 = simd_extract(c, 0);
22862 macro_rules! call {
22863 ($imm4:expr) => {
22864 vfmadd132sd(extracta, extractb, extractc, $imm4)
22865 };
22866 }
22867 let fnmadd = constify_imm4_round!(rounding, call);
22868 let r = simd_insert(a, 0, fnmadd);
22869 transmute(r)
22870 }
22871
22872 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22873 ///
22874 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22875 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22876 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22877 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22878 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22879 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22880 ///
22881 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_sd&expand=2736)
22882 #[inline]
22883 #[target_feature(enable = "avx512f")]
22884 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22885 #[rustc_args_required_const(4)]
22886 pub unsafe fn _mm_mask_fnmadd_round_sd(
22887 a: __m128d,
22888 k: __mmask8,
22889 b: __m128d,
22890 c: __m128d,
22891 rounding: i32,
22892 ) -> __m128d {
22893 let mut fnmadd: f64 = simd_extract(a, 0);
22894 if (k & 0b00000001) != 0 {
22895 let extracta = -fnmadd;
22896 let extractb: f64 = simd_extract(b, 0);
22897 let extractc: f64 = simd_extract(c, 0);
22898 macro_rules! call {
22899 ($imm4:expr) => {
22900 vfmadd132sd(extracta, extractb, extractc, $imm4)
22901 };
22902 }
22903 fnmadd = constify_imm4_round!(rounding, call);
22904 }
22905 let r = simd_insert(a, 0, fnmadd);
22906 transmute(r)
22907 }
22908
22909 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22910 ///
22911 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22912 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22913 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22914 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22915 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22916 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22917 ///
22918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_sd&expand=2738)
22919 #[inline]
22920 #[target_feature(enable = "avx512f")]
22921 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22922 #[rustc_args_required_const(4)]
22923 pub unsafe fn _mm_maskz_fnmadd_round_sd(
22924 k: __mmask8,
22925 a: __m128d,
22926 b: __m128d,
22927 c: __m128d,
22928 rounding: i32,
22929 ) -> __m128d {
22930 let mut fnmadd: f64 = 0.;
22931 if (k & 0b00000001) != 0 {
22932 let extracta: f64 = simd_extract(a, 0);
22933 let extracta = -extracta;
22934 let extractb: f64 = simd_extract(b, 0);
22935 let extractc: f64 = simd_extract(c, 0);
22936 macro_rules! call {
22937 ($imm4:expr) => {
22938 vfmadd132sd(extracta, extractb, extractc, $imm4)
22939 };
22940 }
22941 fnmadd = constify_imm4_round!(rounding, call);
22942 }
22943 let r = simd_insert(a, 0, fnmadd);
22944 transmute(r)
22945 }
22946
22947 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
22948 ///
22949 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22950 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22951 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22952 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22953 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22954 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22955 ///
22956 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_Sd&expand=2737)
22957 #[inline]
22958 #[target_feature(enable = "avx512f")]
22959 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22960 #[rustc_args_required_const(4)]
22961 pub unsafe fn _mm_mask3_fnmadd_round_sd(
22962 a: __m128d,
22963 b: __m128d,
22964 c: __m128d,
22965 k: __mmask8,
22966 rounding: i32,
22967 ) -> __m128d {
22968 let mut fnmadd: f64 = simd_extract(c, 0);
22969 if (k & 0b00000001) != 0 {
22970 let extracta: f64 = simd_extract(a, 0);
22971 let extracta = -extracta;
22972 let extractb: f64 = simd_extract(b, 0);
22973 macro_rules! call {
22974 ($imm4:expr) => {
22975 vfmadd132sd(extracta, extractb, fnmadd, $imm4)
22976 };
22977 }
22978 fnmadd = constify_imm4_round!(rounding, call);
22979 }
22980 let r = simd_insert(c, 0, fnmadd);
22981 transmute(r)
22982 }
22983
22984 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22985 ///
22986 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22987 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22988 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22989 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22990 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22991 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22992 ///
22993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_ss&expand=2787)
22994 #[inline]
22995 #[target_feature(enable = "avx512f")]
22996 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
22997 #[rustc_args_required_const(3)]
22998 pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
22999 let extracta: f32 = simd_extract(a, 0);
23000 let extracta = -extracta;
23001 let extractb: f32 = simd_extract(b, 0);
23002 let extractc: f32 = simd_extract(c, 0);
23003 let extractc = -extractc;
23004 macro_rules! call {
23005 ($imm4:expr) => {
23006 vfmadd132ss(extracta, extractb, extractc, $imm4)
23007 };
23008 }
23009 let fnmsub = constify_imm4_round!(rounding, call);
23010 let r = simd_insert(a, 0, fnmsub);
23011 transmute(r)
23012 }
23013
23014 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
23015 ///
23016 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23017 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23018 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23019 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23020 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23021 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23022 ///
23023 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_ss&expand=2788)
23024 #[inline]
23025 #[target_feature(enable = "avx512f")]
23026 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
23027 #[rustc_args_required_const(4)]
23028 pub unsafe fn _mm_mask_fnmsub_round_ss(
23029 a: __m128,
23030 k: __mmask8,
23031 b: __m128,
23032 c: __m128,
23033 rounding: i32,
23034 ) -> __m128 {
23035 let mut fnmsub: f32 = simd_extract(a, 0);
23036 if (k & 0b00000001) != 0 {
23037 let extracta = -fnmsub;
23038 let extractb: f32 = simd_extract(b, 0);
23039 let extractc: f32 = simd_extract(c, 0);
23040 let extractc = -extractc;
23041 macro_rules! call {
23042 ($imm4:expr) => {
23043 vfmadd132ss(extracta, extractb, extractc, $imm4)
23044 };
23045 }
23046 fnmsub = constify_imm4_round!(rounding, call);
23047 }
23048 let r = simd_insert(a, 0, fnmsub);
23049 transmute(r)
23050 }
23051
23052 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
23053 ///
23054 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23055 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23056 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23057 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23058 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23059 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23060 ///
23061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_ss&expand=2790)
23062 #[inline]
23063 #[target_feature(enable = "avx512f")]
23064 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
23065 #[rustc_args_required_const(4)]
23066 pub unsafe fn _mm_maskz_fnmsub_round_ss(
23067 k: __mmask8,
23068 a: __m128,
23069 b: __m128,
23070 c: __m128,
23071 rounding: i32,
23072 ) -> __m128 {
23073 let mut fnmsub: f32 = 0.;
23074 if (k & 0b00000001) != 0 {
23075 let extracta: f32 = simd_extract(a, 0);
23076 let extracta = -extracta;
23077 let extractb: f32 = simd_extract(b, 0);
23078 let extractc: f32 = simd_extract(c, 0);
23079 let extractc = -extractc;
23080 macro_rules! call {
23081 ($imm4:expr) => {
23082 vfmadd132ss(extracta, extractb, extractc, $imm4)
23083 };
23084 }
23085 fnmsub = constify_imm4_round!(rounding, call);
23086 }
23087 let r = simd_insert(a, 0, fnmsub);
23088 transmute(r)
23089 }
23090
23091 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
23092 ///
23093 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23094 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23095 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23096 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23097 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23098 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23099 ///
23100 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_ss&expand=2789)
23101 #[inline]
23102 #[target_feature(enable = "avx512f")]
23103 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
23104 #[rustc_args_required_const(4)]
23105 pub unsafe fn _mm_mask3_fnmsub_round_ss(
23106 a: __m128,
23107 b: __m128,
23108 c: __m128,
23109 k: __mmask8,
23110 rounding: i32,
23111 ) -> __m128 {
23112 let mut fnmsub: f32 = simd_extract(c, 0);
23113 if (k & 0b00000001) != 0 {
23114 let extracta: f32 = simd_extract(a, 0);
23115 let extracta = -extracta;
23116 let extractb: f32 = simd_extract(b, 0);
23117 let extractc = -fnmsub;
23118 macro_rules! call {
23119 ($imm4:expr) => {
23120 vfmadd132ss(extracta, extractb, extractc, $imm4)
23121 };
23122 }
23123 fnmsub = constify_imm4_round!(rounding, call);
23124 }
23125 let r = simd_insert(c, 0, fnmsub);
23126 transmute(r)
23127 }
23128
23129 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
23130 ///
23131 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23132 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23133 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23134 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23135 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23136 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23137 ///
23138 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_sd&expand=2783)
23139 #[inline]
23140 #[target_feature(enable = "avx512f")]
23141 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23142 #[rustc_args_required_const(3)]
23143 pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
23144 let extracta: f64 = simd_extract(a, 0);
23145 let extracta = -extracta;
23146 let extractb: f64 = simd_extract(b, 0);
23147 let extractc: f64 = simd_extract(c, 0);
23148 let extractc = -extractc;
23149 macro_rules! call {
23150 ($imm4:expr) => {
23151 vfmadd132sd(extracta, extractb, extractc, $imm4)
23152 };
23153 }
23154 let fnmsub = constify_imm4_round!(rounding, call);
23155 let r = simd_insert(a, 0, fnmsub);
23156 transmute(r)
23157 }
23158
23159 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23160 ///
23161 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23162 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23163 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23164 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23165 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23166 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23167 ///
23168 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_sd&expand=2784)
23169 #[inline]
23170 #[target_feature(enable = "avx512f")]
23171 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23172 #[rustc_args_required_const(4)]
23173 pub unsafe fn _mm_mask_fnmsub_round_sd(
23174 a: __m128d,
23175 k: __mmask8,
23176 b: __m128d,
23177 c: __m128d,
23178 rounding: i32,
23179 ) -> __m128d {
23180 let mut fnmsub: f64 = simd_extract(a, 0);
23181 if (k & 0b00000001) != 0 {
23182 let extracta = -fnmsub;
23183 let extractb: f64 = simd_extract(b, 0);
23184 let extractc: f64 = simd_extract(c, 0);
23185 let extractc = -extractc;
23186 macro_rules! call {
23187 ($imm4:expr) => {
23188 vfmadd132sd(extracta, extractb, extractc, $imm4)
23189 };
23190 }
23191 fnmsub = constify_imm4_round!(rounding, call);
23192 }
23193 let r = simd_insert(a, 0, fnmsub);
23194 transmute(r)
23195 }
23196
23197 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23198 ///
23199 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23200 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23201 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23202 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23203 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23204 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23205 ///
23206 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_sd&expand=2786)
23207 #[inline]
23208 #[target_feature(enable = "avx512f")]
23209 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23210 #[rustc_args_required_const(4)]
23211 pub unsafe fn _mm_maskz_fnmsub_round_sd(
23212 k: __mmask8,
23213 a: __m128d,
23214 b: __m128d,
23215 c: __m128d,
23216 rounding: i32,
23217 ) -> __m128d {
23218 let mut fnmsub: f64 = 0.;
23219 if (k & 0b00000001) != 0 {
23220 let extracta: f64 = simd_extract(a, 0);
23221 let extracta = -extracta;
23222 let extractb: f64 = simd_extract(b, 0);
23223 let extractc: f64 = simd_extract(c, 0);
23224 let extractc = -extractc;
23225 macro_rules! call {
23226 ($imm4:expr) => {
23227 vfmadd132sd(extracta, extractb, extractc, $imm4)
23228 };
23229 }
23230 fnmsub = constify_imm4_round!(rounding, call);
23231 }
23232 let r = simd_insert(a, 0, fnmsub);
23233 transmute(r)
23234 }
23235
23236 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
23237 ///
23238 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23239 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23240 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23241 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23242 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23243 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23244 ///
23245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_sd&expand=2785)
23246 #[inline]
23247 #[target_feature(enable = "avx512f")]
23248 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23249 #[rustc_args_required_const(4)]
23250 pub unsafe fn _mm_mask3_fnmsub_round_sd(
23251 a: __m128d,
23252 b: __m128d,
23253 c: __m128d,
23254 k: __mmask8,
23255 rounding: i32,
23256 ) -> __m128d {
23257 let mut fnmsub: f64 = simd_extract(c, 0);
23258 if (k & 0b00000001) != 0 {
23259 let extracta: f64 = simd_extract(a, 0);
23260 let extracta = -extracta;
23261 let extractb: f64 = simd_extract(b, 0);
23262 let extractc = -fnmsub;
23263 macro_rules! call {
23264 ($imm4:expr) => {
23265 vfmadd132sd(extracta, extractb, extractc, $imm4)
23266 };
23267 }
23268 fnmsub = constify_imm4_round!(rounding, call);
23269 }
23270 let r = simd_insert(c, 0, fnmsub);
23271 transmute(r)
23272 }
23273
23274 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
23275 ///
23276 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_ss&expand=2517)
23277 #[inline]
23278 #[target_feature(enable = "avx512f")]
23279 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
23280 #[rustc_args_required_const(3)]
23281 pub unsafe fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i, imm8: i32) -> __m128 {
23282 let a = a.as_f32x4();
23283 let b = b.as_f32x4();
23284 let c = c.as_i32x4();
23285 macro_rules! call {
23286 ($imm8:expr) => {
23287 vfixupimmss(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
23288 };
23289 }
23290 let fixupimm = constify_imm8_sae!(imm8, call);
23291 let fixupimm: f32 = simd_extract(fixupimm, 0);
23292 let r = simd_insert(a, 0, fixupimm);
23293 transmute(r)
23294 }
23295
23296 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
23297 ///
23298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_ss&expand=2518)
23299 #[inline]
23300 #[target_feature(enable = "avx512f")]
23301 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
23302 #[rustc_args_required_const(4)]
23303 pub unsafe fn _mm_mask_fixupimm_ss(
23304 a: __m128,
23305 k: __mmask8,
23306 b: __m128,
23307 c: __m128i,
23308 imm8: i32,
23309 ) -> __m128 {
23310 let a = a.as_f32x4();
23311 let b = b.as_f32x4();
23312 let c = c.as_i32x4();
23313 macro_rules! call {
23314 ($imm8:expr) => {
23315 vfixupimmss(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
23316 };
23317 }
23318 let fixupimm = constify_imm8_sae!(imm8, call);
23319 let fixupimm: f32 = simd_extract(fixupimm, 0);
23320 let r = simd_insert(a, 0, fixupimm);
23321 transmute(r)
23322 }
23323
23324 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
23325 ///
23326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_ss&expand=2519)
23327 #[inline]
23328 #[target_feature(enable = "avx512f")]
23329 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
23330 #[rustc_args_required_const(4)]
23331 pub unsafe fn _mm_maskz_fixupimm_ss(
23332 k: __mmask8,
23333 a: __m128,
23334 b: __m128,
23335 c: __m128i,
23336 imm8: i32,
23337 ) -> __m128 {
23338 let a = a.as_f32x4();
23339 let b = b.as_f32x4();
23340 let c = c.as_i32x4();
23341 macro_rules! call {
23342 ($imm8:expr) => {
23343 vfixupimmssz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
23344 };
23345 }
23346 let fixupimm = constify_imm8_sae!(imm8, call);
23347 let fixupimm: f32 = simd_extract(fixupimm, 0);
23348 let r = simd_insert(a, 0, fixupimm);
23349 transmute(r)
23350 }
23351
23352 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
23353 ///
23354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_sd&expand=2514)
23355 #[inline]
23356 #[target_feature(enable = "avx512f")]
23357 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
23358 #[rustc_args_required_const(3)]
23359 pub unsafe fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128i, imm8: i32) -> __m128d {
23360 let a = a.as_f64x2();
23361 let b = b.as_f64x2();
23362 let c = c.as_i64x2();
23363 macro_rules! call {
23364 ($imm8:expr) => {
23365 vfixupimmsd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
23366 };
23367 }
23368 let fixupimm = constify_imm8_sae!(imm8, call);
23369 let fixupimm: f64 = simd_extract(fixupimm, 0);
23370 let r = simd_insert(a, 0, fixupimm);
23371 transmute(r)
23372 }
23373
23374 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
23375 ///
23376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_sd&expand=2515)
23377 #[inline]
23378 #[target_feature(enable = "avx512f")]
23379 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
23380 #[rustc_args_required_const(4)]
23381 pub unsafe fn _mm_mask_fixupimm_sd(
23382 a: __m128d,
23383 k: __mmask8,
23384 b: __m128d,
23385 c: __m128i,
23386 imm8: i32,
23387 ) -> __m128d {
23388 let a = a.as_f64x2();
23389 let b = b.as_f64x2();
23390 let c = c.as_i64x2();
23391 macro_rules! call {
23392 ($imm8:expr) => {
23393 vfixupimmsd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
23394 };
23395 }
23396 let fixupimm = constify_imm8_sae!(imm8, call);
23397 let fixupimm: f64 = simd_extract(fixupimm, 0);
23398 let r = simd_insert(a, 0, fixupimm);
23399 transmute(r)
23400 }
23401
23402 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
23403 ///
23404 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_sd&expand=2516)
23405 #[inline]
23406 #[target_feature(enable = "avx512f")]
23407 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
23408 #[rustc_args_required_const(4)]
23409 pub unsafe fn _mm_maskz_fixupimm_sd(
23410 k: __mmask8,
23411 a: __m128d,
23412 b: __m128d,
23413 c: __m128i,
23414 imm8: i32,
23415 ) -> __m128d {
23416 let a = a.as_f64x2();
23417 let b = b.as_f64x2();
23418 let c = c.as_i64x2();
23419 macro_rules! call {
23420 ($imm8:expr) => {
23421 vfixupimmsdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
23422 };
23423 }
23424 let fixupimm = constify_imm8_sae!(imm8, call);
23425 let fixupimm: f64 = simd_extract(fixupimm, 0);
23426 let r = simd_insert(a, 0, fixupimm);
23427 transmute(r)
23428 }
23429
23430 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
23431 ///
23432 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23433 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_ss&expand=2511)
23434 #[inline]
23435 #[target_feature(enable = "avx512f")]
23436 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
23437 #[rustc_args_required_const(3, 4)]
23438 pub unsafe fn _mm_fixupimm_round_ss(
23439 a: __m128,
23440 b: __m128,
23441 c: __m128i,
23442 imm8: i32,
23443 sae: i32,
23444 ) -> __m128 {
23445 let a = a.as_f32x4();
23446 let b = b.as_f32x4();
23447 let c = c.as_i32x4();
23448 macro_rules! call {
23449 ($imm8:expr, $imm4:expr) => {
23450 vfixupimmss(a, b, c, $imm8, 0b11111111, $imm4)
23451 };
23452 }
23453 let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
23454 let fixupimm: f32 = simd_extract(fixupimm, 0);
23455 let r = simd_insert(a, 0, fixupimm);
23456 transmute(r)
23457 }
23458
23459 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
23460 ///
23461 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_ss&expand=2512)
23463 #[inline]
23464 #[target_feature(enable = "avx512f")]
23465 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
23466 #[rustc_args_required_const(4, 5)]
23467 pub unsafe fn _mm_mask_fixupimm_round_ss(
23468 a: __m128,
23469 k: __mmask8,
23470 b: __m128,
23471 c: __m128i,
23472 imm8: i32,
23473 sae: i32,
23474 ) -> __m128 {
23475 let a = a.as_f32x4();
23476 let b = b.as_f32x4();
23477 let c = c.as_i32x4();
23478 macro_rules! call {
23479 ($imm8:expr, $imm4:expr) => {
23480 vfixupimmss(a, b, c, $imm8, k, $imm4)
23481 };
23482 }
23483 let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
23484 let fixupimm: f32 = simd_extract(fixupimm, 0);
23485 let r = simd_insert(a, 0, fixupimm);
23486 transmute(r)
23487 }
23488
23489 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
23490 ///
23491 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23492 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_ss&expand=2513)
23493 #[inline]
23494 #[target_feature(enable = "avx512f")]
23495 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
23496 #[rustc_args_required_const(4, 5)]
23497 pub unsafe fn _mm_maskz_fixupimm_round_ss(
23498 k: __mmask8,
23499 a: __m128,
23500 b: __m128,
23501 c: __m128i,
23502 imm8: i32,
23503 sae: i32,
23504 ) -> __m128 {
23505 let a = a.as_f32x4();
23506 let b = b.as_f32x4();
23507 let c = c.as_i32x4();
23508 macro_rules! call {
23509 ($imm8:expr, $imm4:expr) => {
23510 vfixupimmssz(a, b, c, $imm8, k, $imm4)
23511 };
23512 }
23513 let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
23514 let fixupimm: f32 = simd_extract(fixupimm, 0);
23515 let r = simd_insert(a, 0, fixupimm);
23516 transmute(r)
23517 }
23518
23519 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
23520 ///
23521 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23522 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_sd&expand=2508)
23523 #[inline]
23524 #[target_feature(enable = "avx512f")]
23525 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
23526 #[rustc_args_required_const(3, 4)]
23527 pub unsafe fn _mm_fixupimm_round_sd(
23528 a: __m128d,
23529 b: __m128d,
23530 c: __m128i,
23531 imm8: i32,
23532 sae: i32,
23533 ) -> __m128d {
23534 let a = a.as_f64x2();
23535 let b = b.as_f64x2();
23536 let c = c.as_i64x2();
23537 macro_rules! call {
23538 ($imm8:expr, $imm4:expr) => {
23539 vfixupimmsd(a, b, c, $imm8, 0b11111111, $imm4)
23540 };
23541 }
23542 let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
23543 let fixupimm: f64 = simd_extract(fixupimm, 0);
23544 let r = simd_insert(a, 0, fixupimm);
23545 transmute(r)
23546 }
23547
23548 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
23549 ///
23550 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23551 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_sd&expand=2509)
23552 #[inline]
23553 #[target_feature(enable = "avx512f")]
23554 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
23555 #[rustc_args_required_const(4, 5)]
23556 pub unsafe fn _mm_mask_fixupimm_round_sd(
23557 a: __m128d,
23558 k: __mmask8,
23559 b: __m128d,
23560 c: __m128i,
23561 imm8: i32,
23562 sae: i32,
23563 ) -> __m128d {
23564 let a = a.as_f64x2();
23565 let b = b.as_f64x2();
23566 let c = c.as_i64x2();
23567 macro_rules! call {
23568 ($imm8:expr, $imm4:expr) => {
23569 vfixupimmsd(a, b, c, $imm8, k, $imm4)
23570 };
23571 }
23572 let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
23573 let fixupimm: f64 = simd_extract(fixupimm, 0);
23574 let r = simd_insert(a, 0, fixupimm);
23575 transmute(r)
23576 }
23577
23578 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
23579 ///
23580 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_sd&expand=2510)
23582 #[inline]
23583 #[target_feature(enable = "avx512f")]
23584 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
23585 #[rustc_args_required_const(4, 5)]
23586 pub unsafe fn _mm_maskz_fixupimm_round_sd(
23587 k: __mmask8,
23588 a: __m128d,
23589 b: __m128d,
23590 c: __m128i,
23591 imm8: i32,
23592 sae: i32,
23593 ) -> __m128d {
23594 let a = a.as_f64x2();
23595 let b = b.as_f64x2();
23596 let c = c.as_i64x2();
23597 macro_rules! call {
23598 ($imm8:expr, $imm4:expr) => {
23599 vfixupimmsdz(a, b, c, $imm8, k, $imm4)
23600 };
23601 }
23602 let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
23603 let fixupimm: f64 = simd_extract(fixupimm, 0);
23604 let r = simd_insert(a, 0, fixupimm);
23605 transmute(r)
23606 }
23607
23608 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
23609 ///
23610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvtss_sd&expand=1896)
23611 #[inline]
23612 #[target_feature(enable = "avx512f")]
23613 #[cfg_attr(test, assert_instr(vcvtss2sd))]
23614 pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
23615 transmute(vcvtss2sd(
23616 a.as_f64x2(),
23617 b.as_f32x4(),
23618 src.as_f64x2(),
23619 k,
23620 _MM_FROUND_CUR_DIRECTION,
23621 ))
23622 }
23623
23624 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
23625 ///
23626 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvtss_sd&expand=1897)
23627 #[inline]
23628 #[target_feature(enable = "avx512f")]
23629 #[cfg_attr(test, assert_instr(vcvtss2sd))]
23630 pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
23631 transmute(vcvtss2sd(
23632 a.as_f64x2(),
23633 b.as_f32x4(),
23634 _mm_setzero_pd().as_f64x2(),
23635 k,
23636 _MM_FROUND_CUR_DIRECTION,
23637 ))
23638 }
23639
23640 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
23641 ///
23642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvtsd_ss&expand=1797)
23643 #[inline]
23644 #[target_feature(enable = "avx512f")]
23645 #[cfg_attr(test, assert_instr(vcvtsd2ss))]
23646 pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
23647 transmute(vcvtsd2ss(
23648 a.as_f32x4(),
23649 b.as_f64x2(),
23650 src.as_f32x4(),
23651 k,
23652 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
23653 ))
23654 }
23655
23656 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
23657 ///
23658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvtsd_ss&expand=1798)
23659 #[inline]
23660 #[target_feature(enable = "avx512f")]
23661 #[cfg_attr(test, assert_instr(vcvtsd2ss))]
23662 pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
23663 transmute(vcvtsd2ss(
23664 a.as_f32x4(),
23665 b.as_f64x2(),
23666 _mm_setzero_ps().as_f32x4(),
23667 k,
23668 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
23669 ))
23670 }
23671
23672 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
23673 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23674 ///
23675 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_sd&expand=1371)
23676 #[inline]
23677 #[target_feature(enable = "avx512f")]
23678 #[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
23679 #[rustc_args_required_const(2)]
23680 pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d {
23681 macro_rules! call {
23682 ($imm4:expr) => {
23683 vcvtss2sd(
23684 a.as_f64x2(),
23685 b.as_f32x4(),
23686 _mm_setzero_pd().as_f64x2(),
23687 0b11111111,
23688 $imm4,
23689 )
23690 };
23691 }
23692 let r = constify_imm4_sae!(sae, call);
23693 transmute(r)
23694 }
23695
23696 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23697 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23698 ///
23699 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundss_sd&expand=1372)
23700 #[inline]
23701 #[target_feature(enable = "avx512f")]
23702 #[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
23703 #[rustc_args_required_const(4)]
23704 pub unsafe fn _mm_mask_cvt_roundss_sd(
23705 src: __m128d,
23706 k: __mmask8,
23707 a: __m128d,
23708 b: __m128,
23709 sae: i32,
23710 ) -> __m128d {
23711 macro_rules! call {
23712 ($imm4:expr) => {
23713 vcvtss2sd(a.as_f64x2(), b.as_f32x4(), src.as_f64x2(), k, $imm4)
23714 };
23715 }
23716 let r = constify_imm4_sae!(sae, call);
23717 transmute(r)
23718 }
23719
23720 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23721 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23722 ///
23723 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundss_sd&expand=1373)
23724 #[inline]
23725 #[target_feature(enable = "avx512f")]
23726 #[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
23727 #[rustc_args_required_const(3)]
23728 pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: i32) -> __m128d {
23729 macro_rules! call {
23730 ($imm4:expr) => {
23731 vcvtss2sd(
23732 a.as_f64x2(),
23733 b.as_f32x4(),
23734 _mm_setzero_pd().as_f64x2(),
23735 k,
23736 $imm4,
23737 )
23738 };
23739 }
23740 let r = constify_imm4_sae!(sae, call);
23741 transmute(r)
23742 }
23743
23744 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
23745 ///
23746 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
23747 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
23748 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
23749 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
23750 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
23751 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23752 ///
23753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_ss&expand=1361)
23754 #[inline]
23755 #[target_feature(enable = "avx512f")]
23756 #[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
23757 #[rustc_args_required_const(2)]
23758 pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 {
23759 macro_rules! call {
23760 ($imm4:expr) => {
23761 vcvtsd2ss(
23762 a.as_f32x4(),
23763 b.as_f64x2(),
23764 _mm_setzero_ps().as_f32x4(),
23765 0b11111111,
23766 $imm4,
23767 )
23768 };
23769 }
23770 let r = constify_imm4_round!(rounding, call);
23771 transmute(r)
23772 }
23773
23774 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
23775 ///
23776 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
23777 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
23778 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
23779 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
23780 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
23781 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23782 ///
23783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundsd_ss&expand=1362)
23784 #[inline]
23785 #[target_feature(enable = "avx512f")]
23786 #[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
23787 #[rustc_args_required_const(4)]
23788 pub unsafe fn _mm_mask_cvt_roundsd_ss(
23789 src: __m128,
23790 k: __mmask8,
23791 a: __m128,
23792 b: __m128d,
23793 rounding: i32,
23794 ) -> __m128 {
23795 macro_rules! call {
23796 ($imm4:expr) => {
23797 vcvtsd2ss(a.as_f32x4(), b.as_f64x2(), src.as_f32x4(), k, $imm4)
23798 };
23799 }
23800 let r = constify_imm4_round!(rounding, call);
23801 transmute(r)
23802 }
23803
23804 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
23805 ///
23806 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23807 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23808 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23809 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23810 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23811 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23812 ///
23813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundsd_ss&expand=1363)
23814 #[inline]
23815 #[target_feature(enable = "avx512f")]
23816 #[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
23817 #[rustc_args_required_const(3)]
23818 pub unsafe fn _mm_maskz_cvt_roundsd_ss(
23819 k: __mmask8,
23820 a: __m128,
23821 b: __m128d,
23822 rounding: i32,
23823 ) -> __m128 {
23824 macro_rules! call {
23825 ($imm4:expr) => {
23826 vcvtsd2ss(
23827 a.as_f32x4(),
23828 b.as_f64x2(),
23829 _mm_setzero_ps().as_f32x4(),
23830 k,
23831 $imm4,
23832 )
23833 };
23834 }
23835 let r = constify_imm4_round!(rounding, call);
23836 transmute(r)
23837 }
23838
23839 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23840 ///
23841 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23842 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23843 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23844 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23845 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23846 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23847 ///
23848 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_si32&expand=1374)
23849 #[inline]
23850 #[target_feature(enable = "avx512f")]
23851 #[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
23852 #[rustc_args_required_const(1)]
23853 pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 {
23854 macro_rules! call {
23855 ($imm4:expr) => {
23856 vcvtss2si(a.as_f32x4(), $imm4)
23857 };
23858 }
23859 let r = constify_imm4_round!(rounding, call);
23860 transmute(r)
23861 }
23862
23863 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23864 ///
23865 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23866 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23867 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23868 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23869 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23870 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23871 ///
23872 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_i32&expand=1369)
23873 #[inline]
23874 #[target_feature(enable = "avx512f")]
23875 #[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
23876 #[rustc_args_required_const(1)]
23877 pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 {
23878 macro_rules! call {
23879 ($imm4:expr) => {
23880 vcvtss2si(a.as_f32x4(), $imm4)
23881 };
23882 }
23883 let r = constify_imm4_round!(rounding, call);
23884 transmute(r)
23885 }
23886
23887 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
23888 ///
23889 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23890 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23891 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23892 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23893 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23894 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23895 ///
23896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_u32&expand=1376)
23897 #[inline]
23898 #[target_feature(enable = "avx512f")]
23899 #[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))]
23900 #[rustc_args_required_const(1)]
23901 pub unsafe fn _mm_cvt_roundss_u32(a: __m128, rounding: i32) -> u32 {
23902 macro_rules! call {
23903 ($imm4:expr) => {
23904 vcvtss2usi(a.as_f32x4(), $imm4)
23905 };
23906 }
23907 let r = constify_imm4_round!(rounding, call);
23908 transmute(r)
23909 }
23910
23911 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
23912 ///
23913 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtss_i32&expand=1893)
23914 #[inline]
23915 #[target_feature(enable = "avx512f")]
23916 #[cfg_attr(test, assert_instr(vcvtss2si))]
23917 pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
23918 transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
23919 }
23920
23921 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
23922 ///
23923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtss_u32&expand=1901)
23924 #[inline]
23925 #[target_feature(enable = "avx512f")]
23926 #[cfg_attr(test, assert_instr(vcvtss2usi))]
23927 pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
23928 transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
23929 }
23930
23931 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23932 ///
23933 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23934 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23935 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23936 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23937 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23938 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23939 ///
23940 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_si32&expand=1359)
23941 #[inline]
23942 #[target_feature(enable = "avx512f")]
23943 #[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
23944 #[rustc_args_required_const(1)]
23945 pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 {
23946 macro_rules! call {
23947 ($imm4:expr) => {
23948 vcvtsd2si(a.as_f64x2(), $imm4)
23949 };
23950 }
23951 let r = constify_imm4_round!(rounding, call);
23952 transmute(r)
23953 }
23954
23955 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23956 ///
23957 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23958 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23959 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23960 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23961 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23962 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23963 ///
23964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_i32&expand=1357)
23965 #[inline]
23966 #[target_feature(enable = "avx512f")]
23967 #[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
23968 #[rustc_args_required_const(1)]
23969 pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 {
23970 macro_rules! call {
23971 ($imm4:expr) => {
23972 vcvtsd2si(a.as_f64x2(), $imm4)
23973 };
23974 }
23975 let r = constify_imm4_round!(rounding, call);
23976 transmute(r)
23977 }
23978
23979 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
23980 ///
23981 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23982 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23983 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23984 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23985 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23986 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23987 ///
23988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundsd_u32&expand=1364)
23989 #[inline]
23990 #[target_feature(enable = "avx512f")]
23991 #[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))]
23992 #[rustc_args_required_const(1)]
23993 pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d, rounding: i32) -> u32 {
23994 macro_rules! call {
23995 ($imm4:expr) => {
23996 vcvtsd2usi(a.as_f64x2(), $imm4)
23997 };
23998 }
23999 let r = constify_imm4_round!(rounding, call);
24000 transmute(r)
24001 }
24002
24003 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
24004 ///
24005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtsd_i32&expand=1791)
24006 #[inline]
24007 #[target_feature(enable = "avx512f")]
24008 #[cfg_attr(test, assert_instr(vcvtsd2si))]
24009 pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
24010 transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
24011 }
24012
24013 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
24014 ///
24015 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtsd_u32&expand=1799)
24016 #[inline]
24017 #[target_feature(enable = "avx512f")]
24018 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
24019 pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
24020 transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
24021 }
24022
24023 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
24024 ///
24025 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
24026 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
24027 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
24028 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
24029 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
24030 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
24031 ///
24032 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundi32_ss&expand=1312)
24033 #[inline]
24034 #[target_feature(enable = "avx512f")]
24035 #[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
24036 #[rustc_args_required_const(2)]
24037 pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
24038 macro_rules! call {
24039 ($imm4:expr) => {
24040 vcvtsi2ss(a.as_f32x4(), b, $imm4)
24041 };
24042 }
24043 let r = constify_imm4_round!(rounding, call);
24044 transmute(r)
24045 }
24046
24047 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
24048 ///
24049 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
24050 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
24051 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
24052 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
24053 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
24054 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
24055 ///
24056 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsi32_ss&expand=1366)
24057 #[inline]
24058 #[target_feature(enable = "avx512f")]
24059 #[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
24060 #[rustc_args_required_const(2)]
24061 pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
24062 macro_rules! call {
24063 ($imm4:expr) => {
24064 vcvtsi2ss(a.as_f32x4(), b, $imm4)
24065 };
24066 }
24067 let r = constify_imm4_round!(rounding, call);
24068 transmute(r)
24069 }
24070
24071 /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
24072 ///
24073 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
24074 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
24075 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
24076 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
24077 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
24078 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
24079 ///
24080 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundu32_ss&expand=1378)
24081 #[inline]
24082 #[target_feature(enable = "avx512f")]
24083 #[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))]
24084 #[rustc_args_required_const(2)]
24085 pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32, rounding: i32) -> __m128 {
24086 macro_rules! call {
24087 ($imm4:expr) => {
24088 vcvtusi2ss(a.as_f32x4(), b, $imm4)
24089 };
24090 }
24091 let r = constify_imm4_round!(rounding, call);
24092 transmute(r)
24093 }
24094
24095 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
24096 ///
24097 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643)
24098 #[inline]
24099 #[target_feature(enable = "avx512f")]
24100 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
24101 pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
24102 let b = b as f32;
24103 let r = simd_insert(a, 0, b);
24104 transmute(r)
24105 }
24106
24107 /// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
24108 ///
24109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_sd&expand=1642)
24110 #[inline]
24111 #[target_feature(enable = "avx512f")]
24112 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
24113 pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
24114 let b = b as f64;
24115 let r = simd_insert(a, 0, b);
24116 transmute(r)
24117 }
24118
24119 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24120 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24121 ///
24122 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_Si32&expand=1936)
24123 #[inline]
24124 #[target_feature(enable = "avx512f")]
24125 #[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
24126 #[rustc_args_required_const(1)]
24127 pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 {
24128 macro_rules! call {
24129 ($imm4:expr) => {
24130 vcvtss2si(a.as_f32x4(), $imm4)
24131 };
24132 }
24133 let r = constify_imm4_sae!(sae, call);
24134 transmute(r)
24135 }
24136
24137 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24138 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24139 ///
24140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_i32&expand=1934)
24141 #[inline]
24142 #[target_feature(enable = "avx512f")]
24143 #[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
24144 #[rustc_args_required_const(1)]
24145 pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 {
24146 macro_rules! call {
24147 ($imm4:expr) => {
24148 vcvtss2si(a.as_f32x4(), $imm4)
24149 };
24150 }
24151 let r = constify_imm4_sae!(sae, call);
24152 transmute(r)
24153 }
24154
24155 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
24156 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24157 ///
24158 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_u32&expand=1938)
24159 #[inline]
24160 #[target_feature(enable = "avx512f")]
24161 #[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))]
24162 #[rustc_args_required_const(1)]
24163 pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 {
24164 macro_rules! call {
24165 ($imm4:expr) => {
24166 vcvtss2usi(a.as_f32x4(), $imm4)
24167 };
24168 }
24169 let r = constify_imm4_sae!(sae, call);
24170 transmute(r)
24171 }
24172
24173 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
24174 ///
24175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_i32&expand=2022)
24176 #[inline]
24177 #[target_feature(enable = "avx512f")]
24178 #[cfg_attr(test, assert_instr(vcvtss2si))]
24179 pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
24180 transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
24181 }
24182
24183 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
24184 ///
24185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_u32&expand=2026)
24186 #[inline]
24187 #[target_feature(enable = "avx512f")]
24188 #[cfg_attr(test, assert_instr(vcvtss2usi))]
24189 pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
24190 transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
24191 }
24192
24193 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24194 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24195 ///
24196 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_si32&expand=1930)
24197 #[inline]
24198 #[target_feature(enable = "avx512f")]
24199 #[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
24200 #[rustc_args_required_const(1)]
24201 pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 {
24202 macro_rules! call {
24203 ($imm4:expr) => {
24204 vcvtsd2si(a.as_f64x2(), $imm4)
24205 };
24206 }
24207 let r = constify_imm4_sae!(sae, call);
24208 transmute(r)
24209 }
24210
24211 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24212 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24213 ///
24214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_i32&expand=1928)
24215 #[inline]
24216 #[target_feature(enable = "avx512f")]
24217 #[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
24218 #[rustc_args_required_const(1)]
24219 pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 {
24220 macro_rules! call {
24221 ($imm4:expr) => {
24222 vcvtsd2si(a.as_f64x2(), $imm4)
24223 };
24224 }
24225 let r = constify_imm4_sae!(sae, call);
24226 transmute(r)
24227 }
24228
24229 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
24230 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24231 ///
24232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_u32&expand=1932)
24233 #[inline]
24234 #[target_feature(enable = "avx512f")]
24235 #[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))]
24236 #[rustc_args_required_const(1)]
24237 pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 {
24238 macro_rules! call {
24239 ($imm4:expr) => {
24240 vcvtsd2usi(a.as_f64x2(), $imm4)
24241 };
24242 }
24243 let r = constify_imm4_sae!(sae, call);
24244 transmute(r)
24245 }
24246
24247 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
24248 ///
24249 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_i32&expand=2015)
24250 #[inline]
24251 #[target_feature(enable = "avx512f")]
24252 #[cfg_attr(test, assert_instr(vcvtsd2si))]
24253 pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
24254 transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
24255 }
24256
24257 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
24258 ///
24259 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_u32&expand=2020)
24260 #[inline]
24261 #[target_feature(enable = "avx512f")]
24262 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
24263 pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
24264 transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
24265 }
24266
24267 /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
24268 ///
24269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_ss&expand=2032)
24270 #[inline]
24271 #[target_feature(enable = "avx512f")]
24272 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
24273 pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
24274 let b = b as f32;
24275 let r = simd_insert(a, 0, b);
24276 transmute(r)
24277 }
24278
24279 /// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
24280 ///
24281 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_sd&expand=2031)
24282 #[inline]
24283 #[target_feature(enable = "avx512f")]
24284 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
24285 pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
24286 let b = b as f64;
24287 let r = simd_insert(a, 0, b);
24288 transmute(r)
24289 }
24290
24291 /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
24292 ///
24293 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_ss&expand=2035)
24294 #[inline]
24295 #[target_feature(enable = "avx512f")]
24296 #[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
24297 pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
24298 let b = b as f32;
24299 let r = simd_insert(a, 0, b);
24300 transmute(r)
24301 }
24302
24303 /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
24304 ///
24305 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_sd&expand=2034)
24306 #[inline]
24307 #[target_feature(enable = "avx512f")]
24308 #[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
24309 pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
24310 let b = b as f64;
24311 let r = simd_insert(a, 0, b);
24312 transmute(r)
24313 }
24314
24315 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
24316 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24317 ///
24318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_ss&expand=1175)
24319 #[inline]
24320 #[target_feature(enable = "avx512f")]
24321 #[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomiss
24322 #[rustc_args_required_const(2, 3)]
24323 pub unsafe fn _mm_comi_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> i32 {
24324 macro_rules! call {
24325 ($imm8:expr, $imm4:expr) => {
24326 vcomiss(a.as_f32x4(), b.as_f32x4(), $imm8, $imm4)
24327 };
24328 }
24329 let r = constify_imm5_sae!(imm8, sae, call);
24330 transmute(r)
24331 }
24332
24333 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
24334 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24335 ///
24336 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_sd&expand=1174)
24337 #[inline]
24338 #[target_feature(enable = "avx512f")]
24339 #[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomisd
24340 #[rustc_args_required_const(2, 3)]
24341 pub unsafe fn _mm_comi_round_sd(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> i32 {
24342 macro_rules! call {
24343 ($imm8:expr, $imm4:expr) => {
24344 vcomisd(a.as_f64x2(), b.as_f64x2(), $imm8, $imm4)
24345 };
24346 }
24347 let r = constify_imm5_sae!(imm8, sae, call);
24348 transmute(r)
24349 }
24350
24351 /// Equal
24352 pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
24353 /// Less-than
24354 pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
24355 /// Less-than-or-equal
24356 pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
24357 /// False
24358 pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
24359 /// Not-equal
24360 pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
24361 /// Not less-than
24362 pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
24363 /// Not less-than-or-equal
24364 pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
24365 /// True
24366 pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
24367
24368 /// interval [1, 2)
24369 pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
24370 /// interval [0.5, 2)
24371 pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
24372 /// interval [0.5, 1)
24373 pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
24374 /// interval [0.75, 1.5)
24375 pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
24376
24377 /// sign = sign(SRC)
24378 pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
24379 /// sign = 0
24380 pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
24381 /// DEST = NaN if sign(SRC) = 1
24382 pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
24383
24384 pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
24385 pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
24386 pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
24387 pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
24388 pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
24389 pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
24390 pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
24391 pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
24392 pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
24393 pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
24394 pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
24395 pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
24396 pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
24397 pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
24398 pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
24399 pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
24400 pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
24401 pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
24402 pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
24403 pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
24404 pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
24405 pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
24406 pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
24407 pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
24408 pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
24409 pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
24410 pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
24411 pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
24412 pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
24413 pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
24414 pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
24415 pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
24416 pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
24417 pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
24418 pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
24419 pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
24420 pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
24421 pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
24422 pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
24423 pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
24424 pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
24425 pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
24426 pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
24427 pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
24428 pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
24429 pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
24430 pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
24431 pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
24432 pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
24433 pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
24434 pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
24435 pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
24436 pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
24437 pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
24438 pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
24439 pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
24440 pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
24441 pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
24442 pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
24443 pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
24444 pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
24445 pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
24446 pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
24447 pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
24448 pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
24449 pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
24450 pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
24451 pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
24452 pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
24453 pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
24454 pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
24455 pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
24456 pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
24457 pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
24458 pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
24459 pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
24460 pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
24461 pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
24462 pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
24463 pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
24464 pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
24465 pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
24466 pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
24467 pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
24468 pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
24469 pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
24470 pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
24471 pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
24472 pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
24473 pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
24474 pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
24475 pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
24476 pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
24477 pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
24478 pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
24479 pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
24480 pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
24481 pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
24482 pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
24483 pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
24484 pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
24485 pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
24486 pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
24487 pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
24488 pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
24489 pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
24490 pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
24491 pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
24492 pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
24493 pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
24494 pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
24495 pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
24496 pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
24497 pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
24498 pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
24499 pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
24500 pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
24501 pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
24502 pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
24503 pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
24504 pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
24505 pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
24506 pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
24507 pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
24508 pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
24509 pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
24510 pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
24511 pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
24512 pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
24513 pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
24514 pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
24515 pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
24516 pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
24517 pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
24518 pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
24519 pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
24520 pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
24521 pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
24522 pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
24523 pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
24524 pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
24525 pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
24526 pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
24527 pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
24528 pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
24529 pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
24530 pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
24531 pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
24532 pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
24533 pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
24534 pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
24535 pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
24536 pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
24537 pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
24538 pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
24539 pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
24540 pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
24541 pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
24542 pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
24543 pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
24544 pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
24545 pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
24546 pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
24547 pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
24548 pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
24549 pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
24550 pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
24551 pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
24552 pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
24553 pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
24554 pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
24555 pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
24556 pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
24557 pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
24558 pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
24559 pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
24560 pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
24561 pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
24562 pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
24563 pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
24564 pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
24565 pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
24566 pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
24567 pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
24568 pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
24569 pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
24570 pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
24571 pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
24572 pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
24573 pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
24574 pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
24575 pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
24576 pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
24577 pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
24578 pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
24579 pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
24580 pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
24581 pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
24582 pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
24583 pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
24584 pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
24585 pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
24586 pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
24587 pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
24588 pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
24589 pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
24590 pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
24591 pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
24592 pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
24593 pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
24594 pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
24595 pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
24596 pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
24597 pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
24598 pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
24599 pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
24600 pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
24601 pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
24602 pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
24603 pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
24604 pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
24605 pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
24606 pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
24607 pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
24608 pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
24609 pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
24610 pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
24611 pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
24612 pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
24613 pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
24614 pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
24615 pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
24616 pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
24617 pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
24618 pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
24619 pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
24620 pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
24621 pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
24622 pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
24623 pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
24624 pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
24625 pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
24626 pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
24627 pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
24628 pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
24629 pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
24630 pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
24631 pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
24632 pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
24633 pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
24634 pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
24635 pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
24636 pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
24637 pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
24638 pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
24639 pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
24640
24641 #[allow(improper_ctypes)]
24642 extern "C" {
24643 #[link_name = "llvm.x86.avx512.pmul.dq.512"]
24644 fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
24645 #[link_name = "llvm.x86.avx512.pmulu.dq.512"]
24646 fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
24647
24648 #[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
24649 fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
24650 #[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
24651 fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
24652 #[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
24653 fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
24654 #[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
24655 fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
24656
24657 #[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
24658 fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
24659 #[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
24660 fn vpmaxuq(a: u64x8, b: u64x8) -> i64x8;
24661 #[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
24662 fn vpminud(a: u32x16, b: u32x16) -> u32x16;
24663 #[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
24664 fn vpminuq(a: u64x8, b: u64x8) -> i64x8;
24665
24666 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
24667 fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
24668 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
24669 fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
24670
24671 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
24672 fn vfmadd132ps(a: f32x16, b: f32x16, c: f32x16, rounding: i32) -> f32x16;
24673 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
24674 fn vfmadd132pd(a: f64x8, b: f64x8, c: f64x8, rounding: i32) -> f64x8;
24675
24676 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
24677 fn vfmaddsub213ps(a: f32x16, b: f32x16, c: f32x16, d: i32) -> f32x16; //from clang
24678 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
24679 fn vfmaddsub213pd(a: f64x8, b: f64x8, c: f64x8, d: i32) -> f64x8; //from clang
24680
24681 #[link_name = "llvm.x86.avx512.add.ps.512"]
24682 fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
24683 #[link_name = "llvm.x86.avx512.add.pd.512"]
24684 fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
24685 #[link_name = "llvm.x86.avx512.sub.ps.512"]
24686 fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
24687 #[link_name = "llvm.x86.avx512.sub.pd.512"]
24688 fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
24689 #[link_name = "llvm.x86.avx512.mul.ps.512"]
24690 fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
24691 #[link_name = "llvm.x86.avx512.mul.pd.512"]
24692 fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
24693 #[link_name = "llvm.x86.avx512.div.ps.512"]
24694 fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
24695 #[link_name = "llvm.x86.avx512.div.pd.512"]
24696 fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
24697
24698 #[link_name = "llvm.x86.avx512.max.ps.512"]
24699 fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
24700 #[link_name = "llvm.x86.avx512.max.pd.512"]
24701 fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
24702 #[link_name = "llvm.x86.avx512.min.ps.512"]
24703 fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
24704 #[link_name = "llvm.x86.avx512.min.pd.512"]
24705 fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
24706
24707 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
24708 fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
24709 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
24710 fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
24711
24712 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
24713 fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
24714 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
24715 fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
24716 #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
24717 fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
24718 #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
24719 fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
24720
24721 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
24722 fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
24723 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
24724 fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
24725 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
24726 fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
24727 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
24728 fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
24729
24730 #[link_name = "llvm.x86.avx512.pternlog.d.512"]
24731 fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, sae: i32) -> i32x16;
24732 #[link_name = "llvm.x86.avx512.pternlog.q.512"]
24733 fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, sae: i32) -> i64x8;
24734
24735 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
24736 fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
24737 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
24738 fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
24739
24740 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
24741 fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
24742 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
24743 fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
24744 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
24745 fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
24746 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
24747 fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
24748
24749 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
24750 fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
24751 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
24752 fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
24753 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
24754 fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
24755 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
24756 fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
24757 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
24758 fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
24759 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
24760 fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
24761 #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
24762 fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
24763 #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
24764 fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
24765
24766 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
24767 fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
24768 #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
24769 fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
24770
24771 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
24772 fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
24773 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
24774 fn vcvttps2udq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> u32x16;
24775 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
24776 fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
24777 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
24778 fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
24779
24780 #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
24781 fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
24782 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
24783 fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
24784 #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
24785 fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
24786 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
24787 fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
24788 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
24789 fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
24790 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
24791 fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
24792 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
24793 fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
24794 #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
24795 fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
24796 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
24797 fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
24798 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
24799 fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
24800 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
24801 fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
24802
24803 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
24804 fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
24805 #[link_name = "llvm.x86.avx512.gather.dps.512"]
24806 fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
24807 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
24808 fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
24809 #[link_name = "llvm.x86.avx512.gather.qps.512"]
24810 fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
24811 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
24812 fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
24813 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
24814 fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
24815 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
24816 fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
24817 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
24818 fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
24819
24820 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
24821 fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
24822 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
24823 fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
24824 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
24825 fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
24826 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
24827 fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
24828 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
24829 fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
24830 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
24831 fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
24832 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
24833 fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
24834 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
24835 fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
24836
24837 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
24838 fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
24839 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
24840 fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
24841 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
24842 fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
24843 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
24844 fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
24845 #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
24846 fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
24847 #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
24848 fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
24849 #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
24850 fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
24851 #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
24852 fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
24853
24854 #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
24855 fn vprold(a: i32x16, i8: i32) -> i32x16;
24856 #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
24857 fn vprord(a: i32x16, i8: i32) -> i32x16;
24858 #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
24859 fn vprolq(a: i64x8, i8: i32) -> i64x8;
24860 #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
24861 fn vprorq(a: i64x8, i8: i32) -> i64x8;
24862
24863 #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
24864 fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
24865 #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
24866 fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
24867 #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
24868 fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
24869 #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
24870 fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
24871
24872 #[link_name = "llvm.x86.avx512.psllv.d.512"]
24873 fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
24874 #[link_name = "llvm.x86.avx512.psrlv.d.512"]
24875 fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
24876 #[link_name = "llvm.x86.avx512.psllv.q.512"]
24877 fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
24878 #[link_name = "llvm.x86.avx512.psrlv.q.512"]
24879 fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
24880
24881 #[link_name = "llvm.x86.avx512.pslli.d.512"]
24882 fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
24883 #[link_name = "llvm.x86.avx512.psrli.d.512"]
24884 fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
24885 #[link_name = "llvm.x86.avx512.pslli.q.512"]
24886 fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
24887 #[link_name = "llvm.x86.avx512.psrli.q.512"]
24888 fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
24889
24890 #[link_name = "llvm.x86.avx512.psll.d.512"]
24891 fn vpslld(a: i32x16, count: i32x4) -> i32x16;
24892 #[link_name = "llvm.x86.avx512.psrl.d.512"]
24893 fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
24894 #[link_name = "llvm.x86.avx512.psll.q.512"]
24895 fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
24896 #[link_name = "llvm.x86.avx512.psrl.q.512"]
24897 fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
24898
24899 #[link_name = "llvm.x86.avx512.psra.d.512"]
24900 fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
24901 #[link_name = "llvm.x86.avx512.psra.q.512"]
24902 fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
24903
24904 #[link_name = "llvm.x86.avx512.psrai.d.512"]
24905 fn vpsraid(a: i32x16, imm8: u32) -> i32x16;
24906 #[link_name = "llvm.x86.avx512.psrai.q.512"]
24907 fn vpsraiq(a: i64x8, imm8: u32) -> i64x8;
24908
24909 #[link_name = "llvm.x86.avx512.psrav.d.512"]
24910 fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
24911 #[link_name = "llvm.x86.avx512.psrav.q.512"]
24912 fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
24913
24914 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
24915 fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
24916 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
24917 fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
24918
24919 #[link_name = "llvm.x86.avx512.permvar.si.512"]
24920 fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
24921 #[link_name = "llvm.x86.avx512.permvar.di.512"]
24922 fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
24923 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
24924 fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
24925 #[link_name = "llvm.x86.avx512.permvar.df.512"]
24926 fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
24927
24928 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
24929 fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
24930 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
24931 fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
24932 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
24933 fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
24934 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
24935 fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
24936
24937 #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
24938 fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
24939 #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
24940 fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
24941 #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
24942 fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
24943 #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
24944 fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
24945 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
24946 fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
24947 #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
24948 fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
24949 #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
24950 fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
24951 #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
24952 fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
24953
24954 #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
24955 fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
24956 #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
24957 fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
24958 #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
24959 fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
24960 #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
24961 fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
24962 #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
24963 fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
24964 #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
24965 fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
24966 #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
24967 fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
24968 #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
24969 fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
24970 #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
24971 fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
24972 #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
24973 fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
24974 #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
24975 fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
24976 #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
24977 fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
24978 #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
24979 fn vsqrtss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
24980 #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
24981 fn vsqrtsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
24982 #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
24983 fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
24984 #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
24985 fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
24986 #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
24987 fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
24988 #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
24989 fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
24990
24991 #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
24992 fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
24993 #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
24994 fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
24995 #[link_name = "llvm.x86.avx512.rcp14.ss"]
24996 fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
24997 #[link_name = "llvm.x86.avx512.rcp14.sd"]
24998 fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
24999
25000 #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
25001 fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
25002 #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
25003 fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
25004 #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
25005 fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
25006 #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
25007 fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
25008
25009 #[link_name = "llvm.x86.avx512.vfmadd.f32"]
25010 fn vfmadd132ss(a: f32, b: f32, c: f32, rounding: i32) -> f32;
25011 #[link_name = "llvm.x86.avx512.vfmadd.f64"]
25012 fn vfmadd132sd(a: f64, b: f64, c: f64, rounding: i32) -> f64;
25013
25014 #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
25015 fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
25016 #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
25017 fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
25018 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
25019 fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
25020 #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
25021 fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
25022
25023 #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
25024 fn vcvtss2sd(a: f64x2, a: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
25025 #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
25026 fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
25027
25028 #[link_name = "llvm.x86.avx512.vcvtss2si32"]
25029 fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
25030 #[link_name = "llvm.x86.avx512.vcvtss2si64"]
25031 fn vcvtss2si64(a: f32x4, rounding: i32) -> i64;
25032 #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
25033 fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
25034 #[link_name = "llvm.x86.avx512.vcvtss2usi64"]
25035 fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64;
25036 #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
25037 fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
25038 #[link_name = "llvm.x86.avx512.vcvtsd2si64"]
25039 fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64;
25040 #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
25041 fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
25042 #[link_name = "llvm.x86.avx512.vcvtsd2usi64"]
25043 fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64;
25044
25045 #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
25046 fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
25047 #[link_name = "llvm.x86.avx512.cvtsi2ss64"]
25048 fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4;
25049 #[link_name = "llvm.x86.avx512.cvtsi2sd64"]
25050 fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
25051 #[link_name = "llvm.x86.avx512.cvtusi2ss"]
25052 fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
25053 #[link_name = "llvm.x86.avx512.cvtusi642ss"]
25054 fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4;
25055 #[link_name = "llvm.x86.avx512.cvtusi642sd"]
25056 fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
25057
25058 #[link_name = "llvm.x86.avx512.vcomi.ss"]
25059 fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
25060 #[link_name = "llvm.x86.avx512.vcomi.sd"]
25061 fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
25062 }
25063
25064 #[cfg(test)]
25065 mod tests {
25066
25067 use stdarch_test::simd_test;
25068
25069 use crate::core_arch::x86::*;
25070 use crate::hint::black_box;
25071 use crate::mem::{self};
25072
25073 #[simd_test(enable = "avx512f")]
25074 unsafe fn test_mm512_abs_epi32() {
25075 #[rustfmt::skip]
25076 let a = _mm512_setr_epi32(
25077 0, 1, -1, i32::MAX,
25078 i32::MIN, 100, -100, -32,
25079 0, 1, -1, i32::MAX,
25080 i32::MIN, 100, -100, -32,
25081 );
25082 let r = _mm512_abs_epi32(a);
25083 let e = _mm512_setr_epi32(
25084 0,
25085 1,
25086 1,
25087 i32::MAX,
25088 i32::MAX.wrapping_add(1),
25089 100,
25090 100,
25091 32,
25092 0,
25093 1,
25094 1,
25095 i32::MAX,
25096 i32::MAX.wrapping_add(1),
25097 100,
25098 100,
25099 32,
25100 );
25101 assert_eq_m512i(r, e);
25102 }
25103
25104 #[simd_test(enable = "avx512f")]
25105 unsafe fn test_mm512_mask_abs_epi32() {
25106 #[rustfmt::skip]
25107 let a = _mm512_setr_epi32(
25108 0, 1, -1, i32::MAX,
25109 i32::MIN, 100, -100, -32,
25110 0, 1, -1, i32::MAX,
25111 i32::MIN, 100, -100, -32,
25112 );
25113 let r = _mm512_mask_abs_epi32(a, 0, a);
25114 assert_eq_m512i(r, a);
25115 let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
25116 let e = _mm512_setr_epi32(
25117 0,
25118 1,
25119 1,
25120 i32::MAX,
25121 i32::MAX.wrapping_add(1),
25122 100,
25123 100,
25124 32,
25125 0,
25126 1,
25127 -1,
25128 i32::MAX,
25129 i32::MIN,
25130 100,
25131 -100,
25132 -32,
25133 );
25134 assert_eq_m512i(r, e);
25135 }
25136
25137 #[simd_test(enable = "avx512f")]
25138 unsafe fn test_mm512_maskz_abs_epi32() {
25139 #[rustfmt::skip]
25140 let a = _mm512_setr_epi32(
25141 0, 1, -1, i32::MAX,
25142 i32::MIN, 100, -100, -32,
25143 0, 1, -1, i32::MAX,
25144 i32::MIN, 100, -100, -32,
25145 );
25146 let r = _mm512_maskz_abs_epi32(0, a);
25147 assert_eq_m512i(r, _mm512_setzero_si512());
25148 let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
25149 let e = _mm512_setr_epi32(
25150 0,
25151 1,
25152 1,
25153 i32::MAX,
25154 i32::MAX.wrapping_add(1),
25155 100,
25156 100,
25157 32,
25158 0,
25159 0,
25160 0,
25161 0,
25162 0,
25163 0,
25164 0,
25165 0,
25166 );
25167 assert_eq_m512i(r, e);
25168 }
25169
25170 #[simd_test(enable = "avx512f")]
25171 unsafe fn test_mm512_abs_ps() {
25172 #[rustfmt::skip]
25173 let a = _mm512_setr_ps(
25174 0., 1., -1., f32::MAX,
25175 f32::MIN, 100., -100., -32.,
25176 0., 1., -1., f32::MAX,
25177 f32::MIN, 100., -100., -32.,
25178 );
25179 let r = _mm512_abs_ps(a);
25180 let e = _mm512_setr_ps(
25181 0.,
25182 1.,
25183 1.,
25184 f32::MAX,
25185 f32::MAX,
25186 100.,
25187 100.,
25188 32.,
25189 0.,
25190 1.,
25191 1.,
25192 f32::MAX,
25193 f32::MAX,
25194 100.,
25195 100.,
25196 32.,
25197 );
25198 assert_eq_m512(r, e);
25199 }
25200
25201 #[simd_test(enable = "avx512f")]
25202 unsafe fn test_mm512_mask_abs_ps() {
25203 let a = _mm512_setr_ps(
25204 0.,
25205 1.,
25206 -1.,
25207 f32::MAX,
25208 f32::MIN,
25209 100.,
25210 -100.,
25211 -32.,
25212 0.,
25213 1.,
25214 -1.,
25215 f32::MAX,
25216 f32::MIN,
25217 100.,
25218 -100.,
25219 -32.,
25220 );
25221 let r = _mm512_mask_abs_ps(a, 0, a);
25222 assert_eq_m512(r, a);
25223 let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
25224 let e = _mm512_setr_ps(
25225 0.,
25226 1.,
25227 1.,
25228 f32::MAX,
25229 f32::MAX,
25230 100.,
25231 100.,
25232 32.,
25233 0.,
25234 1.,
25235 -1.,
25236 f32::MAX,
25237 f32::MIN,
25238 100.,
25239 -100.,
25240 -32.,
25241 );
25242 assert_eq_m512(r, e);
25243 }
25244
25245 #[simd_test(enable = "avx512f")]
25246 unsafe fn test_mm512_mask_mov_epi32() {
25247 let src = _mm512_set1_epi32(1);
25248 let a = _mm512_set1_epi32(2);
25249 let r = _mm512_mask_mov_epi32(src, 0, a);
25250 assert_eq_m512i(r, src);
25251 let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
25252 assert_eq_m512i(r, a);
25253 }
25254
25255 #[simd_test(enable = "avx512f")]
25256 unsafe fn test_mm512_maskz_mov_epi32() {
25257 let a = _mm512_set1_epi32(2);
25258 let r = _mm512_maskz_mov_epi32(0, a);
25259 assert_eq_m512i(r, _mm512_setzero_si512());
25260 let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
25261 assert_eq_m512i(r, a);
25262 }
25263
25264 #[simd_test(enable = "avx512f")]
25265 unsafe fn test_mm512_mask_mov_ps() {
25266 let src = _mm512_set1_ps(1.);
25267 let a = _mm512_set1_ps(2.);
25268 let r = _mm512_mask_mov_ps(src, 0, a);
25269 assert_eq_m512(r, src);
25270 let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
25271 assert_eq_m512(r, a);
25272 }
25273
25274 #[simd_test(enable = "avx512f")]
25275 unsafe fn test_mm512_maskz_mov_ps() {
25276 let a = _mm512_set1_ps(2.);
25277 let r = _mm512_maskz_mov_ps(0, a);
25278 assert_eq_m512(r, _mm512_setzero_ps());
25279 let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
25280 assert_eq_m512(r, a);
25281 }
25282
25283 #[simd_test(enable = "avx512f")]
25284 unsafe fn test_mm512_add_epi32() {
25285 let a = _mm512_setr_epi32(
25286 0,
25287 1,
25288 -1,
25289 i32::MAX,
25290 i32::MIN,
25291 100,
25292 -100,
25293 -32,
25294 0,
25295 1,
25296 -1,
25297 i32::MAX,
25298 i32::MIN,
25299 100,
25300 -100,
25301 -32,
25302 );
25303 let b = _mm512_set1_epi32(1);
25304 let r = _mm512_add_epi32(a, b);
25305 let e = _mm512_setr_epi32(
25306 1,
25307 2,
25308 0,
25309 i32::MIN,
25310 i32::MIN + 1,
25311 101,
25312 -99,
25313 -31,
25314 1,
25315 2,
25316 0,
25317 i32::MIN,
25318 i32::MIN + 1,
25319 101,
25320 -99,
25321 -31,
25322 );
25323 assert_eq_m512i(r, e);
25324 }
25325
25326 #[simd_test(enable = "avx512f")]
25327 unsafe fn test_mm512_mask_add_epi32() {
25328 #[rustfmt::skip]
25329 let a = _mm512_setr_epi32(
25330 0, 1, -1, i32::MAX,
25331 i32::MIN, 100, -100, -32,
25332 0, 1, -1, i32::MAX,
25333 i32::MIN, 100, -100, -32,
25334 );
25335 let b = _mm512_set1_epi32(1);
25336 let r = _mm512_mask_add_epi32(a, 0, a, b);
25337 assert_eq_m512i(r, a);
25338 let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
25339 let e = _mm512_setr_epi32(
25340 1,
25341 2,
25342 0,
25343 i32::MIN,
25344 i32::MIN + 1,
25345 101,
25346 -99,
25347 -31,
25348 0,
25349 1,
25350 -1,
25351 i32::MAX,
25352 i32::MIN,
25353 100,
25354 -100,
25355 -32,
25356 );
25357 assert_eq_m512i(r, e);
25358 }
25359
25360 #[simd_test(enable = "avx512f")]
25361 unsafe fn test_mm512_maskz_add_epi32() {
25362 #[rustfmt::skip]
25363 let a = _mm512_setr_epi32(
25364 0, 1, -1, i32::MAX,
25365 i32::MIN, 100, -100, -32,
25366 0, 1, -1, i32::MAX,
25367 i32::MIN, 100, -100, -32,
25368 );
25369 let b = _mm512_set1_epi32(1);
25370 let r = _mm512_maskz_add_epi32(0, a, b);
25371 assert_eq_m512i(r, _mm512_setzero_si512());
25372 let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
25373 let e = _mm512_setr_epi32(
25374 1,
25375 2,
25376 0,
25377 i32::MIN,
25378 i32::MIN + 1,
25379 101,
25380 -99,
25381 -31,
25382 0,
25383 0,
25384 0,
25385 0,
25386 0,
25387 0,
25388 0,
25389 0,
25390 );
25391 assert_eq_m512i(r, e);
25392 }
25393
25394 #[simd_test(enable = "avx512f")]
25395 unsafe fn test_mm512_add_ps() {
25396 let a = _mm512_setr_ps(
25397 0.,
25398 1.,
25399 -1.,
25400 f32::MAX,
25401 f32::MIN,
25402 100.,
25403 -100.,
25404 -32.,
25405 0.,
25406 1.,
25407 -1.,
25408 f32::MAX,
25409 f32::MIN,
25410 100.,
25411 -100.,
25412 -32.,
25413 );
25414 let b = _mm512_set1_ps(1.);
25415 let r = _mm512_add_ps(a, b);
25416 let e = _mm512_setr_ps(
25417 1.,
25418 2.,
25419 0.,
25420 f32::MAX,
25421 f32::MIN + 1.,
25422 101.,
25423 -99.,
25424 -31.,
25425 1.,
25426 2.,
25427 0.,
25428 f32::MAX,
25429 f32::MIN + 1.,
25430 101.,
25431 -99.,
25432 -31.,
25433 );
25434 assert_eq_m512(r, e);
25435 }
25436
25437 #[simd_test(enable = "avx512f")]
25438 unsafe fn test_mm512_mask_add_ps() {
25439 let a = _mm512_setr_ps(
25440 0.,
25441 1.,
25442 -1.,
25443 f32::MAX,
25444 f32::MIN,
25445 100.,
25446 -100.,
25447 -32.,
25448 0.,
25449 1.,
25450 -1.,
25451 f32::MAX,
25452 f32::MIN,
25453 100.,
25454 -100.,
25455 -32.,
25456 );
25457 let b = _mm512_set1_ps(1.);
25458 let r = _mm512_mask_add_ps(a, 0, a, b);
25459 assert_eq_m512(r, a);
25460 let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
25461 let e = _mm512_setr_ps(
25462 1.,
25463 2.,
25464 0.,
25465 f32::MAX,
25466 f32::MIN + 1.,
25467 101.,
25468 -99.,
25469 -31.,
25470 0.,
25471 1.,
25472 -1.,
25473 f32::MAX,
25474 f32::MIN,
25475 100.,
25476 -100.,
25477 -32.,
25478 );
25479 assert_eq_m512(r, e);
25480 }
25481
25482 #[simd_test(enable = "avx512f")]
25483 unsafe fn test_mm512_maskz_add_ps() {
25484 let a = _mm512_setr_ps(
25485 0.,
25486 1.,
25487 -1.,
25488 f32::MAX,
25489 f32::MIN,
25490 100.,
25491 -100.,
25492 -32.,
25493 0.,
25494 1.,
25495 -1.,
25496 f32::MAX,
25497 f32::MIN,
25498 100.,
25499 -100.,
25500 -32.,
25501 );
25502 let b = _mm512_set1_ps(1.);
25503 let r = _mm512_maskz_add_ps(0, a, b);
25504 assert_eq_m512(r, _mm512_setzero_ps());
25505 let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
25506 let e = _mm512_setr_ps(
25507 1.,
25508 2.,
25509 0.,
25510 f32::MAX,
25511 f32::MIN + 1.,
25512 101.,
25513 -99.,
25514 -31.,
25515 0.,
25516 0.,
25517 0.,
25518 0.,
25519 0.,
25520 0.,
25521 0.,
25522 0.,
25523 );
25524 assert_eq_m512(r, e);
25525 }
25526
25527 #[simd_test(enable = "avx512f")]
25528 unsafe fn test_mm512_sub_epi32() {
25529 let a = _mm512_setr_epi32(
25530 0,
25531 1,
25532 -1,
25533 i32::MAX,
25534 i32::MIN,
25535 100,
25536 -100,
25537 -32,
25538 0,
25539 1,
25540 -1,
25541 i32::MAX,
25542 i32::MIN,
25543 100,
25544 -100,
25545 -32,
25546 );
25547 let b = _mm512_set1_epi32(1);
25548 let r = _mm512_sub_epi32(a, b);
25549 let e = _mm512_setr_epi32(
25550 -1,
25551 0,
25552 -2,
25553 i32::MAX - 1,
25554 i32::MAX,
25555 99,
25556 -101,
25557 -33,
25558 -1,
25559 0,
25560 -2,
25561 i32::MAX - 1,
25562 i32::MAX,
25563 99,
25564 -101,
25565 -33,
25566 );
25567 assert_eq_m512i(r, e);
25568 }
25569
25570 #[simd_test(enable = "avx512f")]
25571 unsafe fn test_mm512_mask_sub_epi32() {
25572 let a = _mm512_setr_epi32(
25573 0,
25574 1,
25575 -1,
25576 i32::MAX,
25577 i32::MIN,
25578 100,
25579 -100,
25580 -32,
25581 0,
25582 1,
25583 -1,
25584 i32::MAX,
25585 i32::MIN,
25586 100,
25587 -100,
25588 -32,
25589 );
25590 let b = _mm512_set1_epi32(1);
25591 let r = _mm512_mask_sub_epi32(a, 0, a, b);
25592 assert_eq_m512i(r, a);
25593 let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
25594 let e = _mm512_setr_epi32(
25595 -1,
25596 0,
25597 -2,
25598 i32::MAX - 1,
25599 i32::MAX,
25600 99,
25601 -101,
25602 -33,
25603 0,
25604 1,
25605 -1,
25606 i32::MAX,
25607 i32::MIN,
25608 100,
25609 -100,
25610 -32,
25611 );
25612 assert_eq_m512i(r, e);
25613 }
25614
25615 #[simd_test(enable = "avx512f")]
25616 unsafe fn test_mm512_maskz_sub_epi32() {
25617 let a = _mm512_setr_epi32(
25618 0,
25619 1,
25620 -1,
25621 i32::MAX,
25622 i32::MIN,
25623 100,
25624 -100,
25625 -32,
25626 0,
25627 1,
25628 -1,
25629 i32::MAX,
25630 i32::MIN,
25631 100,
25632 -100,
25633 -32,
25634 );
25635 let b = _mm512_set1_epi32(1);
25636 let r = _mm512_maskz_sub_epi32(0, a, b);
25637 assert_eq_m512i(r, _mm512_setzero_si512());
25638 let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
25639 let e = _mm512_setr_epi32(
25640 -1,
25641 0,
25642 -2,
25643 i32::MAX - 1,
25644 i32::MAX,
25645 99,
25646 -101,
25647 -33,
25648 0,
25649 0,
25650 0,
25651 0,
25652 0,
25653 0,
25654 0,
25655 0,
25656 );
25657 assert_eq_m512i(r, e);
25658 }
25659
25660 #[simd_test(enable = "avx512f")]
25661 unsafe fn test_mm512_sub_ps() {
25662 let a = _mm512_setr_ps(
25663 0.,
25664 1.,
25665 -1.,
25666 f32::MAX,
25667 f32::MIN,
25668 100.,
25669 -100.,
25670 -32.,
25671 0.,
25672 1.,
25673 -1.,
25674 f32::MAX,
25675 f32::MIN,
25676 100.,
25677 -100.,
25678 -32.,
25679 );
25680 let b = _mm512_set1_ps(1.);
25681 let r = _mm512_sub_ps(a, b);
25682 let e = _mm512_setr_ps(
25683 -1.,
25684 0.,
25685 -2.,
25686 f32::MAX - 1.,
25687 f32::MIN,
25688 99.,
25689 -101.,
25690 -33.,
25691 -1.,
25692 0.,
25693 -2.,
25694 f32::MAX - 1.,
25695 f32::MIN,
25696 99.,
25697 -101.,
25698 -33.,
25699 );
25700 assert_eq_m512(r, e);
25701 }
25702
25703 #[simd_test(enable = "avx512f")]
25704 unsafe fn test_mm512_mask_sub_ps() {
25705 let a = _mm512_setr_ps(
25706 0.,
25707 1.,
25708 -1.,
25709 f32::MAX,
25710 f32::MIN,
25711 100.,
25712 -100.,
25713 -32.,
25714 0.,
25715 1.,
25716 -1.,
25717 f32::MAX,
25718 f32::MIN,
25719 100.,
25720 -100.,
25721 -32.,
25722 );
25723 let b = _mm512_set1_ps(1.);
25724 let r = _mm512_mask_sub_ps(a, 0, a, b);
25725 assert_eq_m512(r, a);
25726 let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
25727 let e = _mm512_setr_ps(
25728 -1.,
25729 0.,
25730 -2.,
25731 f32::MAX - 1.,
25732 f32::MIN,
25733 99.,
25734 -101.,
25735 -33.,
25736 0.,
25737 1.,
25738 -1.,
25739 f32::MAX,
25740 f32::MIN,
25741 100.,
25742 -100.,
25743 -32.,
25744 );
25745 assert_eq_m512(r, e);
25746 }
25747
25748 #[simd_test(enable = "avx512f")]
25749 unsafe fn test_mm512_maskz_sub_ps() {
25750 let a = _mm512_setr_ps(
25751 0.,
25752 1.,
25753 -1.,
25754 f32::MAX,
25755 f32::MIN,
25756 100.,
25757 -100.,
25758 -32.,
25759 0.,
25760 1.,
25761 -1.,
25762 f32::MAX,
25763 f32::MIN,
25764 100.,
25765 -100.,
25766 -32.,
25767 );
25768 let b = _mm512_set1_ps(1.);
25769 let r = _mm512_maskz_sub_ps(0, a, b);
25770 assert_eq_m512(r, _mm512_setzero_ps());
25771 let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
25772 let e = _mm512_setr_ps(
25773 -1.,
25774 0.,
25775 -2.,
25776 f32::MAX - 1.,
25777 f32::MIN,
25778 99.,
25779 -101.,
25780 -33.,
25781 0.,
25782 0.,
25783 0.,
25784 0.,
25785 0.,
25786 0.,
25787 0.,
25788 0.,
25789 );
25790 assert_eq_m512(r, e);
25791 }
25792
25793 #[simd_test(enable = "avx512f")]
25794 unsafe fn test_mm512_mullo_epi32() {
25795 let a = _mm512_setr_epi32(
25796 0,
25797 1,
25798 -1,
25799 i32::MAX,
25800 i32::MIN,
25801 100,
25802 -100,
25803 -32,
25804 0,
25805 1,
25806 -1,
25807 i32::MAX,
25808 i32::MIN,
25809 100,
25810 -100,
25811 -32,
25812 );
25813 let b = _mm512_set1_epi32(2);
25814 let r = _mm512_mullo_epi32(a, b);
25815 let e = _mm512_setr_epi32(
25816 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
25817 );
25818 assert_eq_m512i(r, e);
25819 }
25820
25821 #[simd_test(enable = "avx512f")]
25822 unsafe fn test_mm512_mask_mullo_epi32() {
25823 let a = _mm512_setr_epi32(
25824 0,
25825 1,
25826 -1,
25827 i32::MAX,
25828 i32::MIN,
25829 100,
25830 -100,
25831 -32,
25832 0,
25833 1,
25834 -1,
25835 i32::MAX,
25836 i32::MIN,
25837 100,
25838 -100,
25839 -32,
25840 );
25841 let b = _mm512_set1_epi32(2);
25842 let r = _mm512_mask_mullo_epi32(a, 0, a, b);
25843 assert_eq_m512i(r, a);
25844 let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
25845 let e = _mm512_setr_epi32(
25846 0,
25847 2,
25848 -2,
25849 -2,
25850 0,
25851 200,
25852 -200,
25853 -64,
25854 0,
25855 1,
25856 -1,
25857 i32::MAX,
25858 i32::MIN,
25859 100,
25860 -100,
25861 -32,
25862 );
25863 assert_eq_m512i(r, e);
25864 }
25865
25866 #[simd_test(enable = "avx512f")]
25867 unsafe fn test_mm512_maskz_mullo_epi32() {
25868 let a = _mm512_setr_epi32(
25869 0,
25870 1,
25871 -1,
25872 i32::MAX,
25873 i32::MIN,
25874 100,
25875 -100,
25876 -32,
25877 0,
25878 1,
25879 -1,
25880 i32::MAX,
25881 i32::MIN,
25882 100,
25883 -100,
25884 -32,
25885 );
25886 let b = _mm512_set1_epi32(2);
25887 let r = _mm512_maskz_mullo_epi32(0, a, b);
25888 assert_eq_m512i(r, _mm512_setzero_si512());
25889 let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
25890 let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
25891 assert_eq_m512i(r, e);
25892 }
25893
25894 #[simd_test(enable = "avx512f")]
25895 unsafe fn test_mm512_mul_ps() {
25896 let a = _mm512_setr_ps(
25897 0.,
25898 1.,
25899 -1.,
25900 f32::MAX,
25901 f32::MIN,
25902 100.,
25903 -100.,
25904 -32.,
25905 0.,
25906 1.,
25907 -1.,
25908 f32::MAX,
25909 f32::MIN,
25910 100.,
25911 -100.,
25912 -32.,
25913 );
25914 let b = _mm512_set1_ps(2.);
25915 let r = _mm512_mul_ps(a, b);
25916 let e = _mm512_setr_ps(
25917 0.,
25918 2.,
25919 -2.,
25920 f32::INFINITY,
25921 f32::NEG_INFINITY,
25922 200.,
25923 -200.,
25924 -64.,
25925 0.,
25926 2.,
25927 -2.,
25928 f32::INFINITY,
25929 f32::NEG_INFINITY,
25930 200.,
25931 -200.,
25932 -64.,
25933 );
25934 assert_eq_m512(r, e);
25935 }
25936
25937 #[simd_test(enable = "avx512f")]
25938 unsafe fn test_mm512_mask_mul_ps() {
25939 let a = _mm512_setr_ps(
25940 0.,
25941 1.,
25942 -1.,
25943 f32::MAX,
25944 f32::MIN,
25945 100.,
25946 -100.,
25947 -32.,
25948 0.,
25949 1.,
25950 -1.,
25951 f32::MAX,
25952 f32::MIN,
25953 100.,
25954 -100.,
25955 -32.,
25956 );
25957 let b = _mm512_set1_ps(2.);
25958 let r = _mm512_mask_mul_ps(a, 0, a, b);
25959 assert_eq_m512(r, a);
25960 let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
25961 let e = _mm512_setr_ps(
25962 0.,
25963 2.,
25964 -2.,
25965 f32::INFINITY,
25966 f32::NEG_INFINITY,
25967 200.,
25968 -200.,
25969 -64.,
25970 0.,
25971 1.,
25972 -1.,
25973 f32::MAX,
25974 f32::MIN,
25975 100.,
25976 -100.,
25977 -32.,
25978 );
25979 assert_eq_m512(r, e);
25980 }
25981
25982 #[simd_test(enable = "avx512f")]
25983 unsafe fn test_mm512_maskz_mul_ps() {
25984 let a = _mm512_setr_ps(
25985 0.,
25986 1.,
25987 -1.,
25988 f32::MAX,
25989 f32::MIN,
25990 100.,
25991 -100.,
25992 -32.,
25993 0.,
25994 1.,
25995 -1.,
25996 f32::MAX,
25997 f32::MIN,
25998 100.,
25999 -100.,
26000 -32.,
26001 );
26002 let b = _mm512_set1_ps(2.);
26003 let r = _mm512_maskz_mul_ps(0, a, b);
26004 assert_eq_m512(r, _mm512_setzero_ps());
26005 let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
26006 let e = _mm512_setr_ps(
26007 0.,
26008 2.,
26009 -2.,
26010 f32::INFINITY,
26011 f32::NEG_INFINITY,
26012 200.,
26013 -200.,
26014 -64.,
26015 0.,
26016 0.,
26017 0.,
26018 0.,
26019 0.,
26020 0.,
26021 0.,
26022 0.,
26023 );
26024 assert_eq_m512(r, e);
26025 }
26026
26027 #[simd_test(enable = "avx512f")]
26028 unsafe fn test_mm512_div_ps() {
26029 let a = _mm512_setr_ps(
26030 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
26031 );
26032 let b = _mm512_setr_ps(
26033 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
26034 );
26035 let r = _mm512_div_ps(a, b);
26036 let e = _mm512_setr_ps(
26037 0.,
26038 0.5,
26039 -0.5,
26040 -1.,
26041 50.,
26042 f32::INFINITY,
26043 -50.,
26044 -16.,
26045 0.,
26046 0.5,
26047 -0.5,
26048 500.,
26049 f32::NEG_INFINITY,
26050 50.,
26051 -50.,
26052 -16.,
26053 );
26054 assert_eq_m512(r, e); // 0/0 = NAN
26055 }
26056
26057 #[simd_test(enable = "avx512f")]
26058 unsafe fn test_mm512_mask_div_ps() {
26059 let a = _mm512_setr_ps(
26060 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
26061 );
26062 let b = _mm512_setr_ps(
26063 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
26064 );
26065 let r = _mm512_mask_div_ps(a, 0, a, b);
26066 assert_eq_m512(r, a);
26067 let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
26068 let e = _mm512_setr_ps(
26069 0.,
26070 0.5,
26071 -0.5,
26072 -1.,
26073 50.,
26074 f32::INFINITY,
26075 -50.,
26076 -16.,
26077 0.,
26078 1.,
26079 -1.,
26080 1000.,
26081 -131.,
26082 100.,
26083 -100.,
26084 -32.,
26085 );
26086 assert_eq_m512(r, e);
26087 }
26088
26089 #[simd_test(enable = "avx512f")]
26090 unsafe fn test_mm512_maskz_div_ps() {
26091 let a = _mm512_setr_ps(
26092 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
26093 );
26094 let b = _mm512_setr_ps(
26095 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
26096 );
26097 let r = _mm512_maskz_div_ps(0, a, b);
26098 assert_eq_m512(r, _mm512_setzero_ps());
26099 let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
26100 let e = _mm512_setr_ps(
26101 0.,
26102 0.5,
26103 -0.5,
26104 -1.,
26105 50.,
26106 f32::INFINITY,
26107 -50.,
26108 -16.,
26109 0.,
26110 0.,
26111 0.,
26112 0.,
26113 0.,
26114 0.,
26115 0.,
26116 0.,
26117 );
26118 assert_eq_m512(r, e);
26119 }
26120
26121 #[simd_test(enable = "avx512f")]
26122 unsafe fn test_mm512_max_epi32() {
26123 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26124 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26125 let r = _mm512_max_epi32(a, b);
26126 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26127 assert_eq_m512i(r, e);
26128 }
26129
26130 #[simd_test(enable = "avx512f")]
26131 unsafe fn test_mm512_mask_max_epi32() {
26132 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26133 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26134 let r = _mm512_mask_max_epi32(a, 0, a, b);
26135 assert_eq_m512i(r, a);
26136 let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
26137 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26138 assert_eq_m512i(r, e);
26139 }
26140
26141 #[simd_test(enable = "avx512f")]
26142 unsafe fn test_mm512_maskz_max_epi32() {
26143 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26144 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26145 let r = _mm512_maskz_max_epi32(0, a, b);
26146 assert_eq_m512i(r, _mm512_setzero_si512());
26147 let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
26148 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
26149 assert_eq_m512i(r, e);
26150 }
26151
26152 #[simd_test(enable = "avx512f")]
26153 unsafe fn test_mm512_max_ps() {
26154 let a = _mm512_setr_ps(
26155 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26156 );
26157 let b = _mm512_setr_ps(
26158 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26159 );
26160 let r = _mm512_max_ps(a, b);
26161 let e = _mm512_setr_ps(
26162 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
26163 );
26164 assert_eq_m512(r, e);
26165 }
26166
26167 #[simd_test(enable = "avx512f")]
26168 unsafe fn test_mm512_mask_max_ps() {
26169 let a = _mm512_setr_ps(
26170 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26171 );
26172 let b = _mm512_setr_ps(
26173 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26174 );
26175 let r = _mm512_mask_max_ps(a, 0, a, b);
26176 assert_eq_m512(r, a);
26177 let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
26178 let e = _mm512_setr_ps(
26179 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
26180 );
26181 assert_eq_m512(r, e);
26182 }
26183
26184 #[simd_test(enable = "avx512f")]
26185 unsafe fn test_mm512_maskz_max_ps() {
26186 let a = _mm512_setr_ps(
26187 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26188 );
26189 let b = _mm512_setr_ps(
26190 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26191 );
26192 let r = _mm512_maskz_max_ps(0, a, b);
26193 assert_eq_m512(r, _mm512_setzero_ps());
26194 let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
26195 let e = _mm512_setr_ps(
26196 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
26197 );
26198 assert_eq_m512(r, e);
26199 }
26200
26201 #[simd_test(enable = "avx512f")]
26202 unsafe fn test_mm512_max_epu32() {
26203 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26204 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26205 let r = _mm512_max_epu32(a, b);
26206 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26207 assert_eq_m512i(r, e);
26208 }
26209
26210 #[simd_test(enable = "avx512f")]
26211 unsafe fn test_mm512_mask_max_epu32() {
26212 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26213 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26214 let r = _mm512_mask_max_epu32(a, 0, a, b);
26215 assert_eq_m512i(r, a);
26216 let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
26217 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26218 assert_eq_m512i(r, e);
26219 }
26220
26221 #[simd_test(enable = "avx512f")]
26222 unsafe fn test_mm512_maskz_max_epu32() {
26223 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26224 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26225 let r = _mm512_maskz_max_epu32(0, a, b);
26226 assert_eq_m512i(r, _mm512_setzero_si512());
26227 let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
26228 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
26229 assert_eq_m512i(r, e);
26230 }
26231
26232 #[simd_test(enable = "avx512f")]
26233 unsafe fn test_mm512_min_epi32() {
26234 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26235 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26236 let r = _mm512_min_epi32(a, b);
26237 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
26238 assert_eq_m512i(r, e);
26239 }
26240
26241 #[simd_test(enable = "avx512f")]
26242 unsafe fn test_mm512_mask_min_epi32() {
26243 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26244 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26245 let r = _mm512_mask_min_epi32(a, 0, a, b);
26246 assert_eq_m512i(r, a);
26247 let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
26248 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26249 assert_eq_m512i(r, e);
26250 }
26251
26252 #[simd_test(enable = "avx512f")]
26253 unsafe fn test_mm512_maskz_min_epi32() {
26254 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26255 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26256 let r = _mm512_maskz_min_epi32(0, a, b);
26257 assert_eq_m512i(r, _mm512_setzero_si512());
26258 let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
26259 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
26260 assert_eq_m512i(r, e);
26261 }
26262
26263 #[simd_test(enable = "avx512f")]
26264 unsafe fn test_mm512_min_ps() {
26265 let a = _mm512_setr_ps(
26266 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26267 );
26268 let b = _mm512_setr_ps(
26269 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26270 );
26271 let r = _mm512_min_ps(a, b);
26272 let e = _mm512_setr_ps(
26273 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
26274 );
26275 assert_eq_m512(r, e);
26276 }
26277
26278 #[simd_test(enable = "avx512f")]
26279 unsafe fn test_mm512_mask_min_ps() {
26280 let a = _mm512_setr_ps(
26281 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26282 );
26283 let b = _mm512_setr_ps(
26284 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26285 );
26286 let r = _mm512_mask_min_ps(a, 0, a, b);
26287 assert_eq_m512(r, a);
26288 let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
26289 let e = _mm512_setr_ps(
26290 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26291 );
26292 assert_eq_m512(r, e);
26293 }
26294
26295 #[simd_test(enable = "avx512f")]
26296 unsafe fn test_mm512_maskz_min_ps() {
26297 let a = _mm512_setr_ps(
26298 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26299 );
26300 let b = _mm512_setr_ps(
26301 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26302 );
26303 let r = _mm512_maskz_min_ps(0, a, b);
26304 assert_eq_m512(r, _mm512_setzero_ps());
26305 let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
26306 let e = _mm512_setr_ps(
26307 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
26308 );
26309 assert_eq_m512(r, e);
26310 }
26311
26312 #[simd_test(enable = "avx512f")]
26313 unsafe fn test_mm512_min_epu32() {
26314 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26315 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26316 let r = _mm512_min_epu32(a, b);
26317 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
26318 assert_eq_m512i(r, e);
26319 }
26320
26321 #[simd_test(enable = "avx512f")]
26322 unsafe fn test_mm512_mask_min_epu32() {
26323 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26324 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26325 let r = _mm512_mask_min_epu32(a, 0, a, b);
26326 assert_eq_m512i(r, a);
26327 let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
26328 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26329 assert_eq_m512i(r, e);
26330 }
26331
26332 #[simd_test(enable = "avx512f")]
26333 unsafe fn test_mm512_maskz_min_epu32() {
26334 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26335 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26336 let r = _mm512_maskz_min_epu32(0, a, b);
26337 assert_eq_m512i(r, _mm512_setzero_si512());
26338 let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
26339 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
26340 assert_eq_m512i(r, e);
26341 }
26342
26343 #[simd_test(enable = "avx512f")]
26344 unsafe fn test_mm512_sqrt_ps() {
26345 let a = _mm512_setr_ps(
26346 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
26347 );
26348 let r = _mm512_sqrt_ps(a);
26349 let e = _mm512_setr_ps(
26350 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26351 );
26352 assert_eq_m512(r, e);
26353 }
26354
26355 #[simd_test(enable = "avx512f")]
26356 unsafe fn test_mm512_mask_sqrt_ps() {
26357 let a = _mm512_setr_ps(
26358 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
26359 );
26360 let r = _mm512_mask_sqrt_ps(a, 0, a);
26361 assert_eq_m512(r, a);
26362 let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
26363 let e = _mm512_setr_ps(
26364 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
26365 );
26366 assert_eq_m512(r, e);
26367 }
26368
26369 #[simd_test(enable = "avx512f")]
26370 unsafe fn test_mm512_maskz_sqrt_ps() {
26371 let a = _mm512_setr_ps(
26372 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
26373 );
26374 let r = _mm512_maskz_sqrt_ps(0, a);
26375 assert_eq_m512(r, _mm512_setzero_ps());
26376 let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
26377 let e = _mm512_setr_ps(
26378 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
26379 );
26380 assert_eq_m512(r, e);
26381 }
26382
26383 #[simd_test(enable = "avx512f")]
26384 unsafe fn test_mm512_fmadd_ps() {
26385 let a = _mm512_setr_ps(
26386 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26387 );
26388 let b = _mm512_setr_ps(
26389 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26390 );
26391 let c = _mm512_setr_ps(
26392 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26393 );
26394 let r = _mm512_fmadd_ps(a, b, c);
26395 let e = _mm512_setr_ps(
26396 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
26397 );
26398 assert_eq_m512(r, e);
26399 }
26400
26401 #[simd_test(enable = "avx512f")]
26402 unsafe fn test_mm512_mask_fmadd_ps() {
26403 let a = _mm512_setr_ps(
26404 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26405 );
26406 let b = _mm512_setr_ps(
26407 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26408 );
26409 let c = _mm512_setr_ps(
26410 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26411 );
26412 let r = _mm512_mask_fmadd_ps(a, 0, b, c);
26413 assert_eq_m512(r, a);
26414 let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
26415 let e = _mm512_setr_ps(
26416 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
26417 );
26418 assert_eq_m512(r, e);
26419 }
26420
26421 #[simd_test(enable = "avx512f")]
26422 unsafe fn test_mm512_maskz_fmadd_ps() {
26423 let a = _mm512_setr_ps(
26424 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26425 );
26426 let b = _mm512_setr_ps(
26427 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26428 );
26429 let c = _mm512_setr_ps(
26430 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26431 );
26432 let r = _mm512_maskz_fmadd_ps(0, a, b, c);
26433 assert_eq_m512(r, _mm512_setzero_ps());
26434 let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
26435 let e = _mm512_setr_ps(
26436 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
26437 );
26438 assert_eq_m512(r, e);
26439 }
26440
26441 #[simd_test(enable = "avx512f")]
26442 unsafe fn test_mm512_mask3_fmadd_ps() {
26443 let a = _mm512_setr_ps(
26444 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26445 );
26446 let b = _mm512_setr_ps(
26447 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26448 );
26449 let c = _mm512_setr_ps(
26450 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26451 );
26452 let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
26453 assert_eq_m512(r, c);
26454 let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
26455 let e = _mm512_setr_ps(
26456 1., 2., 3., 4., 5., 6., 7., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
26457 );
26458 assert_eq_m512(r, e);
26459 }
26460
26461 #[simd_test(enable = "avx512f")]
26462 unsafe fn test_mm512_fmsub_ps() {
26463 let a = _mm512_setr_ps(
26464 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26465 );
26466 let b = _mm512_setr_ps(
26467 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26468 );
26469 let c = _mm512_setr_ps(
26470 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26471 );
26472 let r = _mm512_fmsub_ps(a, b, c);
26473 let e = _mm512_setr_ps(
26474 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
26475 );
26476 assert_eq_m512(r, e);
26477 }
26478
26479 #[simd_test(enable = "avx512f")]
26480 unsafe fn test_mm512_mask_fmsub_ps() {
26481 let a = _mm512_setr_ps(
26482 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26483 );
26484 let b = _mm512_setr_ps(
26485 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26486 );
26487 let c = _mm512_setr_ps(
26488 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26489 );
26490 let r = _mm512_mask_fmsub_ps(a, 0, b, c);
26491 assert_eq_m512(r, a);
26492 let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
26493 let e = _mm512_setr_ps(
26494 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
26495 );
26496 assert_eq_m512(r, e);
26497 }
26498
26499 #[simd_test(enable = "avx512f")]
26500 unsafe fn test_mm512_maskz_fmsub_ps() {
26501 let a = _mm512_setr_ps(
26502 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26503 );
26504 let b = _mm512_setr_ps(
26505 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26506 );
26507 let c = _mm512_setr_ps(
26508 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26509 );
26510 let r = _mm512_maskz_fmsub_ps(0, a, b, c);
26511 assert_eq_m512(r, _mm512_setzero_ps());
26512 let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
26513 let e = _mm512_setr_ps(
26514 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
26515 );
26516 assert_eq_m512(r, e);
26517 }
26518
26519 #[simd_test(enable = "avx512f")]
26520 unsafe fn test_mm512_mask3_fmsub_ps() {
26521 let a = _mm512_setr_ps(
26522 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26523 );
26524 let b = _mm512_setr_ps(
26525 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26526 );
26527 let c = _mm512_setr_ps(
26528 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26529 );
26530 let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
26531 assert_eq_m512(r, c);
26532 let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
26533 let e = _mm512_setr_ps(
26534 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
26535 );
26536 assert_eq_m512(r, e);
26537 }
26538
26539 #[simd_test(enable = "avx512f")]
26540 unsafe fn test_mm512_fmaddsub_ps() {
26541 let a = _mm512_setr_ps(
26542 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26543 );
26544 let b = _mm512_setr_ps(
26545 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26546 );
26547 let c = _mm512_setr_ps(
26548 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26549 );
26550 let r = _mm512_fmaddsub_ps(a, b, c);
26551 let e = _mm512_setr_ps(
26552 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
26553 );
26554 assert_eq_m512(r, e);
26555 }
26556
26557 #[simd_test(enable = "avx512f")]
26558 unsafe fn test_mm512_mask_fmaddsub_ps() {
26559 let a = _mm512_setr_ps(
26560 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26561 );
26562 let b = _mm512_setr_ps(
26563 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26564 );
26565 let c = _mm512_setr_ps(
26566 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26567 );
26568 let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
26569 assert_eq_m512(r, a);
26570 let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
26571 let e = _mm512_setr_ps(
26572 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
26573 );
26574 assert_eq_m512(r, e);
26575 }
26576
26577 #[simd_test(enable = "avx512f")]
26578 unsafe fn test_mm512_maskz_fmaddsub_ps() {
26579 let a = _mm512_setr_ps(
26580 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26581 );
26582 let b = _mm512_setr_ps(
26583 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26584 );
26585 let c = _mm512_setr_ps(
26586 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26587 );
26588 let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
26589 assert_eq_m512(r, _mm512_setzero_ps());
26590 let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
26591 let e = _mm512_setr_ps(
26592 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
26593 );
26594 assert_eq_m512(r, e);
26595 }
26596
26597 #[simd_test(enable = "avx512f")]
26598 unsafe fn test_mm512_mask3_fmaddsub_ps() {
26599 let a = _mm512_setr_ps(
26600 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26601 );
26602 let b = _mm512_setr_ps(
26603 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26604 );
26605 let c = _mm512_setr_ps(
26606 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26607 );
26608 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
26609 assert_eq_m512(r, c);
26610 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
26611 let e = _mm512_setr_ps(
26612 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
26613 );
26614 assert_eq_m512(r, e);
26615 }
26616
26617 #[simd_test(enable = "avx512f")]
26618 unsafe fn test_mm512_fmsubadd_ps() {
26619 let a = _mm512_setr_ps(
26620 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26621 );
26622 let b = _mm512_setr_ps(
26623 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26624 );
26625 let c = _mm512_setr_ps(
26626 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26627 );
26628 let r = _mm512_fmsubadd_ps(a, b, c);
26629 let e = _mm512_setr_ps(
26630 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
26631 );
26632 assert_eq_m512(r, e);
26633 }
26634
26635 #[simd_test(enable = "avx512f")]
26636 unsafe fn test_mm512_mask_fmsubadd_ps() {
26637 let a = _mm512_setr_ps(
26638 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26639 );
26640 let b = _mm512_setr_ps(
26641 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26642 );
26643 let c = _mm512_setr_ps(
26644 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26645 );
26646 let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
26647 assert_eq_m512(r, a);
26648 let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
26649 let e = _mm512_setr_ps(
26650 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
26651 );
26652 assert_eq_m512(r, e);
26653 }
26654
26655 #[simd_test(enable = "avx512f")]
26656 unsafe fn test_mm512_maskz_fmsubadd_ps() {
26657 let a = _mm512_setr_ps(
26658 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26659 );
26660 let b = _mm512_setr_ps(
26661 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26662 );
26663 let c = _mm512_setr_ps(
26664 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26665 );
26666 let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
26667 assert_eq_m512(r, _mm512_setzero_ps());
26668 let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
26669 let e = _mm512_setr_ps(
26670 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
26671 );
26672 assert_eq_m512(r, e);
26673 }
26674
26675 #[simd_test(enable = "avx512f")]
26676 unsafe fn test_mm512_mask3_fmsubadd_ps() {
26677 let a = _mm512_setr_ps(
26678 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26679 );
26680 let b = _mm512_setr_ps(
26681 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26682 );
26683 let c = _mm512_setr_ps(
26684 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26685 );
26686 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
26687 assert_eq_m512(r, c);
26688 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
26689 let e = _mm512_setr_ps(
26690 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
26691 );
26692 assert_eq_m512(r, e);
26693 }
26694
26695 #[simd_test(enable = "avx512f")]
26696 unsafe fn test_mm512_fnmadd_ps() {
26697 let a = _mm512_setr_ps(
26698 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26699 );
26700 let b = _mm512_setr_ps(
26701 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26702 );
26703 let c = _mm512_setr_ps(
26704 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26705 );
26706 let r = _mm512_fnmadd_ps(a, b, c);
26707 let e = _mm512_setr_ps(
26708 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
26709 );
26710 assert_eq_m512(r, e);
26711 }
26712
26713 #[simd_test(enable = "avx512f")]
26714 unsafe fn test_mm512_mask_fnmadd_ps() {
26715 let a = _mm512_setr_ps(
26716 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26717 );
26718 let b = _mm512_setr_ps(
26719 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26720 );
26721 let c = _mm512_setr_ps(
26722 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26723 );
26724 let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
26725 assert_eq_m512(r, a);
26726 let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
26727 let e = _mm512_setr_ps(
26728 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
26729 );
26730 assert_eq_m512(r, e);
26731 }
26732
26733 #[simd_test(enable = "avx512f")]
26734 unsafe fn test_mm512_maskz_fnmadd_ps() {
26735 let a = _mm512_setr_ps(
26736 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26737 );
26738 let b = _mm512_setr_ps(
26739 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26740 );
26741 let c = _mm512_setr_ps(
26742 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26743 );
26744 let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
26745 assert_eq_m512(r, _mm512_setzero_ps());
26746 let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
26747 let e = _mm512_setr_ps(
26748 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
26749 );
26750 assert_eq_m512(r, e);
26751 }
26752
26753 #[simd_test(enable = "avx512f")]
26754 unsafe fn test_mm512_mask3_fnmadd_ps() {
26755 let a = _mm512_setr_ps(
26756 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26757 );
26758 let b = _mm512_setr_ps(
26759 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26760 );
26761 let c = _mm512_setr_ps(
26762 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26763 );
26764 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
26765 assert_eq_m512(r, c);
26766 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
26767 let e = _mm512_setr_ps(
26768 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
26769 );
26770 assert_eq_m512(r, e);
26771 }
26772
26773 #[simd_test(enable = "avx512f")]
26774 unsafe fn test_mm512_fnmsub_ps() {
26775 let a = _mm512_setr_ps(
26776 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26777 );
26778 let b = _mm512_setr_ps(
26779 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26780 );
26781 let c = _mm512_setr_ps(
26782 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26783 );
26784 let r = _mm512_fnmsub_ps(a, b, c);
26785 let e = _mm512_setr_ps(
26786 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
26787 );
26788 assert_eq_m512(r, e);
26789 }
26790
26791 #[simd_test(enable = "avx512f")]
26792 unsafe fn test_mm512_mask_fnmsub_ps() {
26793 let a = _mm512_setr_ps(
26794 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26795 );
26796 let b = _mm512_setr_ps(
26797 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26798 );
26799 let c = _mm512_setr_ps(
26800 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26801 );
26802 let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
26803 assert_eq_m512(r, a);
26804 let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
26805 let e = _mm512_setr_ps(
26806 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
26807 );
26808 assert_eq_m512(r, e);
26809 }
26810
26811 #[simd_test(enable = "avx512f")]
26812 unsafe fn test_mm512_maskz_fnmsub_ps() {
26813 let a = _mm512_setr_ps(
26814 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26815 );
26816 let b = _mm512_setr_ps(
26817 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26818 );
26819 let c = _mm512_setr_ps(
26820 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26821 );
26822 let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
26823 assert_eq_m512(r, _mm512_setzero_ps());
26824 let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
26825 let e = _mm512_setr_ps(
26826 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
26827 );
26828 assert_eq_m512(r, e);
26829 }
26830
26831 #[simd_test(enable = "avx512f")]
26832 unsafe fn test_mm512_mask3_fnmsub_ps() {
26833 let a = _mm512_setr_ps(
26834 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26835 );
26836 let b = _mm512_setr_ps(
26837 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26838 );
26839 let c = _mm512_setr_ps(
26840 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26841 );
26842 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
26843 assert_eq_m512(r, c);
26844 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
26845 let e = _mm512_setr_ps(
26846 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
26847 );
26848 assert_eq_m512(r, e);
26849 }
26850
26851 #[simd_test(enable = "avx512f")]
26852 unsafe fn test_mm512_rcp14_ps() {
26853 let a = _mm512_set1_ps(3.);
26854 let r = _mm512_rcp14_ps(a);
26855 let e = _mm512_set1_ps(0.33333206);
26856 assert_eq_m512(r, e);
26857 }
26858
26859 #[simd_test(enable = "avx512f")]
26860 unsafe fn test_mm512_mask_rcp14_ps() {
26861 let a = _mm512_set1_ps(3.);
26862 let r = _mm512_mask_rcp14_ps(a, 0, a);
26863 assert_eq_m512(r, a);
26864 let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
26865 let e = _mm512_setr_ps(
26866 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26867 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26868 );
26869 assert_eq_m512(r, e);
26870 }
26871
26872 #[simd_test(enable = "avx512f")]
26873 unsafe fn test_mm512_maskz_rcp14_ps() {
26874 let a = _mm512_set1_ps(3.);
26875 let r = _mm512_maskz_rcp14_ps(0, a);
26876 assert_eq_m512(r, _mm512_setzero_ps());
26877 let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
26878 let e = _mm512_setr_ps(
26879 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26880 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26881 );
26882 assert_eq_m512(r, e);
26883 }
26884
26885 #[simd_test(enable = "avx512f")]
26886 unsafe fn test_mm512_rsqrt14_ps() {
26887 let a = _mm512_set1_ps(3.);
26888 let r = _mm512_rsqrt14_ps(a);
26889 let e = _mm512_set1_ps(0.5773392);
26890 assert_eq_m512(r, e);
26891 }
26892
26893 #[simd_test(enable = "avx512f")]
26894 unsafe fn test_mm512_mask_rsqrt14_ps() {
26895 let a = _mm512_set1_ps(3.);
26896 let r = _mm512_mask_rsqrt14_ps(a, 0, a);
26897 assert_eq_m512(r, a);
26898 let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
26899 let e = _mm512_setr_ps(
26900 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
26901 0.5773392, 0.5773392, 0.5773392,
26902 );
26903 assert_eq_m512(r, e);
26904 }
26905
26906 #[simd_test(enable = "avx512f")]
26907 unsafe fn test_mm512_maskz_rsqrt14_ps() {
26908 let a = _mm512_set1_ps(3.);
26909 let r = _mm512_maskz_rsqrt14_ps(0, a);
26910 assert_eq_m512(r, _mm512_setzero_ps());
26911 let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
26912 let e = _mm512_setr_ps(
26913 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
26914 0.5773392, 0.5773392, 0.5773392,
26915 );
26916 assert_eq_m512(r, e);
26917 }
26918
26919 #[simd_test(enable = "avx512f")]
26920 unsafe fn test_mm512_getexp_ps() {
26921 let a = _mm512_set1_ps(3.);
26922 let r = _mm512_getexp_ps(a);
26923 let e = _mm512_set1_ps(1.);
26924 assert_eq_m512(r, e);
26925 }
26926
26927 #[simd_test(enable = "avx512f")]
26928 unsafe fn test_mm512_mask_getexp_ps() {
26929 let a = _mm512_set1_ps(3.);
26930 let r = _mm512_mask_getexp_ps(a, 0, a);
26931 assert_eq_m512(r, a);
26932 let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
26933 let e = _mm512_setr_ps(
26934 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
26935 );
26936 assert_eq_m512(r, e);
26937 }
26938
26939 #[simd_test(enable = "avx512f")]
26940 unsafe fn test_mm512_maskz_getexp_ps() {
26941 let a = _mm512_set1_ps(3.);
26942 let r = _mm512_maskz_getexp_ps(0, a);
26943 assert_eq_m512(r, _mm512_setzero_ps());
26944 let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
26945 let e = _mm512_setr_ps(
26946 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
26947 );
26948 assert_eq_m512(r, e);
26949 }
26950
26951 #[simd_test(enable = "avx512f")]
26952 unsafe fn test_mm512_roundscale_ps() {
26953 let a = _mm512_set1_ps(1.1);
26954 let r = _mm512_roundscale_ps(a, 0);
26955 let e = _mm512_set1_ps(1.0);
26956 assert_eq_m512(r, e);
26957 }
26958
26959 #[simd_test(enable = "avx512f")]
26960 unsafe fn test_mm512_mask_roundscale_ps() {
26961 let a = _mm512_set1_ps(1.1);
26962 let r = _mm512_mask_roundscale_ps(a, 0, a, 0);
26963 let e = _mm512_set1_ps(1.1);
26964 assert_eq_m512(r, e);
26965 let r = _mm512_mask_roundscale_ps(a, 0b11111111_11111111, a, 0);
26966 let e = _mm512_set1_ps(1.0);
26967 assert_eq_m512(r, e);
26968 }
26969
26970 #[simd_test(enable = "avx512f")]
26971 unsafe fn test_mm512_maskz_roundscale_ps() {
26972 let a = _mm512_set1_ps(1.1);
26973 let r = _mm512_maskz_roundscale_ps(0, a, 0);
26974 assert_eq_m512(r, _mm512_setzero_ps());
26975 let r = _mm512_maskz_roundscale_ps(0b11111111_11111111, a, 0);
26976 let e = _mm512_set1_ps(1.0);
26977 assert_eq_m512(r, e);
26978 }
26979
26980 #[simd_test(enable = "avx512f")]
26981 unsafe fn test_mm512_scalef_ps() {
26982 let a = _mm512_set1_ps(1.);
26983 let b = _mm512_set1_ps(3.);
26984 let r = _mm512_scalef_ps(a, b);
26985 let e = _mm512_set1_ps(8.);
26986 assert_eq_m512(r, e);
26987 }
26988
26989 #[simd_test(enable = "avx512f")]
26990 unsafe fn test_mm512_mask_scalef_ps() {
26991 let a = _mm512_set1_ps(1.);
26992 let b = _mm512_set1_ps(3.);
26993 let r = _mm512_mask_scalef_ps(a, 0, a, b);
26994 assert_eq_m512(r, a);
26995 let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
26996 let e = _mm512_set_ps(
26997 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
26998 );
26999 assert_eq_m512(r, e);
27000 }
27001
27002 #[simd_test(enable = "avx512f")]
27003 unsafe fn test_mm512_maskz_scalef_ps() {
27004 let a = _mm512_set1_ps(1.);
27005 let b = _mm512_set1_ps(3.);
27006 let r = _mm512_maskz_scalef_ps(0, a, b);
27007 assert_eq_m512(r, _mm512_setzero_ps());
27008 let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
27009 let e = _mm512_set_ps(
27010 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
27011 );
27012 assert_eq_m512(r, e);
27013 }
27014
27015 #[simd_test(enable = "avx512f")]
27016 unsafe fn test_mm512_fixupimm_ps() {
27017 let a = _mm512_set1_ps(f32::NAN);
27018 let b = _mm512_set1_ps(f32::MAX);
27019 let c = _mm512_set1_epi32(i32::MAX);
27020 let r = _mm512_fixupimm_ps(a, b, c, 5);
27021 let e = _mm512_set1_ps(0.0);
27022 assert_eq_m512(r, e);
27023 }
27024
27025 #[simd_test(enable = "avx512f")]
27026 unsafe fn test_mm512_mask_fixupimm_ps() {
27027 let a = _mm512_set_ps(
27028 f32::NAN,
27029 f32::NAN,
27030 f32::NAN,
27031 f32::NAN,
27032 f32::NAN,
27033 f32::NAN,
27034 f32::NAN,
27035 f32::NAN,
27036 1.,
27037 1.,
27038 1.,
27039 1.,
27040 1.,
27041 1.,
27042 1.,
27043 1.,
27044 );
27045 let b = _mm512_set1_ps(f32::MAX);
27046 let c = _mm512_set1_epi32(i32::MAX);
27047 let r = _mm512_mask_fixupimm_ps(a, 0b11111111_00000000, b, c, 5);
27048 let e = _mm512_set_ps(
27049 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
27050 );
27051 assert_eq_m512(r, e);
27052 }
27053
27054 #[simd_test(enable = "avx512f")]
27055 unsafe fn test_mm512_maskz_fixupimm_ps() {
27056 let a = _mm512_set_ps(
27057 f32::NAN,
27058 f32::NAN,
27059 f32::NAN,
27060 f32::NAN,
27061 f32::NAN,
27062 f32::NAN,
27063 f32::NAN,
27064 f32::NAN,
27065 1.,
27066 1.,
27067 1.,
27068 1.,
27069 1.,
27070 1.,
27071 1.,
27072 1.,
27073 );
27074 let b = _mm512_set1_ps(f32::MAX);
27075 let c = _mm512_set1_epi32(i32::MAX);
27076 let r = _mm512_maskz_fixupimm_ps(0b11111111_00000000, a, b, c, 5);
27077 let e = _mm512_set_ps(
27078 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
27079 );
27080 assert_eq_m512(r, e);
27081 }
27082
27083 #[simd_test(enable = "avx512f")]
27084 unsafe fn test_mm512_ternarylogic_epi32() {
27085 let a = _mm512_set1_epi32(1 << 2);
27086 let b = _mm512_set1_epi32(1 << 1);
27087 let c = _mm512_set1_epi32(1 << 0);
27088 let r = _mm512_ternarylogic_epi32(a, b, c, 8);
27089 let e = _mm512_set1_epi32(0);
27090 assert_eq_m512i(r, e);
27091 }
27092
27093 #[simd_test(enable = "avx512f")]
27094 unsafe fn test_mm512_mask_ternarylogic_epi32() {
27095 let src = _mm512_set1_epi32(1 << 2);
27096 let a = _mm512_set1_epi32(1 << 1);
27097 let b = _mm512_set1_epi32(1 << 0);
27098 let r = _mm512_mask_ternarylogic_epi32(src, 0, a, b, 8);
27099 assert_eq_m512i(r, src);
27100 let r = _mm512_mask_ternarylogic_epi32(src, 0b11111111_11111111, a, b, 8);
27101 let e = _mm512_set1_epi32(0);
27102 assert_eq_m512i(r, e);
27103 }
27104
27105 #[simd_test(enable = "avx512f")]
27106 unsafe fn test_mm512_maskz_ternarylogic_epi32() {
27107 let a = _mm512_set1_epi32(1 << 2);
27108 let b = _mm512_set1_epi32(1 << 1);
27109 let c = _mm512_set1_epi32(1 << 0);
27110 let r = _mm512_maskz_ternarylogic_epi32(0, a, b, c, 9);
27111 assert_eq_m512i(r, _mm512_setzero_si512());
27112 let r = _mm512_maskz_ternarylogic_epi32(0b11111111_11111111, a, b, c, 8);
27113 let e = _mm512_set1_epi32(0);
27114 assert_eq_m512i(r, e);
27115 }
27116
27117 #[simd_test(enable = "avx512f")]
27118 unsafe fn test_mm512_getmant_ps() {
27119 let a = _mm512_set1_ps(10.);
27120 let r = _mm512_getmant_ps(a, _MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN);
27121 let e = _mm512_set1_ps(1.25);
27122 assert_eq_m512(r, e);
27123 }
27124
27125 #[simd_test(enable = "avx512f")]
27126 unsafe fn test_mm512_mask_getmant_ps() {
27127 let a = _mm512_set1_ps(10.);
27128 let r = _mm512_mask_getmant_ps(a, 0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
27129 assert_eq_m512(r, a);
27130 let r = _mm512_mask_getmant_ps(
27131 a,
27132 0b11111111_00000000,
27133 a,
27134 _MM_MANT_NORM_1_2,
27135 _MM_MANT_SIGN_SRC,
27136 );
27137 let e = _mm512_setr_ps(
27138 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
27139 );
27140 assert_eq_m512(r, e);
27141 }
27142
27143 #[simd_test(enable = "avx512f")]
27144 unsafe fn test_mm512_maskz_getmant_ps() {
27145 let a = _mm512_set1_ps(10.);
27146 let r = _mm512_maskz_getmant_ps(0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
27147 assert_eq_m512(r, _mm512_setzero_ps());
27148 let r =
27149 _mm512_maskz_getmant_ps(0b11111111_00000000, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
27150 let e = _mm512_setr_ps(
27151 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
27152 );
27153 assert_eq_m512(r, e);
27154 }
27155
27156 #[simd_test(enable = "avx512f")]
27157 unsafe fn test_mm512_add_round_ps() {
27158 let a = _mm512_setr_ps(
27159 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27160 );
27161 let b = _mm512_set1_ps(-1.);
27162 let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27163 let e = _mm512_setr_ps(
27164 -1.,
27165 0.5,
27166 1.,
27167 2.5,
27168 3.,
27169 4.5,
27170 5.,
27171 6.5,
27172 7.,
27173 8.5,
27174 9.,
27175 10.5,
27176 11.,
27177 12.5,
27178 13.,
27179 -0.99999994,
27180 );
27181 assert_eq_m512(r, e);
27182 let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27183 let e = _mm512_setr_ps(
27184 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
27185 );
27186 assert_eq_m512(r, e);
27187 }
27188
27189 #[simd_test(enable = "avx512f")]
27190 unsafe fn test_mm512_mask_add_round_ps() {
27191 let a = _mm512_setr_ps(
27192 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27193 );
27194 let b = _mm512_set1_ps(-1.);
27195 let r = _mm512_mask_add_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27196 assert_eq_m512(r, a);
27197 let r = _mm512_mask_add_round_ps(
27198 a,
27199 0b11111111_00000000,
27200 a,
27201 b,
27202 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27203 );
27204 let e = _mm512_setr_ps(
27205 0.,
27206 1.5,
27207 2.,
27208 3.5,
27209 4.,
27210 5.5,
27211 6.,
27212 7.5,
27213 7.,
27214 8.5,
27215 9.,
27216 10.5,
27217 11.,
27218 12.5,
27219 13.,
27220 -0.99999994,
27221 );
27222 assert_eq_m512(r, e);
27223 }
27224
27225 #[simd_test(enable = "avx512f")]
27226 unsafe fn test_mm512_maskz_add_round_ps() {
27227 let a = _mm512_setr_ps(
27228 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27229 );
27230 let b = _mm512_set1_ps(-1.);
27231 let r = _mm512_maskz_add_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27232 assert_eq_m512(r, _mm512_setzero_ps());
27233 let r = _mm512_maskz_add_round_ps(
27234 0b11111111_00000000,
27235 a,
27236 b,
27237 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27238 );
27239 let e = _mm512_setr_ps(
27240 0.,
27241 0.,
27242 0.,
27243 0.,
27244 0.,
27245 0.,
27246 0.,
27247 0.,
27248 7.,
27249 8.5,
27250 9.,
27251 10.5,
27252 11.,
27253 12.5,
27254 13.,
27255 -0.99999994,
27256 );
27257 assert_eq_m512(r, e);
27258 }
27259
27260 #[simd_test(enable = "avx512f")]
27261 unsafe fn test_mm512_sub_round_ps() {
27262 let a = _mm512_setr_ps(
27263 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27264 );
27265 let b = _mm512_set1_ps(1.);
27266 let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27267 let e = _mm512_setr_ps(
27268 -1.,
27269 0.5,
27270 1.,
27271 2.5,
27272 3.,
27273 4.5,
27274 5.,
27275 6.5,
27276 7.,
27277 8.5,
27278 9.,
27279 10.5,
27280 11.,
27281 12.5,
27282 13.,
27283 -0.99999994,
27284 );
27285 assert_eq_m512(r, e);
27286 let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27287 let e = _mm512_setr_ps(
27288 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
27289 );
27290 assert_eq_m512(r, e);
27291 }
27292
27293 #[simd_test(enable = "avx512f")]
27294 unsafe fn test_mm512_mask_sub_round_ps() {
27295 let a = _mm512_setr_ps(
27296 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27297 );
27298 let b = _mm512_set1_ps(1.);
27299 let r = _mm512_mask_sub_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27300 assert_eq_m512(r, a);
27301 let r = _mm512_mask_sub_round_ps(
27302 a,
27303 0b11111111_00000000,
27304 a,
27305 b,
27306 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27307 );
27308 let e = _mm512_setr_ps(
27309 0.,
27310 1.5,
27311 2.,
27312 3.5,
27313 4.,
27314 5.5,
27315 6.,
27316 7.5,
27317 7.,
27318 8.5,
27319 9.,
27320 10.5,
27321 11.,
27322 12.5,
27323 13.,
27324 -0.99999994,
27325 );
27326 assert_eq_m512(r, e);
27327 }
27328
27329 #[simd_test(enable = "avx512f")]
27330 unsafe fn test_mm512_maskz_sub_round_ps() {
27331 let a = _mm512_setr_ps(
27332 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27333 );
27334 let b = _mm512_set1_ps(1.);
27335 let r = _mm512_maskz_sub_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27336 assert_eq_m512(r, _mm512_setzero_ps());
27337 let r = _mm512_maskz_sub_round_ps(
27338 0b11111111_00000000,
27339 a,
27340 b,
27341 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27342 );
27343 let e = _mm512_setr_ps(
27344 0.,
27345 0.,
27346 0.,
27347 0.,
27348 0.,
27349 0.,
27350 0.,
27351 0.,
27352 7.,
27353 8.5,
27354 9.,
27355 10.5,
27356 11.,
27357 12.5,
27358 13.,
27359 -0.99999994,
27360 );
27361 assert_eq_m512(r, e);
27362 }
27363
27364 #[simd_test(enable = "avx512f")]
27365 unsafe fn test_mm512_mul_round_ps() {
27366 let a = _mm512_setr_ps(
27367 0.,
27368 1.5,
27369 2.,
27370 3.5,
27371 4.,
27372 5.5,
27373 6.,
27374 7.5,
27375 8.,
27376 9.5,
27377 10.,
27378 11.5,
27379 12.,
27380 13.5,
27381 14.,
27382 0.00000000000000000000007,
27383 );
27384 let b = _mm512_set1_ps(0.1);
27385 let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27386 let e = _mm512_setr_ps(
27387 0.,
27388 0.15,
27389 0.2,
27390 0.35,
27391 0.4,
27392 0.55,
27393 0.6,
27394 0.75,
27395 0.8,
27396 0.95,
27397 1.0,
27398 1.15,
27399 1.2,
27400 1.35,
27401 1.4,
27402 0.000000000000000000000007000001,
27403 );
27404 assert_eq_m512(r, e);
27405 let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27406 let e = _mm512_setr_ps(
27407 0.,
27408 0.14999999,
27409 0.2,
27410 0.35,
27411 0.4,
27412 0.54999995,
27413 0.59999996,
27414 0.75,
27415 0.8,
27416 0.95,
27417 1.0,
27418 1.15,
27419 1.1999999,
27420 1.3499999,
27421 1.4,
27422 0.000000000000000000000007,
27423 );
27424 assert_eq_m512(r, e);
27425 }
27426
27427 #[simd_test(enable = "avx512f")]
27428 unsafe fn test_mm512_mask_mul_round_ps() {
27429 let a = _mm512_setr_ps(
27430 0.,
27431 1.5,
27432 2.,
27433 3.5,
27434 4.,
27435 5.5,
27436 6.,
27437 7.5,
27438 8.,
27439 9.5,
27440 10.,
27441 11.5,
27442 12.,
27443 13.5,
27444 14.,
27445 0.00000000000000000000007,
27446 );
27447 let b = _mm512_set1_ps(0.1);
27448 let r = _mm512_mask_mul_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27449 assert_eq_m512(r, a);
27450 let r = _mm512_mask_mul_round_ps(
27451 a,
27452 0b11111111_00000000,
27453 a,
27454 b,
27455 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27456 );
27457 let e = _mm512_setr_ps(
27458 0.,
27459 1.5,
27460 2.,
27461 3.5,
27462 4.,
27463 5.5,
27464 6.,
27465 7.5,
27466 0.8,
27467 0.95,
27468 1.0,
27469 1.15,
27470 1.2,
27471 1.35,
27472 1.4,
27473 0.000000000000000000000007000001,
27474 );
27475 assert_eq_m512(r, e);
27476 }
27477
27478 #[simd_test(enable = "avx512f")]
27479 unsafe fn test_mm512_maskz_mul_round_ps() {
27480 let a = _mm512_setr_ps(
27481 0.,
27482 1.5,
27483 2.,
27484 3.5,
27485 4.,
27486 5.5,
27487 6.,
27488 7.5,
27489 8.,
27490 9.5,
27491 10.,
27492 11.5,
27493 12.,
27494 13.5,
27495 14.,
27496 0.00000000000000000000007,
27497 );
27498 let b = _mm512_set1_ps(0.1);
27499 let r = _mm512_maskz_mul_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27500 assert_eq_m512(r, _mm512_setzero_ps());
27501 let r = _mm512_maskz_mul_round_ps(
27502 0b11111111_00000000,
27503 a,
27504 b,
27505 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27506 );
27507 let e = _mm512_setr_ps(
27508 0.,
27509 0.,
27510 0.,
27511 0.,
27512 0.,
27513 0.,
27514 0.,
27515 0.,
27516 0.8,
27517 0.95,
27518 1.0,
27519 1.15,
27520 1.2,
27521 1.35,
27522 1.4,
27523 0.000000000000000000000007000001,
27524 );
27525 assert_eq_m512(r, e);
27526 }
27527
27528 #[simd_test(enable = "avx512f")]
27529 unsafe fn test_mm512_div_round_ps() {
27530 let a = _mm512_set1_ps(1.);
27531 let b = _mm512_set1_ps(3.);
27532 let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27533 let e = _mm512_set1_ps(0.33333334);
27534 assert_eq_m512(r, e);
27535 let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27536 let e = _mm512_set1_ps(0.3333333);
27537 assert_eq_m512(r, e);
27538 }
27539
27540 #[simd_test(enable = "avx512f")]
27541 unsafe fn test_mm512_mask_div_round_ps() {
27542 let a = _mm512_set1_ps(1.);
27543 let b = _mm512_set1_ps(3.);
27544 let r = _mm512_mask_div_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27545 assert_eq_m512(r, a);
27546 let r = _mm512_mask_div_round_ps(
27547 a,
27548 0b11111111_00000000,
27549 a,
27550 b,
27551 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27552 );
27553 let e = _mm512_setr_ps(
27554 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27555 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27556 );
27557 assert_eq_m512(r, e);
27558 }
27559
27560 #[simd_test(enable = "avx512f")]
27561 unsafe fn test_mm512_maskz_div_round_ps() {
27562 let a = _mm512_set1_ps(1.);
27563 let b = _mm512_set1_ps(3.);
27564 let r = _mm512_maskz_div_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27565 assert_eq_m512(r, _mm512_setzero_ps());
27566 let r = _mm512_maskz_div_round_ps(
27567 0b11111111_00000000,
27568 a,
27569 b,
27570 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27571 );
27572 let e = _mm512_setr_ps(
27573 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27574 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27575 );
27576 assert_eq_m512(r, e);
27577 }
27578
27579 #[simd_test(enable = "avx512f")]
27580 unsafe fn test_mm512_sqrt_round_ps() {
27581 let a = _mm512_set1_ps(3.);
27582 let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27583 let e = _mm512_set1_ps(1.7320508);
27584 assert_eq_m512(r, e);
27585 let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
27586 let e = _mm512_set1_ps(1.7320509);
27587 assert_eq_m512(r, e);
27588 }
27589
27590 #[simd_test(enable = "avx512f")]
27591 unsafe fn test_mm512_mask_sqrt_round_ps() {
27592 let a = _mm512_set1_ps(3.);
27593 let r = _mm512_mask_sqrt_round_ps(a, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27594 assert_eq_m512(r, a);
27595 let r = _mm512_mask_sqrt_round_ps(
27596 a,
27597 0b11111111_00000000,
27598 a,
27599 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27600 );
27601 let e = _mm512_setr_ps(
27602 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
27603 1.7320508, 1.7320508, 1.7320508,
27604 );
27605 assert_eq_m512(r, e);
27606 }
27607
27608 #[simd_test(enable = "avx512f")]
27609 unsafe fn test_mm512_maskz_sqrt_round_ps() {
27610 let a = _mm512_set1_ps(3.);
27611 let r = _mm512_maskz_sqrt_round_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27612 assert_eq_m512(r, _mm512_setzero_ps());
27613 let r = _mm512_maskz_sqrt_round_ps(
27614 0b11111111_00000000,
27615 a,
27616 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27617 );
27618 let e = _mm512_setr_ps(
27619 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
27620 1.7320508, 1.7320508, 1.7320508,
27621 );
27622 assert_eq_m512(r, e);
27623 }
27624
27625 #[simd_test(enable = "avx512f")]
27626 unsafe fn test_mm512_fmadd_round_ps() {
27627 let a = _mm512_set1_ps(0.00000007);
27628 let b = _mm512_set1_ps(1.);
27629 let c = _mm512_set1_ps(-1.);
27630 let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27631 let e = _mm512_set1_ps(-0.99999994);
27632 assert_eq_m512(r, e);
27633 let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27634 let e = _mm512_set1_ps(-0.9999999);
27635 assert_eq_m512(r, e);
27636 }
27637
27638 #[simd_test(enable = "avx512f")]
27639 unsafe fn test_mm512_mask_fmadd_round_ps() {
27640 let a = _mm512_set1_ps(0.00000007);
27641 let b = _mm512_set1_ps(1.);
27642 let c = _mm512_set1_ps(-1.);
27643 let r =
27644 _mm512_mask_fmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27645 assert_eq_m512(r, a);
27646 let r = _mm512_mask_fmadd_round_ps(
27647 a,
27648 0b00000000_11111111,
27649 b,
27650 c,
27651 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27652 );
27653 let e = _mm512_setr_ps(
27654 -0.99999994,
27655 -0.99999994,
27656 -0.99999994,
27657 -0.99999994,
27658 -0.99999994,
27659 -0.99999994,
27660 -0.99999994,
27661 -0.99999994,
27662 0.00000007,
27663 0.00000007,
27664 0.00000007,
27665 0.00000007,
27666 0.00000007,
27667 0.00000007,
27668 0.00000007,
27669 0.00000007,
27670 );
27671 assert_eq_m512(r, e);
27672 }
27673
27674 #[simd_test(enable = "avx512f")]
27675 unsafe fn test_mm512_maskz_fmadd_round_ps() {
27676 let a = _mm512_set1_ps(0.00000007);
27677 let b = _mm512_set1_ps(1.);
27678 let c = _mm512_set1_ps(-1.);
27679 let r =
27680 _mm512_maskz_fmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27681 assert_eq_m512(r, _mm512_setzero_ps());
27682 let r = _mm512_maskz_fmadd_round_ps(
27683 0b00000000_11111111,
27684 a,
27685 b,
27686 c,
27687 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27688 );
27689 let e = _mm512_setr_ps(
27690 -0.99999994,
27691 -0.99999994,
27692 -0.99999994,
27693 -0.99999994,
27694 -0.99999994,
27695 -0.99999994,
27696 -0.99999994,
27697 -0.99999994,
27698 0.,
27699 0.,
27700 0.,
27701 0.,
27702 0.,
27703 0.,
27704 0.,
27705 0.,
27706 );
27707 assert_eq_m512(r, e);
27708 }
27709
27710 #[simd_test(enable = "avx512f")]
27711 unsafe fn test_mm512_mask3_fmadd_round_ps() {
27712 let a = _mm512_set1_ps(0.00000007);
27713 let b = _mm512_set1_ps(1.);
27714 let c = _mm512_set1_ps(-1.);
27715 let r =
27716 _mm512_mask3_fmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27717 assert_eq_m512(r, c);
27718 let r = _mm512_mask3_fmadd_round_ps(
27719 a,
27720 b,
27721 c,
27722 0b00000000_11111111,
27723 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27724 );
27725 let e = _mm512_setr_ps(
27726 -0.99999994,
27727 -0.99999994,
27728 -0.99999994,
27729 -0.99999994,
27730 -0.99999994,
27731 -0.99999994,
27732 -0.99999994,
27733 -0.99999994,
27734 -1.,
27735 -1.,
27736 -1.,
27737 -1.,
27738 -1.,
27739 -1.,
27740 -1.,
27741 -1.,
27742 );
27743 assert_eq_m512(r, e);
27744 }
27745
27746 #[simd_test(enable = "avx512f")]
27747 unsafe fn test_mm512_fmsub_round_ps() {
27748 let a = _mm512_set1_ps(0.00000007);
27749 let b = _mm512_set1_ps(1.);
27750 let c = _mm512_set1_ps(1.);
27751 let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27752 let e = _mm512_set1_ps(-0.99999994);
27753 assert_eq_m512(r, e);
27754 let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27755 let e = _mm512_set1_ps(-0.9999999);
27756 assert_eq_m512(r, e);
27757 }
27758
27759 #[simd_test(enable = "avx512f")]
27760 unsafe fn test_mm512_mask_fmsub_round_ps() {
27761 let a = _mm512_set1_ps(0.00000007);
27762 let b = _mm512_set1_ps(1.);
27763 let c = _mm512_set1_ps(1.);
27764 let r =
27765 _mm512_mask_fmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27766 assert_eq_m512(r, a);
27767 let r = _mm512_mask_fmsub_round_ps(
27768 a,
27769 0b00000000_11111111,
27770 b,
27771 c,
27772 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27773 );
27774 let e = _mm512_setr_ps(
27775 -0.99999994,
27776 -0.99999994,
27777 -0.99999994,
27778 -0.99999994,
27779 -0.99999994,
27780 -0.99999994,
27781 -0.99999994,
27782 -0.99999994,
27783 0.00000007,
27784 0.00000007,
27785 0.00000007,
27786 0.00000007,
27787 0.00000007,
27788 0.00000007,
27789 0.00000007,
27790 0.00000007,
27791 );
27792 assert_eq_m512(r, e);
27793 }
27794
27795 #[simd_test(enable = "avx512f")]
27796 unsafe fn test_mm512_maskz_fmsub_round_ps() {
27797 let a = _mm512_set1_ps(0.00000007);
27798 let b = _mm512_set1_ps(1.);
27799 let c = _mm512_set1_ps(1.);
27800 let r =
27801 _mm512_maskz_fmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27802 assert_eq_m512(r, _mm512_setzero_ps());
27803 let r = _mm512_maskz_fmsub_round_ps(
27804 0b00000000_11111111,
27805 a,
27806 b,
27807 c,
27808 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27809 );
27810 let e = _mm512_setr_ps(
27811 -0.99999994,
27812 -0.99999994,
27813 -0.99999994,
27814 -0.99999994,
27815 -0.99999994,
27816 -0.99999994,
27817 -0.99999994,
27818 -0.99999994,
27819 0.,
27820 0.,
27821 0.,
27822 0.,
27823 0.,
27824 0.,
27825 0.,
27826 0.,
27827 );
27828 assert_eq_m512(r, e);
27829 }
27830
27831 #[simd_test(enable = "avx512f")]
27832 unsafe fn test_mm512_mask3_fmsub_round_ps() {
27833 let a = _mm512_set1_ps(0.00000007);
27834 let b = _mm512_set1_ps(1.);
27835 let c = _mm512_set1_ps(1.);
27836 let r =
27837 _mm512_mask3_fmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27838 assert_eq_m512(r, c);
27839 let r = _mm512_mask3_fmsub_round_ps(
27840 a,
27841 b,
27842 c,
27843 0b00000000_11111111,
27844 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27845 );
27846 let e = _mm512_setr_ps(
27847 -0.99999994,
27848 -0.99999994,
27849 -0.99999994,
27850 -0.99999994,
27851 -0.99999994,
27852 -0.99999994,
27853 -0.99999994,
27854 -0.99999994,
27855 1.,
27856 1.,
27857 1.,
27858 1.,
27859 1.,
27860 1.,
27861 1.,
27862 1.,
27863 );
27864 assert_eq_m512(r, e);
27865 }
27866
27867 #[simd_test(enable = "avx512f")]
27868 unsafe fn test_mm512_fmaddsub_round_ps() {
27869 let a = _mm512_set1_ps(0.00000007);
27870 let b = _mm512_set1_ps(1.);
27871 let c = _mm512_set1_ps(-1.);
27872 let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
27873 let e = _mm512_setr_ps(
27874 1.0000001,
27875 -0.99999994,
27876 1.0000001,
27877 -0.99999994,
27878 1.0000001,
27879 -0.99999994,
27880 1.0000001,
27881 -0.99999994,
27882 1.0000001,
27883 -0.99999994,
27884 1.0000001,
27885 -0.99999994,
27886 1.0000001,
27887 -0.99999994,
27888 1.0000001,
27889 -0.99999994,
27890 );
27891 assert_eq_m512(r, e);
27892 let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
27893 let e = _mm512_setr_ps(
27894 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
27895 -0.9999999, 1., -0.9999999, 1., -0.9999999,
27896 );
27897 assert_eq_m512(r, e);
27898 }
27899
27900 #[simd_test(enable = "avx512f")]
27901 unsafe fn test_mm512_mask_fmaddsub_round_ps() {
27902 let a = _mm512_set1_ps(0.00000007);
27903 let b = _mm512_set1_ps(1.);
27904 let c = _mm512_set1_ps(-1.);
27905 let r = _mm512_mask_fmaddsub_round_ps(
27906 a,
27907 0,
27908 b,
27909 c,
27910 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27911 );
27912 assert_eq_m512(r, a);
27913 let r = _mm512_mask_fmaddsub_round_ps(
27914 a,
27915 0b00000000_11111111,
27916 b,
27917 c,
27918 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27919 );
27920 let e = _mm512_setr_ps(
27921 1.0000001,
27922 -0.99999994,
27923 1.0000001,
27924 -0.99999994,
27925 1.0000001,
27926 -0.99999994,
27927 1.0000001,
27928 -0.99999994,
27929 0.00000007,
27930 0.00000007,
27931 0.00000007,
27932 0.00000007,
27933 0.00000007,
27934 0.00000007,
27935 0.00000007,
27936 0.00000007,
27937 );
27938 assert_eq_m512(r, e);
27939 }
27940
27941 #[simd_test(enable = "avx512f")]
27942 unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
27943 let a = _mm512_set1_ps(0.00000007);
27944 let b = _mm512_set1_ps(1.);
27945 let c = _mm512_set1_ps(-1.);
27946 let r = _mm512_maskz_fmaddsub_round_ps(
27947 0,
27948 a,
27949 b,
27950 c,
27951 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27952 );
27953 assert_eq_m512(r, _mm512_setzero_ps());
27954 let r = _mm512_maskz_fmaddsub_round_ps(
27955 0b00000000_11111111,
27956 a,
27957 b,
27958 c,
27959 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27960 );
27961 let e = _mm512_setr_ps(
27962 1.0000001,
27963 -0.99999994,
27964 1.0000001,
27965 -0.99999994,
27966 1.0000001,
27967 -0.99999994,
27968 1.0000001,
27969 -0.99999994,
27970 0.,
27971 0.,
27972 0.,
27973 0.,
27974 0.,
27975 0.,
27976 0.,
27977 0.,
27978 );
27979 assert_eq_m512(r, e);
27980 }
27981
27982 #[simd_test(enable = "avx512f")]
27983 unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
27984 let a = _mm512_set1_ps(0.00000007);
27985 let b = _mm512_set1_ps(1.);
27986 let c = _mm512_set1_ps(-1.);
27987 let r = _mm512_mask3_fmaddsub_round_ps(
27988 a,
27989 b,
27990 c,
27991 0,
27992 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
27993 );
27994 assert_eq_m512(r, c);
27995 let r = _mm512_mask3_fmaddsub_round_ps(
27996 a,
27997 b,
27998 c,
27999 0b00000000_11111111,
28000 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28001 );
28002 let e = _mm512_setr_ps(
28003 1.0000001,
28004 -0.99999994,
28005 1.0000001,
28006 -0.99999994,
28007 1.0000001,
28008 -0.99999994,
28009 1.0000001,
28010 -0.99999994,
28011 -1.,
28012 -1.,
28013 -1.,
28014 -1.,
28015 -1.,
28016 -1.,
28017 -1.,
28018 -1.,
28019 );
28020 assert_eq_m512(r, e);
28021 }
28022
28023 #[simd_test(enable = "avx512f")]
28024 unsafe fn test_mm512_fmsubadd_round_ps() {
28025 let a = _mm512_set1_ps(0.00000007);
28026 let b = _mm512_set1_ps(1.);
28027 let c = _mm512_set1_ps(-1.);
28028 let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28029 let e = _mm512_setr_ps(
28030 -0.99999994,
28031 1.0000001,
28032 -0.99999994,
28033 1.0000001,
28034 -0.99999994,
28035 1.0000001,
28036 -0.99999994,
28037 1.0000001,
28038 -0.99999994,
28039 1.0000001,
28040 -0.99999994,
28041 1.0000001,
28042 -0.99999994,
28043 1.0000001,
28044 -0.99999994,
28045 1.0000001,
28046 );
28047 assert_eq_m512(r, e);
28048 let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
28049 let e = _mm512_setr_ps(
28050 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
28051 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
28052 );
28053 assert_eq_m512(r, e);
28054 }
28055
28056 #[simd_test(enable = "avx512f")]
28057 unsafe fn test_mm512_mask_fmsubadd_round_ps() {
28058 let a = _mm512_set1_ps(0.00000007);
28059 let b = _mm512_set1_ps(1.);
28060 let c = _mm512_set1_ps(-1.);
28061 let r = _mm512_mask_fmsubadd_round_ps(
28062 a,
28063 0,
28064 b,
28065 c,
28066 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28067 );
28068 assert_eq_m512(r, a);
28069 let r = _mm512_mask_fmsubadd_round_ps(
28070 a,
28071 0b00000000_11111111,
28072 b,
28073 c,
28074 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28075 );
28076 let e = _mm512_setr_ps(
28077 -0.99999994,
28078 1.0000001,
28079 -0.99999994,
28080 1.0000001,
28081 -0.99999994,
28082 1.0000001,
28083 -0.99999994,
28084 1.0000001,
28085 0.00000007,
28086 0.00000007,
28087 0.00000007,
28088 0.00000007,
28089 0.00000007,
28090 0.00000007,
28091 0.00000007,
28092 0.00000007,
28093 );
28094 assert_eq_m512(r, e);
28095 }
28096
28097 #[simd_test(enable = "avx512f")]
28098 unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
28099 let a = _mm512_set1_ps(0.00000007);
28100 let b = _mm512_set1_ps(1.);
28101 let c = _mm512_set1_ps(-1.);
28102 let r = _mm512_maskz_fmsubadd_round_ps(
28103 0,
28104 a,
28105 b,
28106 c,
28107 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28108 );
28109 assert_eq_m512(r, _mm512_setzero_ps());
28110 let r = _mm512_maskz_fmsubadd_round_ps(
28111 0b00000000_11111111,
28112 a,
28113 b,
28114 c,
28115 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28116 );
28117 let e = _mm512_setr_ps(
28118 -0.99999994,
28119 1.0000001,
28120 -0.99999994,
28121 1.0000001,
28122 -0.99999994,
28123 1.0000001,
28124 -0.99999994,
28125 1.0000001,
28126 0.,
28127 0.,
28128 0.,
28129 0.,
28130 0.,
28131 0.,
28132 0.,
28133 0.,
28134 );
28135 assert_eq_m512(r, e);
28136 }
28137
28138 #[simd_test(enable = "avx512f")]
28139 unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
28140 let a = _mm512_set1_ps(0.00000007);
28141 let b = _mm512_set1_ps(1.);
28142 let c = _mm512_set1_ps(-1.);
28143 let r = _mm512_mask3_fmsubadd_round_ps(
28144 a,
28145 b,
28146 c,
28147 0,
28148 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28149 );
28150 assert_eq_m512(r, c);
28151 let r = _mm512_mask3_fmsubadd_round_ps(
28152 a,
28153 b,
28154 c,
28155 0b00000000_11111111,
28156 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28157 );
28158 let e = _mm512_setr_ps(
28159 -0.99999994,
28160 1.0000001,
28161 -0.99999994,
28162 1.0000001,
28163 -0.99999994,
28164 1.0000001,
28165 -0.99999994,
28166 1.0000001,
28167 -1.,
28168 -1.,
28169 -1.,
28170 -1.,
28171 -1.,
28172 -1.,
28173 -1.,
28174 -1.,
28175 );
28176 assert_eq_m512(r, e);
28177 }
28178
28179 #[simd_test(enable = "avx512f")]
28180 unsafe fn test_mm512_fnmadd_round_ps() {
28181 let a = _mm512_set1_ps(0.00000007);
28182 let b = _mm512_set1_ps(1.);
28183 let c = _mm512_set1_ps(1.);
28184 let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28185 let e = _mm512_set1_ps(0.99999994);
28186 assert_eq_m512(r, e);
28187 let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
28188 let e = _mm512_set1_ps(0.9999999);
28189 assert_eq_m512(r, e);
28190 }
28191
28192 #[simd_test(enable = "avx512f")]
28193 unsafe fn test_mm512_mask_fnmadd_round_ps() {
28194 let a = _mm512_set1_ps(0.00000007);
28195 let b = _mm512_set1_ps(1.);
28196 let c = _mm512_set1_ps(1.);
28197 let r =
28198 _mm512_mask_fnmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28199 assert_eq_m512(r, a);
28200 let r = _mm512_mask_fnmadd_round_ps(
28201 a,
28202 0b00000000_11111111,
28203 b,
28204 c,
28205 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28206 );
28207 let e = _mm512_setr_ps(
28208 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28209 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
28210 0.00000007, 0.00000007,
28211 );
28212 assert_eq_m512(r, e);
28213 }
28214
28215 #[simd_test(enable = "avx512f")]
28216 unsafe fn test_mm512_maskz_fnmadd_round_ps() {
28217 let a = _mm512_set1_ps(0.00000007);
28218 let b = _mm512_set1_ps(1.);
28219 let c = _mm512_set1_ps(1.);
28220 let r =
28221 _mm512_maskz_fnmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28222 assert_eq_m512(r, _mm512_setzero_ps());
28223 let r = _mm512_maskz_fnmadd_round_ps(
28224 0b00000000_11111111,
28225 a,
28226 b,
28227 c,
28228 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28229 );
28230 let e = _mm512_setr_ps(
28231 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28232 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
28233 );
28234 assert_eq_m512(r, e);
28235 }
28236
28237 #[simd_test(enable = "avx512f")]
28238 unsafe fn test_mm512_mask3_fnmadd_round_ps() {
28239 let a = _mm512_set1_ps(0.00000007);
28240 let b = _mm512_set1_ps(1.);
28241 let c = _mm512_set1_ps(1.);
28242 let r =
28243 _mm512_mask3_fnmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28244 assert_eq_m512(r, c);
28245 let r = _mm512_mask3_fnmadd_round_ps(
28246 a,
28247 b,
28248 c,
28249 0b00000000_11111111,
28250 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28251 );
28252 let e = _mm512_setr_ps(
28253 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28254 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
28255 );
28256 assert_eq_m512(r, e);
28257 }
28258
28259 #[simd_test(enable = "avx512f")]
28260 unsafe fn test_mm512_fnmsub_round_ps() {
28261 let a = _mm512_set1_ps(0.00000007);
28262 let b = _mm512_set1_ps(1.);
28263 let c = _mm512_set1_ps(-1.);
28264 let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28265 let e = _mm512_set1_ps(0.99999994);
28266 assert_eq_m512(r, e);
28267 let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
28268 let e = _mm512_set1_ps(0.9999999);
28269 assert_eq_m512(r, e);
28270 }
28271
28272 #[simd_test(enable = "avx512f")]
28273 unsafe fn test_mm512_mask_fnmsub_round_ps() {
28274 let a = _mm512_set1_ps(0.00000007);
28275 let b = _mm512_set1_ps(1.);
28276 let c = _mm512_set1_ps(-1.);
28277 let r =
28278 _mm512_mask_fnmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28279 assert_eq_m512(r, a);
28280 let r = _mm512_mask_fnmsub_round_ps(
28281 a,
28282 0b00000000_11111111,
28283 b,
28284 c,
28285 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28286 );
28287 let e = _mm512_setr_ps(
28288 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28289 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
28290 0.00000007, 0.00000007,
28291 );
28292 assert_eq_m512(r, e);
28293 }
28294
28295 #[simd_test(enable = "avx512f")]
28296 unsafe fn test_mm512_maskz_fnmsub_round_ps() {
28297 let a = _mm512_set1_ps(0.00000007);
28298 let b = _mm512_set1_ps(1.);
28299 let c = _mm512_set1_ps(-1.);
28300 let r =
28301 _mm512_maskz_fnmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28302 assert_eq_m512(r, _mm512_setzero_ps());
28303 let r = _mm512_maskz_fnmsub_round_ps(
28304 0b00000000_11111111,
28305 a,
28306 b,
28307 c,
28308 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28309 );
28310 let e = _mm512_setr_ps(
28311 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28312 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
28313 );
28314 assert_eq_m512(r, e);
28315 }
28316
28317 #[simd_test(enable = "avx512f")]
28318 unsafe fn test_mm512_mask3_fnmsub_round_ps() {
28319 let a = _mm512_set1_ps(0.00000007);
28320 let b = _mm512_set1_ps(1.);
28321 let c = _mm512_set1_ps(-1.);
28322 let r =
28323 _mm512_mask3_fnmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28324 assert_eq_m512(r, c);
28325 let r = _mm512_mask3_fnmsub_round_ps(
28326 a,
28327 b,
28328 c,
28329 0b00000000_11111111,
28330 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28331 );
28332 let e = _mm512_setr_ps(
28333 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28334 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
28335 );
28336 assert_eq_m512(r, e);
28337 }
28338
28339 #[simd_test(enable = "avx512f")]
28340 unsafe fn test_mm512_max_round_ps() {
28341 let a = _mm512_setr_ps(
28342 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28343 );
28344 let b = _mm512_setr_ps(
28345 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28346 );
28347 let r = _mm512_max_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
28348 let e = _mm512_setr_ps(
28349 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
28350 );
28351 assert_eq_m512(r, e);
28352 }
28353
28354 #[simd_test(enable = "avx512f")]
28355 unsafe fn test_mm512_mask_max_round_ps() {
28356 let a = _mm512_setr_ps(
28357 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28358 );
28359 let b = _mm512_setr_ps(
28360 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28361 );
28362 let r = _mm512_mask_max_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
28363 assert_eq_m512(r, a);
28364 let r = _mm512_mask_max_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
28365 let e = _mm512_setr_ps(
28366 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
28367 );
28368 assert_eq_m512(r, e);
28369 }
28370
28371 #[simd_test(enable = "avx512f")]
28372 unsafe fn test_mm512_maskz_max_round_ps() {
28373 let a = _mm512_setr_ps(
28374 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28375 );
28376 let b = _mm512_setr_ps(
28377 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28378 );
28379 let r = _mm512_maskz_max_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
28380 assert_eq_m512(r, _mm512_setzero_ps());
28381 let r = _mm512_maskz_max_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
28382 let e = _mm512_setr_ps(
28383 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
28384 );
28385 assert_eq_m512(r, e);
28386 }
28387
28388 #[simd_test(enable = "avx512f")]
28389 unsafe fn test_mm512_min_round_ps() {
28390 let a = _mm512_setr_ps(
28391 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28392 );
28393 let b = _mm512_setr_ps(
28394 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28395 );
28396 let r = _mm512_min_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
28397 let e = _mm512_setr_ps(
28398 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
28399 );
28400 assert_eq_m512(r, e);
28401 }
28402
28403 #[simd_test(enable = "avx512f")]
28404 unsafe fn test_mm512_mask_min_round_ps() {
28405 let a = _mm512_setr_ps(
28406 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28407 );
28408 let b = _mm512_setr_ps(
28409 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28410 );
28411 let r = _mm512_mask_min_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
28412 assert_eq_m512(r, a);
28413 let r = _mm512_mask_min_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
28414 let e = _mm512_setr_ps(
28415 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28416 );
28417 assert_eq_m512(r, e);
28418 }
28419
28420 #[simd_test(enable = "avx512f")]
28421 unsafe fn test_mm512_maskz_min_round_ps() {
28422 let a = _mm512_setr_ps(
28423 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28424 );
28425 let b = _mm512_setr_ps(
28426 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28427 );
28428 let r = _mm512_maskz_min_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
28429 assert_eq_m512(r, _mm512_setzero_ps());
28430 let r = _mm512_maskz_min_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
28431 let e = _mm512_setr_ps(
28432 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
28433 );
28434 assert_eq_m512(r, e);
28435 }
28436
28437 #[simd_test(enable = "avx512f")]
28438 unsafe fn test_mm512_getexp_round_ps() {
28439 let a = _mm512_set1_ps(3.);
28440 let r = _mm512_getexp_round_ps(a, _MM_FROUND_CUR_DIRECTION);
28441 let e = _mm512_set1_ps(1.);
28442 assert_eq_m512(r, e);
28443 let r = _mm512_getexp_round_ps(a, _MM_FROUND_NO_EXC);
28444 let e = _mm512_set1_ps(1.);
28445 assert_eq_m512(r, e);
28446 }
28447
28448 #[simd_test(enable = "avx512f")]
28449 unsafe fn test_mm512_mask_getexp_round_ps() {
28450 let a = _mm512_set1_ps(3.);
28451 let r = _mm512_mask_getexp_round_ps(a, 0, a, _MM_FROUND_CUR_DIRECTION);
28452 assert_eq_m512(r, a);
28453 let r = _mm512_mask_getexp_round_ps(a, 0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
28454 let e = _mm512_setr_ps(
28455 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
28456 );
28457 assert_eq_m512(r, e);
28458 }
28459
28460 #[simd_test(enable = "avx512f")]
28461 unsafe fn test_mm512_maskz_getexp_round_ps() {
28462 let a = _mm512_set1_ps(3.);
28463 let r = _mm512_maskz_getexp_round_ps(0, a, _MM_FROUND_CUR_DIRECTION);
28464 assert_eq_m512(r, _mm512_setzero_ps());
28465 let r = _mm512_maskz_getexp_round_ps(0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
28466 let e = _mm512_setr_ps(
28467 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
28468 );
28469 assert_eq_m512(r, e);
28470 }
28471
28472 #[simd_test(enable = "avx512f")]
28473 unsafe fn test_mm512_roundscale_round_ps() {
28474 let a = _mm512_set1_ps(1.1);
28475 let r = _mm512_roundscale_round_ps(a, 0, _MM_FROUND_CUR_DIRECTION);
28476 let e = _mm512_set1_ps(1.0);
28477 assert_eq_m512(r, e);
28478 }
28479
28480 #[simd_test(enable = "avx512f")]
28481 unsafe fn test_mm512_mask_roundscale_round_ps() {
28482 let a = _mm512_set1_ps(1.1);
28483 let r = _mm512_mask_roundscale_round_ps(a, 0, a, 0, _MM_FROUND_CUR_DIRECTION);
28484 let e = _mm512_set1_ps(1.1);
28485 assert_eq_m512(r, e);
28486 let r =
28487 _mm512_mask_roundscale_round_ps(a, 0b11111111_11111111, a, 0, _MM_FROUND_CUR_DIRECTION);
28488 let e = _mm512_set1_ps(1.0);
28489 assert_eq_m512(r, e);
28490 }
28491
28492 #[simd_test(enable = "avx512f")]
28493 unsafe fn test_mm512_maskz_roundscale_round_ps() {
28494 let a = _mm512_set1_ps(1.1);
28495 let r = _mm512_maskz_roundscale_round_ps(0, a, 0, _MM_FROUND_CUR_DIRECTION);
28496 assert_eq_m512(r, _mm512_setzero_ps());
28497 let r =
28498 _mm512_maskz_roundscale_round_ps(0b11111111_11111111, a, 0, _MM_FROUND_CUR_DIRECTION);
28499 let e = _mm512_set1_ps(1.0);
28500 assert_eq_m512(r, e);
28501 }
28502
28503 #[simd_test(enable = "avx512f")]
28504 unsafe fn test_mm512_scalef_round_ps() {
28505 let a = _mm512_set1_ps(1.);
28506 let b = _mm512_set1_ps(3.);
28507 let r = _mm512_scalef_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28508 let e = _mm512_set1_ps(8.);
28509 assert_eq_m512(r, e);
28510 }
28511
28512 #[simd_test(enable = "avx512f")]
28513 unsafe fn test_mm512_mask_scalef_round_ps() {
28514 let a = _mm512_set1_ps(1.);
28515 let b = _mm512_set1_ps(3.);
28516 let r =
28517 _mm512_mask_scalef_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28518 assert_eq_m512(r, a);
28519 let r = _mm512_mask_scalef_round_ps(
28520 a,
28521 0b11111111_00000000,
28522 a,
28523 b,
28524 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28525 );
28526 let e = _mm512_set_ps(
28527 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
28528 );
28529 assert_eq_m512(r, e);
28530 }
28531
28532 #[simd_test(enable = "avx512f")]
28533 unsafe fn test_mm512_maskz_scalef_round_ps() {
28534 let a = _mm512_set1_ps(1.);
28535 let b = _mm512_set1_ps(3.);
28536 let r =
28537 _mm512_maskz_scalef_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
28538 assert_eq_m512(r, _mm512_setzero_ps());
28539 let r = _mm512_maskz_scalef_round_ps(
28540 0b11111111_00000000,
28541 a,
28542 b,
28543 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
28544 );
28545 let e = _mm512_set_ps(
28546 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
28547 );
28548 assert_eq_m512(r, e);
28549 }
28550
28551 #[simd_test(enable = "avx512f")]
28552 unsafe fn test_mm512_fixupimm_round_ps() {
28553 let a = _mm512_set1_ps(f32::NAN);
28554 let b = _mm512_set1_ps(f32::MAX);
28555 let c = _mm512_set1_epi32(i32::MAX);
28556 let r = _mm512_fixupimm_round_ps(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
28557 let e = _mm512_set1_ps(0.0);
28558 assert_eq_m512(r, e);
28559 }
28560
28561 #[simd_test(enable = "avx512f")]
28562 unsafe fn test_mm512_mask_fixupimm_round_ps() {
28563 let a = _mm512_set_ps(
28564 f32::NAN,
28565 f32::NAN,
28566 f32::NAN,
28567 f32::NAN,
28568 f32::NAN,
28569 f32::NAN,
28570 f32::NAN,
28571 f32::NAN,
28572 1.,
28573 1.,
28574 1.,
28575 1.,
28576 1.,
28577 1.,
28578 1.,
28579 1.,
28580 );
28581 let b = _mm512_set1_ps(f32::MAX);
28582 let c = _mm512_set1_epi32(i32::MAX);
28583 let r = _mm512_mask_fixupimm_round_ps(
28584 a,
28585 0b11111111_00000000,
28586 b,
28587 c,
28588 5,
28589 _MM_FROUND_CUR_DIRECTION,
28590 );
28591 let e = _mm512_set_ps(
28592 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
28593 );
28594 assert_eq_m512(r, e);
28595 }
28596
28597 #[simd_test(enable = "avx512f")]
28598 unsafe fn test_mm512_maskz_fixupimm_round_ps() {
28599 let a = _mm512_set_ps(
28600 f32::NAN,
28601 f32::NAN,
28602 f32::NAN,
28603 f32::NAN,
28604 f32::NAN,
28605 f32::NAN,
28606 f32::NAN,
28607 f32::NAN,
28608 1.,
28609 1.,
28610 1.,
28611 1.,
28612 1.,
28613 1.,
28614 1.,
28615 1.,
28616 );
28617 let b = _mm512_set1_ps(f32::MAX);
28618 let c = _mm512_set1_epi32(i32::MAX);
28619 let r = _mm512_maskz_fixupimm_round_ps(
28620 0b11111111_00000000,
28621 a,
28622 b,
28623 c,
28624 5,
28625 _MM_FROUND_CUR_DIRECTION,
28626 );
28627 let e = _mm512_set_ps(
28628 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
28629 );
28630 assert_eq_m512(r, e);
28631 }
28632
28633 #[simd_test(enable = "avx512f")]
28634 unsafe fn test_mm512_getmant_round_ps() {
28635 let a = _mm512_set1_ps(10.);
28636 let r = _mm512_getmant_round_ps(
28637 a,
28638 _MM_MANT_NORM_1_2,
28639 _MM_MANT_SIGN_SRC,
28640 _MM_FROUND_CUR_DIRECTION,
28641 );
28642 let e = _mm512_set1_ps(1.25);
28643 assert_eq_m512(r, e);
28644 }
28645
28646 #[simd_test(enable = "avx512f")]
28647 unsafe fn test_mm512_mask_getmant_round_ps() {
28648 let a = _mm512_set1_ps(10.);
28649 let r = _mm512_mask_getmant_round_ps(
28650 a,
28651 0,
28652 a,
28653 _MM_MANT_NORM_1_2,
28654 _MM_MANT_SIGN_SRC,
28655 _MM_FROUND_CUR_DIRECTION,
28656 );
28657 assert_eq_m512(r, a);
28658 let r = _mm512_mask_getmant_round_ps(
28659 a,
28660 0b11111111_00000000,
28661 a,
28662 _MM_MANT_NORM_1_2,
28663 _MM_MANT_SIGN_SRC,
28664 _MM_FROUND_CUR_DIRECTION,
28665 );
28666 let e = _mm512_setr_ps(
28667 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
28668 );
28669 assert_eq_m512(r, e);
28670 }
28671
28672 #[simd_test(enable = "avx512f")]
28673 unsafe fn test_mm512_maskz_getmant_round_ps() {
28674 let a = _mm512_set1_ps(10.);
28675 let r = _mm512_maskz_getmant_round_ps(
28676 0,
28677 a,
28678 _MM_MANT_NORM_1_2,
28679 _MM_MANT_SIGN_SRC,
28680 _MM_FROUND_CUR_DIRECTION,
28681 );
28682 assert_eq_m512(r, _mm512_setzero_ps());
28683 let r = _mm512_maskz_getmant_round_ps(
28684 0b11111111_00000000,
28685 a,
28686 _MM_MANT_NORM_1_2,
28687 _MM_MANT_SIGN_SRC,
28688 _MM_FROUND_CUR_DIRECTION,
28689 );
28690 let e = _mm512_setr_ps(
28691 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
28692 );
28693 assert_eq_m512(r, e);
28694 }
28695
28696 #[simd_test(enable = "avx512f")]
28697 unsafe fn test_mm512_cvtps_epi32() {
28698 let a = _mm512_setr_ps(
28699 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28700 );
28701 let r = _mm512_cvtps_epi32(a);
28702 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
28703 assert_eq_m512i(r, e);
28704 }
28705
28706 #[simd_test(enable = "avx512f")]
28707 unsafe fn test_mm512_mask_cvtps_epi32() {
28708 let a = _mm512_setr_ps(
28709 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28710 );
28711 let src = _mm512_set1_epi32(0);
28712 let r = _mm512_mask_cvtps_epi32(src, 0, a);
28713 assert_eq_m512i(r, src);
28714 let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
28715 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
28716 assert_eq_m512i(r, e);
28717 }
28718
28719 #[simd_test(enable = "avx512f")]
28720 unsafe fn test_mm512_maskz_cvtps_epi32() {
28721 let a = _mm512_setr_ps(
28722 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28723 );
28724 let r = _mm512_maskz_cvtps_epi32(0, a);
28725 assert_eq_m512i(r, _mm512_setzero_si512());
28726 let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
28727 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
28728 assert_eq_m512i(r, e);
28729 }
28730
28731 #[simd_test(enable = "avx512f")]
28732 unsafe fn test_mm512_cvtps_epu32() {
28733 let a = _mm512_setr_ps(
28734 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28735 );
28736 let r = _mm512_cvtps_epu32(a);
28737 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
28738 assert_eq_m512i(r, e);
28739 }
28740
28741 #[simd_test(enable = "avx512f")]
28742 unsafe fn test_mm512_mask_cvtps_epu32() {
28743 let a = _mm512_setr_ps(
28744 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28745 );
28746 let src = _mm512_set1_epi32(0);
28747 let r = _mm512_mask_cvtps_epu32(src, 0, a);
28748 assert_eq_m512i(r, src);
28749 let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
28750 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
28751 assert_eq_m512i(r, e);
28752 }
28753
28754 #[simd_test(enable = "avx512f")]
28755 unsafe fn test_mm512_maskz_cvtps_epu32() {
28756 let a = _mm512_setr_ps(
28757 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28758 );
28759 let r = _mm512_maskz_cvtps_epu32(0, a);
28760 assert_eq_m512i(r, _mm512_setzero_si512());
28761 let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
28762 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
28763 assert_eq_m512i(r, e);
28764 }
28765
28766 #[simd_test(enable = "avx512f")]
28767 unsafe fn test_mm512_cvtepi8_epi32() {
28768 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28769 let r = _mm512_cvtepi8_epi32(a);
28770 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28771 assert_eq_m512i(r, e);
28772 }
28773
28774 #[simd_test(enable = "avx512f")]
28775 unsafe fn test_mm512_mask_cvtepi8_epi32() {
28776 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28777 let src = _mm512_set1_epi32(-1);
28778 let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
28779 assert_eq_m512i(r, src);
28780 let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
28781 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28782 assert_eq_m512i(r, e);
28783 }
28784
28785 #[simd_test(enable = "avx512f")]
28786 unsafe fn test_mm512_maskz_cvtepi8_epi32() {
28787 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28788 let r = _mm512_maskz_cvtepi8_epi32(0, a);
28789 assert_eq_m512i(r, _mm512_setzero_si512());
28790 let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
28791 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28792 assert_eq_m512i(r, e);
28793 }
28794
28795 #[simd_test(enable = "avx512f")]
28796 unsafe fn test_mm512_cvtepu8_epi32() {
28797 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28798 let r = _mm512_cvtepu8_epi32(a);
28799 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28800 assert_eq_m512i(r, e);
28801 }
28802
28803 #[simd_test(enable = "avx512f")]
28804 unsafe fn test_mm512_mask_cvtepu8_epi32() {
28805 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28806 let src = _mm512_set1_epi32(-1);
28807 let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
28808 assert_eq_m512i(r, src);
28809 let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
28810 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28811 assert_eq_m512i(r, e);
28812 }
28813
28814 #[simd_test(enable = "avx512f")]
28815 unsafe fn test_mm512_maskz_cvtepu8_epi32() {
28816 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28817 let r = _mm512_maskz_cvtepu8_epi32(0, a);
28818 assert_eq_m512i(r, _mm512_setzero_si512());
28819 let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
28820 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28821 assert_eq_m512i(r, e);
28822 }
28823
28824 #[simd_test(enable = "avx512f")]
28825 unsafe fn test_mm512_cvtepi16_epi32() {
28826 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28827 let r = _mm512_cvtepi16_epi32(a);
28828 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28829 assert_eq_m512i(r, e);
28830 }
28831
28832 #[simd_test(enable = "avx512f")]
28833 unsafe fn test_mm512_mask_cvtepi16_epi32() {
28834 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28835 let src = _mm512_set1_epi32(-1);
28836 let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
28837 assert_eq_m512i(r, src);
28838 let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
28839 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28840 assert_eq_m512i(r, e);
28841 }
28842
28843 #[simd_test(enable = "avx512f")]
28844 unsafe fn test_mm512_maskz_cvtepi16_epi32() {
28845 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28846 let r = _mm512_maskz_cvtepi16_epi32(0, a);
28847 assert_eq_m512i(r, _mm512_setzero_si512());
28848 let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
28849 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28850 assert_eq_m512i(r, e);
28851 }
28852
28853 #[simd_test(enable = "avx512f")]
28854 unsafe fn test_mm512_cvtepu16_epi32() {
28855 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28856 let r = _mm512_cvtepu16_epi32(a);
28857 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28858 assert_eq_m512i(r, e);
28859 }
28860
28861 #[simd_test(enable = "avx512f")]
28862 unsafe fn test_mm512_mask_cvtepu16_epi32() {
28863 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28864 let src = _mm512_set1_epi32(-1);
28865 let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
28866 assert_eq_m512i(r, src);
28867 let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
28868 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28869 assert_eq_m512i(r, e);
28870 }
28871
28872 #[simd_test(enable = "avx512f")]
28873 unsafe fn test_mm512_maskz_cvtepu16_epi32() {
28874 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28875 let r = _mm512_maskz_cvtepu16_epi32(0, a);
28876 assert_eq_m512i(r, _mm512_setzero_si512());
28877 let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
28878 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28879 assert_eq_m512i(r, e);
28880 }
28881
28882 #[simd_test(enable = "avx512f")]
28883 unsafe fn test_mm512_cvtepi32_ps() {
28884 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28885 let r = _mm512_cvtepi32_ps(a);
28886 let e = _mm512_set_ps(
28887 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28888 );
28889 assert_eq_m512(r, e);
28890 }
28891
28892 #[simd_test(enable = "avx512f")]
28893 unsafe fn test_mm512_mask_cvtepi32_ps() {
28894 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28895 let src = _mm512_set1_ps(-1.);
28896 let r = _mm512_mask_cvtepi32_ps(src, 0, a);
28897 assert_eq_m512(r, src);
28898 let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
28899 let e = _mm512_set_ps(
28900 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
28901 );
28902 assert_eq_m512(r, e);
28903 }
28904
28905 #[simd_test(enable = "avx512f")]
28906 unsafe fn test_mm512_maskz_cvtepi32_ps() {
28907 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28908 let r = _mm512_maskz_cvtepi32_ps(0, a);
28909 assert_eq_m512(r, _mm512_setzero_ps());
28910 let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
28911 let e = _mm512_set_ps(
28912 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
28913 );
28914 assert_eq_m512(r, e);
28915 }
28916
28917 #[simd_test(enable = "avx512f")]
28918 unsafe fn test_mm512_cvtepu32_ps() {
28919 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28920 let r = _mm512_cvtepu32_ps(a);
28921 let e = _mm512_set_ps(
28922 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28923 );
28924 assert_eq_m512(r, e);
28925 }
28926
28927 #[simd_test(enable = "avx512f")]
28928 unsafe fn test_mm512_mask_cvtepu32_ps() {
28929 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28930 let src = _mm512_set1_ps(-1.);
28931 let r = _mm512_mask_cvtepu32_ps(src, 0, a);
28932 assert_eq_m512(r, src);
28933 let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
28934 let e = _mm512_set_ps(
28935 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
28936 );
28937 assert_eq_m512(r, e);
28938 }
28939
28940 #[simd_test(enable = "avx512f")]
28941 unsafe fn test_mm512_maskz_cvtepu32_ps() {
28942 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28943 let r = _mm512_maskz_cvtepu32_ps(0, a);
28944 assert_eq_m512(r, _mm512_setzero_ps());
28945 let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
28946 let e = _mm512_set_ps(
28947 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
28948 );
28949 assert_eq_m512(r, e);
28950 }
28951
28952 #[simd_test(enable = "avx512f")]
28953 unsafe fn test_mm512_cvtepi32_epi16() {
28954 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28955 let r = _mm512_cvtepi32_epi16(a);
28956 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28957 assert_eq_m256i(r, e);
28958 }
28959
28960 #[simd_test(enable = "avx512f")]
28961 unsafe fn test_mm512_mask_cvtepi32_epi16() {
28962 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28963 let src = _mm256_set1_epi16(-1);
28964 let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
28965 assert_eq_m256i(r, src);
28966 let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
28967 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28968 assert_eq_m256i(r, e);
28969 }
28970
28971 #[simd_test(enable = "avx512f")]
28972 unsafe fn test_mm512_maskz_cvtepi32_epi16() {
28973 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28974 let r = _mm512_maskz_cvtepi32_epi16(0, a);
28975 assert_eq_m256i(r, _mm256_setzero_si256());
28976 let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
28977 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28978 assert_eq_m256i(r, e);
28979 }
28980
28981 #[simd_test(enable = "avx512f")]
28982 unsafe fn test_mm512_cvtepi32_epi8() {
28983 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28984 let r = _mm512_cvtepi32_epi8(a);
28985 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28986 assert_eq_m128i(r, e);
28987 }
28988
28989 #[simd_test(enable = "avx512f")]
28990 unsafe fn test_mm512_mask_cvtepi32_epi8() {
28991 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28992 let src = _mm_set1_epi8(-1);
28993 let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
28994 assert_eq_m128i(r, src);
28995 let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
28996 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28997 assert_eq_m128i(r, e);
28998 }
28999
29000 #[simd_test(enable = "avx512f")]
29001 unsafe fn test_mm512_maskz_cvtepi32_epi8() {
29002 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
29003 let r = _mm512_maskz_cvtepi32_epi8(0, a);
29004 assert_eq_m128i(r, _mm_setzero_si128());
29005 let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
29006 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
29007 assert_eq_m128i(r, e);
29008 }
29009
29010 #[simd_test(enable = "avx512f")]
29011 unsafe fn test_mm512_cvtsepi32_epi16() {
29012 let a = _mm512_set_epi32(
29013 0,
29014 1,
29015 2,
29016 3,
29017 4,
29018 5,
29019 6,
29020 7,
29021 8,
29022 9,
29023 10,
29024 11,
29025 12,
29026 13,
29027 i32::MIN,
29028 i32::MAX,
29029 );
29030 let r = _mm512_cvtsepi32_epi16(a);
29031 let e = _mm256_set_epi16(
29032 0,
29033 1,
29034 2,
29035 3,
29036 4,
29037 5,
29038 6,
29039 7,
29040 8,
29041 9,
29042 10,
29043 11,
29044 12,
29045 13,
29046 i16::MIN,
29047 i16::MAX,
29048 );
29049 assert_eq_m256i(r, e);
29050 }
29051
29052 #[simd_test(enable = "avx512f")]
29053 unsafe fn test_mm512_mask_cvtsepi32_epi16() {
29054 let a = _mm512_set_epi32(
29055 0,
29056 1,
29057 2,
29058 3,
29059 4,
29060 5,
29061 6,
29062 7,
29063 8,
29064 9,
29065 10,
29066 11,
29067 12,
29068 13,
29069 i32::MIN,
29070 i32::MAX,
29071 );
29072 let src = _mm256_set1_epi16(-1);
29073 let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
29074 assert_eq_m256i(r, src);
29075 let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
29076 let e = _mm256_set_epi16(
29077 -1,
29078 -1,
29079 -1,
29080 -1,
29081 -1,
29082 -1,
29083 -1,
29084 -1,
29085 8,
29086 9,
29087 10,
29088 11,
29089 12,
29090 13,
29091 i16::MIN,
29092 i16::MAX,
29093 );
29094 assert_eq_m256i(r, e);
29095 }
29096
29097 #[simd_test(enable = "avx512f")]
29098 unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
29099 let a = _mm512_set_epi32(
29100 0,
29101 1,
29102 2,
29103 3,
29104 4,
29105 5,
29106 6,
29107 7,
29108 8,
29109 9,
29110 10,
29111 11,
29112 12,
29113 13,
29114 i32::MIN,
29115 i32::MAX,
29116 );
29117 let r = _mm512_maskz_cvtsepi32_epi16(0, a);
29118 assert_eq_m256i(r, _mm256_setzero_si256());
29119 let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
29120 let e = _mm256_set_epi16(
29121 0,
29122 0,
29123 0,
29124 0,
29125 0,
29126 0,
29127 0,
29128 0,
29129 8,
29130 9,
29131 10,
29132 11,
29133 12,
29134 13,
29135 i16::MIN,
29136 i16::MAX,
29137 );
29138 assert_eq_m256i(r, e);
29139 }
29140
29141 #[simd_test(enable = "avx512f")]
29142 unsafe fn test_mm512_cvtsepi32_epi8() {
29143 let a = _mm512_set_epi32(
29144 0,
29145 1,
29146 2,
29147 3,
29148 4,
29149 5,
29150 6,
29151 7,
29152 8,
29153 9,
29154 10,
29155 11,
29156 12,
29157 13,
29158 i32::MIN,
29159 i32::MAX,
29160 );
29161 let r = _mm512_cvtsepi32_epi8(a);
29162 let e = _mm_set_epi8(
29163 0,
29164 1,
29165 2,
29166 3,
29167 4,
29168 5,
29169 6,
29170 7,
29171 8,
29172 9,
29173 10,
29174 11,
29175 12,
29176 13,
29177 i8::MIN,
29178 i8::MAX,
29179 );
29180 assert_eq_m128i(r, e);
29181 }
29182
29183 #[simd_test(enable = "avx512f")]
29184 unsafe fn test_mm512_mask_cvtsepi32_epi8() {
29185 let a = _mm512_set_epi32(
29186 0,
29187 1,
29188 2,
29189 3,
29190 4,
29191 5,
29192 6,
29193 7,
29194 8,
29195 9,
29196 10,
29197 11,
29198 12,
29199 13,
29200 i32::MIN,
29201 i32::MAX,
29202 );
29203 let src = _mm_set1_epi8(-1);
29204 let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
29205 assert_eq_m128i(r, src);
29206 let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
29207 let e = _mm_set_epi8(
29208 -1,
29209 -1,
29210 -1,
29211 -1,
29212 -1,
29213 -1,
29214 -1,
29215 -1,
29216 8,
29217 9,
29218 10,
29219 11,
29220 12,
29221 13,
29222 i8::MIN,
29223 i8::MAX,
29224 );
29225 assert_eq_m128i(r, e);
29226 }
29227
29228 #[simd_test(enable = "avx512f")]
29229 unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
29230 let a = _mm512_set_epi32(
29231 0,
29232 1,
29233 2,
29234 3,
29235 4,
29236 5,
29237 6,
29238 7,
29239 8,
29240 9,
29241 10,
29242 11,
29243 12,
29244 13,
29245 i32::MIN,
29246 i32::MAX,
29247 );
29248 let r = _mm512_maskz_cvtsepi32_epi8(0, a);
29249 assert_eq_m128i(r, _mm_setzero_si128());
29250 let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
29251 let e = _mm_set_epi8(
29252 0,
29253 0,
29254 0,
29255 0,
29256 0,
29257 0,
29258 0,
29259 0,
29260 8,
29261 9,
29262 10,
29263 11,
29264 12,
29265 13,
29266 i8::MIN,
29267 i8::MAX,
29268 );
29269 assert_eq_m128i(r, e);
29270 }
29271
29272 #[simd_test(enable = "avx512f")]
29273 unsafe fn test_mm512_cvtusepi32_epi16() {
29274 let a = _mm512_set_epi32(
29275 0,
29276 1,
29277 2,
29278 3,
29279 4,
29280 5,
29281 6,
29282 7,
29283 8,
29284 9,
29285 10,
29286 11,
29287 12,
29288 13,
29289 i32::MIN,
29290 i32::MIN,
29291 );
29292 let r = _mm512_cvtusepi32_epi16(a);
29293 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
29294 assert_eq_m256i(r, e);
29295 }
29296
29297 #[simd_test(enable = "avx512f")]
29298 unsafe fn test_mm512_mask_cvtusepi32_epi16() {
29299 let a = _mm512_set_epi32(
29300 0,
29301 1,
29302 2,
29303 3,
29304 4,
29305 5,
29306 6,
29307 7,
29308 8,
29309 9,
29310 10,
29311 11,
29312 12,
29313 13,
29314 i32::MIN,
29315 i32::MIN,
29316 );
29317 let src = _mm256_set1_epi16(-1);
29318 let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
29319 assert_eq_m256i(r, src);
29320 let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
29321 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
29322 assert_eq_m256i(r, e);
29323 }
29324
29325 #[simd_test(enable = "avx512f")]
29326 unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
29327 let a = _mm512_set_epi32(
29328 0,
29329 1,
29330 2,
29331 3,
29332 4,
29333 5,
29334 6,
29335 7,
29336 8,
29337 9,
29338 10,
29339 11,
29340 12,
29341 13,
29342 i32::MIN,
29343 i32::MIN,
29344 );
29345 let r = _mm512_maskz_cvtusepi32_epi16(0, a);
29346 assert_eq_m256i(r, _mm256_setzero_si256());
29347 let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
29348 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
29349 assert_eq_m256i(r, e);
29350 }
29351
29352 #[simd_test(enable = "avx512f")]
29353 unsafe fn test_mm512_cvtusepi32_epi8() {
29354 let a = _mm512_set_epi32(
29355 0,
29356 1,
29357 2,
29358 3,
29359 4,
29360 5,
29361 6,
29362 7,
29363 8,
29364 9,
29365 10,
29366 11,
29367 12,
29368 13,
29369 i32::MIN,
29370 i32::MIN,
29371 );
29372 let r = _mm512_cvtusepi32_epi8(a);
29373 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
29374 assert_eq_m128i(r, e);
29375 }
29376
29377 #[simd_test(enable = "avx512f")]
29378 unsafe fn test_mm512_mask_cvtusepi32_epi8() {
29379 let a = _mm512_set_epi32(
29380 0,
29381 1,
29382 2,
29383 3,
29384 4,
29385 5,
29386 6,
29387 7,
29388 8,
29389 9,
29390 10,
29391 11,
29392 12,
29393 13,
29394 i32::MIN,
29395 i32::MIN,
29396 );
29397 let src = _mm_set1_epi8(-1);
29398 let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
29399 assert_eq_m128i(r, src);
29400 let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
29401 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
29402 assert_eq_m128i(r, e);
29403 }
29404
29405 #[simd_test(enable = "avx512f")]
29406 unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
29407 let a = _mm512_set_epi32(
29408 0,
29409 1,
29410 2,
29411 3,
29412 4,
29413 5,
29414 6,
29415 7,
29416 8,
29417 9,
29418 10,
29419 11,
29420 12,
29421 13,
29422 i32::MIN,
29423 i32::MIN,
29424 );
29425 let r = _mm512_maskz_cvtusepi32_epi8(0, a);
29426 assert_eq_m128i(r, _mm_setzero_si128());
29427 let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
29428 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
29429 assert_eq_m128i(r, e);
29430 }
29431
29432 #[simd_test(enable = "avx512f")]
29433 unsafe fn test_mm512_cvt_roundps_epi32() {
29434 let a = _mm512_setr_ps(
29435 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29436 );
29437 let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29438 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29439 assert_eq_m512i(r, e);
29440 let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
29441 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
29442 assert_eq_m512i(r, e);
29443 }
29444
29445 #[simd_test(enable = "avx512f")]
29446 unsafe fn test_mm512_mask_cvt_roundps_epi32() {
29447 let a = _mm512_setr_ps(
29448 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29449 );
29450 let src = _mm512_set1_epi32(0);
29451 let r =
29452 _mm512_mask_cvt_roundps_epi32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29453 assert_eq_m512i(r, src);
29454 let r = _mm512_mask_cvt_roundps_epi32(
29455 src,
29456 0b00000000_11111111,
29457 a,
29458 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29459 );
29460 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
29461 assert_eq_m512i(r, e);
29462 }
29463
29464 #[simd_test(enable = "avx512f")]
29465 unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
29466 let a = _mm512_setr_ps(
29467 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29468 );
29469 let r = _mm512_maskz_cvt_roundps_epi32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29470 assert_eq_m512i(r, _mm512_setzero_si512());
29471 let r = _mm512_maskz_cvt_roundps_epi32(
29472 0b00000000_11111111,
29473 a,
29474 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29475 );
29476 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
29477 assert_eq_m512i(r, e);
29478 }
29479
29480 #[simd_test(enable = "avx512f")]
29481 unsafe fn test_mm512_cvt_roundps_epu32() {
29482 let a = _mm512_setr_ps(
29483 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29484 );
29485 let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29486 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
29487 assert_eq_m512i(r, e);
29488 let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
29489 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
29490 assert_eq_m512i(r, e);
29491 }
29492
29493 #[simd_test(enable = "avx512f")]
29494 unsafe fn test_mm512_mask_cvt_roundps_epu32() {
29495 let a = _mm512_setr_ps(
29496 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29497 );
29498 let src = _mm512_set1_epi32(0);
29499 let r =
29500 _mm512_mask_cvt_roundps_epu32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29501 assert_eq_m512i(r, src);
29502 let r = _mm512_mask_cvt_roundps_epu32(
29503 src,
29504 0b00000000_11111111,
29505 a,
29506 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29507 );
29508 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29509 assert_eq_m512i(r, e);
29510 }
29511
29512 #[simd_test(enable = "avx512f")]
29513 unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
29514 let a = _mm512_setr_ps(
29515 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29516 );
29517 let r = _mm512_maskz_cvt_roundps_epu32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29518 assert_eq_m512i(r, _mm512_setzero_si512());
29519 let r = _mm512_maskz_cvt_roundps_epu32(
29520 0b00000000_11111111,
29521 a,
29522 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29523 );
29524 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29525 assert_eq_m512i(r, e);
29526 }
29527
29528 #[simd_test(enable = "avx512f")]
29529 unsafe fn test_mm512_cvt_roundepi32_ps() {
29530 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29531 let r = _mm512_cvt_roundepi32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29532 let e = _mm512_setr_ps(
29533 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
29534 );
29535 assert_eq_m512(r, e);
29536 }
29537
29538 #[simd_test(enable = "avx512f")]
29539 unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
29540 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29541 let src = _mm512_set1_ps(0.);
29542 let r =
29543 _mm512_mask_cvt_roundepi32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29544 assert_eq_m512(r, src);
29545 let r = _mm512_mask_cvt_roundepi32_ps(
29546 src,
29547 0b00000000_11111111,
29548 a,
29549 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29550 );
29551 let e = _mm512_setr_ps(
29552 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
29553 );
29554 assert_eq_m512(r, e);
29555 }
29556
29557 #[simd_test(enable = "avx512f")]
29558 unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
29559 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29560 let r = _mm512_maskz_cvt_roundepi32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29561 assert_eq_m512(r, _mm512_setzero_ps());
29562 let r = _mm512_maskz_cvt_roundepi32_ps(
29563 0b00000000_11111111,
29564 a,
29565 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29566 );
29567 let e = _mm512_setr_ps(
29568 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
29569 );
29570 assert_eq_m512(r, e);
29571 }
29572
29573 #[simd_test(enable = "avx512f")]
29574 unsafe fn test_mm512_cvt_roundepu32_ps() {
29575 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29576 let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29577 let e = _mm512_setr_ps(
29578 0.,
29579 4294967300.,
29580 2.,
29581 4294967300.,
29582 4.,
29583 4294967300.,
29584 6.,
29585 4294967300.,
29586 8.,
29587 10.,
29588 10.,
29589 12.,
29590 12.,
29591 14.,
29592 14.,
29593 16.,
29594 );
29595 assert_eq_m512(r, e);
29596 }
29597
29598 #[simd_test(enable = "avx512f")]
29599 unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
29600 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29601 let src = _mm512_set1_ps(0.);
29602 let r =
29603 _mm512_mask_cvt_roundepu32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29604 assert_eq_m512(r, src);
29605 let r = _mm512_mask_cvt_roundepu32_ps(
29606 src,
29607 0b00000000_11111111,
29608 a,
29609 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29610 );
29611 let e = _mm512_setr_ps(
29612 0.,
29613 4294967300.,
29614 2.,
29615 4294967300.,
29616 4.,
29617 4294967300.,
29618 6.,
29619 4294967300.,
29620 0.,
29621 0.,
29622 0.,
29623 0.,
29624 0.,
29625 0.,
29626 0.,
29627 0.,
29628 );
29629 assert_eq_m512(r, e);
29630 }
29631
29632 #[simd_test(enable = "avx512f")]
29633 unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
29634 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29635 let r = _mm512_maskz_cvt_roundepu32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
29636 assert_eq_m512(r, _mm512_setzero_ps());
29637 let r = _mm512_maskz_cvt_roundepu32_ps(
29638 0b00000000_11111111,
29639 a,
29640 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
29641 );
29642 let e = _mm512_setr_ps(
29643 0.,
29644 4294967300.,
29645 2.,
29646 4294967300.,
29647 4.,
29648 4294967300.,
29649 6.,
29650 4294967300.,
29651 0.,
29652 0.,
29653 0.,
29654 0.,
29655 0.,
29656 0.,
29657 0.,
29658 0.,
29659 );
29660 assert_eq_m512(r, e);
29661 }
29662
29663 #[simd_test(enable = "avx512f")]
29664 unsafe fn test_mm512_cvt_roundps_ph() {
29665 let a = _mm512_set1_ps(1.);
29666 let r = _mm512_cvt_roundps_ph(a, _MM_FROUND_NO_EXC);
29667 let e = _mm256_setr_epi64x(
29668 4323521613979991040,
29669 4323521613979991040,
29670 4323521613979991040,
29671 4323521613979991040,
29672 );
29673 assert_eq_m256i(r, e);
29674 }
29675
29676 #[simd_test(enable = "avx512f")]
29677 unsafe fn test_mm512_mask_cvt_roundps_ph() {
29678 let a = _mm512_set1_ps(1.);
29679 let src = _mm256_set1_epi16(0);
29680 let r = _mm512_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC);
29681 assert_eq_m256i(r, src);
29682 let r = _mm512_mask_cvt_roundps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29683 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29684 assert_eq_m256i(r, e);
29685 }
29686
29687 #[simd_test(enable = "avx512f")]
29688 unsafe fn test_mm512_maskz_cvt_roundps_ph() {
29689 let a = _mm512_set1_ps(1.);
29690 let r = _mm512_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC);
29691 assert_eq_m256i(r, _mm256_setzero_si256());
29692 let r = _mm512_maskz_cvt_roundps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29693 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29694 assert_eq_m256i(r, e);
29695 }
29696
29697 #[simd_test(enable = "avx512f")]
29698 unsafe fn test_mm512_cvtps_ph() {
29699 let a = _mm512_set1_ps(1.);
29700 let r = _mm512_cvtps_ph(a, _MM_FROUND_NO_EXC);
29701 let e = _mm256_setr_epi64x(
29702 4323521613979991040,
29703 4323521613979991040,
29704 4323521613979991040,
29705 4323521613979991040,
29706 );
29707 assert_eq_m256i(r, e);
29708 }
29709
29710 #[simd_test(enable = "avx512f")]
29711 unsafe fn test_mm512_mask_cvtps_ph() {
29712 let a = _mm512_set1_ps(1.);
29713 let src = _mm256_set1_epi16(0);
29714 let r = _mm512_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
29715 assert_eq_m256i(r, src);
29716 let r = _mm512_mask_cvtps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29717 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29718 assert_eq_m256i(r, e);
29719 }
29720
29721 #[simd_test(enable = "avx512f")]
29722 unsafe fn test_mm512_maskz_cvtps_ph() {
29723 let a = _mm512_set1_ps(1.);
29724 let r = _mm512_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
29725 assert_eq_m256i(r, _mm256_setzero_si256());
29726 let r = _mm512_maskz_cvtps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29727 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29728 assert_eq_m256i(r, e);
29729 }
29730
29731 #[simd_test(enable = "avx512f")]
29732 unsafe fn test_mm512_cvt_roundph_ps() {
29733 let a = _mm256_setr_epi64x(
29734 4323521613979991040,
29735 4323521613979991040,
29736 4323521613979991040,
29737 4323521613979991040,
29738 );
29739 let r = _mm512_cvt_roundph_ps(a, _MM_FROUND_NO_EXC);
29740 let e = _mm512_set1_ps(1.);
29741 assert_eq_m512(r, e);
29742 }
29743
29744 #[simd_test(enable = "avx512f")]
29745 unsafe fn test_mm512_mask_cvt_roundph_ps() {
29746 let a = _mm256_setr_epi64x(
29747 4323521613979991040,
29748 4323521613979991040,
29749 4323521613979991040,
29750 4323521613979991040,
29751 );
29752 let src = _mm512_set1_ps(0.);
29753 let r = _mm512_mask_cvt_roundph_ps(src, 0, a, _MM_FROUND_NO_EXC);
29754 assert_eq_m512(r, src);
29755 let r = _mm512_mask_cvt_roundph_ps(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29756 let e = _mm512_setr_ps(
29757 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29758 );
29759 assert_eq_m512(r, e);
29760 }
29761
29762 #[simd_test(enable = "avx512f")]
29763 unsafe fn test_mm512_maskz_cvt_roundph_ps() {
29764 let a = _mm256_setr_epi64x(
29765 4323521613979991040,
29766 4323521613979991040,
29767 4323521613979991040,
29768 4323521613979991040,
29769 );
29770 let r = _mm512_maskz_cvt_roundph_ps(0, a, _MM_FROUND_NO_EXC);
29771 assert_eq_m512(r, _mm512_setzero_ps());
29772 let r = _mm512_maskz_cvt_roundph_ps(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29773 let e = _mm512_setr_ps(
29774 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29775 );
29776 assert_eq_m512(r, e);
29777 }
29778
29779 #[simd_test(enable = "avx512f")]
29780 unsafe fn test_mm512_cvtph_ps() {
29781 let a = _mm256_setr_epi64x(
29782 4323521613979991040,
29783 4323521613979991040,
29784 4323521613979991040,
29785 4323521613979991040,
29786 );
29787 let r = _mm512_cvtph_ps(a);
29788 let e = _mm512_set1_ps(1.);
29789 assert_eq_m512(r, e);
29790 }
29791
29792 #[simd_test(enable = "avx512f")]
29793 unsafe fn test_mm512_mask_cvtph_ps() {
29794 let a = _mm256_setr_epi64x(
29795 4323521613979991040,
29796 4323521613979991040,
29797 4323521613979991040,
29798 4323521613979991040,
29799 );
29800 let src = _mm512_set1_ps(0.);
29801 let r = _mm512_mask_cvtph_ps(src, 0, a);
29802 assert_eq_m512(r, src);
29803 let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
29804 let e = _mm512_setr_ps(
29805 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29806 );
29807 assert_eq_m512(r, e);
29808 }
29809
29810 #[simd_test(enable = "avx512f")]
29811 unsafe fn test_mm512_maskz_cvtph_ps() {
29812 let a = _mm256_setr_epi64x(
29813 4323521613979991040,
29814 4323521613979991040,
29815 4323521613979991040,
29816 4323521613979991040,
29817 );
29818 let r = _mm512_maskz_cvtph_ps(0, a);
29819 assert_eq_m512(r, _mm512_setzero_ps());
29820 let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
29821 let e = _mm512_setr_ps(
29822 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29823 );
29824 assert_eq_m512(r, e);
29825 }
29826
29827 #[simd_test(enable = "avx512f")]
29828 unsafe fn test_mm512_cvtt_roundps_epi32() {
29829 let a = _mm512_setr_ps(
29830 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29831 );
29832 let r = _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC);
29833 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
29834 assert_eq_m512i(r, e);
29835 }
29836
29837 #[simd_test(enable = "avx512f")]
29838 unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
29839 let a = _mm512_setr_ps(
29840 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29841 );
29842 let src = _mm512_set1_epi32(0);
29843 let r = _mm512_mask_cvtt_roundps_epi32(src, 0, a, _MM_FROUND_NO_EXC);
29844 assert_eq_m512i(r, src);
29845 let r = _mm512_mask_cvtt_roundps_epi32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29846 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29847 assert_eq_m512i(r, e);
29848 }
29849
29850 #[simd_test(enable = "avx512f")]
29851 unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
29852 let a = _mm512_setr_ps(
29853 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29854 );
29855 let r = _mm512_maskz_cvtt_roundps_epi32(0, a, _MM_FROUND_NO_EXC);
29856 assert_eq_m512i(r, _mm512_setzero_si512());
29857 let r = _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29858 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29859 assert_eq_m512i(r, e);
29860 }
29861
29862 #[simd_test(enable = "avx512f")]
29863 unsafe fn test_mm512_cvtt_roundps_epu32() {
29864 let a = _mm512_setr_ps(
29865 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29866 );
29867 let r = _mm512_cvtt_roundps_epu32(a, _MM_FROUND_NO_EXC);
29868 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
29869 assert_eq_m512i(r, e);
29870 }
29871
29872 #[simd_test(enable = "avx512f")]
29873 unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
29874 let a = _mm512_setr_ps(
29875 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29876 );
29877 let src = _mm512_set1_epi32(0);
29878 let r = _mm512_mask_cvtt_roundps_epu32(src, 0, a, _MM_FROUND_NO_EXC);
29879 assert_eq_m512i(r, src);
29880 let r = _mm512_mask_cvtt_roundps_epu32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29881 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29882 assert_eq_m512i(r, e);
29883 }
29884
29885 #[simd_test(enable = "avx512f")]
29886 unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
29887 let a = _mm512_setr_ps(
29888 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29889 );
29890 let r = _mm512_maskz_cvtt_roundps_epu32(0, a, _MM_FROUND_NO_EXC);
29891 assert_eq_m512i(r, _mm512_setzero_si512());
29892 let r = _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
29893 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29894 assert_eq_m512i(r, e);
29895 }
29896
29897 #[simd_test(enable = "avx512f")]
29898 unsafe fn test_mm512_cvttps_epi32() {
29899 let a = _mm512_setr_ps(
29900 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29901 );
29902 let r = _mm512_cvttps_epi32(a);
29903 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
29904 assert_eq_m512i(r, e);
29905 }
29906
29907 #[simd_test(enable = "avx512f")]
29908 unsafe fn test_mm512_mask_cvttps_epi32() {
29909 let a = _mm512_setr_ps(
29910 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29911 );
29912 let src = _mm512_set1_epi32(0);
29913 let r = _mm512_mask_cvttps_epi32(src, 0, a);
29914 assert_eq_m512i(r, src);
29915 let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
29916 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29917 assert_eq_m512i(r, e);
29918 }
29919
29920 #[simd_test(enable = "avx512f")]
29921 unsafe fn test_mm512_maskz_cvttps_epi32() {
29922 let a = _mm512_setr_ps(
29923 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29924 );
29925 let r = _mm512_maskz_cvttps_epi32(0, a);
29926 assert_eq_m512i(r, _mm512_setzero_si512());
29927 let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
29928 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29929 assert_eq_m512i(r, e);
29930 }
29931
29932 #[simd_test(enable = "avx512f")]
29933 unsafe fn test_mm512_cvttps_epu32() {
29934 let a = _mm512_setr_ps(
29935 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29936 );
29937 let r = _mm512_cvttps_epu32(a);
29938 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
29939 assert_eq_m512i(r, e);
29940 }
29941
29942 #[simd_test(enable = "avx512f")]
29943 unsafe fn test_mm512_mask_cvttps_epu32() {
29944 let a = _mm512_setr_ps(
29945 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29946 );
29947 let src = _mm512_set1_epi32(0);
29948 let r = _mm512_mask_cvttps_epu32(src, 0, a);
29949 assert_eq_m512i(r, src);
29950 let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
29951 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29952 assert_eq_m512i(r, e);
29953 }
29954
29955 #[simd_test(enable = "avx512f")]
29956 unsafe fn test_mm512_maskz_cvttps_epu32() {
29957 let a = _mm512_setr_ps(
29958 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29959 );
29960 let r = _mm512_maskz_cvttps_epu32(0, a);
29961 assert_eq_m512i(r, _mm512_setzero_si512());
29962 let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
29963 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29964 assert_eq_m512i(r, e);
29965 }
29966
29967 #[simd_test(enable = "avx512f")]
29968 unsafe fn test_mm512_i32gather_ps() {
29969 let mut arr = [0f32; 256];
29970 for i in 0..256 {
29971 arr[i] = i as f32;
29972 }
29973 // A multiplier of 4 is word-addressing
29974 #[rustfmt::skip]
29975 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
29976 120, 128, 136, 144, 152, 160, 168, 176);
29977 let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
29978 #[rustfmt::skip]
29979 assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
29980 120., 128., 136., 144., 152., 160., 168., 176.));
29981 }
29982
29983 #[simd_test(enable = "avx512f")]
29984 unsafe fn test_mm512_mask_i32gather_ps() {
29985 let mut arr = [0f32; 256];
29986 for i in 0..256 {
29987 arr[i] = i as f32;
29988 }
29989 let src = _mm512_set1_ps(2.);
29990 let mask = 0b10101010_10101010;
29991 #[rustfmt::skip]
29992 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
29993 120, 128, 136, 144, 152, 160, 168, 176);
29994 // A multiplier of 4 is word-addressing
29995 let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
29996 #[rustfmt::skip]
29997 assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
29998 2., 128., 2., 144., 2., 160., 2., 176.));
29999 }
30000
30001 #[simd_test(enable = "avx512f")]
30002 unsafe fn test_mm512_i32gather_epi32() {
30003 let mut arr = [0i32; 256];
30004 for i in 0..256 {
30005 arr[i] = i as i32;
30006 }
30007 // A multiplier of 4 is word-addressing
30008 #[rustfmt::skip]
30009 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30010 120, 128, 136, 144, 152, 160, 168, 176);
30011 let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
30012 #[rustfmt::skip]
30013 assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30014 120, 128, 136, 144, 152, 160, 168, 176));
30015 }
30016
30017 #[simd_test(enable = "avx512f")]
30018 unsafe fn test_mm512_mask_i32gather_epi32() {
30019 let mut arr = [0i32; 256];
30020 for i in 0..256 {
30021 arr[i] = i as i32;
30022 }
30023 let src = _mm512_set1_epi32(2);
30024 let mask = 0b10101010_10101010;
30025 #[rustfmt::skip]
30026 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30027 128, 144, 160, 176, 192, 208, 224, 240);
30028 // A multiplier of 4 is word-addressing
30029 let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
30030 #[rustfmt::skip]
30031 assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
30032 2, 144, 2, 176, 2, 208, 2, 240));
30033 }
30034
30035 #[simd_test(enable = "avx512f")]
30036 unsafe fn test_mm512_i32scatter_ps() {
30037 let mut arr = [0f32; 256];
30038 #[rustfmt::skip]
30039 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30040 128, 144, 160, 176, 192, 208, 224, 240);
30041 let src = _mm512_setr_ps(
30042 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
30043 );
30044 // A multiplier of 4 is word-addressing
30045 _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
30046 let mut expected = [0f32; 256];
30047 for i in 0..16 {
30048 expected[i * 16] = (i + 1) as f32;
30049 }
30050 assert_eq!(&arr[..], &expected[..],);
30051 }
30052
30053 #[simd_test(enable = "avx512f")]
30054 unsafe fn test_mm512_mask_i32scatter_ps() {
30055 let mut arr = [0f32; 256];
30056 let mask = 0b10101010_10101010;
30057 #[rustfmt::skip]
30058 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30059 128, 144, 160, 176, 192, 208, 224, 240);
30060 let src = _mm512_setr_ps(
30061 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
30062 );
30063 // A multiplier of 4 is word-addressing
30064 _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
30065 let mut expected = [0f32; 256];
30066 for i in 0..8 {
30067 expected[i * 32 + 16] = 2. * (i + 1) as f32;
30068 }
30069 assert_eq!(&arr[..], &expected[..],);
30070 }
30071
30072 #[simd_test(enable = "avx512f")]
30073 unsafe fn test_mm512_i32scatter_epi32() {
30074 let mut arr = [0i32; 256];
30075 #[rustfmt::skip]
30076
30077 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30078 128, 144, 160, 176, 192, 208, 224, 240);
30079 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
30080 // A multiplier of 4 is word-addressing
30081 _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
30082 let mut expected = [0i32; 256];
30083 for i in 0..16 {
30084 expected[i * 16] = (i + 1) as i32;
30085 }
30086 assert_eq!(&arr[..], &expected[..],);
30087 }
30088
30089 #[simd_test(enable = "avx512f")]
30090 unsafe fn test_mm512_mask_i32scatter_epi32() {
30091 let mut arr = [0i32; 256];
30092 let mask = 0b10101010_10101010;
30093 #[rustfmt::skip]
30094 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30095 128, 144, 160, 176, 192, 208, 224, 240);
30096 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
30097 // A multiplier of 4 is word-addressing
30098 _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
30099 let mut expected = [0i32; 256];
30100 for i in 0..8 {
30101 expected[i * 32 + 16] = 2 * (i + 1) as i32;
30102 }
30103 assert_eq!(&arr[..], &expected[..],);
30104 }
30105
30106 #[simd_test(enable = "avx512f")]
30107 unsafe fn test_mm512_cmplt_ps_mask() {
30108 #[rustfmt::skip]
30109 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30110 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30111 let b = _mm512_set1_ps(-1.);
30112 let m = _mm512_cmplt_ps_mask(a, b);
30113 assert_eq!(m, 0b00000101_00000101);
30114 }
30115
30116 #[simd_test(enable = "avx512f")]
30117 unsafe fn test_mm512_mask_cmplt_ps_mask() {
30118 #[rustfmt::skip]
30119 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30120 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30121 let b = _mm512_set1_ps(-1.);
30122 let mask = 0b01100110_01100110;
30123 let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
30124 assert_eq!(r, 0b00000100_00000100);
30125 }
30126
30127 #[simd_test(enable = "avx512f")]
30128 unsafe fn test_mm512_cmpnlt_ps_mask() {
30129 #[rustfmt::skip]
30130 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30131 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30132 let b = _mm512_set1_ps(-1.);
30133 assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
30134 }
30135
30136 #[simd_test(enable = "avx512f")]
30137 unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
30138 #[rustfmt::skip]
30139 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30140 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30141 let b = _mm512_set1_ps(-1.);
30142 let mask = 0b01111010_01111010;
30143 assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
30144 }
30145
30146 #[simd_test(enable = "avx512f")]
30147 unsafe fn test_mm512_cmpnle_ps_mask() {
30148 #[rustfmt::skip]
30149 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30150 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30151 let b = _mm512_set1_ps(-1.);
30152 let m = _mm512_cmpnle_ps_mask(b, a);
30153 assert_eq!(m, 0b00001101_00001101);
30154 }
30155
30156 #[simd_test(enable = "avx512f")]
30157 unsafe fn test_mm512_mask_cmpnle_ps_mask() {
30158 #[rustfmt::skip]
30159 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30160 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30161 let b = _mm512_set1_ps(-1.);
30162 let mask = 0b01100110_01100110;
30163 let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
30164 assert_eq!(r, 0b00000100_00000100);
30165 }
30166
30167 #[simd_test(enable = "avx512f")]
30168 unsafe fn test_mm512_cmple_ps_mask() {
30169 #[rustfmt::skip]
30170 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30171 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30172 let b = _mm512_set1_ps(-1.);
30173 assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
30174 }
30175
30176 #[simd_test(enable = "avx512f")]
30177 unsafe fn test_mm512_mask_cmple_ps_mask() {
30178 #[rustfmt::skip]
30179 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
30180 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
30181 let b = _mm512_set1_ps(-1.);
30182 let mask = 0b01111010_01111010;
30183 assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
30184 }
30185
30186 #[simd_test(enable = "avx512f")]
30187 unsafe fn test_mm512_cmpeq_ps_mask() {
30188 #[rustfmt::skip]
30189 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
30190 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
30191 #[rustfmt::skip]
30192 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
30193 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
30194 let m = _mm512_cmpeq_ps_mask(b, a);
30195 assert_eq!(m, 0b11001101_11001101);
30196 }
30197
30198 #[simd_test(enable = "avx512f")]
30199 unsafe fn test_mm512_mask_cmpeq_ps_mask() {
30200 #[rustfmt::skip]
30201 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
30202 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
30203 #[rustfmt::skip]
30204 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
30205 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
30206 let mask = 0b01111010_01111010;
30207 let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
30208 assert_eq!(r, 0b01001000_01001000);
30209 }
30210
30211 #[simd_test(enable = "avx512f")]
30212 unsafe fn test_mm512_cmpneq_ps_mask() {
30213 #[rustfmt::skip]
30214 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
30215 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
30216 #[rustfmt::skip]
30217 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
30218 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
30219 let m = _mm512_cmpneq_ps_mask(b, a);
30220 assert_eq!(m, 0b00110010_00110010);
30221 }
30222
30223 #[simd_test(enable = "avx512f")]
30224 unsafe fn test_mm512_mask_cmpneq_ps_mask() {
30225 #[rustfmt::skip]
30226 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
30227 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
30228 #[rustfmt::skip]
30229 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
30230 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
30231 let mask = 0b01111010_01111010;
30232 let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
30233 assert_eq!(r, 0b00110010_00110010)
30234 }
30235
30236 #[simd_test(enable = "avx512f")]
30237 unsafe fn test_mm512_cmp_ps_mask() {
30238 #[rustfmt::skip]
30239 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
30240 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
30241 let b = _mm512_set1_ps(-1.);
30242 let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
30243 assert_eq!(m, 0b00000101_00000101);
30244 }
30245
30246 #[simd_test(enable = "avx512f")]
30247 unsafe fn test_mm512_mask_cmp_ps_mask() {
30248 #[rustfmt::skip]
30249 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
30250 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
30251 let b = _mm512_set1_ps(-1.);
30252 let mask = 0b01100110_01100110;
30253 let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
30254 assert_eq!(r, 0b00000100_00000100);
30255 }
30256
30257 #[simd_test(enable = "avx512f")]
30258 unsafe fn test_mm512_cmp_round_ps_mask() {
30259 #[rustfmt::skip]
30260 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
30261 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
30262 let b = _mm512_set1_ps(-1.);
30263 let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
30264 assert_eq!(m, 0b00000101_00000101);
30265 }
30266
30267 #[simd_test(enable = "avx512f")]
30268 unsafe fn test_mm512_mask_cmp_round_ps_mask() {
30269 #[rustfmt::skip]
30270 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
30271 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
30272 let b = _mm512_set1_ps(-1.);
30273 let mask = 0b01100110_01100110;
30274 let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
30275 assert_eq!(r, 0b00000100_00000100);
30276 }
30277
30278 #[simd_test(enable = "avx512f")]
30279 unsafe fn test_mm512_cmpord_ps_mask() {
30280 #[rustfmt::skip]
30281 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
30282 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
30283 #[rustfmt::skip]
30284 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
30285 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
30286 let m = _mm512_cmpord_ps_mask(a, b);
30287 assert_eq!(m, 0b00000101_00000101);
30288 }
30289
30290 #[simd_test(enable = "avx512f")]
30291 unsafe fn test_mm512_mask_cmpord_ps_mask() {
30292 #[rustfmt::skip]
30293 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
30294 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
30295 #[rustfmt::skip]
30296 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
30297 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
30298 let mask = 0b11000011_11000011;
30299 let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
30300 assert_eq!(m, 0b00000001_00000001);
30301 }
30302
30303 #[simd_test(enable = "avx512f")]
30304 unsafe fn test_mm512_cmpunord_ps_mask() {
30305 #[rustfmt::skip]
30306 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
30307 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
30308 #[rustfmt::skip]
30309 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
30310 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
30311 let m = _mm512_cmpunord_ps_mask(a, b);
30312
30313 assert_eq!(m, 0b11111010_11111010);
30314 }
30315
30316 #[simd_test(enable = "avx512f")]
30317 unsafe fn test_mm512_mask_cmpunord_ps_mask() {
30318 #[rustfmt::skip]
30319 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
30320 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
30321 #[rustfmt::skip]
30322 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
30323 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
30324 let mask = 0b00001111_00001111;
30325 let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
30326 assert_eq!(m, 0b000001010_00001010);
30327 }
30328
30329 #[simd_test(enable = "avx512f")]
30330 unsafe fn test_mm_cmp_ss_mask() {
30331 let a = _mm_setr_ps(2., 1., 1., 1.);
30332 let b = _mm_setr_ps(1., 2., 2., 2.);
30333 let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
30334 assert_eq!(m, 1);
30335 }
30336
30337 #[simd_test(enable = "avx512f")]
30338 unsafe fn test_mm_mask_cmp_ss_mask() {
30339 let a = _mm_setr_ps(2., 1., 1., 1.);
30340 let b = _mm_setr_ps(1., 2., 2., 2.);
30341 let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
30342 assert_eq!(m, 0);
30343 let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
30344 assert_eq!(m, 1);
30345 }
30346
30347 #[simd_test(enable = "avx512f")]
30348 unsafe fn test_mm_cmp_round_ss_mask() {
30349 let a = _mm_setr_ps(2., 1., 1., 1.);
30350 let b = _mm_setr_ps(1., 2., 2., 2.);
30351 let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
30352 assert_eq!(m, 1);
30353 }
30354
30355 #[simd_test(enable = "avx512f")]
30356 unsafe fn test_mm_mask_cmp_round_ss_mask() {
30357 let a = _mm_setr_ps(2., 1., 1., 1.);
30358 let b = _mm_setr_ps(1., 2., 2., 2.);
30359 let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
30360 assert_eq!(m, 0);
30361 let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
30362 assert_eq!(m, 1);
30363 }
30364
30365 #[simd_test(enable = "avx512f")]
30366 unsafe fn test_mm_cmp_sd_mask() {
30367 let a = _mm_setr_pd(2., 1.);
30368 let b = _mm_setr_pd(1., 2.);
30369 let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
30370 assert_eq!(m, 1);
30371 }
30372
30373 #[simd_test(enable = "avx512f")]
30374 unsafe fn test_mm_mask_cmp_sd_mask() {
30375 let a = _mm_setr_pd(2., 1.);
30376 let b = _mm_setr_pd(1., 2.);
30377 let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
30378 assert_eq!(m, 0);
30379 let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
30380 assert_eq!(m, 1);
30381 }
30382
30383 #[simd_test(enable = "avx512f")]
30384 unsafe fn test_mm_cmp_round_sd_mask() {
30385 let a = _mm_setr_pd(2., 1.);
30386 let b = _mm_setr_pd(1., 2.);
30387 let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
30388 assert_eq!(m, 1);
30389 }
30390
30391 #[simd_test(enable = "avx512f")]
30392 unsafe fn test_mm_mask_cmp_round_sd_mask() {
30393 let a = _mm_setr_pd(2., 1.);
30394 let b = _mm_setr_pd(1., 2.);
30395 let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
30396 assert_eq!(m, 0);
30397 let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
30398 assert_eq!(m, 1);
30399 }
30400
30401 #[simd_test(enable = "avx512f")]
30402 unsafe fn test_mm512_cmplt_epu32_mask() {
30403 #[rustfmt::skip]
30404 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30405 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30406 let b = _mm512_set1_epi32(-1);
30407 let m = _mm512_cmplt_epu32_mask(a, b);
30408 assert_eq!(m, 0b11001111_11001111);
30409 }
30410
30411 #[simd_test(enable = "avx512f")]
30412 unsafe fn test_mm512_mask_cmplt_epu32_mask() {
30413 #[rustfmt::skip]
30414 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30415 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30416 let b = _mm512_set1_epi32(-1);
30417 let mask = 0b01111010_01111010;
30418 let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
30419 assert_eq!(r, 0b01001010_01001010);
30420 }
30421
30422 #[simd_test(enable = "avx512f")]
30423 unsafe fn test_mm512_cmpgt_epu32_mask() {
30424 #[rustfmt::skip]
30425 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30426 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30427 let b = _mm512_set1_epi32(-1);
30428 let m = _mm512_cmpgt_epu32_mask(b, a);
30429 assert_eq!(m, 0b11001111_11001111);
30430 }
30431
30432 #[simd_test(enable = "avx512f")]
30433 unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
30434 #[rustfmt::skip]
30435 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30436 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30437 let b = _mm512_set1_epi32(-1);
30438 let mask = 0b01111010_01111010;
30439 let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
30440 assert_eq!(r, 0b01001010_01001010);
30441 }
30442
30443 #[simd_test(enable = "avx512f")]
30444 unsafe fn test_mm512_cmple_epu32_mask() {
30445 #[rustfmt::skip]
30446 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30447 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30448 let b = _mm512_set1_epi32(-1);
30449 assert_eq!(
30450 _mm512_cmple_epu32_mask(a, b),
30451 !_mm512_cmpgt_epu32_mask(a, b)
30452 )
30453 }
30454
30455 #[simd_test(enable = "avx512f")]
30456 unsafe fn test_mm512_mask_cmple_epu32_mask() {
30457 #[rustfmt::skip]
30458 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30459 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30460 let b = _mm512_set1_epi32(-1);
30461 let mask = 0b01111010_01111010;
30462 assert_eq!(
30463 _mm512_mask_cmple_epu32_mask(mask, a, b),
30464 0b01111010_01111010
30465 );
30466 }
30467
30468 #[simd_test(enable = "avx512f")]
30469 unsafe fn test_mm512_cmpge_epu32_mask() {
30470 #[rustfmt::skip]
30471 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30472 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30473 let b = _mm512_set1_epi32(-1);
30474 assert_eq!(
30475 _mm512_cmpge_epu32_mask(a, b),
30476 !_mm512_cmplt_epu32_mask(a, b)
30477 )
30478 }
30479
30480 #[simd_test(enable = "avx512f")]
30481 unsafe fn test_mm512_mask_cmpge_epu32_mask() {
30482 #[rustfmt::skip]
30483 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30484 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30485 let b = _mm512_set1_epi32(-1);
30486 let mask = 0b01111010_01111010;
30487 assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
30488 }
30489
30490 #[simd_test(enable = "avx512f")]
30491 unsafe fn test_mm512_cmpeq_epu32_mask() {
30492 #[rustfmt::skip]
30493 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30494 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30495 #[rustfmt::skip]
30496 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30497 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30498 let m = _mm512_cmpeq_epu32_mask(b, a);
30499 assert_eq!(m, 0b11001111_11001111);
30500 }
30501
30502 #[simd_test(enable = "avx512f")]
30503 unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
30504 #[rustfmt::skip]
30505 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30506 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30507 #[rustfmt::skip]
30508 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30509 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30510 let mask = 0b01111010_01111010;
30511 let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
30512 assert_eq!(r, 0b01001010_01001010);
30513 }
30514
30515 #[simd_test(enable = "avx512f")]
30516 unsafe fn test_mm512_cmpneq_epu32_mask() {
30517 #[rustfmt::skip]
30518 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30519 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30520 #[rustfmt::skip]
30521 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30522 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30523 let m = _mm512_cmpneq_epu32_mask(b, a);
30524 assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
30525 }
30526
30527 #[simd_test(enable = "avx512f")]
30528 unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
30529 #[rustfmt::skip]
30530 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
30531 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
30532 #[rustfmt::skip]
30533 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30534 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30535 let mask = 0b01111010_01111010;
30536 let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
30537 assert_eq!(r, 0b00110010_00110010);
30538 }
30539
30540 #[simd_test(enable = "avx512f")]
30541 unsafe fn test_mm512_cmp_epu32_mask() {
30542 #[rustfmt::skip]
30543 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30544 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30545 let b = _mm512_set1_epi32(-1);
30546 let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
30547 assert_eq!(m, 0b11001111_11001111);
30548 }
30549
30550 #[simd_test(enable = "avx512f")]
30551 unsafe fn test_mm512_mask_cmp_epu32_mask() {
30552 #[rustfmt::skip]
30553 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30554 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30555 let b = _mm512_set1_epi32(-1);
30556 let mask = 0b01111010_01111010;
30557 let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
30558 assert_eq!(r, 0b01001010_01001010);
30559 }
30560
30561 #[simd_test(enable = "avx512f")]
30562 unsafe fn test_mm512_cmplt_epi32_mask() {
30563 #[rustfmt::skip]
30564 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30565 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30566 let b = _mm512_set1_epi32(-1);
30567 let m = _mm512_cmplt_epi32_mask(a, b);
30568 assert_eq!(m, 0b00000101_00000101);
30569 }
30570
30571 #[simd_test(enable = "avx512f")]
30572 unsafe fn test_mm512_mask_cmplt_epi32_mask() {
30573 #[rustfmt::skip]
30574 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30575 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30576 let b = _mm512_set1_epi32(-1);
30577 let mask = 0b01100110_01100110;
30578 let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
30579 assert_eq!(r, 0b00000100_00000100);
30580 }
30581
30582 #[simd_test(enable = "avx512f")]
30583 unsafe fn test_mm512_cmpgt_epi32_mask() {
30584 #[rustfmt::skip]
30585 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30586 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30587 let b = _mm512_set1_epi32(-1);
30588 let m = _mm512_cmpgt_epi32_mask(b, a);
30589 assert_eq!(m, 0b00000101_00000101);
30590 }
30591
30592 #[simd_test(enable = "avx512f")]
30593 unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
30594 #[rustfmt::skip]
30595 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30596 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30597 let b = _mm512_set1_epi32(-1);
30598 let mask = 0b01100110_01100110;
30599 let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
30600 assert_eq!(r, 0b00000100_00000100);
30601 }
30602
30603 #[simd_test(enable = "avx512f")]
30604 unsafe fn test_mm512_cmple_epi32_mask() {
30605 #[rustfmt::skip]
30606 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30607 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30608 let b = _mm512_set1_epi32(-1);
30609 assert_eq!(
30610 _mm512_cmple_epi32_mask(a, b),
30611 !_mm512_cmpgt_epi32_mask(a, b)
30612 )
30613 }
30614
30615 #[simd_test(enable = "avx512f")]
30616 unsafe fn test_mm512_mask_cmple_epi32_mask() {
30617 #[rustfmt::skip]
30618 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30619 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30620 let b = _mm512_set1_epi32(-1);
30621 let mask = 0b01111010_01111010;
30622 assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
30623 }
30624
30625 #[simd_test(enable = "avx512f")]
30626 unsafe fn test_mm512_cmpge_epi32_mask() {
30627 #[rustfmt::skip]
30628 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30629 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30630 let b = _mm512_set1_epi32(-1);
30631 assert_eq!(
30632 _mm512_cmpge_epi32_mask(a, b),
30633 !_mm512_cmplt_epi32_mask(a, b)
30634 )
30635 }
30636
30637 #[simd_test(enable = "avx512f")]
30638 unsafe fn test_mm512_mask_cmpge_epi32_mask() {
30639 #[rustfmt::skip]
30640 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
30641 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
30642 let b = _mm512_set1_epi32(-1);
30643 let mask = 0b01111010_01111010;
30644 assert_eq!(
30645 _mm512_mask_cmpge_epi32_mask(mask, a, b),
30646 0b01111010_01111010
30647 );
30648 }
30649
30650 #[simd_test(enable = "avx512f")]
30651 unsafe fn test_mm512_cmpeq_epi32_mask() {
30652 #[rustfmt::skip]
30653 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30654 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30655 #[rustfmt::skip]
30656 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30657 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30658 let m = _mm512_cmpeq_epi32_mask(b, a);
30659 assert_eq!(m, 0b11001111_11001111);
30660 }
30661
30662 #[simd_test(enable = "avx512f")]
30663 unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
30664 #[rustfmt::skip]
30665 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30666 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30667 #[rustfmt::skip]
30668 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30669 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30670 let mask = 0b01111010_01111010;
30671 let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
30672 assert_eq!(r, 0b01001010_01001010);
30673 }
30674
30675 #[simd_test(enable = "avx512f")]
30676 unsafe fn test_mm512_cmpneq_epi32_mask() {
30677 #[rustfmt::skip]
30678 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30679 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30680 #[rustfmt::skip]
30681 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30682 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30683 let m = _mm512_cmpneq_epi32_mask(b, a);
30684 assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
30685 }
30686
30687 #[simd_test(enable = "avx512f")]
30688 unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
30689 #[rustfmt::skip]
30690 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
30691 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
30692 #[rustfmt::skip]
30693 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
30694 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
30695 let mask = 0b01111010_01111010;
30696 let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
30697 assert_eq!(r, 0b00110010_00110010)
30698 }
30699
30700 #[simd_test(enable = "avx512f")]
30701 unsafe fn test_mm512_cmp_epi32_mask() {
30702 #[rustfmt::skip]
30703 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30704 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30705 let b = _mm512_set1_epi32(-1);
30706 let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
30707 assert_eq!(m, 0b00000101_00000101);
30708 }
30709
30710 #[simd_test(enable = "avx512f")]
30711 unsafe fn test_mm512_mask_cmp_epi32_mask() {
30712 #[rustfmt::skip]
30713 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
30714 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
30715 let b = _mm512_set1_epi32(-1);
30716 let mask = 0b01100110_01100110;
30717 let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
30718 assert_eq!(r, 0b00000100_00000100);
30719 }
30720
30721 #[simd_test(enable = "avx512f")]
30722 unsafe fn test_mm512_set_epi8() {
30723 let r = _mm512_set1_epi8(2);
30724 assert_eq_m512i(
30725 r,
30726 _mm512_set_epi8(
30727 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30728 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30729 2, 2, 2, 2, 2, 2, 2, 2,
30730 ),
30731 )
30732 }
30733
30734 #[simd_test(enable = "avx512f")]
30735 unsafe fn test_mm512_set_epi16() {
30736 let r = _mm512_set1_epi16(2);
30737 assert_eq_m512i(
30738 r,
30739 _mm512_set_epi16(
30740 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30741 2, 2, 2, 2,
30742 ),
30743 )
30744 }
30745
30746 #[simd_test(enable = "avx512f")]
30747 unsafe fn test_mm512_set_epi32() {
30748 let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
30749 assert_eq_m512i(
30750 r,
30751 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
30752 )
30753 }
30754
30755 #[simd_test(enable = "avx512f")]
30756 unsafe fn test_mm512_setr_epi32() {
30757 let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
30758 assert_eq_m512i(
30759 r,
30760 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
30761 )
30762 }
30763
30764 #[simd_test(enable = "avx512f")]
30765 unsafe fn test_mm512_set1_epi8() {
30766 let r = _mm512_set_epi8(
30767 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30768 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30769 2, 2, 2, 2, 2, 2,
30770 );
30771 assert_eq_m512i(r, _mm512_set1_epi8(2));
30772 }
30773
30774 #[simd_test(enable = "avx512f")]
30775 unsafe fn test_mm512_set1_epi16() {
30776 let r = _mm512_set_epi16(
30777 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30778 2, 2, 2,
30779 );
30780 assert_eq_m512i(r, _mm512_set1_epi16(2));
30781 }
30782
30783 #[simd_test(enable = "avx512f")]
30784 unsafe fn test_mm512_set1_epi32() {
30785 let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30786 assert_eq_m512i(r, _mm512_set1_epi32(2));
30787 }
30788
30789 #[simd_test(enable = "avx512f")]
30790 unsafe fn test_mm512_setzero_si512() {
30791 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
30792 }
30793
30794 #[simd_test(enable = "avx512f")]
30795 unsafe fn test_mm512_setzero_epi32() {
30796 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
30797 }
30798
30799 #[simd_test(enable = "avx512f")]
30800 unsafe fn test_mm512_set_ps() {
30801 let r = _mm512_setr_ps(
30802 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
30803 );
30804 assert_eq_m512(
30805 r,
30806 _mm512_set_ps(
30807 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
30808 ),
30809 )
30810 }
30811
30812 #[simd_test(enable = "avx512f")]
30813 unsafe fn test_mm512_setr_ps() {
30814 let r = _mm512_set_ps(
30815 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
30816 );
30817 assert_eq_m512(
30818 r,
30819 _mm512_setr_ps(
30820 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
30821 ),
30822 )
30823 }
30824
30825 #[simd_test(enable = "avx512f")]
30826 unsafe fn test_mm512_set1_ps() {
30827 #[rustfmt::skip]
30828 let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
30829 2., 2., 2., 2., 2., 2., 2., 2.);
30830 assert_eq_m512(expected, _mm512_set1_ps(2.));
30831 }
30832
30833 #[simd_test(enable = "avx512f")]
30834 unsafe fn test_mm512_set4_epi32() {
30835 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
30836 assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
30837 }
30838
30839 #[simd_test(enable = "avx512f")]
30840 unsafe fn test_mm512_set4_ps() {
30841 let r = _mm512_set_ps(
30842 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
30843 );
30844 assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
30845 }
30846
30847 #[simd_test(enable = "avx512f")]
30848 unsafe fn test_mm512_setr4_epi32() {
30849 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
30850 assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
30851 }
30852
30853 #[simd_test(enable = "avx512f")]
30854 unsafe fn test_mm512_setr4_ps() {
30855 let r = _mm512_set_ps(
30856 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
30857 );
30858 assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
30859 }
30860
30861 #[simd_test(enable = "avx512f")]
30862 unsafe fn test_mm512_setzero_ps() {
30863 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
30864 }
30865
30866 #[simd_test(enable = "avx512f")]
30867 unsafe fn test_mm512_setzero() {
30868 assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
30869 }
30870
30871 #[simd_test(enable = "avx512f")]
30872 unsafe fn test_mm512_loadu_pd() {
30873 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
30874 let p = a.as_ptr();
30875 let r = _mm512_loadu_pd(black_box(p));
30876 let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
30877 assert_eq_m512d(r, e);
30878 }
30879
30880 #[simd_test(enable = "avx512f")]
30881 unsafe fn test_mm512_storeu_pd() {
30882 let a = _mm512_set1_pd(9.);
30883 let mut r = _mm512_undefined_pd();
30884 _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
30885 assert_eq_m512d(r, a);
30886 }
30887
30888 #[simd_test(enable = "avx512f")]
30889 unsafe fn test_mm512_loadu_ps() {
30890 let a = &[
30891 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
30892 ];
30893 let p = a.as_ptr();
30894 let r = _mm512_loadu_ps(black_box(p));
30895 let e = _mm512_setr_ps(
30896 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
30897 );
30898 assert_eq_m512(r, e);
30899 }
30900
30901 #[simd_test(enable = "avx512f")]
30902 unsafe fn test_mm512_storeu_ps() {
30903 let a = _mm512_set1_ps(9.);
30904 let mut r = _mm512_undefined_ps();
30905 _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
30906 assert_eq_m512(r, a);
30907 }
30908
30909 #[simd_test(enable = "avx512f")]
30910 unsafe fn test_mm512_setr_pd() {
30911 let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
30912 assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
30913 }
30914
30915 #[simd_test(enable = "avx512f")]
30916 unsafe fn test_mm512_set_pd() {
30917 let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
30918 assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
30919 }
30920
30921 #[simd_test(enable = "avx512f")]
30922 unsafe fn test_mm512_rol_epi32() {
30923 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30924 let r = _mm512_rol_epi32(a, 1);
30925 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30926 assert_eq_m512i(r, e);
30927 }
30928
30929 #[simd_test(enable = "avx512f")]
30930 unsafe fn test_mm512_mask_rol_epi32() {
30931 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30932 let r = _mm512_mask_rol_epi32(a, 0, a, 1);
30933 assert_eq_m512i(r, a);
30934
30935 let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
30936 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30937 assert_eq_m512i(r, e);
30938 }
30939
30940 #[simd_test(enable = "avx512f")]
30941 unsafe fn test_mm512_maskz_rol_epi32() {
30942 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
30943 let r = _mm512_maskz_rol_epi32(0, a, 1);
30944 assert_eq_m512i(r, _mm512_setzero_si512());
30945
30946 let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
30947 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
30948 assert_eq_m512i(r, e);
30949 }
30950
30951 #[simd_test(enable = "avx512f")]
30952 unsafe fn test_mm512_ror_epi32() {
30953 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30954 let r = _mm512_ror_epi32(a, 1);
30955 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30956 assert_eq_m512i(r, e);
30957 }
30958
30959 #[simd_test(enable = "avx512f")]
30960 unsafe fn test_mm512_mask_ror_epi32() {
30961 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30962 let r = _mm512_mask_ror_epi32(a, 0, a, 1);
30963 assert_eq_m512i(r, a);
30964
30965 let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
30966 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30967 assert_eq_m512i(r, e);
30968 }
30969
30970 #[simd_test(enable = "avx512f")]
30971 unsafe fn test_mm512_maskz_ror_epi32() {
30972 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
30973 let r = _mm512_maskz_ror_epi32(0, a, 1);
30974 assert_eq_m512i(r, _mm512_setzero_si512());
30975
30976 let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
30977 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
30978 assert_eq_m512i(r, e);
30979 }
30980
30981 #[simd_test(enable = "avx512f")]
30982 unsafe fn test_mm512_slli_epi32() {
30983 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30984 let r = _mm512_slli_epi32(a, 1);
30985 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30986 assert_eq_m512i(r, e);
30987 }
30988
30989 #[simd_test(enable = "avx512f")]
30990 unsafe fn test_mm512_mask_slli_epi32() {
30991 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30992 let r = _mm512_mask_slli_epi32(a, 0, a, 1);
30993 assert_eq_m512i(r, a);
30994
30995 let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1);
30996 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30997 assert_eq_m512i(r, e);
30998 }
30999
31000 #[simd_test(enable = "avx512f")]
31001 unsafe fn test_mm512_maskz_slli_epi32() {
31002 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31003 let r = _mm512_maskz_slli_epi32(0, a, 1);
31004 assert_eq_m512i(r, _mm512_setzero_si512());
31005
31006 let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1);
31007 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
31008 assert_eq_m512i(r, e);
31009 }
31010
31011 #[simd_test(enable = "avx512f")]
31012 unsafe fn test_mm512_srli_epi32() {
31013 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31014 let r = _mm512_srli_epi32(a, 1);
31015 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31016 assert_eq_m512i(r, e);
31017 }
31018
31019 #[simd_test(enable = "avx512f")]
31020 unsafe fn test_mm512_mask_srli_epi32() {
31021 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31022 let r = _mm512_mask_srli_epi32(a, 0, a, 1);
31023 assert_eq_m512i(r, a);
31024
31025 let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
31026 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31027 assert_eq_m512i(r, e);
31028 }
31029
31030 #[simd_test(enable = "avx512f")]
31031 unsafe fn test_mm512_maskz_srli_epi32() {
31032 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
31033 let r = _mm512_maskz_srli_epi32(0, a, 1);
31034 assert_eq_m512i(r, _mm512_setzero_si512());
31035
31036 let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
31037 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
31038 assert_eq_m512i(r, e);
31039 }
31040
31041 #[simd_test(enable = "avx512f")]
31042 unsafe fn test_mm512_rolv_epi32() {
31043 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31044 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31045
31046 let r = _mm512_rolv_epi32(a, b);
31047
31048 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31049 assert_eq_m512i(r, e);
31050 }
31051
31052 #[simd_test(enable = "avx512f")]
31053 unsafe fn test_mm512_mask_rolv_epi32() {
31054 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31055 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31056
31057 let r = _mm512_mask_rolv_epi32(a, 0, a, b);
31058 assert_eq_m512i(r, a);
31059
31060 let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
31061
31062 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31063 assert_eq_m512i(r, e);
31064 }
31065
31066 #[simd_test(enable = "avx512f")]
31067 unsafe fn test_mm512_maskz_rolv_epi32() {
31068 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31069 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31070
31071 let r = _mm512_maskz_rolv_epi32(0, a, b);
31072 assert_eq_m512i(r, _mm512_setzero_si512());
31073
31074 let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
31075
31076 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
31077 assert_eq_m512i(r, e);
31078 }
31079
31080 #[simd_test(enable = "avx512f")]
31081 unsafe fn test_mm512_rorv_epi32() {
31082 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31083 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31084
31085 let r = _mm512_rorv_epi32(a, b);
31086
31087 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31088 assert_eq_m512i(r, e);
31089 }
31090
31091 #[simd_test(enable = "avx512f")]
31092 unsafe fn test_mm512_mask_rorv_epi32() {
31093 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31094 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31095
31096 let r = _mm512_mask_rorv_epi32(a, 0, a, b);
31097 assert_eq_m512i(r, a);
31098
31099 let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
31100
31101 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31102 assert_eq_m512i(r, e);
31103 }
31104
31105 #[simd_test(enable = "avx512f")]
31106 unsafe fn test_mm512_maskz_rorv_epi32() {
31107 let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
31108 let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31109
31110 let r = _mm512_maskz_rorv_epi32(0, a, b);
31111 assert_eq_m512i(r, _mm512_setzero_si512());
31112
31113 let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
31114
31115 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31116 assert_eq_m512i(r, e);
31117 }
31118
31119 #[simd_test(enable = "avx512f")]
31120 unsafe fn test_mm512_sllv_epi32() {
31121 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31122 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31123
31124 let r = _mm512_sllv_epi32(a, count);
31125
31126 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31127 assert_eq_m512i(r, e);
31128 }
31129
31130 #[simd_test(enable = "avx512f")]
31131 unsafe fn test_mm512_mask_sllv_epi32() {
31132 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31133 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31134
31135 let r = _mm512_mask_sllv_epi32(a, 0, a, count);
31136 assert_eq_m512i(r, a);
31137
31138 let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
31139
31140 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31141 assert_eq_m512i(r, e);
31142 }
31143
31144 #[simd_test(enable = "avx512f")]
31145 unsafe fn test_mm512_maskz_sllv_epi32() {
31146 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31147 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31148
31149 let r = _mm512_maskz_sllv_epi32(0, a, count);
31150 assert_eq_m512i(r, _mm512_setzero_si512());
31151
31152 let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
31153
31154 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
31155 assert_eq_m512i(r, e);
31156 }
31157
31158 #[simd_test(enable = "avx512f")]
31159 unsafe fn test_mm512_srlv_epi32() {
31160 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31161 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31162
31163 let r = _mm512_srlv_epi32(a, count);
31164
31165 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31166 assert_eq_m512i(r, e);
31167 }
31168
31169 #[simd_test(enable = "avx512f")]
31170 unsafe fn test_mm512_mask_srlv_epi32() {
31171 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31172 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31173
31174 let r = _mm512_mask_srlv_epi32(a, 0, a, count);
31175 assert_eq_m512i(r, a);
31176
31177 let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
31178
31179 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31180 assert_eq_m512i(r, e);
31181 }
31182
31183 #[simd_test(enable = "avx512f")]
31184 unsafe fn test_mm512_maskz_srlv_epi32() {
31185 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
31186 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31187
31188 let r = _mm512_maskz_srlv_epi32(0, a, count);
31189 assert_eq_m512i(r, _mm512_setzero_si512());
31190
31191 let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
31192
31193 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
31194 assert_eq_m512i(r, e);
31195 }
31196
31197 #[simd_test(enable = "avx512f")]
31198 unsafe fn test_mm512_sll_epi32() {
31199 let a = _mm512_set_epi32(
31200 1 << 31,
31201 1 << 0,
31202 1 << 1,
31203 1 << 2,
31204 0,
31205 0,
31206 0,
31207 0,
31208 0,
31209 0,
31210 0,
31211 0,
31212 0,
31213 0,
31214 0,
31215 0,
31216 );
31217 let count = _mm_set_epi32(0, 0, 0, 2);
31218 let r = _mm512_sll_epi32(a, count);
31219 let e = _mm512_set_epi32(
31220 0,
31221 1 << 2,
31222 1 << 3,
31223 1 << 4,
31224 0,
31225 0,
31226 0,
31227 0,
31228 0,
31229 0,
31230 0,
31231 0,
31232 0,
31233 0,
31234 0,
31235 0,
31236 );
31237 assert_eq_m512i(r, e);
31238 }
31239
31240 #[simd_test(enable = "avx512f")]
31241 unsafe fn test_mm512_mask_sll_epi32() {
31242 let a = _mm512_set_epi32(
31243 1 << 31,
31244 1 << 0,
31245 1 << 1,
31246 1 << 2,
31247 0,
31248 0,
31249 0,
31250 0,
31251 0,
31252 0,
31253 0,
31254 0,
31255 0,
31256 0,
31257 0,
31258 0,
31259 );
31260 let count = _mm_set_epi32(0, 0, 0, 2);
31261 let r = _mm512_mask_sll_epi32(a, 0, a, count);
31262 assert_eq_m512i(r, a);
31263
31264 let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
31265 let e = _mm512_set_epi32(
31266 0,
31267 1 << 2,
31268 1 << 3,
31269 1 << 4,
31270 0,
31271 0,
31272 0,
31273 0,
31274 0,
31275 0,
31276 0,
31277 0,
31278 0,
31279 0,
31280 0,
31281 0,
31282 );
31283 assert_eq_m512i(r, e);
31284 }
31285
31286 #[simd_test(enable = "avx512f")]
31287 unsafe fn test_mm512_maskz_sll_epi32() {
31288 let a = _mm512_set_epi32(
31289 1 << 31,
31290 1 << 0,
31291 1 << 1,
31292 1 << 2,
31293 0,
31294 0,
31295 0,
31296 0,
31297 0,
31298 0,
31299 0,
31300 0,
31301 0,
31302 0,
31303 0,
31304 1 << 31,
31305 );
31306 let count = _mm_set_epi32(2, 0, 0, 2);
31307 let r = _mm512_maskz_sll_epi32(0, a, count);
31308 assert_eq_m512i(r, _mm512_setzero_si512());
31309
31310 let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
31311 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31312 assert_eq_m512i(r, e);
31313 }
31314
31315 #[simd_test(enable = "avx512f")]
31316 unsafe fn test_mm512_srl_epi32() {
31317 let a = _mm512_set_epi32(
31318 1 << 31,
31319 1 << 0,
31320 1 << 1,
31321 1 << 2,
31322 0,
31323 0,
31324 0,
31325 0,
31326 0,
31327 0,
31328 0,
31329 0,
31330 0,
31331 0,
31332 0,
31333 0,
31334 );
31335 let count = _mm_set_epi32(0, 0, 0, 2);
31336 let r = _mm512_srl_epi32(a, count);
31337 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31338 assert_eq_m512i(r, e);
31339 }
31340
31341 #[simd_test(enable = "avx512f")]
31342 unsafe fn test_mm512_mask_srl_epi32() {
31343 let a = _mm512_set_epi32(
31344 1 << 31,
31345 1 << 0,
31346 1 << 1,
31347 1 << 2,
31348 0,
31349 0,
31350 0,
31351 0,
31352 0,
31353 0,
31354 0,
31355 0,
31356 0,
31357 0,
31358 0,
31359 0,
31360 );
31361 let count = _mm_set_epi32(0, 0, 0, 2);
31362 let r = _mm512_mask_srl_epi32(a, 0, a, count);
31363 assert_eq_m512i(r, a);
31364
31365 let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
31366 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31367 assert_eq_m512i(r, e);
31368 }
31369
31370 #[simd_test(enable = "avx512f")]
31371 unsafe fn test_mm512_maskz_srl_epi32() {
31372 let a = _mm512_set_epi32(
31373 1 << 31,
31374 1 << 0,
31375 1 << 1,
31376 1 << 2,
31377 0,
31378 0,
31379 0,
31380 0,
31381 0,
31382 0,
31383 0,
31384 0,
31385 0,
31386 0,
31387 0,
31388 1 << 31,
31389 );
31390 let count = _mm_set_epi32(2, 0, 0, 2);
31391 let r = _mm512_maskz_srl_epi32(0, a, count);
31392 assert_eq_m512i(r, _mm512_setzero_si512());
31393
31394 let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
31395 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
31396 assert_eq_m512i(r, e);
31397 }
31398
31399 #[simd_test(enable = "avx512f")]
31400 unsafe fn test_mm512_sra_epi32() {
31401 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31402 let count = _mm_set_epi32(1, 0, 0, 2);
31403 let r = _mm512_sra_epi32(a, count);
31404 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31405 assert_eq_m512i(r, e);
31406 }
31407
31408 #[simd_test(enable = "avx512f")]
31409 unsafe fn test_mm512_mask_sra_epi32() {
31410 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
31411 let count = _mm_set_epi32(0, 0, 0, 2);
31412 let r = _mm512_mask_sra_epi32(a, 0, a, count);
31413 assert_eq_m512i(r, a);
31414
31415 let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
31416 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
31417 assert_eq_m512i(r, e);
31418 }
31419
31420 #[simd_test(enable = "avx512f")]
31421 unsafe fn test_mm512_maskz_sra_epi32() {
31422 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
31423 let count = _mm_set_epi32(2, 0, 0, 2);
31424 let r = _mm512_maskz_sra_epi32(0, a, count);
31425 assert_eq_m512i(r, _mm512_setzero_si512());
31426
31427 let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
31428 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
31429 assert_eq_m512i(r, e);
31430 }
31431
31432 #[simd_test(enable = "avx512f")]
31433 unsafe fn test_mm512_srav_epi32() {
31434 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31435 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31436 let r = _mm512_srav_epi32(a, count);
31437 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31438 assert_eq_m512i(r, e);
31439 }
31440
31441 #[simd_test(enable = "avx512f")]
31442 unsafe fn test_mm512_mask_srav_epi32() {
31443 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
31444 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31445 let r = _mm512_mask_srav_epi32(a, 0, a, count);
31446 assert_eq_m512i(r, a);
31447
31448 let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
31449 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
31450 assert_eq_m512i(r, e);
31451 }
31452
31453 #[simd_test(enable = "avx512f")]
31454 unsafe fn test_mm512_maskz_srav_epi32() {
31455 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
31456 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
31457 let r = _mm512_maskz_srav_epi32(0, a, count);
31458 assert_eq_m512i(r, _mm512_setzero_si512());
31459
31460 let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
31461 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
31462 assert_eq_m512i(r, e);
31463 }
31464
31465 #[simd_test(enable = "avx512f")]
31466 unsafe fn test_mm512_srai_epi32() {
31467 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
31468 let r = _mm512_srai_epi32(a, 2);
31469 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
31470 assert_eq_m512i(r, e);
31471 }
31472
31473 #[simd_test(enable = "avx512f")]
31474 unsafe fn test_mm512_mask_srai_epi32() {
31475 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
31476 let r = _mm512_mask_srai_epi32(a, 0, a, 2);
31477 assert_eq_m512i(r, a);
31478
31479 let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2);
31480 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
31481 assert_eq_m512i(r, e);
31482 }
31483
31484 #[simd_test(enable = "avx512f")]
31485 unsafe fn test_mm512_maskz_srai_epi32() {
31486 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
31487 let r = _mm512_maskz_srai_epi32(0, a, 2);
31488 assert_eq_m512i(r, _mm512_setzero_si512());
31489
31490 let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2);
31491 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
31492 assert_eq_m512i(r, e);
31493 }
31494
31495 #[simd_test(enable = "avx512f")]
31496 unsafe fn test_mm512_permute_ps() {
31497 let a = _mm512_set_ps(
31498 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31499 );
31500 let r = _mm512_permute_ps(a, 1);
31501 let e = _mm512_set_ps(
31502 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31503 );
31504 assert_eq_m512(r, e);
31505 }
31506
31507 #[simd_test(enable = "avx512f")]
31508 unsafe fn test_mm512_mask_permute_ps() {
31509 let a = _mm512_set_ps(
31510 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31511 );
31512 let r = _mm512_mask_permute_ps(a, 0b00000000_00000000, a, 1);
31513 assert_eq_m512(r, a);
31514 let r = _mm512_mask_permute_ps(a, 0b11111111_11111111, a, 1);
31515 let e = _mm512_set_ps(
31516 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31517 );
31518 assert_eq_m512(r, e);
31519 }
31520
31521 #[simd_test(enable = "avx512f")]
31522 unsafe fn test_mm512_maskz_permute_ps() {
31523 let a = _mm512_set_ps(
31524 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31525 );
31526 let r = _mm512_maskz_permute_ps(0, a, 1);
31527 assert_eq_m512(r, _mm512_setzero_ps());
31528 let r = _mm512_maskz_permute_ps(0b00000000_11111111, a, 1);
31529 let e = _mm512_set_ps(
31530 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
31531 );
31532 assert_eq_m512(r, e);
31533 }
31534
31535 #[simd_test(enable = "avx512f")]
31536 unsafe fn test_mm512_permutevar_epi32() {
31537 let idx = _mm512_set1_epi32(1);
31538 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31539 let r = _mm512_permutevar_epi32(idx, a);
31540 let e = _mm512_set1_epi32(14);
31541 assert_eq_m512i(r, e);
31542 }
31543
31544 #[simd_test(enable = "avx512f")]
31545 unsafe fn test_mm512_mask_permutevar_epi32() {
31546 let idx = _mm512_set1_epi32(1);
31547 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31548 let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
31549 assert_eq_m512i(r, a);
31550 let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
31551 let e = _mm512_set1_epi32(14);
31552 assert_eq_m512i(r, e);
31553 }
31554
31555 #[simd_test(enable = "avx512f")]
31556 unsafe fn test_mm512_permutevar_ps() {
31557 let a = _mm512_set_ps(
31558 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31559 );
31560 let b = _mm512_set1_epi32(1);
31561 let r = _mm512_permutevar_ps(a, b);
31562 let e = _mm512_set_ps(
31563 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31564 );
31565 assert_eq_m512(r, e);
31566 }
31567
31568 #[simd_test(enable = "avx512f")]
31569 unsafe fn test_mm512_mask_permutevar_ps() {
31570 let a = _mm512_set_ps(
31571 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31572 );
31573 let b = _mm512_set1_epi32(1);
31574 let r = _mm512_mask_permutevar_ps(a, 0, a, b);
31575 assert_eq_m512(r, a);
31576 let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
31577 let e = _mm512_set_ps(
31578 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31579 );
31580 assert_eq_m512(r, e);
31581 }
31582
31583 #[simd_test(enable = "avx512f")]
31584 unsafe fn test_mm512_maskz_permutevar_ps() {
31585 let a = _mm512_set_ps(
31586 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31587 );
31588 let b = _mm512_set1_epi32(1);
31589 let r = _mm512_maskz_permutevar_ps(0, a, b);
31590 assert_eq_m512(r, _mm512_setzero_ps());
31591 let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
31592 let e = _mm512_set_ps(
31593 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
31594 );
31595 assert_eq_m512(r, e);
31596 }
31597
31598 #[simd_test(enable = "avx512f")]
31599 unsafe fn test_mm512_permutexvar_epi32() {
31600 let idx = _mm512_set1_epi32(1);
31601 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31602 let r = _mm512_permutexvar_epi32(idx, a);
31603 let e = _mm512_set1_epi32(14);
31604 assert_eq_m512i(r, e);
31605 }
31606
31607 #[simd_test(enable = "avx512f")]
31608 unsafe fn test_mm512_mask_permutexvar_epi32() {
31609 let idx = _mm512_set1_epi32(1);
31610 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31611 let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
31612 assert_eq_m512i(r, a);
31613 let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
31614 let e = _mm512_set1_epi32(14);
31615 assert_eq_m512i(r, e);
31616 }
31617
31618 #[simd_test(enable = "avx512f")]
31619 unsafe fn test_mm512_maskz_permutexvar_epi32() {
31620 let idx = _mm512_set1_epi32(1);
31621 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31622 let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
31623 assert_eq_m512i(r, _mm512_setzero_si512());
31624 let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
31625 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
31626 assert_eq_m512i(r, e);
31627 }
31628
31629 #[simd_test(enable = "avx512f")]
31630 unsafe fn test_mm512_permutexvar_ps() {
31631 let idx = _mm512_set1_epi32(1);
31632 let a = _mm512_set_ps(
31633 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31634 );
31635 let r = _mm512_permutexvar_ps(idx, a);
31636 let e = _mm512_set1_ps(14.);
31637 assert_eq_m512(r, e);
31638 }
31639
31640 #[simd_test(enable = "avx512f")]
31641 unsafe fn test_mm512_mask_permutexvar_ps() {
31642 let idx = _mm512_set1_epi32(1);
31643 let a = _mm512_set_ps(
31644 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31645 );
31646 let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
31647 assert_eq_m512(r, a);
31648 let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
31649 let e = _mm512_set1_ps(14.);
31650 assert_eq_m512(r, e);
31651 }
31652
31653 #[simd_test(enable = "avx512f")]
31654 unsafe fn test_mm512_maskz_permutexvar_ps() {
31655 let idx = _mm512_set1_epi32(1);
31656 let a = _mm512_set_ps(
31657 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31658 );
31659 let r = _mm512_maskz_permutexvar_ps(0, idx, a);
31660 assert_eq_m512(r, _mm512_setzero_ps());
31661 let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
31662 let e = _mm512_set_ps(
31663 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
31664 );
31665 assert_eq_m512(r, e);
31666 }
31667
31668 #[simd_test(enable = "avx512f")]
31669 unsafe fn test_mm512_permutex2var_epi32() {
31670 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31671 let idx = _mm512_set_epi32(
31672 1,
31673 1 << 4,
31674 2,
31675 1 << 4,
31676 3,
31677 1 << 4,
31678 4,
31679 1 << 4,
31680 5,
31681 1 << 4,
31682 6,
31683 1 << 4,
31684 7,
31685 1 << 4,
31686 8,
31687 1 << 4,
31688 );
31689 let b = _mm512_set1_epi32(100);
31690 let r = _mm512_permutex2var_epi32(a, idx, b);
31691 let e = _mm512_set_epi32(
31692 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
31693 );
31694 assert_eq_m512i(r, e);
31695 }
31696
31697 #[simd_test(enable = "avx512f")]
31698 unsafe fn test_mm512_mask_permutex2var_epi32() {
31699 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31700 let idx = _mm512_set_epi32(
31701 1,
31702 1 << 4,
31703 2,
31704 1 << 4,
31705 3,
31706 1 << 4,
31707 4,
31708 1 << 4,
31709 5,
31710 1 << 4,
31711 6,
31712 1 << 4,
31713 7,
31714 1 << 4,
31715 8,
31716 1 << 4,
31717 );
31718 let b = _mm512_set1_epi32(100);
31719 let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
31720 assert_eq_m512i(r, a);
31721 let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
31722 let e = _mm512_set_epi32(
31723 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
31724 );
31725 assert_eq_m512i(r, e);
31726 }
31727
31728 #[simd_test(enable = "avx512f")]
31729 unsafe fn test_mm512_maskz_permutex2var_epi32() {
31730 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31731 let idx = _mm512_set_epi32(
31732 1,
31733 1 << 4,
31734 2,
31735 1 << 4,
31736 3,
31737 1 << 4,
31738 4,
31739 1 << 4,
31740 5,
31741 1 << 4,
31742 6,
31743 1 << 4,
31744 7,
31745 1 << 4,
31746 8,
31747 1 << 4,
31748 );
31749 let b = _mm512_set1_epi32(100);
31750 let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
31751 assert_eq_m512i(r, _mm512_setzero_si512());
31752 let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
31753 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
31754 assert_eq_m512i(r, e);
31755 }
31756
31757 #[simd_test(enable = "avx512f")]
31758 unsafe fn test_mm512_mask2_permutex2var_epi32() {
31759 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31760 let idx = _mm512_set_epi32(
31761 1000,
31762 1 << 4,
31763 2000,
31764 1 << 4,
31765 3000,
31766 1 << 4,
31767 4000,
31768 1 << 4,
31769 5,
31770 1 << 4,
31771 6,
31772 1 << 4,
31773 7,
31774 1 << 4,
31775 8,
31776 1 << 4,
31777 );
31778 let b = _mm512_set1_epi32(100);
31779 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
31780 assert_eq_m512i(r, idx);
31781 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
31782 let e = _mm512_set_epi32(
31783 1000,
31784 1 << 4,
31785 2000,
31786 1 << 4,
31787 3000,
31788 1 << 4,
31789 4000,
31790 1 << 4,
31791 10,
31792 100,
31793 9,
31794 100,
31795 8,
31796 100,
31797 7,
31798 100,
31799 );
31800 assert_eq_m512i(r, e);
31801 }
31802
31803 #[simd_test(enable = "avx512f")]
31804 unsafe fn test_mm512_permutex2var_ps() {
31805 let a = _mm512_set_ps(
31806 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31807 );
31808 let idx = _mm512_set_epi32(
31809 1,
31810 1 << 4,
31811 2,
31812 1 << 4,
31813 3,
31814 1 << 4,
31815 4,
31816 1 << 4,
31817 5,
31818 1 << 4,
31819 6,
31820 1 << 4,
31821 7,
31822 1 << 4,
31823 8,
31824 1 << 4,
31825 );
31826 let b = _mm512_set1_ps(100.);
31827 let r = _mm512_permutex2var_ps(a, idx, b);
31828 let e = _mm512_set_ps(
31829 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
31830 );
31831 assert_eq_m512(r, e);
31832 }
31833
31834 #[simd_test(enable = "avx512f")]
31835 unsafe fn test_mm512_mask_permutex2var_ps() {
31836 let a = _mm512_set_ps(
31837 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31838 );
31839 let idx = _mm512_set_epi32(
31840 1,
31841 1 << 4,
31842 2,
31843 1 << 4,
31844 3,
31845 1 << 4,
31846 4,
31847 1 << 4,
31848 5,
31849 1 << 4,
31850 6,
31851 1 << 4,
31852 7,
31853 1 << 4,
31854 8,
31855 1 << 4,
31856 );
31857 let b = _mm512_set1_ps(100.);
31858 let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
31859 assert_eq_m512(r, a);
31860 let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
31861 let e = _mm512_set_ps(
31862 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
31863 );
31864 assert_eq_m512(r, e);
31865 }
31866
31867 #[simd_test(enable = "avx512f")]
31868 unsafe fn test_mm512_maskz_permutex2var_ps() {
31869 let a = _mm512_set_ps(
31870 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31871 );
31872 let idx = _mm512_set_epi32(
31873 1,
31874 1 << 4,
31875 2,
31876 1 << 4,
31877 3,
31878 1 << 4,
31879 4,
31880 1 << 4,
31881 5,
31882 1 << 4,
31883 6,
31884 1 << 4,
31885 7,
31886 1 << 4,
31887 8,
31888 1 << 4,
31889 );
31890 let b = _mm512_set1_ps(100.);
31891 let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
31892 assert_eq_m512(r, _mm512_setzero_ps());
31893 let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
31894 let e = _mm512_set_ps(
31895 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
31896 );
31897 assert_eq_m512(r, e);
31898 }
31899
31900 #[simd_test(enable = "avx512f")]
31901 unsafe fn test_mm512_mask2_permutex2var_ps() {
31902 let a = _mm512_set_ps(
31903 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31904 );
31905 let idx = _mm512_set_epi32(
31906 1,
31907 1 << 4,
31908 2,
31909 1 << 4,
31910 3,
31911 1 << 4,
31912 4,
31913 1 << 4,
31914 5,
31915 1 << 4,
31916 6,
31917 1 << 4,
31918 7,
31919 1 << 4,
31920 8,
31921 1 << 4,
31922 );
31923 let b = _mm512_set1_ps(100.);
31924 let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
31925 assert_eq_m512(r, _mm512_setzero_ps());
31926 let r = _mm512_mask2_permutex2var_ps(a, idx, 0b00000000_11111111, b);
31927 let e = _mm512_set_ps(
31928 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
31929 );
31930 assert_eq_m512(r, e);
31931 }
31932
31933 #[simd_test(enable = "avx512f")]
31934 unsafe fn test_mm512_shuffle_epi32() {
31935 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
31936 let r = _mm512_shuffle_epi32(a, _MM_PERM_AADD);
31937 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
31938 assert_eq_m512i(r, e);
31939 }
31940
31941 #[simd_test(enable = "avx512f")]
31942 unsafe fn test_mm512_mask_shuffle_epi32() {
31943 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
31944 let r = _mm512_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD);
31945 assert_eq_m512i(r, a);
31946 let r = _mm512_mask_shuffle_epi32(a, 0b11111111_11111111, a, _MM_PERM_AADD);
31947 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
31948 assert_eq_m512i(r, e);
31949 }
31950
31951 #[simd_test(enable = "avx512f")]
31952 unsafe fn test_mm512_maskz_shuffle_epi32() {
31953 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
31954 let r = _mm512_maskz_shuffle_epi32(0, a, _MM_PERM_AADD);
31955 assert_eq_m512i(r, _mm512_setzero_si512());
31956 let r = _mm512_maskz_shuffle_epi32(0b00000000_11111111, a, _MM_PERM_AADD);
31957 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
31958 assert_eq_m512i(r, e);
31959 }
31960
31961 #[simd_test(enable = "avx512f")]
31962 unsafe fn test_mm512_shuffle_ps() {
31963 let a = _mm512_setr_ps(
31964 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
31965 );
31966 let b = _mm512_setr_ps(
31967 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
31968 );
31969 let r = _mm512_shuffle_ps(a, b, 0x0F);
31970 let e = _mm512_setr_ps(
31971 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
31972 );
31973 assert_eq_m512(r, e);
31974 }
31975
31976 #[simd_test(enable = "avx512f")]
31977 unsafe fn test_mm512_mask_shuffle_ps() {
31978 let a = _mm512_setr_ps(
31979 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
31980 );
31981 let b = _mm512_setr_ps(
31982 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
31983 );
31984 let r = _mm512_mask_shuffle_ps(a, 0, a, b, 0x0F);
31985 assert_eq_m512(r, a);
31986 let r = _mm512_mask_shuffle_ps(a, 0b11111111_11111111, a, b, 0x0F);
31987 let e = _mm512_setr_ps(
31988 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
31989 );
31990 assert_eq_m512(r, e);
31991 }
31992
31993 #[simd_test(enable = "avx512f")]
31994 unsafe fn test_mm512_maskz_shuffle_ps() {
31995 let a = _mm512_setr_ps(
31996 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
31997 );
31998 let b = _mm512_setr_ps(
31999 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32000 );
32001 let r = _mm512_maskz_shuffle_ps(0, a, b, 0x0F);
32002 assert_eq_m512(r, _mm512_setzero_ps());
32003 let r = _mm512_maskz_shuffle_ps(0b00000000_11111111, a, b, 0x0F);
32004 let e = _mm512_setr_ps(
32005 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
32006 );
32007 assert_eq_m512(r, e);
32008 }
32009
32010 #[simd_test(enable = "avx512f")]
32011 unsafe fn test_mm512_shuffle_i32x4() {
32012 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
32013 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
32014 let r = _mm512_shuffle_i32x4(a, b, 0b00000000);
32015 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
32016 assert_eq_m512i(r, e);
32017 }
32018
32019 #[simd_test(enable = "avx512f")]
32020 unsafe fn test_mm512_mask_shuffle_i32x4() {
32021 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
32022 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
32023 let r = _mm512_mask_shuffle_i32x4(a, 0, a, b, 0b00000000);
32024 assert_eq_m512i(r, a);
32025 let r = _mm512_mask_shuffle_i32x4(a, 0b11111111_11111111, a, b, 0b00000000);
32026 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
32027 assert_eq_m512i(r, e);
32028 }
32029
32030 #[simd_test(enable = "avx512f")]
32031 unsafe fn test_mm512_maskz_shuffle_i32x4() {
32032 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
32033 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
32034 let r = _mm512_maskz_shuffle_i32x4(0, a, b, 0b00000000);
32035 assert_eq_m512i(r, _mm512_setzero_si512());
32036 let r = _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a, b, 0b00000000);
32037 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
32038 assert_eq_m512i(r, e);
32039 }
32040
32041 #[simd_test(enable = "avx512f")]
32042 unsafe fn test_mm512_shuffle_f32x4() {
32043 let a = _mm512_setr_ps(
32044 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
32045 );
32046 let b = _mm512_setr_ps(
32047 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32048 );
32049 let r = _mm512_shuffle_f32x4(a, b, 0b00000000);
32050 let e = _mm512_setr_ps(
32051 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
32052 );
32053 assert_eq_m512(r, e);
32054 }
32055
32056 #[simd_test(enable = "avx512f")]
32057 unsafe fn test_mm512_mask_shuffle_f32x4() {
32058 let a = _mm512_setr_ps(
32059 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
32060 );
32061 let b = _mm512_setr_ps(
32062 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32063 );
32064 let r = _mm512_mask_shuffle_f32x4(a, 0, a, b, 0b00000000);
32065 assert_eq_m512(r, a);
32066 let r = _mm512_mask_shuffle_f32x4(a, 0b11111111_11111111, a, b, 0b00000000);
32067 let e = _mm512_setr_ps(
32068 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
32069 );
32070 assert_eq_m512(r, e);
32071 }
32072
32073 #[simd_test(enable = "avx512f")]
32074 unsafe fn test_mm512_maskz_shuffle_f32x4() {
32075 let a = _mm512_setr_ps(
32076 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
32077 );
32078 let b = _mm512_setr_ps(
32079 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32080 );
32081 let r = _mm512_maskz_shuffle_f32x4(0, a, b, 0b00000000);
32082 assert_eq_m512(r, _mm512_setzero_ps());
32083 let r = _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a, b, 0b00000000);
32084 let e = _mm512_setr_ps(
32085 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
32086 );
32087 assert_eq_m512(r, e);
32088 }
32089
32090 #[simd_test(enable = "avx512f")]
32091 unsafe fn test_mm512_extractf32x4_ps() {
32092 let a = _mm512_setr_ps(
32093 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32094 );
32095 let r = _mm512_extractf32x4_ps(a, 0x1);
32096 let e = _mm_setr_ps(5., 6., 7., 8.);
32097 assert_eq_m128(r, e);
32098 }
32099
32100 #[simd_test(enable = "avx512f")]
32101 unsafe fn test_mm512_mask_extractf32x4_ps() {
32102 let a = _mm512_setr_ps(
32103 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32104 );
32105 let src = _mm_set1_ps(100.);
32106 let r = _mm512_mask_extractf32x4_ps(src, 0, a, 0x1);
32107 assert_eq_m128(r, src);
32108 let r = _mm512_mask_extractf32x4_ps(src, 0b11111111, a, 0x1);
32109 let e = _mm_setr_ps(5., 6., 7., 8.);
32110 assert_eq_m128(r, e);
32111 }
32112
32113 #[simd_test(enable = "avx512f")]
32114 unsafe fn test_mm512_maskz_extractf32x4_ps() {
32115 let a = _mm512_setr_ps(
32116 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32117 );
32118 let r = _mm512_maskz_extractf32x4_ps(0, a, 0x1);
32119 assert_eq_m128(r, _mm_setzero_ps());
32120 let r = _mm512_maskz_extractf32x4_ps(0b00000001, a, 0x1);
32121 let e = _mm_setr_ps(5., 0., 0., 0.);
32122 assert_eq_m128(r, e);
32123 }
32124
32125 #[simd_test(enable = "avx512f")]
32126 unsafe fn test_mm512_extracti32x4_epi32() {
32127 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32128 let r = _mm512_extracti32x4_epi32(a, 0x1);
32129 let e = _mm_setr_epi32(5, 6, 7, 8);
32130 assert_eq_m128i(r, e);
32131 }
32132
32133 #[simd_test(enable = "avx512f")]
32134 unsafe fn test_mm512_mask_extracti32x4_epi32() {
32135 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32136 let src = _mm_set1_epi32(100);
32137 let r = _mm512_mask_extracti32x4_epi32(src, 0, a, 0x1);
32138 assert_eq_m128i(r, src);
32139 let r = _mm512_mask_extracti32x4_epi32(src, 0b11111111, a, 0x1);
32140 let e = _mm_setr_epi32(5, 6, 7, 8);
32141 assert_eq_m128i(r, e);
32142 }
32143
32144 #[simd_test(enable = "avx512f")]
32145 unsafe fn test_mm512_maskz_extracti32x4_epi32() {
32146 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32147 let r = _mm512_maskz_extracti32x4_epi32(0, a, 0x1);
32148 assert_eq_m128i(r, _mm_setzero_si128());
32149 let r = _mm512_maskz_extracti32x4_epi32(0b00000001, a, 0x1);
32150 let e = _mm_setr_epi32(5, 0, 0, 0);
32151 assert_eq_m128i(r, e);
32152 }
32153
32154 #[simd_test(enable = "avx512f")]
32155 unsafe fn test_mm512_moveldup_ps() {
32156 let a = _mm512_setr_ps(
32157 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32158 );
32159 let r = _mm512_moveldup_ps(a);
32160 let e = _mm512_setr_ps(
32161 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
32162 );
32163 assert_eq_m512(r, e);
32164 }
32165
32166 #[simd_test(enable = "avx512f")]
32167 unsafe fn test_mm512_mask_moveldup_ps() {
32168 let a = _mm512_setr_ps(
32169 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32170 );
32171 let r = _mm512_mask_moveldup_ps(a, 0, a);
32172 assert_eq_m512(r, a);
32173 let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
32174 let e = _mm512_setr_ps(
32175 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
32176 );
32177 assert_eq_m512(r, e);
32178 }
32179
32180 #[simd_test(enable = "avx512f")]
32181 unsafe fn test_mm512_maskz_moveldup_ps() {
32182 let a = _mm512_setr_ps(
32183 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32184 );
32185 let r = _mm512_maskz_moveldup_ps(0, a);
32186 assert_eq_m512(r, _mm512_setzero_ps());
32187 let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
32188 let e = _mm512_setr_ps(
32189 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
32190 );
32191 assert_eq_m512(r, e);
32192 }
32193
32194 #[simd_test(enable = "avx512f")]
32195 unsafe fn test_mm512_movehdup_ps() {
32196 let a = _mm512_setr_ps(
32197 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32198 );
32199 let r = _mm512_movehdup_ps(a);
32200 let e = _mm512_setr_ps(
32201 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
32202 );
32203 assert_eq_m512(r, e);
32204 }
32205
32206 #[simd_test(enable = "avx512f")]
32207 unsafe fn test_mm512_mask_movehdup_ps() {
32208 let a = _mm512_setr_ps(
32209 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32210 );
32211 let r = _mm512_mask_movehdup_ps(a, 0, a);
32212 assert_eq_m512(r, a);
32213 let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
32214 let e = _mm512_setr_ps(
32215 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
32216 );
32217 assert_eq_m512(r, e);
32218 }
32219
32220 #[simd_test(enable = "avx512f")]
32221 unsafe fn test_mm512_maskz_movehdup_ps() {
32222 let a = _mm512_setr_ps(
32223 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32224 );
32225 let r = _mm512_maskz_movehdup_ps(0, a);
32226 assert_eq_m512(r, _mm512_setzero_ps());
32227 let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
32228 let e = _mm512_setr_ps(
32229 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
32230 );
32231 assert_eq_m512(r, e);
32232 }
32233
32234 #[simd_test(enable = "avx512f")]
32235 unsafe fn test_mm512_inserti32x4() {
32236 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32237 let b = _mm_setr_epi32(17, 18, 19, 20);
32238 let r = _mm512_inserti32x4(a, b, 0);
32239 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32240 assert_eq_m512i(r, e);
32241 }
32242
32243 #[simd_test(enable = "avx512f")]
32244 unsafe fn test_mm512_mask_inserti32x4() {
32245 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32246 let b = _mm_setr_epi32(17, 18, 19, 20);
32247 let r = _mm512_mask_inserti32x4(a, 0, a, b, 0);
32248 assert_eq_m512i(r, a);
32249 let r = _mm512_mask_inserti32x4(a, 0b11111111_11111111, a, b, 0);
32250 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32251 assert_eq_m512i(r, e);
32252 }
32253
32254 #[simd_test(enable = "avx512f")]
32255 unsafe fn test_mm512_maskz_inserti32x4() {
32256 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32257 let b = _mm_setr_epi32(17, 18, 19, 20);
32258 let r = _mm512_maskz_inserti32x4(0, a, b, 0);
32259 assert_eq_m512i(r, _mm512_setzero_si512());
32260 let r = _mm512_maskz_inserti32x4(0b00000000_11111111, a, b, 0);
32261 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
32262 assert_eq_m512i(r, e);
32263 }
32264
32265 #[simd_test(enable = "avx512f")]
32266 unsafe fn test_mm512_insertf32x4() {
32267 let a = _mm512_setr_ps(
32268 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32269 );
32270 let b = _mm_setr_ps(17., 18., 19., 20.);
32271 let r = _mm512_insertf32x4(a, b, 0);
32272 let e = _mm512_setr_ps(
32273 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32274 );
32275 assert_eq_m512(r, e);
32276 }
32277
32278 #[simd_test(enable = "avx512f")]
32279 unsafe fn test_mm512_mask_insertf32x4() {
32280 let a = _mm512_setr_ps(
32281 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32282 );
32283 let b = _mm_setr_ps(17., 18., 19., 20.);
32284 let r = _mm512_mask_insertf32x4(a, 0, a, b, 0);
32285 assert_eq_m512(r, a);
32286 let r = _mm512_mask_insertf32x4(a, 0b11111111_11111111, a, b, 0);
32287 let e = _mm512_setr_ps(
32288 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32289 );
32290 assert_eq_m512(r, e);
32291 }
32292
32293 #[simd_test(enable = "avx512f")]
32294 unsafe fn test_mm512_maskz_insertf32x4() {
32295 let a = _mm512_setr_ps(
32296 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32297 );
32298 let b = _mm_setr_ps(17., 18., 19., 20.);
32299 let r = _mm512_maskz_insertf32x4(0, a, b, 0);
32300 assert_eq_m512(r, _mm512_setzero_ps());
32301 let r = _mm512_maskz_insertf32x4(0b00000000_11111111, a, b, 0);
32302 let e = _mm512_setr_ps(
32303 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
32304 );
32305 assert_eq_m512(r, e);
32306 }
32307
32308 #[simd_test(enable = "avx512f")]
32309 unsafe fn test_mm512_castps128_ps512() {
32310 let a = _mm_setr_ps(17., 18., 19., 20.);
32311 let r = _mm512_castps128_ps512(a);
32312 let e = _mm512_setr_ps(
32313 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
32314 );
32315 assert_eq_m512(r, e);
32316 }
32317
32318 #[simd_test(enable = "avx512f")]
32319 unsafe fn test_mm512_castps256_ps512() {
32320 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
32321 let r = _mm512_castps256_ps512(a);
32322 let e = _mm512_setr_ps(
32323 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
32324 );
32325 assert_eq_m512(r, e);
32326 }
32327
32328 #[simd_test(enable = "avx512f")]
32329 unsafe fn test_mm512_zextps128_ps512() {
32330 let a = _mm_setr_ps(17., 18., 19., 20.);
32331 let r = _mm512_zextps128_ps512(a);
32332 let e = _mm512_setr_ps(
32333 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
32334 );
32335 assert_eq_m512(r, e);
32336 }
32337
32338 #[simd_test(enable = "avx512f")]
32339 unsafe fn test_mm512_zextps256_ps512() {
32340 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
32341 let r = _mm512_zextps256_ps512(a);
32342 let e = _mm512_setr_ps(
32343 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
32344 );
32345 assert_eq_m512(r, e);
32346 }
32347
32348 #[simd_test(enable = "avx512f")]
32349 unsafe fn test_mm512_castps512_ps128() {
32350 let a = _mm512_setr_ps(
32351 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
32352 );
32353 let r = _mm512_castps512_ps128(a);
32354 let e = _mm_setr_ps(17., 18., 19., 20.);
32355 assert_eq_m128(r, e);
32356 }
32357
32358 #[simd_test(enable = "avx512f")]
32359 unsafe fn test_mm512_castps512_ps256() {
32360 let a = _mm512_setr_ps(
32361 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
32362 );
32363 let r = _mm512_castps512_ps256(a);
32364 let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
32365 assert_eq_m256(r, e);
32366 }
32367
32368 #[simd_test(enable = "avx512f")]
32369 unsafe fn test_mm512_castps_pd() {
32370 let a = _mm512_set1_ps(1.);
32371 let r = _mm512_castps_pd(a);
32372 let e = _mm512_set1_pd(0.007812501848093234);
32373 assert_eq_m512d(r, e);
32374 }
32375
32376 #[simd_test(enable = "avx512f")]
32377 unsafe fn test_mm512_castps_si512() {
32378 let a = _mm512_set1_ps(1.);
32379 let r = _mm512_castps_si512(a);
32380 let e = _mm512_set1_epi32(1065353216);
32381 assert_eq_m512i(r, e);
32382 }
32383
32384 #[simd_test(enable = "avx512f")]
32385 unsafe fn test_mm512_broadcastd_epi32() {
32386 let a = _mm_set_epi32(17, 18, 19, 20);
32387 let r = _mm512_broadcastd_epi32(a);
32388 let e = _mm512_set1_epi32(20);
32389 assert_eq_m512i(r, e);
32390 }
32391
32392 #[simd_test(enable = "avx512f")]
32393 unsafe fn test_mm512_mask_broadcastd_epi32() {
32394 let src = _mm512_set1_epi32(20);
32395 let a = _mm_set_epi32(17, 18, 19, 20);
32396 let r = _mm512_mask_broadcastd_epi32(src, 0, a);
32397 assert_eq_m512i(r, src);
32398 let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
32399 let e = _mm512_set1_epi32(20);
32400 assert_eq_m512i(r, e);
32401 }
32402
32403 #[simd_test(enable = "avx512f")]
32404 unsafe fn test_mm512_maskz_broadcastd_epi32() {
32405 let a = _mm_set_epi32(17, 18, 19, 20);
32406 let r = _mm512_maskz_broadcastd_epi32(0, a);
32407 assert_eq_m512i(r, _mm512_setzero_si512());
32408 let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
32409 let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
32410 assert_eq_m512i(r, e);
32411 }
32412
32413 #[simd_test(enable = "avx512f")]
32414 unsafe fn test_mm512_broadcastss_ps() {
32415 let a = _mm_set_ps(17., 18., 19., 20.);
32416 let r = _mm512_broadcastss_ps(a);
32417 let e = _mm512_set1_ps(20.);
32418 assert_eq_m512(r, e);
32419 }
32420
32421 #[simd_test(enable = "avx512f")]
32422 unsafe fn test_mm512_mask_broadcastss_ps() {
32423 let src = _mm512_set1_ps(20.);
32424 let a = _mm_set_ps(17., 18., 19., 20.);
32425 let r = _mm512_mask_broadcastss_ps(src, 0, a);
32426 assert_eq_m512(r, src);
32427 let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
32428 let e = _mm512_set1_ps(20.);
32429 assert_eq_m512(r, e);
32430 }
32431
32432 #[simd_test(enable = "avx512f")]
32433 unsafe fn test_mm512_maskz_broadcastss_ps() {
32434 let a = _mm_set_ps(17., 18., 19., 20.);
32435 let r = _mm512_maskz_broadcastss_ps(0, a);
32436 assert_eq_m512(r, _mm512_setzero_ps());
32437 let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
32438 let e = _mm512_setr_ps(
32439 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
32440 );
32441 assert_eq_m512(r, e);
32442 }
32443
32444 #[simd_test(enable = "avx512f")]
32445 unsafe fn test_mm512_broadcast_i32x4() {
32446 let a = _mm_set_epi32(17, 18, 19, 20);
32447 let r = _mm512_broadcast_i32x4(a);
32448 let e = _mm512_set_epi32(
32449 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
32450 );
32451 assert_eq_m512i(r, e);
32452 }
32453
32454 #[simd_test(enable = "avx512f")]
32455 unsafe fn test_mm512_mask_broadcast_i32x4() {
32456 let src = _mm512_set1_epi32(20);
32457 let a = _mm_set_epi32(17, 18, 19, 20);
32458 let r = _mm512_mask_broadcast_i32x4(src, 0, a);
32459 assert_eq_m512i(r, src);
32460 let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
32461 let e = _mm512_set_epi32(
32462 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
32463 );
32464 assert_eq_m512i(r, e);
32465 }
32466
32467 #[simd_test(enable = "avx512f")]
32468 unsafe fn test_mm512_maskz_broadcast_i32x4() {
32469 let a = _mm_set_epi32(17, 18, 19, 20);
32470 let r = _mm512_maskz_broadcast_i32x4(0, a);
32471 assert_eq_m512i(r, _mm512_setzero_si512());
32472 let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
32473 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
32474 assert_eq_m512i(r, e);
32475 }
32476
32477 #[simd_test(enable = "avx512f")]
32478 unsafe fn test_mm512_broadcast_f32x4() {
32479 let a = _mm_set_ps(17., 18., 19., 20.);
32480 let r = _mm512_broadcast_f32x4(a);
32481 let e = _mm512_set_ps(
32482 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
32483 );
32484 assert_eq_m512(r, e);
32485 }
32486
32487 #[simd_test(enable = "avx512f")]
32488 unsafe fn test_mm512_mask_broadcast_f32x4() {
32489 let src = _mm512_set1_ps(20.);
32490 let a = _mm_set_ps(17., 18., 19., 20.);
32491 let r = _mm512_mask_broadcast_f32x4(src, 0, a);
32492 assert_eq_m512(r, src);
32493 let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
32494 let e = _mm512_set_ps(
32495 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
32496 );
32497 assert_eq_m512(r, e);
32498 }
32499
32500 #[simd_test(enable = "avx512f")]
32501 unsafe fn test_mm512_maskz_broadcast_f32x4() {
32502 let a = _mm_set_ps(17., 18., 19., 20.);
32503 let r = _mm512_maskz_broadcast_f32x4(0, a);
32504 assert_eq_m512(r, _mm512_setzero_ps());
32505 let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
32506 let e = _mm512_set_ps(
32507 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
32508 );
32509 assert_eq_m512(r, e);
32510 }
32511
32512 #[simd_test(enable = "avx512f")]
32513 unsafe fn test_mm512_mask_blend_epi32() {
32514 let a = _mm512_set1_epi32(1);
32515 let b = _mm512_set1_epi32(2);
32516 let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
32517 let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
32518 assert_eq_m512i(r, e);
32519 }
32520
32521 #[simd_test(enable = "avx512f")]
32522 unsafe fn test_mm512_mask_blend_ps() {
32523 let a = _mm512_set1_ps(1.);
32524 let b = _mm512_set1_ps(2.);
32525 let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
32526 let e = _mm512_set_ps(
32527 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
32528 );
32529 assert_eq_m512(r, e);
32530 }
32531
32532 #[simd_test(enable = "avx512f")]
32533 unsafe fn test_mm512_unpackhi_epi32() {
32534 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32535 let b = _mm512_set_epi32(
32536 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32537 );
32538 let r = _mm512_unpackhi_epi32(a, b);
32539 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
32540 assert_eq_m512i(r, e);
32541 }
32542
32543 #[simd_test(enable = "avx512f")]
32544 unsafe fn test_mm512_mask_unpackhi_epi32() {
32545 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32546 let b = _mm512_set_epi32(
32547 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32548 );
32549 let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
32550 assert_eq_m512i(r, a);
32551 let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
32552 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
32553 assert_eq_m512i(r, e);
32554 }
32555
32556 #[simd_test(enable = "avx512f")]
32557 unsafe fn test_mm512_maskz_unpackhi_epi32() {
32558 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32559 let b = _mm512_set_epi32(
32560 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32561 );
32562 let r = _mm512_maskz_unpackhi_epi32(0, a, b);
32563 assert_eq_m512i(r, _mm512_setzero_si512());
32564 let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
32565 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
32566 assert_eq_m512i(r, e);
32567 }
32568
32569 #[simd_test(enable = "avx512f")]
32570 unsafe fn test_mm512_unpackhi_ps() {
32571 let a = _mm512_set_ps(
32572 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32573 );
32574 let b = _mm512_set_ps(
32575 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32576 );
32577 let r = _mm512_unpackhi_ps(a, b);
32578 let e = _mm512_set_ps(
32579 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
32580 );
32581 assert_eq_m512(r, e);
32582 }
32583
32584 #[simd_test(enable = "avx512f")]
32585 unsafe fn test_mm512_mask_unpackhi_ps() {
32586 let a = _mm512_set_ps(
32587 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32588 );
32589 let b = _mm512_set_ps(
32590 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32591 );
32592 let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
32593 assert_eq_m512(r, a);
32594 let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
32595 let e = _mm512_set_ps(
32596 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
32597 );
32598 assert_eq_m512(r, e);
32599 }
32600
32601 #[simd_test(enable = "avx512f")]
32602 unsafe fn test_mm512_maskz_unpackhi_ps() {
32603 let a = _mm512_set_ps(
32604 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32605 );
32606 let b = _mm512_set_ps(
32607 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32608 );
32609 let r = _mm512_maskz_unpackhi_ps(0, a, b);
32610 assert_eq_m512(r, _mm512_setzero_ps());
32611 let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
32612 let e = _mm512_set_ps(
32613 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
32614 );
32615 assert_eq_m512(r, e);
32616 }
32617
32618 #[simd_test(enable = "avx512f")]
32619 unsafe fn test_mm512_unpacklo_epi32() {
32620 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32621 let b = _mm512_set_epi32(
32622 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32623 );
32624 let r = _mm512_unpacklo_epi32(a, b);
32625 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
32626 assert_eq_m512i(r, e);
32627 }
32628
32629 #[simd_test(enable = "avx512f")]
32630 unsafe fn test_mm512_mask_unpacklo_epi32() {
32631 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32632 let b = _mm512_set_epi32(
32633 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32634 );
32635 let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
32636 assert_eq_m512i(r, a);
32637 let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
32638 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
32639 assert_eq_m512i(r, e);
32640 }
32641
32642 #[simd_test(enable = "avx512f")]
32643 unsafe fn test_mm512_maskz_unpacklo_epi32() {
32644 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32645 let b = _mm512_set_epi32(
32646 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32647 );
32648 let r = _mm512_maskz_unpacklo_epi32(0, a, b);
32649 assert_eq_m512i(r, _mm512_setzero_si512());
32650 let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
32651 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
32652 assert_eq_m512i(r, e);
32653 }
32654
32655 #[simd_test(enable = "avx512f")]
32656 unsafe fn test_mm512_unpacklo_ps() {
32657 let a = _mm512_set_ps(
32658 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32659 );
32660 let b = _mm512_set_ps(
32661 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32662 );
32663 let r = _mm512_unpacklo_ps(a, b);
32664 let e = _mm512_set_ps(
32665 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
32666 );
32667 assert_eq_m512(r, e);
32668 }
32669
32670 #[simd_test(enable = "avx512f")]
32671 unsafe fn test_mm512_mask_unpacklo_ps() {
32672 let a = _mm512_set_ps(
32673 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32674 );
32675 let b = _mm512_set_ps(
32676 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32677 );
32678 let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
32679 assert_eq_m512(r, a);
32680 let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
32681 let e = _mm512_set_ps(
32682 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
32683 );
32684 assert_eq_m512(r, e);
32685 }
32686
32687 #[simd_test(enable = "avx512f")]
32688 unsafe fn test_mm512_maskz_unpacklo_ps() {
32689 let a = _mm512_set_ps(
32690 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32691 );
32692 let b = _mm512_set_ps(
32693 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32694 );
32695 let r = _mm512_maskz_unpacklo_ps(0, a, b);
32696 assert_eq_m512(r, _mm512_setzero_ps());
32697 let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
32698 let e = _mm512_set_ps(
32699 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
32700 );
32701 assert_eq_m512(r, e);
32702 }
32703
32704 #[simd_test(enable = "avx512f")]
32705 unsafe fn test_mm512_alignr_epi32() {
32706 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
32707 let b = _mm512_set_epi32(
32708 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
32709 );
32710 let r = _mm512_alignr_epi32(a, b, 0);
32711 assert_eq_m512i(r, b);
32712 let r = _mm512_alignr_epi32(a, b, 16);
32713 assert_eq_m512i(r, b);
32714 let r = _mm512_alignr_epi32(a, b, 1);
32715 let e = _mm512_set_epi32(
32716 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
32717 );
32718 assert_eq_m512i(r, e);
32719 }
32720
32721 #[simd_test(enable = "avx512f")]
32722 unsafe fn test_mm512_mask_alignr_epi32() {
32723 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
32724 let b = _mm512_set_epi32(
32725 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
32726 );
32727 let r = _mm512_mask_alignr_epi32(a, 0, a, b, 1);
32728 assert_eq_m512i(r, a);
32729 let r = _mm512_mask_alignr_epi32(a, 0b11111111_11111111, a, b, 1);
32730 let e = _mm512_set_epi32(
32731 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
32732 );
32733 assert_eq_m512i(r, e);
32734 }
32735
32736 #[simd_test(enable = "avx512f")]
32737 unsafe fn test_mm512_maskz_alignr_epi32() {
32738 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
32739 let b = _mm512_set_epi32(
32740 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
32741 );
32742 let r = _mm512_maskz_alignr_epi32(0, a, b, 1);
32743 assert_eq_m512i(r, _mm512_setzero_si512());
32744 let r = _mm512_maskz_alignr_epi32(0b00000000_11111111, a, b, 1);
32745 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
32746 assert_eq_m512i(r, e);
32747 }
32748
32749 #[simd_test(enable = "avx512f")]
32750 unsafe fn test_mm512_and_epi32() {
32751 let a = _mm512_set_epi32(
32752 1 << 1 | 1 << 2,
32753 0,
32754 0,
32755 0,
32756 0,
32757 0,
32758 0,
32759 0,
32760 0,
32761 0,
32762 0,
32763 0,
32764 0,
32765 0,
32766 0,
32767 1 << 1 | 1 << 3,
32768 );
32769 let b = _mm512_set_epi32(
32770 1 << 1,
32771 0,
32772 0,
32773 0,
32774 0,
32775 0,
32776 0,
32777 0,
32778 0,
32779 0,
32780 0,
32781 0,
32782 0,
32783 0,
32784 0,
32785 1 << 3 | 1 << 4,
32786 );
32787 let r = _mm512_and_epi32(a, b);
32788 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
32789 assert_eq_m512i(r, e);
32790 }
32791
32792 #[simd_test(enable = "avx512f")]
32793 unsafe fn test_mm512_mask_and_epi32() {
32794 let a = _mm512_set_epi32(
32795 1 << 1 | 1 << 2,
32796 0,
32797 0,
32798 0,
32799 0,
32800 0,
32801 0,
32802 0,
32803 0,
32804 0,
32805 0,
32806 0,
32807 0,
32808 0,
32809 0,
32810 1 << 1 | 1 << 3,
32811 );
32812 let b = _mm512_set_epi32(
32813 1 << 1,
32814 0,
32815 0,
32816 0,
32817 0,
32818 0,
32819 0,
32820 0,
32821 0,
32822 0,
32823 0,
32824 0,
32825 0,
32826 0,
32827 0,
32828 1 << 3 | 1 << 4,
32829 );
32830 let r = _mm512_mask_and_epi32(a, 0, a, b);
32831 assert_eq_m512i(r, a);
32832
32833 let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
32834 let e = _mm512_set_epi32(
32835 1 << 1 | 1 << 2,
32836 0,
32837 0,
32838 0,
32839 0,
32840 0,
32841 0,
32842 0,
32843 0,
32844 0,
32845 0,
32846 0,
32847 0,
32848 0,
32849 0,
32850 1 << 3,
32851 );
32852 assert_eq_m512i(r, e);
32853 }
32854
32855 #[simd_test(enable = "avx512f")]
32856 unsafe fn test_mm512_maskz_and_epi32() {
32857 let a = _mm512_set_epi32(
32858 1 << 1 | 1 << 2,
32859 0,
32860 0,
32861 0,
32862 0,
32863 0,
32864 0,
32865 0,
32866 0,
32867 0,
32868 0,
32869 0,
32870 0,
32871 0,
32872 0,
32873 1 << 1 | 1 << 3,
32874 );
32875 let b = _mm512_set_epi32(
32876 1 << 1,
32877 0,
32878 0,
32879 0,
32880 0,
32881 0,
32882 0,
32883 0,
32884 0,
32885 0,
32886 0,
32887 0,
32888 0,
32889 0,
32890 0,
32891 1 << 3 | 1 << 4,
32892 );
32893 let r = _mm512_maskz_and_epi32(0, a, b);
32894 assert_eq_m512i(r, _mm512_setzero_si512());
32895
32896 let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
32897 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
32898 assert_eq_m512i(r, e);
32899 }
32900
32901 #[simd_test(enable = "avx512f")]
32902 unsafe fn test_mm512_and_si512() {
32903 let a = _mm512_set_epi32(
32904 1 << 1 | 1 << 2,
32905 0,
32906 0,
32907 0,
32908 0,
32909 0,
32910 0,
32911 0,
32912 0,
32913 0,
32914 0,
32915 0,
32916 0,
32917 0,
32918 0,
32919 1 << 1 | 1 << 3,
32920 );
32921 let b = _mm512_set_epi32(
32922 1 << 1,
32923 0,
32924 0,
32925 0,
32926 0,
32927 0,
32928 0,
32929 0,
32930 0,
32931 0,
32932 0,
32933 0,
32934 0,
32935 0,
32936 0,
32937 1 << 3 | 1 << 4,
32938 );
32939 let r = _mm512_and_epi32(a, b);
32940 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
32941 assert_eq_m512i(r, e);
32942 }
32943
32944 #[simd_test(enable = "avx512f")]
32945 unsafe fn test_mm512_or_epi32() {
32946 let a = _mm512_set_epi32(
32947 1 << 1 | 1 << 2,
32948 0,
32949 0,
32950 0,
32951 0,
32952 0,
32953 0,
32954 0,
32955 0,
32956 0,
32957 0,
32958 0,
32959 0,
32960 0,
32961 0,
32962 1 << 1 | 1 << 3,
32963 );
32964 let b = _mm512_set_epi32(
32965 1 << 1,
32966 0,
32967 0,
32968 0,
32969 0,
32970 0,
32971 0,
32972 0,
32973 0,
32974 0,
32975 0,
32976 0,
32977 0,
32978 0,
32979 0,
32980 1 << 3 | 1 << 4,
32981 );
32982 let r = _mm512_or_epi32(a, b);
32983 let e = _mm512_set_epi32(
32984 1 << 1 | 1 << 2,
32985 0,
32986 0,
32987 0,
32988 0,
32989 0,
32990 0,
32991 0,
32992 0,
32993 0,
32994 0,
32995 0,
32996 0,
32997 0,
32998 0,
32999 1 << 1 | 1 << 3 | 1 << 4,
33000 );
33001 assert_eq_m512i(r, e);
33002 }
33003
33004 #[simd_test(enable = "avx512f")]
33005 unsafe fn test_mm512_mask_or_epi32() {
33006 let a = _mm512_set_epi32(
33007 1 << 1 | 1 << 2,
33008 0,
33009 0,
33010 0,
33011 0,
33012 0,
33013 0,
33014 0,
33015 0,
33016 0,
33017 0,
33018 0,
33019 0,
33020 0,
33021 0,
33022 1 << 1 | 1 << 3,
33023 );
33024 let b = _mm512_set_epi32(
33025 1 << 1,
33026 0,
33027 0,
33028 0,
33029 0,
33030 0,
33031 0,
33032 0,
33033 0,
33034 0,
33035 0,
33036 0,
33037 0,
33038 0,
33039 0,
33040 1 << 3 | 1 << 4,
33041 );
33042 let r = _mm512_mask_or_epi32(a, 0, a, b);
33043 assert_eq_m512i(r, a);
33044
33045 let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
33046 let e = _mm512_set_epi32(
33047 1 << 1 | 1 << 2,
33048 0,
33049 0,
33050 0,
33051 0,
33052 0,
33053 0,
33054 0,
33055 0,
33056 0,
33057 0,
33058 0,
33059 0,
33060 0,
33061 0,
33062 1 << 1 | 1 << 3 | 1 << 4,
33063 );
33064 assert_eq_m512i(r, e);
33065 }
33066
33067 #[simd_test(enable = "avx512f")]
33068 unsafe fn test_mm512_maskz_or_epi32() {
33069 let a = _mm512_set_epi32(
33070 1 << 1 | 1 << 2,
33071 0,
33072 0,
33073 0,
33074 0,
33075 0,
33076 0,
33077 0,
33078 0,
33079 0,
33080 0,
33081 0,
33082 0,
33083 0,
33084 0,
33085 1 << 1 | 1 << 3,
33086 );
33087 let b = _mm512_set_epi32(
33088 1 << 1,
33089 0,
33090 0,
33091 0,
33092 0,
33093 0,
33094 0,
33095 0,
33096 0,
33097 0,
33098 0,
33099 0,
33100 0,
33101 0,
33102 0,
33103 1 << 3 | 1 << 4,
33104 );
33105 let r = _mm512_maskz_or_epi32(0, a, b);
33106 assert_eq_m512i(r, _mm512_setzero_si512());
33107
33108 let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
33109 let e = _mm512_set_epi32(
33110 0,
33111 0,
33112 0,
33113 0,
33114 0,
33115 0,
33116 0,
33117 0,
33118 0,
33119 0,
33120 0,
33121 0,
33122 0,
33123 0,
33124 0,
33125 1 << 1 | 1 << 3 | 1 << 4,
33126 );
33127 assert_eq_m512i(r, e);
33128 }
33129
33130 #[simd_test(enable = "avx512f")]
33131 unsafe fn test_mm512_or_si512() {
33132 let a = _mm512_set_epi32(
33133 1 << 1 | 1 << 2,
33134 0,
33135 0,
33136 0,
33137 0,
33138 0,
33139 0,
33140 0,
33141 0,
33142 0,
33143 0,
33144 0,
33145 0,
33146 0,
33147 0,
33148 1 << 1 | 1 << 3,
33149 );
33150 let b = _mm512_set_epi32(
33151 1 << 1,
33152 0,
33153 0,
33154 0,
33155 0,
33156 0,
33157 0,
33158 0,
33159 0,
33160 0,
33161 0,
33162 0,
33163 0,
33164 0,
33165 0,
33166 1 << 3 | 1 << 4,
33167 );
33168 let r = _mm512_or_epi32(a, b);
33169 let e = _mm512_set_epi32(
33170 1 << 1 | 1 << 2,
33171 0,
33172 0,
33173 0,
33174 0,
33175 0,
33176 0,
33177 0,
33178 0,
33179 0,
33180 0,
33181 0,
33182 0,
33183 0,
33184 0,
33185 1 << 1 | 1 << 3 | 1 << 4,
33186 );
33187 assert_eq_m512i(r, e);
33188 }
33189
33190 #[simd_test(enable = "avx512f")]
33191 unsafe fn test_mm512_xor_epi32() {
33192 let a = _mm512_set_epi32(
33193 1 << 1 | 1 << 2,
33194 0,
33195 0,
33196 0,
33197 0,
33198 0,
33199 0,
33200 0,
33201 0,
33202 0,
33203 0,
33204 0,
33205 0,
33206 0,
33207 0,
33208 1 << 1 | 1 << 3,
33209 );
33210 let b = _mm512_set_epi32(
33211 1 << 1,
33212 0,
33213 0,
33214 0,
33215 0,
33216 0,
33217 0,
33218 0,
33219 0,
33220 0,
33221 0,
33222 0,
33223 0,
33224 0,
33225 0,
33226 1 << 3 | 1 << 4,
33227 );
33228 let r = _mm512_xor_epi32(a, b);
33229 let e = _mm512_set_epi32(
33230 1 << 2,
33231 0,
33232 0,
33233 0,
33234 0,
33235 0,
33236 0,
33237 0,
33238 0,
33239 0,
33240 0,
33241 0,
33242 0,
33243 0,
33244 0,
33245 1 << 1 | 1 << 4,
33246 );
33247 assert_eq_m512i(r, e);
33248 }
33249
33250 #[simd_test(enable = "avx512f")]
33251 unsafe fn test_mm512_mask_xor_epi32() {
33252 let a = _mm512_set_epi32(
33253 1 << 1 | 1 << 2,
33254 0,
33255 0,
33256 0,
33257 0,
33258 0,
33259 0,
33260 0,
33261 0,
33262 0,
33263 0,
33264 0,
33265 0,
33266 0,
33267 0,
33268 1 << 1 | 1 << 3,
33269 );
33270 let b = _mm512_set_epi32(
33271 1 << 1,
33272 0,
33273 0,
33274 0,
33275 0,
33276 0,
33277 0,
33278 0,
33279 0,
33280 0,
33281 0,
33282 0,
33283 0,
33284 0,
33285 0,
33286 1 << 3 | 1 << 4,
33287 );
33288 let r = _mm512_mask_xor_epi32(a, 0, a, b);
33289 assert_eq_m512i(r, a);
33290
33291 let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
33292 let e = _mm512_set_epi32(
33293 1 << 1 | 1 << 2,
33294 0,
33295 0,
33296 0,
33297 0,
33298 0,
33299 0,
33300 0,
33301 0,
33302 0,
33303 0,
33304 0,
33305 0,
33306 0,
33307 0,
33308 1 << 1 | 1 << 4,
33309 );
33310 assert_eq_m512i(r, e);
33311 }
33312
33313 #[simd_test(enable = "avx512f")]
33314 unsafe fn test_mm512_maskz_xor_epi32() {
33315 let a = _mm512_set_epi32(
33316 1 << 1 | 1 << 2,
33317 0,
33318 0,
33319 0,
33320 0,
33321 0,
33322 0,
33323 0,
33324 0,
33325 0,
33326 0,
33327 0,
33328 0,
33329 0,
33330 0,
33331 1 << 1 | 1 << 3,
33332 );
33333 let b = _mm512_set_epi32(
33334 1 << 1,
33335 0,
33336 0,
33337 0,
33338 0,
33339 0,
33340 0,
33341 0,
33342 0,
33343 0,
33344 0,
33345 0,
33346 0,
33347 0,
33348 0,
33349 1 << 3 | 1 << 4,
33350 );
33351 let r = _mm512_maskz_xor_epi32(0, a, b);
33352 assert_eq_m512i(r, _mm512_setzero_si512());
33353
33354 let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
33355 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
33356 assert_eq_m512i(r, e);
33357 }
33358
33359 #[simd_test(enable = "avx512f")]
33360 unsafe fn test_mm512_xor_si512() {
33361 let a = _mm512_set_epi32(
33362 1 << 1 | 1 << 2,
33363 0,
33364 0,
33365 0,
33366 0,
33367 0,
33368 0,
33369 0,
33370 0,
33371 0,
33372 0,
33373 0,
33374 0,
33375 0,
33376 0,
33377 1 << 1 | 1 << 3,
33378 );
33379 let b = _mm512_set_epi32(
33380 1 << 1,
33381 0,
33382 0,
33383 0,
33384 0,
33385 0,
33386 0,
33387 0,
33388 0,
33389 0,
33390 0,
33391 0,
33392 0,
33393 0,
33394 0,
33395 1 << 3 | 1 << 4,
33396 );
33397 let r = _mm512_xor_epi32(a, b);
33398 let e = _mm512_set_epi32(
33399 1 << 2,
33400 0,
33401 0,
33402 0,
33403 0,
33404 0,
33405 0,
33406 0,
33407 0,
33408 0,
33409 0,
33410 0,
33411 0,
33412 0,
33413 0,
33414 1 << 1 | 1 << 4,
33415 );
33416 assert_eq_m512i(r, e);
33417 }
33418
33419 #[simd_test(enable = "avx512f")]
33420 unsafe fn test_mm512_andnot_epi32() {
33421 let a = _mm512_set1_epi32(0);
33422 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
33423 let r = _mm512_andnot_epi32(a, b);
33424 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
33425 assert_eq_m512i(r, e);
33426 }
33427
33428 #[simd_test(enable = "avx512f")]
33429 unsafe fn test_mm512_mask_andnot_epi32() {
33430 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
33431 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
33432 let r = _mm512_mask_andnot_epi32(a, 0, a, b);
33433 assert_eq_m512i(r, a);
33434
33435 let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
33436 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
33437 assert_eq_m512i(r, e);
33438 }
33439
33440 #[simd_test(enable = "avx512f")]
33441 unsafe fn test_mm512_maskz_andnot_epi32() {
33442 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
33443 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
33444 let r = _mm512_maskz_andnot_epi32(0, a, b);
33445 assert_eq_m512i(r, _mm512_setzero_si512());
33446
33447 let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
33448 let e = _mm512_set_epi32(
33449 0,
33450 0,
33451 0,
33452 0,
33453 0,
33454 0,
33455 0,
33456 0,
33457 1 << 3 | 1 << 4,
33458 1 << 3 | 1 << 4,
33459 1 << 3 | 1 << 4,
33460 1 << 3 | 1 << 4,
33461 1 << 3 | 1 << 4,
33462 1 << 3 | 1 << 4,
33463 1 << 3 | 1 << 4,
33464 1 << 3 | 1 << 4,
33465 );
33466 assert_eq_m512i(r, e);
33467 }
33468
33469 #[simd_test(enable = "avx512f")]
33470 unsafe fn test_mm512_kand() {
33471 let a: u16 = 0b11001100_00110011;
33472 let b: u16 = 0b11001100_00110011;
33473 let r = _mm512_kand(a, b);
33474 let e: u16 = 0b11001100_00110011;
33475 assert_eq!(r, e);
33476 }
33477
33478 #[simd_test(enable = "avx512f")]
33479 unsafe fn test_kand_mask16() {
33480 let a: u16 = 0b11001100_00110011;
33481 let b: u16 = 0b11001100_00110011;
33482 let r = _kand_mask16(a, b);
33483 let e: u16 = 0b11001100_00110011;
33484 assert_eq!(r, e);
33485 }
33486
33487 #[simd_test(enable = "avx512f")]
33488 unsafe fn test_mm512_kor() {
33489 let a: u16 = 0b11001100_00110011;
33490 let b: u16 = 0b00101110_00001011;
33491 let r = _mm512_kor(a, b);
33492 let e: u16 = 0b11101110_00111011;
33493 assert_eq!(r, e);
33494 }
33495
33496 #[simd_test(enable = "avx512f")]
33497 unsafe fn test_kor_mask16() {
33498 let a: u16 = 0b11001100_00110011;
33499 let b: u16 = 0b00101110_00001011;
33500 let r = _kor_mask16(a, b);
33501 let e: u16 = 0b11101110_00111011;
33502 assert_eq!(r, e);
33503 }
33504
33505 #[simd_test(enable = "avx512f")]
33506 unsafe fn test_mm512_kxor() {
33507 let a: u16 = 0b11001100_00110011;
33508 let b: u16 = 0b00101110_00001011;
33509 let r = _mm512_kxor(a, b);
33510 let e: u16 = 0b11100010_00111000;
33511 assert_eq!(r, e);
33512 }
33513
33514 #[simd_test(enable = "avx512f")]
33515 unsafe fn test_kxor_mask16() {
33516 let a: u16 = 0b11001100_00110011;
33517 let b: u16 = 0b00101110_00001011;
33518 let r = _kxor_mask16(a, b);
33519 let e: u16 = 0b11100010_00111000;
33520 assert_eq!(r, e);
33521 }
33522
33523 #[simd_test(enable = "avx512f")]
33524 unsafe fn test_mm512_knot() {
33525 let a: u16 = 0b11001100_00110011;
33526 let r = _mm512_knot(a);
33527 let e: u16 = 0b00110011_11001100;
33528 assert_eq!(r, e);
33529 }
33530
33531 #[simd_test(enable = "avx512f")]
33532 unsafe fn test_knot_mask16() {
33533 let a: u16 = 0b11001100_00110011;
33534 let r = _knot_mask16(a);
33535 let e: u16 = 0b00110011_11001100;
33536 assert_eq!(r, e);
33537 }
33538
33539 #[simd_test(enable = "avx512f")]
33540 unsafe fn test_mm512_kandn() {
33541 let a: u16 = 0b11001100_00110011;
33542 let b: u16 = 0b00101110_00001011;
33543 let r = _mm512_kandn(a, b);
33544 let e: u16 = 0b00100010_00001000;
33545 assert_eq!(r, e);
33546 }
33547
33548 #[simd_test(enable = "avx512f")]
33549 unsafe fn test_kandn_mask16() {
33550 let a: u16 = 0b11001100_00110011;
33551 let b: u16 = 0b00101110_00001011;
33552 let r = _kandn_mask16(a, b);
33553 let e: u16 = 0b00100010_00001000;
33554 assert_eq!(r, e);
33555 }
33556
33557 #[simd_test(enable = "avx512f")]
33558 unsafe fn test_mm512_kxnor() {
33559 let a: u16 = 0b11001100_00110011;
33560 let b: u16 = 0b00101110_00001011;
33561 let r = _mm512_kxnor(a, b);
33562 let e: u16 = 0b00011101_11000111;
33563 assert_eq!(r, e);
33564 }
33565
33566 #[simd_test(enable = "avx512f")]
33567 unsafe fn test_kxnor_mask16() {
33568 let a: u16 = 0b11001100_00110011;
33569 let b: u16 = 0b00101110_00001011;
33570 let r = _kxnor_mask16(a, b);
33571 let e: u16 = 0b00011101_11000111;
33572 assert_eq!(r, e);
33573 }
33574
33575 #[simd_test(enable = "avx512f")]
33576 unsafe fn test_mm512_kmov() {
33577 let a: u16 = 0b11001100_00110011;
33578 let r = _mm512_kmov(a);
33579 let e: u16 = 0b11001100_00110011;
33580 assert_eq!(r, e);
33581 }
33582
33583 #[simd_test(enable = "avx512f")]
33584 unsafe fn test_mm512_int2mask() {
33585 let a: i32 = 0b11001100_00110011;
33586 let r = _mm512_int2mask(a);
33587 let e: u16 = 0b11001100_00110011;
33588 assert_eq!(r, e);
33589 }
33590
33591 #[simd_test(enable = "avx512f")]
33592 unsafe fn test_mm512_mask2int() {
33593 let k1: __mmask16 = 0b11001100_00110011;
33594 let r = _mm512_mask2int(k1);
33595 let e: i32 = 0b11001100_00110011;
33596 assert_eq!(r, e);
33597 }
33598
33599 #[simd_test(enable = "avx512f")]
33600 unsafe fn test_mm512_kunpackb() {
33601 let a: u16 = 0b11001100_00110011;
33602 let b: u16 = 0b00101110_00001011;
33603 let r = _mm512_kunpackb(a, b);
33604 let e: u16 = 0b00101110_00110011;
33605 assert_eq!(r, e);
33606 }
33607
33608 #[simd_test(enable = "avx512f")]
33609 unsafe fn test_mm512_kortestc() {
33610 let a: u16 = 0b11001100_00110011;
33611 let b: u16 = 0b00101110_00001011;
33612 let r = _mm512_kortestc(a, b);
33613 assert_eq!(r, 0);
33614 let b: u16 = 0b11111111_11111111;
33615 let r = _mm512_kortestc(a, b);
33616 assert_eq!(r, 1);
33617 }
33618
33619 #[simd_test(enable = "avx512f")]
33620 unsafe fn test_mm512_test_epi32_mask() {
33621 let a = _mm512_set1_epi32(1 << 0);
33622 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
33623 let r = _mm512_test_epi32_mask(a, b);
33624 let e: __mmask16 = 0b11111111_11111111;
33625 assert_eq!(r, e);
33626 }
33627
33628 #[simd_test(enable = "avx512f")]
33629 unsafe fn test_mm512_mask_test_epi32_mask() {
33630 let a = _mm512_set1_epi32(1 << 0);
33631 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
33632 let r = _mm512_mask_test_epi32_mask(0, a, b);
33633 assert_eq!(r, 0);
33634 let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
33635 let e: __mmask16 = 0b11111111_11111111;
33636 assert_eq!(r, e);
33637 }
33638
33639 #[simd_test(enable = "avx512f")]
33640 unsafe fn test_mm512_testn_epi32_mask() {
33641 let a = _mm512_set1_epi32(1 << 0);
33642 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
33643 let r = _mm512_testn_epi32_mask(a, b);
33644 let e: __mmask16 = 0b00000000_00000000;
33645 assert_eq!(r, e);
33646 }
33647
33648 #[simd_test(enable = "avx512f")]
33649 unsafe fn test_mm512_mask_testn_epi32_mask() {
33650 let a = _mm512_set1_epi32(1 << 0);
33651 let b = _mm512_set1_epi32(1 << 1);
33652 let r = _mm512_mask_test_epi32_mask(0, a, b);
33653 assert_eq!(r, 0);
33654 let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
33655 let e: __mmask16 = 0b11111111_11111111;
33656 assert_eq!(r, e);
33657 }
33658
33659 #[simd_test(enable = "avx512f")]
33660 unsafe fn test_mm512_stream_ps() {
33661 #[repr(align(32))]
33662 struct Memory {
33663 pub data: [f32; 16],
33664 }
33665 let a = _mm512_set1_ps(7.0);
33666 let mut mem = Memory { data: [-1.0; 16] };
33667
33668 _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
33669 for i in 0..16 {
33670 assert_eq!(mem.data[i], get_m512(a, i));
33671 }
33672 }
33673
33674 #[simd_test(enable = "avx512f")]
33675 unsafe fn test_mm512_reduce_add_epi32() {
33676 let a = _mm512_set1_epi32(1);
33677 let e: i32 = _mm512_reduce_add_epi32(a);
33678 assert_eq!(16, e);
33679 }
33680
33681 #[simd_test(enable = "avx512f")]
33682 unsafe fn test_mm512_mask_reduce_add_epi32() {
33683 let a = _mm512_set1_epi32(1);
33684 let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
33685 assert_eq!(8, e);
33686 }
33687
33688 #[simd_test(enable = "avx512f")]
33689 unsafe fn test_mm512_reduce_add_ps() {
33690 let a = _mm512_set1_ps(1.);
33691 let e: f32 = _mm512_reduce_add_ps(a);
33692 assert_eq!(16., e);
33693 }
33694
33695 #[simd_test(enable = "avx512f")]
33696 unsafe fn test_mm512_mask_reduce_add_ps() {
33697 let a = _mm512_set1_ps(1.);
33698 let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
33699 assert_eq!(8., e);
33700 }
33701
33702 #[simd_test(enable = "avx512f")]
33703 unsafe fn test_mm512_reduce_mul_epi32() {
33704 let a = _mm512_set1_epi32(2);
33705 let e: i32 = _mm512_reduce_mul_epi32(a);
33706 assert_eq!(65536, e);
33707 }
33708
33709 #[simd_test(enable = "avx512f")]
33710 unsafe fn test_mm512_mask_reduce_mul_epi32() {
33711 let a = _mm512_set1_epi32(2);
33712 let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
33713 assert_eq!(256, e);
33714 }
33715
33716 #[simd_test(enable = "avx512f")]
33717 unsafe fn test_mm512_reduce_mul_ps() {
33718 let a = _mm512_set1_ps(2.);
33719 let e: f32 = _mm512_reduce_mul_ps(a);
33720 assert_eq!(65536., e);
33721 }
33722
33723 #[simd_test(enable = "avx512f")]
33724 unsafe fn test_mm512_mask_reduce_mul_ps() {
33725 let a = _mm512_set1_ps(2.);
33726 let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
33727 assert_eq!(256., e);
33728 }
33729
33730 #[simd_test(enable = "avx512f")]
33731 unsafe fn test_mm512_reduce_max_epi32() {
33732 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33733 let e: i32 = _mm512_reduce_max_epi32(a);
33734 assert_eq!(15, e);
33735 }
33736
33737 #[simd_test(enable = "avx512f")]
33738 unsafe fn test_mm512_mask_reduce_max_epi32() {
33739 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33740 let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
33741 assert_eq!(7, e);
33742 }
33743
33744 #[simd_test(enable = "avx512f")]
33745 unsafe fn test_mm512_reduce_max_epu32() {
33746 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33747 let e: u32 = _mm512_reduce_max_epu32(a);
33748 assert_eq!(15, e);
33749 }
33750
33751 #[simd_test(enable = "avx512f")]
33752 unsafe fn test_mm512_mask_reduce_max_epu32() {
33753 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33754 let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
33755 assert_eq!(7, e);
33756 }
33757
33758 #[simd_test(enable = "avx512f")]
33759 unsafe fn test_mm512_reduce_max_ps() {
33760 let a = _mm512_set_ps(
33761 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33762 );
33763 let e: f32 = _mm512_reduce_max_ps(a);
33764 assert_eq!(15., e);
33765 }
33766
33767 #[simd_test(enable = "avx512f")]
33768 unsafe fn test_mm512_mask_reduce_max_ps() {
33769 let a = _mm512_set_ps(
33770 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33771 );
33772 let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
33773 assert_eq!(7., e);
33774 }
33775
33776 #[simd_test(enable = "avx512f")]
33777 unsafe fn test_mm512_reduce_min_epi32() {
33778 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33779 let e: i32 = _mm512_reduce_min_epi32(a);
33780 assert_eq!(0, e);
33781 }
33782
33783 #[simd_test(enable = "avx512f")]
33784 unsafe fn test_mm512_mask_reduce_min_epi32() {
33785 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33786 let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
33787 assert_eq!(0, e);
33788 }
33789
33790 #[simd_test(enable = "avx512f")]
33791 unsafe fn test_mm512_reduce_min_epu32() {
33792 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33793 let e: u32 = _mm512_reduce_min_epu32(a);
33794 assert_eq!(0, e);
33795 }
33796
33797 #[simd_test(enable = "avx512f")]
33798 unsafe fn test_mm512_mask_reduce_min_epu32() {
33799 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33800 let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
33801 assert_eq!(0, e);
33802 }
33803
33804 #[simd_test(enable = "avx512f")]
33805 unsafe fn test_mm512_reduce_min_ps() {
33806 let a = _mm512_set_ps(
33807 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33808 );
33809 let e: f32 = _mm512_reduce_min_ps(a);
33810 assert_eq!(0., e);
33811 }
33812
33813 #[simd_test(enable = "avx512f")]
33814 unsafe fn test_mm512_mask_reduce_min_ps() {
33815 let a = _mm512_set_ps(
33816 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33817 );
33818 let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
33819 assert_eq!(0., e);
33820 }
33821
33822 #[simd_test(enable = "avx512f")]
33823 unsafe fn test_mm512_reduce_and_epi32() {
33824 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33825 let e: i32 = _mm512_reduce_and_epi32(a);
33826 assert_eq!(0, e);
33827 }
33828
33829 #[simd_test(enable = "avx512f")]
33830 unsafe fn test_mm512_mask_reduce_and_epi32() {
33831 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33832 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
33833 assert_eq!(1, e);
33834 }
33835
33836 #[simd_test(enable = "avx512f")]
33837 unsafe fn test_mm512_reduce_or_epi32() {
33838 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33839 let e: i32 = _mm512_reduce_or_epi32(a);
33840 assert_eq!(3, e);
33841 }
33842
33843 #[simd_test(enable = "avx512f")]
33844 unsafe fn test_mm512_mask_reduce_or_epi32() {
33845 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33846 let e: i32 = _mm512_mask_reduce_or_epi32(0b11111111_00000000, a);
33847 assert_eq!(1, e);
33848 }
33849
33850 #[simd_test(enable = "avx512f")]
33851 unsafe fn test_mm512_mask_compress_epi32() {
33852 let src = _mm512_set1_epi32(200);
33853 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33854 let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
33855 let e = _mm512_set_epi32(
33856 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
33857 );
33858 assert_eq_m512i(r, e);
33859 }
33860
33861 #[simd_test(enable = "avx512f")]
33862 unsafe fn test_mm512_maskz_compress_epi32() {
33863 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33864 let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
33865 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
33866 assert_eq_m512i(r, e);
33867 }
33868
33869 #[simd_test(enable = "avx512f")]
33870 unsafe fn test_mm512_mask_compress_ps() {
33871 let src = _mm512_set1_ps(200.);
33872 let a = _mm512_set_ps(
33873 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33874 );
33875 let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
33876 let e = _mm512_set_ps(
33877 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
33878 );
33879 assert_eq_m512(r, e);
33880 }
33881
33882 #[simd_test(enable = "avx512f")]
33883 unsafe fn test_mm512_maskz_compress_ps() {
33884 let a = _mm512_set_ps(
33885 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33886 );
33887 let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
33888 let e = _mm512_set_ps(
33889 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
33890 );
33891 assert_eq_m512(r, e);
33892 }
33893
33894 #[simd_test(enable = "avx512f")]
33895 unsafe fn test_mm512_mask_expand_epi32() {
33896 let src = _mm512_set1_epi32(200);
33897 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33898 let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
33899 let e = _mm512_set_epi32(
33900 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
33901 );
33902 assert_eq_m512i(r, e);
33903 }
33904
33905 #[simd_test(enable = "avx512f")]
33906 unsafe fn test_mm512_maskz_expand_epi32() {
33907 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33908 let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
33909 let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
33910 assert_eq_m512i(r, e);
33911 }
33912
33913 #[simd_test(enable = "avx512f")]
33914 unsafe fn test_mm512_mask_expand_ps() {
33915 let src = _mm512_set1_ps(200.);
33916 let a = _mm512_set_ps(
33917 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33918 );
33919 let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
33920 let e = _mm512_set_ps(
33921 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
33922 );
33923 assert_eq_m512(r, e);
33924 }
33925
33926 #[simd_test(enable = "avx512f")]
33927 unsafe fn test_mm512_maskz_expand_ps() {
33928 let a = _mm512_set_ps(
33929 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33930 );
33931 let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
33932 let e = _mm512_set_ps(
33933 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
33934 );
33935 assert_eq_m512(r, e);
33936 }
33937
33938 #[simd_test(enable = "avx512f")]
33939 unsafe fn test_mm512_loadu_epi32() {
33940 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
33941 let p = a.as_ptr();
33942 let r = _mm512_loadu_epi32(black_box(p));
33943 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
33944 assert_eq_m512i(r, e);
33945 }
33946
33947 #[simd_test(enable = "avx512f")]
33948 unsafe fn test_mm512_storeu_epi32() {
33949 let a = _mm512_set1_epi32(9);
33950 let mut r = _mm512_undefined_epi32();
33951 _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
33952 assert_eq_m512i(r, a);
33953 }
33954
33955 #[simd_test(enable = "avx512f")]
33956 unsafe fn test_mm512_loadu_si512() {
33957 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
33958 let p = a.as_ptr();
33959 let r = _mm512_loadu_si512(black_box(p));
33960 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
33961 assert_eq_m512i(r, e);
33962 }
33963
33964 #[simd_test(enable = "avx512f")]
33965 unsafe fn test_mm512_storeu_si512() {
33966 let a = _mm512_set1_epi32(9);
33967 let mut r = _mm512_undefined_epi32();
33968 _mm512_storeu_si512(&mut r as *mut _ as *mut i32, a);
33969 assert_eq_m512i(r, a);
33970 }
33971
33972 #[simd_test(enable = "avx512f")]
33973 unsafe fn test_mm512_load_si512() {
33974 #[repr(align(64))]
33975 struct Align {
33976 data: [i32; 16], // 64 bytes
33977 }
33978 let a = Align {
33979 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
33980 };
33981 let p = (a.data).as_ptr();
33982 let r = _mm512_load_si512(black_box(p));
33983 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
33984 assert_eq_m512i(r, e);
33985 }
33986
33987 #[simd_test(enable = "avx512f")]
33988 unsafe fn test_mm512_store_si512() {
33989 let a = _mm512_set1_epi32(9);
33990 let mut r = _mm512_undefined_epi32();
33991 _mm512_store_si512(&mut r as *mut _ as *mut i32, a);
33992 assert_eq_m512i(r, a);
33993 }
33994
33995 #[simd_test(enable = "avx512f")]
33996 unsafe fn test_mm512_load_epi32() {
33997 #[repr(align(64))]
33998 struct Align {
33999 data: [i32; 16], // 64 bytes
34000 }
34001 let a = Align {
34002 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
34003 };
34004 let p = (a.data).as_ptr();
34005 let r = _mm512_load_epi32(black_box(p));
34006 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
34007 assert_eq_m512i(r, e);
34008 }
34009
34010 #[simd_test(enable = "avx512f")]
34011 unsafe fn test_mm512_store_epi32() {
34012 let a = _mm512_set1_epi32(9);
34013 let mut r = _mm512_undefined_epi32();
34014 _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
34015 assert_eq_m512i(r, a);
34016 }
34017
34018 #[simd_test(enable = "avx512f")]
34019 unsafe fn test_mm512_load_ps() {
34020 #[repr(align(64))]
34021 struct Align {
34022 data: [f32; 16], // 64 bytes
34023 }
34024 let a = Align {
34025 data: [
34026 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
34027 ],
34028 };
34029 let p = (a.data).as_ptr();
34030 let r = _mm512_load_ps(black_box(p));
34031 let e = _mm512_setr_ps(
34032 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
34033 );
34034 assert_eq_m512(r, e);
34035 }
34036
34037 #[simd_test(enable = "avx512f")]
34038 unsafe fn test_mm512_store_ps() {
34039 let a = _mm512_set1_ps(9.);
34040 let mut r = _mm512_undefined_ps();
34041 _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
34042 assert_eq_m512(r, a);
34043 }
34044
34045 #[simd_test(enable = "avx512f")]
34046 unsafe fn test_mm512_mask_set1_epi32() {
34047 let src = _mm512_set1_epi32(2);
34048 let a: i32 = 11;
34049 let r = _mm512_mask_set1_epi32(src, 0, a);
34050 assert_eq_m512i(r, src);
34051 let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
34052 let e = _mm512_set1_epi32(11);
34053 assert_eq_m512i(r, e);
34054 }
34055
34056 #[simd_test(enable = "avx512f")]
34057 unsafe fn test_mm512_maskz_set1_epi32() {
34058 let a: i32 = 11;
34059 let r = _mm512_maskz_set1_epi32(0, a);
34060 assert_eq_m512i(r, _mm512_setzero_si512());
34061 let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
34062 let e = _mm512_set1_epi32(11);
34063 assert_eq_m512i(r, e);
34064 }
34065
34066 #[simd_test(enable = "avx512f")]
34067 unsafe fn test_mm_mask_move_ss() {
34068 let src = _mm_set_ps(10., 11., 100., 110.);
34069 let a = _mm_set_ps(1., 2., 10., 20.);
34070 let b = _mm_set_ps(3., 4., 30., 40.);
34071 let r = _mm_mask_move_ss(src, 0, a, b);
34072 let e = _mm_set_ps(1., 2., 10., 110.);
34073 assert_eq_m128(r, e);
34074 let r = _mm_mask_move_ss(src, 0b11111111, a, b);
34075 let e = _mm_set_ps(1., 2., 10., 40.);
34076 assert_eq_m128(r, e);
34077 }
34078
34079 #[simd_test(enable = "avx512f")]
34080 unsafe fn test_mm_maskz_move_ss() {
34081 let a = _mm_set_ps(1., 2., 10., 20.);
34082 let b = _mm_set_ps(3., 4., 30., 40.);
34083 let r = _mm_maskz_move_ss(0, a, b);
34084 let e = _mm_set_ps(1., 2., 10., 0.);
34085 assert_eq_m128(r, e);
34086 let r = _mm_maskz_move_ss(0b11111111, a, b);
34087 let e = _mm_set_ps(1., 2., 10., 40.);
34088 assert_eq_m128(r, e);
34089 }
34090
34091 #[simd_test(enable = "avx512f")]
34092 unsafe fn test_mm_mask_move_sd() {
34093 let src = _mm_set_pd(10., 11.);
34094 let a = _mm_set_pd(1., 2.);
34095 let b = _mm_set_pd(3., 4.);
34096 let r = _mm_mask_move_sd(src, 0, a, b);
34097 let e = _mm_set_pd(1., 11.);
34098 assert_eq_m128d(r, e);
34099 let r = _mm_mask_move_sd(src, 0b11111111, a, b);
34100 let e = _mm_set_pd(1., 4.);
34101 assert_eq_m128d(r, e);
34102 }
34103
34104 #[simd_test(enable = "avx512f")]
34105 unsafe fn test_mm_maskz_move_sd() {
34106 let a = _mm_set_pd(1., 2.);
34107 let b = _mm_set_pd(3., 4.);
34108 let r = _mm_maskz_move_sd(0, a, b);
34109 let e = _mm_set_pd(1., 0.);
34110 assert_eq_m128d(r, e);
34111 let r = _mm_maskz_move_sd(0b11111111, a, b);
34112 let e = _mm_set_pd(1., 4.);
34113 assert_eq_m128d(r, e);
34114 }
34115
34116 #[simd_test(enable = "avx512f")]
34117 unsafe fn test_mm_mask_add_ss() {
34118 let src = _mm_set_ps(10., 11., 100., 110.);
34119 let a = _mm_set_ps(1., 2., 10., 20.);
34120 let b = _mm_set_ps(3., 4., 30., 40.);
34121 let r = _mm_mask_add_ss(src, 0, a, b);
34122 let e = _mm_set_ps(1., 2., 10., 110.);
34123 assert_eq_m128(r, e);
34124 let r = _mm_mask_add_ss(src, 0b11111111, a, b);
34125 let e = _mm_set_ps(1., 2., 10., 60.);
34126 assert_eq_m128(r, e);
34127 }
34128
34129 #[simd_test(enable = "avx512f")]
34130 unsafe fn test_mm_maskz_add_ss() {
34131 let a = _mm_set_ps(1., 2., 10., 20.);
34132 let b = _mm_set_ps(3., 4., 30., 40.);
34133 let r = _mm_maskz_add_ss(0, a, b);
34134 let e = _mm_set_ps(1., 2., 10., 0.);
34135 assert_eq_m128(r, e);
34136 let r = _mm_maskz_add_ss(0b11111111, a, b);
34137 let e = _mm_set_ps(1., 2., 10., 60.);
34138 assert_eq_m128(r, e);
34139 }
34140
34141 #[simd_test(enable = "avx512f")]
34142 unsafe fn test_mm_mask_add_sd() {
34143 let src = _mm_set_pd(10., 11.);
34144 let a = _mm_set_pd(1., 2.);
34145 let b = _mm_set_pd(3., 4.);
34146 let r = _mm_mask_add_sd(src, 0, a, b);
34147 let e = _mm_set_pd(1., 11.);
34148 assert_eq_m128d(r, e);
34149 let r = _mm_mask_add_sd(src, 0b11111111, a, b);
34150 let e = _mm_set_pd(1., 6.);
34151 assert_eq_m128d(r, e);
34152 }
34153
34154 #[simd_test(enable = "avx512f")]
34155 unsafe fn test_mm_maskz_add_sd() {
34156 let a = _mm_set_pd(1., 2.);
34157 let b = _mm_set_pd(3., 4.);
34158 let r = _mm_maskz_add_sd(0, a, b);
34159 let e = _mm_set_pd(1., 0.);
34160 assert_eq_m128d(r, e);
34161 let r = _mm_maskz_add_sd(0b11111111, a, b);
34162 let e = _mm_set_pd(1., 6.);
34163 assert_eq_m128d(r, e);
34164 }
34165
34166 #[simd_test(enable = "avx512f")]
34167 unsafe fn test_mm_mask_sub_ss() {
34168 let src = _mm_set_ps(10., 11., 100., 110.);
34169 let a = _mm_set_ps(1., 2., 10., 20.);
34170 let b = _mm_set_ps(3., 4., 30., 40.);
34171 let r = _mm_mask_sub_ss(src, 0, a, b);
34172 let e = _mm_set_ps(1., 2., 10., 110.);
34173 assert_eq_m128(r, e);
34174 let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
34175 let e = _mm_set_ps(1., 2., 10., -20.);
34176 assert_eq_m128(r, e);
34177 }
34178
34179 #[simd_test(enable = "avx512f")]
34180 unsafe fn test_mm_maskz_sub_ss() {
34181 let a = _mm_set_ps(1., 2., 10., 20.);
34182 let b = _mm_set_ps(3., 4., 30., 40.);
34183 let r = _mm_maskz_sub_ss(0, a, b);
34184 let e = _mm_set_ps(1., 2., 10., 0.);
34185 assert_eq_m128(r, e);
34186 let r = _mm_maskz_sub_ss(0b11111111, a, b);
34187 let e = _mm_set_ps(1., 2., 10., -20.);
34188 assert_eq_m128(r, e);
34189 }
34190
34191 #[simd_test(enable = "avx512f")]
34192 unsafe fn test_mm_mask_sub_sd() {
34193 let src = _mm_set_pd(10., 11.);
34194 let a = _mm_set_pd(1., 2.);
34195 let b = _mm_set_pd(3., 4.);
34196 let r = _mm_mask_sub_sd(src, 0, a, b);
34197 let e = _mm_set_pd(1., 11.);
34198 assert_eq_m128d(r, e);
34199 let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
34200 let e = _mm_set_pd(1., -2.);
34201 assert_eq_m128d(r, e);
34202 }
34203
34204 #[simd_test(enable = "avx512f")]
34205 unsafe fn test_mm_maskz_sub_sd() {
34206 let a = _mm_set_pd(1., 2.);
34207 let b = _mm_set_pd(3., 4.);
34208 let r = _mm_maskz_sub_sd(0, a, b);
34209 let e = _mm_set_pd(1., 0.);
34210 assert_eq_m128d(r, e);
34211 let r = _mm_maskz_sub_sd(0b11111111, a, b);
34212 let e = _mm_set_pd(1., -2.);
34213 assert_eq_m128d(r, e);
34214 }
34215
34216 #[simd_test(enable = "avx512f")]
34217 unsafe fn test_mm_mask_mul_ss() {
34218 let src = _mm_set_ps(10., 11., 100., 110.);
34219 let a = _mm_set_ps(1., 2., 10., 20.);
34220 let b = _mm_set_ps(3., 4., 30., 40.);
34221 let r = _mm_mask_mul_ss(src, 0, a, b);
34222 let e = _mm_set_ps(1., 2., 10., 110.);
34223 assert_eq_m128(r, e);
34224 let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
34225 let e = _mm_set_ps(1., 2., 10., 800.);
34226 assert_eq_m128(r, e);
34227 }
34228
34229 #[simd_test(enable = "avx512f")]
34230 unsafe fn test_mm_maskz_mul_ss() {
34231 let a = _mm_set_ps(1., 2., 10., 20.);
34232 let b = _mm_set_ps(3., 4., 30., 40.);
34233 let r = _mm_maskz_mul_ss(0, a, b);
34234 let e = _mm_set_ps(1., 2., 10., 0.);
34235 assert_eq_m128(r, e);
34236 let r = _mm_maskz_mul_ss(0b11111111, a, b);
34237 let e = _mm_set_ps(1., 2., 10., 800.);
34238 assert_eq_m128(r, e);
34239 }
34240
34241 #[simd_test(enable = "avx512f")]
34242 unsafe fn test_mm_mask_mul_sd() {
34243 let src = _mm_set_pd(10., 11.);
34244 let a = _mm_set_pd(1., 2.);
34245 let b = _mm_set_pd(3., 4.);
34246 let r = _mm_mask_mul_sd(src, 0, a, b);
34247 let e = _mm_set_pd(1., 11.);
34248 assert_eq_m128d(r, e);
34249 let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
34250 let e = _mm_set_pd(1., 8.);
34251 assert_eq_m128d(r, e);
34252 }
34253
34254 #[simd_test(enable = "avx512f")]
34255 unsafe fn test_mm_maskz_mul_sd() {
34256 let a = _mm_set_pd(1., 2.);
34257 let b = _mm_set_pd(3., 4.);
34258 let r = _mm_maskz_mul_sd(0, a, b);
34259 let e = _mm_set_pd(1., 0.);
34260 assert_eq_m128d(r, e);
34261 let r = _mm_maskz_mul_sd(0b11111111, a, b);
34262 let e = _mm_set_pd(1., 8.);
34263 assert_eq_m128d(r, e);
34264 }
34265
34266 #[simd_test(enable = "avx512f")]
34267 unsafe fn test_mm_mask_div_ss() {
34268 let src = _mm_set_ps(10., 11., 100., 110.);
34269 let a = _mm_set_ps(1., 2., 10., 20.);
34270 let b = _mm_set_ps(3., 4., 30., 40.);
34271 let r = _mm_mask_div_ss(src, 0, a, b);
34272 let e = _mm_set_ps(1., 2., 10., 110.);
34273 assert_eq_m128(r, e);
34274 let r = _mm_mask_div_ss(src, 0b11111111, a, b);
34275 let e = _mm_set_ps(1., 2., 10., 0.5);
34276 assert_eq_m128(r, e);
34277 }
34278
34279 #[simd_test(enable = "avx512f")]
34280 unsafe fn test_mm_maskz_div_ss() {
34281 let a = _mm_set_ps(1., 2., 10., 20.);
34282 let b = _mm_set_ps(3., 4., 30., 40.);
34283 let r = _mm_maskz_div_ss(0, a, b);
34284 let e = _mm_set_ps(1., 2., 10., 0.);
34285 assert_eq_m128(r, e);
34286 let r = _mm_maskz_div_ss(0b11111111, a, b);
34287 let e = _mm_set_ps(1., 2., 10., 0.5);
34288 assert_eq_m128(r, e);
34289 }
34290
34291 #[simd_test(enable = "avx512f")]
34292 unsafe fn test_mm_mask_div_sd() {
34293 let src = _mm_set_pd(10., 11.);
34294 let a = _mm_set_pd(1., 2.);
34295 let b = _mm_set_pd(3., 4.);
34296 let r = _mm_mask_div_sd(src, 0, a, b);
34297 let e = _mm_set_pd(1., 11.);
34298 assert_eq_m128d(r, e);
34299 let r = _mm_mask_div_sd(src, 0b11111111, a, b);
34300 let e = _mm_set_pd(1., 0.5);
34301 assert_eq_m128d(r, e);
34302 }
34303
34304 #[simd_test(enable = "avx512f")]
34305 unsafe fn test_mm_maskz_div_sd() {
34306 let a = _mm_set_pd(1., 2.);
34307 let b = _mm_set_pd(3., 4.);
34308 let r = _mm_maskz_div_sd(0, a, b);
34309 let e = _mm_set_pd(1., 0.);
34310 assert_eq_m128d(r, e);
34311 let r = _mm_maskz_div_sd(0b11111111, a, b);
34312 let e = _mm_set_pd(1., 0.5);
34313 assert_eq_m128d(r, e);
34314 }
34315
34316 #[simd_test(enable = "avx512f")]
34317 unsafe fn test_mm_mask_max_ss() {
34318 let a = _mm_set_ps(0., 1., 2., 3.);
34319 let b = _mm_set_ps(4., 5., 6., 7.);
34320 let r = _mm_mask_max_ss(a, 0, a, b);
34321 let e = _mm_set_ps(0., 1., 2., 3.);
34322 assert_eq_m128(r, e);
34323 let r = _mm_mask_max_ss(a, 0b11111111, a, b);
34324 let e = _mm_set_ps(0., 1., 2., 7.);
34325 assert_eq_m128(r, e);
34326 }
34327
34328 #[simd_test(enable = "avx512f")]
34329 unsafe fn test_mm_maskz_max_ss() {
34330 let a = _mm_set_ps(0., 1., 2., 3.);
34331 let b = _mm_set_ps(4., 5., 6., 7.);
34332 let r = _mm_maskz_max_ss(0, a, b);
34333 let e = _mm_set_ps(0., 1., 2., 0.);
34334 assert_eq_m128(r, e);
34335 let r = _mm_maskz_max_ss(0b11111111, a, b);
34336 let e = _mm_set_ps(0., 1., 2., 7.);
34337 assert_eq_m128(r, e);
34338 }
34339
34340 #[simd_test(enable = "avx512f")]
34341 unsafe fn test_mm_mask_max_sd() {
34342 let a = _mm_set_pd(0., 1.);
34343 let b = _mm_set_pd(2., 3.);
34344 let r = _mm_mask_max_sd(a, 0, a, b);
34345 let e = _mm_set_pd(0., 1.);
34346 assert_eq_m128d(r, e);
34347 let r = _mm_mask_max_sd(a, 0b11111111, a, b);
34348 let e = _mm_set_pd(0., 3.);
34349 assert_eq_m128d(r, e);
34350 }
34351
34352 #[simd_test(enable = "avx512f")]
34353 unsafe fn test_mm_maskz_max_sd() {
34354 let a = _mm_set_pd(0., 1.);
34355 let b = _mm_set_pd(2., 3.);
34356 let r = _mm_maskz_max_sd(0, a, b);
34357 let e = _mm_set_pd(0., 0.);
34358 assert_eq_m128d(r, e);
34359 let r = _mm_maskz_max_sd(0b11111111, a, b);
34360 let e = _mm_set_pd(0., 3.);
34361 assert_eq_m128d(r, e);
34362 }
34363
34364 #[simd_test(enable = "avx512f")]
34365 unsafe fn test_mm_mask_min_ss() {
34366 let a = _mm_set_ps(0., 1., 2., 3.);
34367 let b = _mm_set_ps(4., 5., 6., 7.);
34368 let r = _mm_mask_min_ss(a, 0, a, b);
34369 let e = _mm_set_ps(0., 1., 2., 3.);
34370 assert_eq_m128(r, e);
34371 let r = _mm_mask_min_ss(a, 0b11111111, a, b);
34372 let e = _mm_set_ps(0., 1., 2., 3.);
34373 assert_eq_m128(r, e);
34374 }
34375
34376 #[simd_test(enable = "avx512f")]
34377 unsafe fn test_mm_maskz_min_ss() {
34378 let a = _mm_set_ps(0., 1., 2., 3.);
34379 let b = _mm_set_ps(4., 5., 6., 7.);
34380 let r = _mm_maskz_min_ss(0, a, b);
34381 let e = _mm_set_ps(0., 1., 2., 0.);
34382 assert_eq_m128(r, e);
34383 let r = _mm_maskz_min_ss(0b11111111, a, b);
34384 let e = _mm_set_ps(0., 1., 2., 3.);
34385 assert_eq_m128(r, e);
34386 }
34387
34388 #[simd_test(enable = "avx512f")]
34389 unsafe fn test_mm_mask_min_sd() {
34390 let a = _mm_set_pd(0., 1.);
34391 let b = _mm_set_pd(2., 3.);
34392 let r = _mm_mask_min_sd(a, 0, a, b);
34393 let e = _mm_set_pd(0., 1.);
34394 assert_eq_m128d(r, e);
34395 let r = _mm_mask_min_sd(a, 0b11111111, a, b);
34396 let e = _mm_set_pd(0., 1.);
34397 assert_eq_m128d(r, e);
34398 }
34399
34400 #[simd_test(enable = "avx512f")]
34401 unsafe fn test_mm_maskz_min_sd() {
34402 let a = _mm_set_pd(0., 1.);
34403 let b = _mm_set_pd(2., 3.);
34404 let r = _mm_maskz_min_sd(0, a, b);
34405 let e = _mm_set_pd(0., 0.);
34406 assert_eq_m128d(r, e);
34407 let r = _mm_maskz_min_sd(0b11111111, a, b);
34408 let e = _mm_set_pd(0., 1.);
34409 assert_eq_m128d(r, e);
34410 }
34411
34412 #[simd_test(enable = "avx512f")]
34413 unsafe fn test_mm_mask_sqrt_ss() {
34414 let src = _mm_set_ps(10., 11., 100., 110.);
34415 let a = _mm_set_ps(1., 2., 10., 20.);
34416 let b = _mm_set_ps(3., 4., 30., 4.);
34417 let r = _mm_mask_sqrt_ss(src, 0, a, b);
34418 let e = _mm_set_ps(1., 2., 10., 110.);
34419 assert_eq_m128(r, e);
34420 let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
34421 let e = _mm_set_ps(1., 2., 10., 2.);
34422 assert_eq_m128(r, e);
34423 }
34424
34425 #[simd_test(enable = "avx512f")]
34426 unsafe fn test_mm_maskz_sqrt_ss() {
34427 let a = _mm_set_ps(1., 2., 10., 20.);
34428 let b = _mm_set_ps(3., 4., 30., 4.);
34429 let r = _mm_maskz_sqrt_ss(0, a, b);
34430 let e = _mm_set_ps(1., 2., 10., 0.);
34431 assert_eq_m128(r, e);
34432 let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
34433 let e = _mm_set_ps(1., 2., 10., 2.);
34434 assert_eq_m128(r, e);
34435 }
34436
34437 #[simd_test(enable = "avx512f")]
34438 unsafe fn test_mm_mask_sqrt_sd() {
34439 let src = _mm_set_pd(10., 11.);
34440 let a = _mm_set_pd(1., 2.);
34441 let b = _mm_set_pd(3., 4.);
34442 let r = _mm_mask_sqrt_sd(src, 0, a, b);
34443 let e = _mm_set_pd(1., 11.);
34444 assert_eq_m128d(r, e);
34445 let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
34446 let e = _mm_set_pd(1., 2.);
34447 assert_eq_m128d(r, e);
34448 }
34449
34450 #[simd_test(enable = "avx512f")]
34451 unsafe fn test_mm_maskz_sqrt_sd() {
34452 let a = _mm_set_pd(1., 2.);
34453 let b = _mm_set_pd(3., 4.);
34454 let r = _mm_maskz_sqrt_sd(0, a, b);
34455 let e = _mm_set_pd(1., 0.);
34456 assert_eq_m128d(r, e);
34457 let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
34458 let e = _mm_set_pd(1., 2.);
34459 assert_eq_m128d(r, e);
34460 }
34461
34462 #[simd_test(enable = "avx512f")]
34463 unsafe fn test_mm_rsqrt14_ss() {
34464 let a = _mm_set_ps(1., 2., 10., 20.);
34465 let b = _mm_set_ps(3., 4., 30., 4.);
34466 let r = _mm_rsqrt14_ss(a, b);
34467 let e = _mm_set_ps(1., 2., 10., 0.5);
34468 assert_eq_m128(r, e);
34469 }
34470
34471 #[simd_test(enable = "avx512f")]
34472 unsafe fn test_mm_mask_rsqrt14_ss() {
34473 let src = _mm_set_ps(10., 11., 100., 110.);
34474 let a = _mm_set_ps(1., 2., 10., 20.);
34475 let b = _mm_set_ps(3., 4., 30., 4.);
34476 let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
34477 let e = _mm_set_ps(1., 2., 10., 110.);
34478 assert_eq_m128(r, e);
34479 let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
34480 let e = _mm_set_ps(1., 2., 10., 0.5);
34481 assert_eq_m128(r, e);
34482 }
34483
34484 #[simd_test(enable = "avx512f")]
34485 unsafe fn test_mm_maskz_rsqrt14_ss() {
34486 let a = _mm_set_ps(1., 2., 10., 20.);
34487 let b = _mm_set_ps(3., 4., 30., 4.);
34488 let r = _mm_maskz_rsqrt14_ss(0, a, b);
34489 let e = _mm_set_ps(1., 2., 10., 0.);
34490 assert_eq_m128(r, e);
34491 let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
34492 let e = _mm_set_ps(1., 2., 10., 0.5);
34493 assert_eq_m128(r, e);
34494 }
34495
34496 #[simd_test(enable = "avx512f")]
34497 unsafe fn test_mm_rsqrt14_sd() {
34498 let a = _mm_set_pd(1., 2.);
34499 let b = _mm_set_pd(3., 4.);
34500 let r = _mm_rsqrt14_sd(a, b);
34501 let e = _mm_set_pd(1., 0.5);
34502 assert_eq_m128d(r, e);
34503 }
34504
34505 #[simd_test(enable = "avx512f")]
34506 unsafe fn test_mm_mask_rsqrt14_sd() {
34507 let src = _mm_set_pd(10., 11.);
34508 let a = _mm_set_pd(1., 2.);
34509 let b = _mm_set_pd(3., 4.);
34510 let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
34511 let e = _mm_set_pd(1., 11.);
34512 assert_eq_m128d(r, e);
34513 let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
34514 let e = _mm_set_pd(1., 0.5);
34515 assert_eq_m128d(r, e);
34516 }
34517
34518 #[simd_test(enable = "avx512f")]
34519 unsafe fn test_mm_maskz_rsqrt14_sd() {
34520 let a = _mm_set_pd(1., 2.);
34521 let b = _mm_set_pd(3., 4.);
34522 let r = _mm_maskz_rsqrt14_sd(0, a, b);
34523 let e = _mm_set_pd(1., 0.);
34524 assert_eq_m128d(r, e);
34525 let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
34526 let e = _mm_set_pd(1., 0.5);
34527 assert_eq_m128d(r, e);
34528 }
34529
34530 #[simd_test(enable = "avx512f")]
34531 unsafe fn test_mm_rcp14_ss() {
34532 let a = _mm_set_ps(1., 2., 10., 20.);
34533 let b = _mm_set_ps(3., 4., 30., 4.);
34534 let r = _mm_rcp14_ss(a, b);
34535 let e = _mm_set_ps(1., 2., 10., 0.25);
34536 assert_eq_m128(r, e);
34537 }
34538
34539 #[simd_test(enable = "avx512f")]
34540 unsafe fn test_mm_mask_rcp14_ss() {
34541 let src = _mm_set_ps(10., 11., 100., 110.);
34542 let a = _mm_set_ps(1., 2., 10., 20.);
34543 let b = _mm_set_ps(3., 4., 30., 4.);
34544 let r = _mm_mask_rcp14_ss(src, 0, a, b);
34545 let e = _mm_set_ps(1., 2., 10., 110.);
34546 assert_eq_m128(r, e);
34547 let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
34548 let e = _mm_set_ps(1., 2., 10., 0.25);
34549 assert_eq_m128(r, e);
34550 }
34551
34552 #[simd_test(enable = "avx512f")]
34553 unsafe fn test_mm_maskz_rcp14_ss() {
34554 let a = _mm_set_ps(1., 2., 10., 20.);
34555 let b = _mm_set_ps(3., 4., 30., 4.);
34556 let r = _mm_maskz_rcp14_ss(0, a, b);
34557 let e = _mm_set_ps(1., 2., 10., 0.);
34558 assert_eq_m128(r, e);
34559 let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
34560 let e = _mm_set_ps(1., 2., 10., 0.25);
34561 assert_eq_m128(r, e);
34562 }
34563
34564 #[simd_test(enable = "avx512f")]
34565 unsafe fn test_mm_rcp14_sd() {
34566 let a = _mm_set_pd(1., 2.);
34567 let b = _mm_set_pd(3., 4.);
34568 let r = _mm_rcp14_sd(a, b);
34569 let e = _mm_set_pd(1., 0.25);
34570 assert_eq_m128d(r, e);
34571 }
34572
34573 #[simd_test(enable = "avx512f")]
34574 unsafe fn test_mm_mask_rcp14_sd() {
34575 let src = _mm_set_pd(10., 11.);
34576 let a = _mm_set_pd(1., 2.);
34577 let b = _mm_set_pd(3., 4.);
34578 let r = _mm_mask_rcp14_sd(src, 0, a, b);
34579 let e = _mm_set_pd(1., 11.);
34580 assert_eq_m128d(r, e);
34581 let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
34582 let e = _mm_set_pd(1., 0.25);
34583 assert_eq_m128d(r, e);
34584 }
34585
34586 #[simd_test(enable = "avx512f")]
34587 unsafe fn test_mm_maskz_rcp14_sd() {
34588 let a = _mm_set_pd(1., 2.);
34589 let b = _mm_set_pd(3., 4.);
34590 let r = _mm_maskz_rcp14_sd(0, a, b);
34591 let e = _mm_set_pd(1., 0.);
34592 assert_eq_m128d(r, e);
34593 let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
34594 let e = _mm_set_pd(1., 0.25);
34595 assert_eq_m128d(r, e);
34596 }
34597
34598 #[simd_test(enable = "avx512f")]
34599 unsafe fn test_mm_getexp_ss() {
34600 let a = _mm_set1_ps(2.);
34601 let b = _mm_set1_ps(3.);
34602 let r = _mm_getexp_ss(a, b);
34603 let e = _mm_set_ps(2., 2., 2., 1.);
34604 assert_eq_m128(r, e);
34605 }
34606
34607 #[simd_test(enable = "avx512f")]
34608 unsafe fn test_mm_mask_getexp_ss() {
34609 let a = _mm_set1_ps(2.);
34610 let b = _mm_set1_ps(3.);
34611 let r = _mm_mask_getexp_ss(a, 0, a, b);
34612 let e = _mm_set_ps(2., 2., 2., 2.);
34613 assert_eq_m128(r, e);
34614 let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
34615 let e = _mm_set_ps(2., 2., 2., 1.);
34616 assert_eq_m128(r, e);
34617 }
34618
34619 #[simd_test(enable = "avx512f")]
34620 unsafe fn test_mm_maskz_getexp_ss() {
34621 let a = _mm_set1_ps(2.);
34622 let b = _mm_set1_ps(3.);
34623 let r = _mm_maskz_getexp_ss(0, a, b);
34624 let e = _mm_set_ps(2., 2., 2., 0.);
34625 assert_eq_m128(r, e);
34626 let r = _mm_maskz_getexp_ss(0b11111111, a, b);
34627 let e = _mm_set_ps(2., 2., 2., 1.);
34628 assert_eq_m128(r, e);
34629 }
34630
34631 #[simd_test(enable = "avx512f")]
34632 unsafe fn test_mm_getexp_sd() {
34633 let a = _mm_set1_pd(2.);
34634 let b = _mm_set1_pd(3.);
34635 let r = _mm_getexp_sd(a, b);
34636 let e = _mm_set_pd(2., 1.);
34637 assert_eq_m128d(r, e);
34638 }
34639
34640 #[simd_test(enable = "avx512f")]
34641 unsafe fn test_mm_mask_getexp_sd() {
34642 let a = _mm_set1_pd(2.);
34643 let b = _mm_set1_pd(3.);
34644 let r = _mm_mask_getexp_sd(a, 0, a, b);
34645 let e = _mm_set_pd(2., 2.);
34646 assert_eq_m128d(r, e);
34647 let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
34648 let e = _mm_set_pd(2., 1.);
34649 assert_eq_m128d(r, e);
34650 }
34651
34652 #[simd_test(enable = "avx512f")]
34653 unsafe fn test_mm_maskz_getexp_sd() {
34654 let a = _mm_set1_pd(2.);
34655 let b = _mm_set1_pd(3.);
34656 let r = _mm_maskz_getexp_sd(0, a, b);
34657 let e = _mm_set_pd(2., 0.);
34658 assert_eq_m128d(r, e);
34659 let r = _mm_maskz_getexp_sd(0b11111111, a, b);
34660 let e = _mm_set_pd(2., 1.);
34661 assert_eq_m128d(r, e);
34662 }
34663
34664 #[simd_test(enable = "avx512f")]
34665 unsafe fn test_mm_getmant_ss() {
34666 let a = _mm_set1_ps(20.);
34667 let b = _mm_set1_ps(10.);
34668 let r = _mm_getmant_ss(a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34669 let e = _mm_set_ps(20., 20., 20., 1.25);
34670 assert_eq_m128(r, e);
34671 }
34672
34673 #[simd_test(enable = "avx512f")]
34674 unsafe fn test_mm_mask_getmant_ss() {
34675 let a = _mm_set1_ps(20.);
34676 let b = _mm_set1_ps(10.);
34677 let r = _mm_mask_getmant_ss(a, 0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34678 let e = _mm_set_ps(20., 20., 20., 20.);
34679 assert_eq_m128(r, e);
34680 let r = _mm_mask_getmant_ss(a, 0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34681 let e = _mm_set_ps(20., 20., 20., 1.25);
34682 assert_eq_m128(r, e);
34683 }
34684
34685 #[simd_test(enable = "avx512f")]
34686 unsafe fn test_mm_maskz_getmant_ss() {
34687 let a = _mm_set1_ps(20.);
34688 let b = _mm_set1_ps(10.);
34689 let r = _mm_maskz_getmant_ss(0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34690 let e = _mm_set_ps(20., 20., 20., 0.);
34691 assert_eq_m128(r, e);
34692 let r = _mm_maskz_getmant_ss(0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34693 let e = _mm_set_ps(20., 20., 20., 1.25);
34694 assert_eq_m128(r, e);
34695 }
34696
34697 #[simd_test(enable = "avx512f")]
34698 unsafe fn test_mm_getmant_sd() {
34699 let a = _mm_set1_pd(20.);
34700 let b = _mm_set1_pd(10.);
34701 let r = _mm_getmant_sd(a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34702 let e = _mm_set_pd(20., 1.25);
34703 assert_eq_m128d(r, e);
34704 }
34705
34706 #[simd_test(enable = "avx512f")]
34707 unsafe fn test_mm_mask_getmant_sd() {
34708 let a = _mm_set1_pd(20.);
34709 let b = _mm_set1_pd(10.);
34710 let r = _mm_mask_getmant_sd(a, 0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34711 let e = _mm_set_pd(20., 20.);
34712 assert_eq_m128d(r, e);
34713 let r = _mm_mask_getmant_sd(a, 0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34714 let e = _mm_set_pd(20., 1.25);
34715 assert_eq_m128d(r, e);
34716 }
34717
34718 #[simd_test(enable = "avx512f")]
34719 unsafe fn test_mm_maskz_getmant_sd() {
34720 let a = _mm_set1_pd(20.);
34721 let b = _mm_set1_pd(10.);
34722 let r = _mm_maskz_getmant_sd(0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34723 let e = _mm_set_pd(20., 0.);
34724 assert_eq_m128d(r, e);
34725 let r = _mm_maskz_getmant_sd(0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
34726 let e = _mm_set_pd(20., 1.25);
34727 assert_eq_m128d(r, e);
34728 }
34729
34730 #[simd_test(enable = "avx512f")]
34731 unsafe fn test_mm_roundscale_ss() {
34732 let a = _mm_set1_ps(2.2);
34733 let b = _mm_set1_ps(1.1);
34734 let r = _mm_roundscale_ss(a, b, 0);
34735 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
34736 assert_eq_m128(r, e);
34737 }
34738
34739 #[simd_test(enable = "avx512f")]
34740 unsafe fn test_mm_mask_roundscale_ss() {
34741 let a = _mm_set1_ps(2.2);
34742 let b = _mm_set1_ps(1.1);
34743 let r = _mm_mask_roundscale_ss(a, 0, a, b, 0);
34744 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
34745 assert_eq_m128(r, e);
34746 let r = _mm_mask_roundscale_ss(a, 0b11111111, a, b, 0);
34747 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
34748 assert_eq_m128(r, e);
34749 }
34750
34751 #[simd_test(enable = "avx512f")]
34752 unsafe fn test_mm_maskz_roundscale_ss() {
34753 let a = _mm_set1_ps(2.2);
34754 let b = _mm_set1_ps(1.1);
34755 let r = _mm_maskz_roundscale_ss(0, a, b, 0);
34756 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
34757 assert_eq_m128(r, e);
34758 let r = _mm_maskz_roundscale_ss(0b11111111, a, b, 0);
34759 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
34760 assert_eq_m128(r, e);
34761 }
34762
34763 #[simd_test(enable = "avx512f")]
34764 unsafe fn test_mm_roundscale_sd() {
34765 let a = _mm_set1_pd(2.2);
34766 let b = _mm_set1_pd(1.1);
34767 let r = _mm_roundscale_sd(a, b, 0);
34768 let e = _mm_set_pd(2.2, 1.0);
34769 assert_eq_m128d(r, e);
34770 }
34771
34772 #[simd_test(enable = "avx512f")]
34773 unsafe fn test_mm_mask_roundscale_sd() {
34774 let a = _mm_set1_pd(2.2);
34775 let b = _mm_set1_pd(1.1);
34776 let r = _mm_mask_roundscale_sd(a, 0, a, b, 0);
34777 let e = _mm_set_pd(2.2, 2.2);
34778 assert_eq_m128d(r, e);
34779 let r = _mm_mask_roundscale_sd(a, 0b11111111, a, b, 0);
34780 let e = _mm_set_pd(2.2, 1.0);
34781 assert_eq_m128d(r, e);
34782 }
34783
34784 #[simd_test(enable = "avx512f")]
34785 unsafe fn test_mm_maskz_roundscale_sd() {
34786 let a = _mm_set1_pd(2.2);
34787 let b = _mm_set1_pd(1.1);
34788 let r = _mm_maskz_roundscale_sd(0, a, b, 0);
34789 let e = _mm_set_pd(2.2, 0.0);
34790 assert_eq_m128d(r, e);
34791 let r = _mm_maskz_roundscale_sd(0b11111111, a, b, 0);
34792 let e = _mm_set_pd(2.2, 1.0);
34793 assert_eq_m128d(r, e);
34794 }
34795
34796 #[simd_test(enable = "avx512f")]
34797 unsafe fn test_mm_scalef_ss() {
34798 let a = _mm_set1_ps(1.);
34799 let b = _mm_set1_ps(3.);
34800 let r = _mm_scalef_ss(a, b);
34801 let e = _mm_set_ps(1., 1., 1., 8.);
34802 assert_eq_m128(r, e);
34803 }
34804
34805 #[simd_test(enable = "avx512f")]
34806 unsafe fn test_mm_mask_scalef_ss() {
34807 let a = _mm_set1_ps(1.);
34808 let b = _mm_set1_ps(3.);
34809 let r = _mm_mask_scalef_ss(a, 0, a, b);
34810 let e = _mm_set_ps(1., 1., 1., 1.);
34811 assert_eq_m128(r, e);
34812 let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
34813 let e = _mm_set_ps(1., 1., 1., 8.);
34814 assert_eq_m128(r, e);
34815 }
34816
34817 #[simd_test(enable = "avx512f")]
34818 unsafe fn test_mm_maskz_scalef_ss() {
34819 let a = _mm_set1_ps(1.);
34820 let b = _mm_set1_ps(3.);
34821 let r = _mm_maskz_scalef_ss(0, a, b);
34822 let e = _mm_set_ps(1., 1., 1., 0.);
34823 assert_eq_m128(r, e);
34824 let r = _mm_maskz_scalef_ss(0b11111111, a, b);
34825 let e = _mm_set_ps(1., 1., 1., 8.);
34826 assert_eq_m128(r, e);
34827 }
34828
34829 #[simd_test(enable = "avx512f")]
34830 unsafe fn test_mm_scalef_sd() {
34831 let a = _mm_set1_pd(1.);
34832 let b = _mm_set1_pd(3.);
34833 let r = _mm_scalef_sd(a, b);
34834 let e = _mm_set_pd(1., 8.);
34835 assert_eq_m128d(r, e);
34836 }
34837
34838 #[simd_test(enable = "avx512f")]
34839 unsafe fn test_mm_mask_scalef_sd() {
34840 let a = _mm_set1_pd(1.);
34841 let b = _mm_set1_pd(3.);
34842 let r = _mm_mask_scalef_sd(a, 0, a, b);
34843 let e = _mm_set_pd(1., 1.);
34844 assert_eq_m128d(r, e);
34845 let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
34846 let e = _mm_set_pd(1., 8.);
34847 assert_eq_m128d(r, e);
34848 }
34849
34850 #[simd_test(enable = "avx512f")]
34851 unsafe fn test_mm_maskz_scalef_sd() {
34852 let a = _mm_set1_pd(1.);
34853 let b = _mm_set1_pd(3.);
34854 let r = _mm_maskz_scalef_sd(0, a, b);
34855 let e = _mm_set_pd(1., 0.);
34856 assert_eq_m128d(r, e);
34857 let r = _mm_maskz_scalef_sd(0b11111111, a, b);
34858 let e = _mm_set_pd(1., 8.);
34859 assert_eq_m128d(r, e);
34860 }
34861
34862 #[simd_test(enable = "avx512f")]
34863 unsafe fn test_mm_mask_fmadd_ss() {
34864 let a = _mm_set1_ps(1.);
34865 let b = _mm_set1_ps(2.);
34866 let c = _mm_set1_ps(3.);
34867 let r = _mm_mask_fmadd_ss(a, 0, b, c);
34868 assert_eq_m128(r, a);
34869 let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
34870 let e = _mm_set_ps(1., 1., 1., 5.);
34871 assert_eq_m128(r, e);
34872 }
34873
34874 #[simd_test(enable = "avx512f")]
34875 unsafe fn test_mm_maskz_fmadd_ss() {
34876 let a = _mm_set1_ps(1.);
34877 let b = _mm_set1_ps(2.);
34878 let c = _mm_set1_ps(3.);
34879 let r = _mm_maskz_fmadd_ss(0, a, b, c);
34880 let e = _mm_set_ps(1., 1., 1., 0.);
34881 assert_eq_m128(r, e);
34882 let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
34883 let e = _mm_set_ps(1., 1., 1., 5.);
34884 assert_eq_m128(r, e);
34885 }
34886
34887 #[simd_test(enable = "avx512f")]
34888 unsafe fn test_mm_mask3_fmadd_ss() {
34889 let a = _mm_set1_ps(1.);
34890 let b = _mm_set1_ps(2.);
34891 let c = _mm_set1_ps(3.);
34892 let r = _mm_mask3_fmadd_ss(a, b, c, 0);
34893 assert_eq_m128(r, c);
34894 let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
34895 let e = _mm_set_ps(3., 3., 3., 5.);
34896 assert_eq_m128(r, e);
34897 }
34898
34899 #[simd_test(enable = "avx512f")]
34900 unsafe fn test_mm_mask_fmadd_sd() {
34901 let a = _mm_set1_pd(1.);
34902 let b = _mm_set1_pd(2.);
34903 let c = _mm_set1_pd(3.);
34904 let r = _mm_mask_fmadd_sd(a, 0, b, c);
34905 assert_eq_m128d(r, a);
34906 let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
34907 let e = _mm_set_pd(1., 5.);
34908 assert_eq_m128d(r, e);
34909 }
34910
34911 #[simd_test(enable = "avx512f")]
34912 unsafe fn test_mm_maskz_fmadd_sd() {
34913 let a = _mm_set1_pd(1.);
34914 let b = _mm_set1_pd(2.);
34915 let c = _mm_set1_pd(3.);
34916 let r = _mm_maskz_fmadd_sd(0, a, b, c);
34917 let e = _mm_set_pd(1., 0.);
34918 assert_eq_m128d(r, e);
34919 let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
34920 let e = _mm_set_pd(1., 5.);
34921 assert_eq_m128d(r, e);
34922 }
34923
34924 #[simd_test(enable = "avx512f")]
34925 unsafe fn test_mm_mask3_fmadd_sd() {
34926 let a = _mm_set1_pd(1.);
34927 let b = _mm_set1_pd(2.);
34928 let c = _mm_set1_pd(3.);
34929 let r = _mm_mask3_fmadd_sd(a, b, c, 0);
34930 assert_eq_m128d(r, c);
34931 let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
34932 let e = _mm_set_pd(3., 5.);
34933 assert_eq_m128d(r, e);
34934 }
34935
34936 #[simd_test(enable = "avx512f")]
34937 unsafe fn test_mm_mask_fmsub_ss() {
34938 let a = _mm_set1_ps(1.);
34939 let b = _mm_set1_ps(2.);
34940 let c = _mm_set1_ps(3.);
34941 let r = _mm_mask_fmsub_ss(a, 0, b, c);
34942 assert_eq_m128(r, a);
34943 let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
34944 let e = _mm_set_ps(1., 1., 1., -1.);
34945 assert_eq_m128(r, e);
34946 }
34947
34948 #[simd_test(enable = "avx512f")]
34949 unsafe fn test_mm_maskz_fmsub_ss() {
34950 let a = _mm_set1_ps(1.);
34951 let b = _mm_set1_ps(2.);
34952 let c = _mm_set1_ps(3.);
34953 let r = _mm_maskz_fmsub_ss(0, a, b, c);
34954 let e = _mm_set_ps(1., 1., 1., 0.);
34955 assert_eq_m128(r, e);
34956 let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
34957 let e = _mm_set_ps(1., 1., 1., -1.);
34958 assert_eq_m128(r, e);
34959 }
34960
34961 #[simd_test(enable = "avx512f")]
34962 unsafe fn test_mm_mask3_fmsub_ss() {
34963 let a = _mm_set1_ps(1.);
34964 let b = _mm_set1_ps(2.);
34965 let c = _mm_set1_ps(3.);
34966 let r = _mm_mask3_fmsub_ss(a, b, c, 0);
34967 assert_eq_m128(r, c);
34968 let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
34969 let e = _mm_set_ps(3., 3., 3., -1.);
34970 assert_eq_m128(r, e);
34971 }
34972
34973 #[simd_test(enable = "avx512f")]
34974 unsafe fn test_mm_mask_fmsub_sd() {
34975 let a = _mm_set1_pd(1.);
34976 let b = _mm_set1_pd(2.);
34977 let c = _mm_set1_pd(3.);
34978 let r = _mm_mask_fmsub_sd(a, 0, b, c);
34979 assert_eq_m128d(r, a);
34980 let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
34981 let e = _mm_set_pd(1., -1.);
34982 assert_eq_m128d(r, e);
34983 }
34984
34985 #[simd_test(enable = "avx512f")]
34986 unsafe fn test_mm_maskz_fmsub_sd() {
34987 let a = _mm_set1_pd(1.);
34988 let b = _mm_set1_pd(2.);
34989 let c = _mm_set1_pd(3.);
34990 let r = _mm_maskz_fmsub_sd(0, a, b, c);
34991 let e = _mm_set_pd(1., 0.);
34992 assert_eq_m128d(r, e);
34993 let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
34994 let e = _mm_set_pd(1., -1.);
34995 assert_eq_m128d(r, e);
34996 }
34997
34998 #[simd_test(enable = "avx512f")]
34999 unsafe fn test_mm_mask3_fmsub_sd() {
35000 let a = _mm_set1_pd(1.);
35001 let b = _mm_set1_pd(2.);
35002 let c = _mm_set1_pd(3.);
35003 let r = _mm_mask3_fmsub_sd(a, b, c, 0);
35004 assert_eq_m128d(r, c);
35005 let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
35006 let e = _mm_set_pd(3., -1.);
35007 assert_eq_m128d(r, e);
35008 }
35009
35010 #[simd_test(enable = "avx512f")]
35011 unsafe fn test_mm_mask_fnmadd_ss() {
35012 let a = _mm_set1_ps(1.);
35013 let b = _mm_set1_ps(2.);
35014 let c = _mm_set1_ps(3.);
35015 let r = _mm_mask_fnmadd_ss(a, 0, b, c);
35016 assert_eq_m128(r, a);
35017 let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
35018 let e = _mm_set_ps(1., 1., 1., 1.);
35019 assert_eq_m128(r, e);
35020 }
35021
35022 #[simd_test(enable = "avx512f")]
35023 unsafe fn test_mm_maskz_fnmadd_ss() {
35024 let a = _mm_set1_ps(1.);
35025 let b = _mm_set1_ps(2.);
35026 let c = _mm_set1_ps(3.);
35027 let r = _mm_maskz_fnmadd_ss(0, a, b, c);
35028 let e = _mm_set_ps(1., 1., 1., 0.);
35029 assert_eq_m128(r, e);
35030 let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
35031 let e = _mm_set_ps(1., 1., 1., 1.);
35032 assert_eq_m128(r, e);
35033 }
35034
35035 #[simd_test(enable = "avx512f")]
35036 unsafe fn test_mm_mask3_fnmadd_ss() {
35037 let a = _mm_set1_ps(1.);
35038 let b = _mm_set1_ps(2.);
35039 let c = _mm_set1_ps(3.);
35040 let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
35041 assert_eq_m128(r, c);
35042 let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
35043 let e = _mm_set_ps(3., 3., 3., 1.);
35044 assert_eq_m128(r, e);
35045 }
35046
35047 #[simd_test(enable = "avx512f")]
35048 unsafe fn test_mm_mask_fnmadd_sd() {
35049 let a = _mm_set1_pd(1.);
35050 let b = _mm_set1_pd(2.);
35051 let c = _mm_set1_pd(3.);
35052 let r = _mm_mask_fnmadd_sd(a, 0, b, c);
35053 assert_eq_m128d(r, a);
35054 let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
35055 let e = _mm_set_pd(1., 1.);
35056 assert_eq_m128d(r, e);
35057 }
35058
35059 #[simd_test(enable = "avx512f")]
35060 unsafe fn test_mm_maskz_fnmadd_sd() {
35061 let a = _mm_set1_pd(1.);
35062 let b = _mm_set1_pd(2.);
35063 let c = _mm_set1_pd(3.);
35064 let r = _mm_maskz_fnmadd_sd(0, a, b, c);
35065 let e = _mm_set_pd(1., 0.);
35066 assert_eq_m128d(r, e);
35067 let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
35068 let e = _mm_set_pd(1., 1.);
35069 assert_eq_m128d(r, e);
35070 }
35071
35072 #[simd_test(enable = "avx512f")]
35073 unsafe fn test_mm_mask3_fnmadd_sd() {
35074 let a = _mm_set1_pd(1.);
35075 let b = _mm_set1_pd(2.);
35076 let c = _mm_set1_pd(3.);
35077 let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
35078 assert_eq_m128d(r, c);
35079 let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
35080 let e = _mm_set_pd(3., 1.);
35081 assert_eq_m128d(r, e);
35082 }
35083
35084 #[simd_test(enable = "avx512f")]
35085 unsafe fn test_mm_mask_fnmsub_ss() {
35086 let a = _mm_set1_ps(1.);
35087 let b = _mm_set1_ps(2.);
35088 let c = _mm_set1_ps(3.);
35089 let r = _mm_mask_fnmsub_ss(a, 0, b, c);
35090 assert_eq_m128(r, a);
35091 let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
35092 let e = _mm_set_ps(1., 1., 1., -5.);
35093 assert_eq_m128(r, e);
35094 }
35095
35096 #[simd_test(enable = "avx512f")]
35097 unsafe fn test_mm_maskz_fnmsub_ss() {
35098 let a = _mm_set1_ps(1.);
35099 let b = _mm_set1_ps(2.);
35100 let c = _mm_set1_ps(3.);
35101 let r = _mm_maskz_fnmsub_ss(0, a, b, c);
35102 let e = _mm_set_ps(1., 1., 1., 0.);
35103 assert_eq_m128(r, e);
35104 let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
35105 let e = _mm_set_ps(1., 1., 1., -5.);
35106 assert_eq_m128(r, e);
35107 }
35108
35109 #[simd_test(enable = "avx512f")]
35110 unsafe fn test_mm_mask3_fnmsub_ss() {
35111 let a = _mm_set1_ps(1.);
35112 let b = _mm_set1_ps(2.);
35113 let c = _mm_set1_ps(3.);
35114 let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
35115 assert_eq_m128(r, c);
35116 let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
35117 let e = _mm_set_ps(3., 3., 3., -5.);
35118 assert_eq_m128(r, e);
35119 }
35120
35121 #[simd_test(enable = "avx512f")]
35122 unsafe fn test_mm_mask_fnmsub_sd() {
35123 let a = _mm_set1_pd(1.);
35124 let b = _mm_set1_pd(2.);
35125 let c = _mm_set1_pd(3.);
35126 let r = _mm_mask_fnmsub_sd(a, 0, b, c);
35127 assert_eq_m128d(r, a);
35128 let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
35129 let e = _mm_set_pd(1., -5.);
35130 assert_eq_m128d(r, e);
35131 }
35132
35133 #[simd_test(enable = "avx512f")]
35134 unsafe fn test_mm_maskz_fnmsub_sd() {
35135 let a = _mm_set1_pd(1.);
35136 let b = _mm_set1_pd(2.);
35137 let c = _mm_set1_pd(3.);
35138 let r = _mm_maskz_fnmsub_sd(0, a, b, c);
35139 let e = _mm_set_pd(1., 0.);
35140 assert_eq_m128d(r, e);
35141 let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
35142 let e = _mm_set_pd(1., -5.);
35143 assert_eq_m128d(r, e);
35144 }
35145
35146 #[simd_test(enable = "avx512f")]
35147 unsafe fn test_mm_mask3_fnmsub_sd() {
35148 let a = _mm_set1_pd(1.);
35149 let b = _mm_set1_pd(2.);
35150 let c = _mm_set1_pd(3.);
35151 let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
35152 assert_eq_m128d(r, c);
35153 let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
35154 let e = _mm_set_pd(3., -5.);
35155 assert_eq_m128d(r, e);
35156 }
35157
35158 #[simd_test(enable = "avx512f")]
35159 unsafe fn test_mm_add_round_ss() {
35160 let a = _mm_set_ps(1., 2., 10., 20.);
35161 let b = _mm_set_ps(3., 4., 30., 40.);
35162 let r = _mm_add_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35163 let e = _mm_set_ps(1., 2., 10., 60.);
35164 assert_eq_m128(r, e);
35165 }
35166
35167 #[simd_test(enable = "avx512f")]
35168 unsafe fn test_mm_mask_add_round_ss() {
35169 let src = _mm_set_ps(10., 11., 100., 110.);
35170 let a = _mm_set_ps(1., 2., 10., 20.);
35171 let b = _mm_set_ps(3., 4., 30., 40.);
35172 let r = _mm_mask_add_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35173 let e = _mm_set_ps(1., 2., 10., 110.);
35174 assert_eq_m128(r, e);
35175 let r = _mm_mask_add_round_ss(
35176 src,
35177 0b11111111,
35178 a,
35179 b,
35180 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35181 );
35182 let e = _mm_set_ps(1., 2., 10., 60.);
35183 assert_eq_m128(r, e);
35184 }
35185
35186 #[simd_test(enable = "avx512f")]
35187 unsafe fn test_mm_maskz_add_round_ss() {
35188 let a = _mm_set_ps(1., 2., 10., 20.);
35189 let b = _mm_set_ps(3., 4., 30., 40.);
35190 let r = _mm_maskz_add_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35191 let e = _mm_set_ps(1., 2., 10., 0.);
35192 assert_eq_m128(r, e);
35193 let r = _mm_maskz_add_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35194 let e = _mm_set_ps(1., 2., 10., 60.);
35195 assert_eq_m128(r, e);
35196 }
35197
35198 #[simd_test(enable = "avx512f")]
35199 unsafe fn test_mm_add_round_sd() {
35200 let a = _mm_set_pd(1., 2.);
35201 let b = _mm_set_pd(3., 4.);
35202 let r = _mm_add_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35203 let e = _mm_set_pd(1., 6.);
35204 assert_eq_m128d(r, e);
35205 }
35206
35207 #[simd_test(enable = "avx512f")]
35208 unsafe fn test_mm_mask_add_round_sd() {
35209 let src = _mm_set_pd(10., 11.);
35210 let a = _mm_set_pd(1., 2.);
35211 let b = _mm_set_pd(3., 4.);
35212 let r = _mm_mask_add_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35213 let e = _mm_set_pd(1., 11.);
35214 assert_eq_m128d(r, e);
35215 let r = _mm_mask_add_round_sd(
35216 src,
35217 0b11111111,
35218 a,
35219 b,
35220 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35221 );
35222 let e = _mm_set_pd(1., 6.);
35223 assert_eq_m128d(r, e);
35224 }
35225
35226 #[simd_test(enable = "avx512f")]
35227 unsafe fn test_mm_maskz_add_round_sd() {
35228 let a = _mm_set_pd(1., 2.);
35229 let b = _mm_set_pd(3., 4.);
35230 let r = _mm_maskz_add_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35231 let e = _mm_set_pd(1., 0.);
35232 assert_eq_m128d(r, e);
35233 let r = _mm_maskz_add_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35234 let e = _mm_set_pd(1., 6.);
35235 assert_eq_m128d(r, e);
35236 }
35237
35238 #[simd_test(enable = "avx512f")]
35239 unsafe fn test_mm_sub_round_ss() {
35240 let a = _mm_set_ps(1., 2., 10., 20.);
35241 let b = _mm_set_ps(3., 4., 30., 40.);
35242 let r = _mm_sub_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35243 let e = _mm_set_ps(1., 2., 10., -20.);
35244 assert_eq_m128(r, e);
35245 }
35246
35247 #[simd_test(enable = "avx512f")]
35248 unsafe fn test_mm_mask_sub_round_ss() {
35249 let src = _mm_set_ps(10., 11., 100., 110.);
35250 let a = _mm_set_ps(1., 2., 10., 20.);
35251 let b = _mm_set_ps(3., 4., 30., 40.);
35252 let r = _mm_mask_sub_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35253 let e = _mm_set_ps(1., 2., 10., 110.);
35254 assert_eq_m128(r, e);
35255 let r = _mm_mask_sub_round_ss(
35256 src,
35257 0b11111111,
35258 a,
35259 b,
35260 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35261 );
35262 let e = _mm_set_ps(1., 2., 10., -20.);
35263 assert_eq_m128(r, e);
35264 }
35265
35266 #[simd_test(enable = "avx512f")]
35267 unsafe fn test_mm_maskz_sub_round_ss() {
35268 let a = _mm_set_ps(1., 2., 10., 20.);
35269 let b = _mm_set_ps(3., 4., 30., 40.);
35270 let r = _mm_maskz_sub_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35271 let e = _mm_set_ps(1., 2., 10., 0.);
35272 assert_eq_m128(r, e);
35273 let r = _mm_maskz_sub_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35274 let e = _mm_set_ps(1., 2., 10., -20.);
35275 assert_eq_m128(r, e);
35276 }
35277
35278 #[simd_test(enable = "avx512f")]
35279 unsafe fn test_mm_sub_round_sd() {
35280 let a = _mm_set_pd(1., 2.);
35281 let b = _mm_set_pd(3., 4.);
35282 let r = _mm_sub_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35283 let e = _mm_set_pd(1., -2.);
35284 assert_eq_m128d(r, e);
35285 }
35286
35287 #[simd_test(enable = "avx512f")]
35288 unsafe fn test_mm_mask_sub_round_sd() {
35289 let src = _mm_set_pd(10., 11.);
35290 let a = _mm_set_pd(1., 2.);
35291 let b = _mm_set_pd(3., 4.);
35292 let r = _mm_mask_sub_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35293 let e = _mm_set_pd(1., 11.);
35294 assert_eq_m128d(r, e);
35295 let r = _mm_mask_sub_round_sd(
35296 src,
35297 0b11111111,
35298 a,
35299 b,
35300 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35301 );
35302 let e = _mm_set_pd(1., -2.);
35303 assert_eq_m128d(r, e);
35304 }
35305
35306 #[simd_test(enable = "avx512f")]
35307 unsafe fn test_mm_maskz_sub_round_sd() {
35308 let a = _mm_set_pd(1., 2.);
35309 let b = _mm_set_pd(3., 4.);
35310 let r = _mm_maskz_sub_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35311 let e = _mm_set_pd(1., 0.);
35312 assert_eq_m128d(r, e);
35313 let r = _mm_maskz_sub_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35314 let e = _mm_set_pd(1., -2.);
35315 assert_eq_m128d(r, e);
35316 }
35317
35318 #[simd_test(enable = "avx512f")]
35319 unsafe fn test_mm_mul_round_ss() {
35320 let a = _mm_set_ps(1., 2., 10., 20.);
35321 let b = _mm_set_ps(3., 4., 30., 40.);
35322 let r = _mm_mul_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35323 let e = _mm_set_ps(1., 2., 10., 800.);
35324 assert_eq_m128(r, e);
35325 }
35326
35327 #[simd_test(enable = "avx512f")]
35328 unsafe fn test_mm_mask_mul_round_ss() {
35329 let src = _mm_set_ps(10., 11., 100., 110.);
35330 let a = _mm_set_ps(1., 2., 10., 20.);
35331 let b = _mm_set_ps(3., 4., 30., 40.);
35332 let r = _mm_mask_mul_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35333 let e = _mm_set_ps(1., 2., 10., 110.);
35334 assert_eq_m128(r, e);
35335 let r = _mm_mask_mul_round_ss(
35336 src,
35337 0b11111111,
35338 a,
35339 b,
35340 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35341 );
35342 let e = _mm_set_ps(1., 2., 10., 800.);
35343 assert_eq_m128(r, e);
35344 }
35345
35346 #[simd_test(enable = "avx512f")]
35347 unsafe fn test_mm_maskz_mul_round_ss() {
35348 let a = _mm_set_ps(1., 2., 10., 20.);
35349 let b = _mm_set_ps(3., 4., 30., 40.);
35350 let r = _mm_maskz_mul_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35351 let e = _mm_set_ps(1., 2., 10., 0.);
35352 assert_eq_m128(r, e);
35353 let r = _mm_maskz_mul_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35354 let e = _mm_set_ps(1., 2., 10., 800.);
35355 assert_eq_m128(r, e);
35356 }
35357
35358 #[simd_test(enable = "avx512f")]
35359 unsafe fn test_mm_mul_round_sd() {
35360 let a = _mm_set_pd(1., 2.);
35361 let b = _mm_set_pd(3., 4.);
35362 let r = _mm_mul_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35363 let e = _mm_set_pd(1., 8.);
35364 assert_eq_m128d(r, e);
35365 }
35366
35367 #[simd_test(enable = "avx512f")]
35368 unsafe fn test_mm_mask_mul_round_sd() {
35369 let src = _mm_set_pd(10., 11.);
35370 let a = _mm_set_pd(1., 2.);
35371 let b = _mm_set_pd(3., 4.);
35372 let r = _mm_mask_mul_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35373 let e = _mm_set_pd(1., 11.);
35374 assert_eq_m128d(r, e);
35375 let r = _mm_mask_mul_round_sd(
35376 src,
35377 0b11111111,
35378 a,
35379 b,
35380 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35381 );
35382 let e = _mm_set_pd(1., 8.);
35383 assert_eq_m128d(r, e);
35384 }
35385
35386 #[simd_test(enable = "avx512f")]
35387 unsafe fn test_mm_maskz_mul_round_sd() {
35388 let a = _mm_set_pd(1., 2.);
35389 let b = _mm_set_pd(3., 4.);
35390 let r = _mm_maskz_mul_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35391 let e = _mm_set_pd(1., 0.);
35392 assert_eq_m128d(r, e);
35393 let r = _mm_maskz_mul_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35394 let e = _mm_set_pd(1., 8.);
35395 assert_eq_m128d(r, e);
35396 }
35397
35398 #[simd_test(enable = "avx512f")]
35399 unsafe fn test_mm_div_round_ss() {
35400 let a = _mm_set_ps(1., 2., 10., 20.);
35401 let b = _mm_set_ps(3., 4., 30., 40.);
35402 let r = _mm_div_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35403 let e = _mm_set_ps(1., 2., 10., 0.5);
35404 assert_eq_m128(r, e);
35405 }
35406
35407 #[simd_test(enable = "avx512f")]
35408 unsafe fn test_mm_mask_div_round_ss() {
35409 let src = _mm_set_ps(10., 11., 100., 110.);
35410 let a = _mm_set_ps(1., 2., 10., 20.);
35411 let b = _mm_set_ps(3., 4., 30., 40.);
35412 let r = _mm_mask_div_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35413 let e = _mm_set_ps(1., 2., 10., 110.);
35414 assert_eq_m128(r, e);
35415 let r = _mm_mask_div_round_ss(
35416 src,
35417 0b11111111,
35418 a,
35419 b,
35420 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35421 );
35422 let e = _mm_set_ps(1., 2., 10., 0.5);
35423 assert_eq_m128(r, e);
35424 }
35425
35426 #[simd_test(enable = "avx512f")]
35427 unsafe fn test_mm_maskz_div_round_ss() {
35428 let a = _mm_set_ps(1., 2., 10., 20.);
35429 let b = _mm_set_ps(3., 4., 30., 40.);
35430 let r = _mm_maskz_div_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35431 let e = _mm_set_ps(1., 2., 10., 0.);
35432 assert_eq_m128(r, e);
35433 let r = _mm_maskz_div_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35434 let e = _mm_set_ps(1., 2., 10., 0.5);
35435 assert_eq_m128(r, e);
35436 }
35437
35438 #[simd_test(enable = "avx512f")]
35439 unsafe fn test_mm_div_round_sd() {
35440 let a = _mm_set_pd(1., 2.);
35441 let b = _mm_set_pd(3., 4.);
35442 let r = _mm_div_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35443 let e = _mm_set_pd(1., 0.5);
35444 assert_eq_m128d(r, e);
35445 }
35446
35447 #[simd_test(enable = "avx512f")]
35448 unsafe fn test_mm_mask_div_round_sd() {
35449 let src = _mm_set_pd(10., 11.);
35450 let a = _mm_set_pd(1., 2.);
35451 let b = _mm_set_pd(3., 4.);
35452 let r = _mm_mask_div_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35453 let e = _mm_set_pd(1., 11.);
35454 assert_eq_m128d(r, e);
35455 let r = _mm_mask_div_round_sd(
35456 src,
35457 0b11111111,
35458 a,
35459 b,
35460 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35461 );
35462 let e = _mm_set_pd(1., 0.5);
35463 assert_eq_m128d(r, e);
35464 }
35465
35466 #[simd_test(enable = "avx512f")]
35467 unsafe fn test_mm_maskz_div_round_sd() {
35468 let a = _mm_set_pd(1., 2.);
35469 let b = _mm_set_pd(3., 4.);
35470 let r = _mm_maskz_div_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35471 let e = _mm_set_pd(1., 0.);
35472 assert_eq_m128d(r, e);
35473 let r = _mm_maskz_div_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35474 let e = _mm_set_pd(1., 0.5);
35475 assert_eq_m128d(r, e);
35476 }
35477
35478 #[simd_test(enable = "avx512f")]
35479 unsafe fn test_mm_max_round_ss() {
35480 let a = _mm_set_ps(0., 1., 2., 3.);
35481 let b = _mm_set_ps(4., 5., 6., 7.);
35482 let r = _mm_max_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
35483 let e = _mm_set_ps(0., 1., 2., 7.);
35484 assert_eq_m128(r, e);
35485 }
35486
35487 #[simd_test(enable = "avx512f")]
35488 unsafe fn test_mm_mask_max_round_ss() {
35489 let a = _mm_set_ps(0., 1., 2., 3.);
35490 let b = _mm_set_ps(4., 5., 6., 7.);
35491 let r = _mm_mask_max_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
35492 let e = _mm_set_ps(0., 1., 2., 3.);
35493 assert_eq_m128(r, e);
35494 let r = _mm_mask_max_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35495 let e = _mm_set_ps(0., 1., 2., 7.);
35496 assert_eq_m128(r, e);
35497 }
35498
35499 #[simd_test(enable = "avx512f")]
35500 unsafe fn test_mm_maskz_max_round_ss() {
35501 let a = _mm_set_ps(0., 1., 2., 3.);
35502 let b = _mm_set_ps(4., 5., 6., 7.);
35503 let r = _mm_maskz_max_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
35504 let e = _mm_set_ps(0., 1., 2., 0.);
35505 assert_eq_m128(r, e);
35506 let r = _mm_maskz_max_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35507 let e = _mm_set_ps(0., 1., 2., 7.);
35508 assert_eq_m128(r, e);
35509 }
35510
35511 #[simd_test(enable = "avx512f")]
35512 unsafe fn test_mm_max_round_sd() {
35513 let a = _mm_set_pd(0., 1.);
35514 let b = _mm_set_pd(2., 3.);
35515 let r = _mm_max_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
35516 let e = _mm_set_pd(0., 3.);
35517 assert_eq_m128d(r, e);
35518 }
35519
35520 #[simd_test(enable = "avx512f")]
35521 unsafe fn test_mm_mask_max_round_sd() {
35522 let a = _mm_set_pd(0., 1.);
35523 let b = _mm_set_pd(2., 3.);
35524 let r = _mm_mask_max_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
35525 let e = _mm_set_pd(0., 1.);
35526 assert_eq_m128d(r, e);
35527 let r = _mm_mask_max_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35528 let e = _mm_set_pd(0., 3.);
35529 assert_eq_m128d(r, e);
35530 }
35531
35532 #[simd_test(enable = "avx512f")]
35533 unsafe fn test_mm_maskz_max_round_sd() {
35534 let a = _mm_set_pd(0., 1.);
35535 let b = _mm_set_pd(2., 3.);
35536 let r = _mm_maskz_max_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
35537 let e = _mm_set_pd(0., 0.);
35538 assert_eq_m128d(r, e);
35539 let r = _mm_maskz_max_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35540 let e = _mm_set_pd(0., 3.);
35541 assert_eq_m128d(r, e);
35542 }
35543
35544 #[simd_test(enable = "avx512f")]
35545 unsafe fn test_mm_min_round_ss() {
35546 let a = _mm_set_ps(0., 1., 2., 3.);
35547 let b = _mm_set_ps(4., 5., 6., 7.);
35548 let r = _mm_min_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
35549 let e = _mm_set_ps(0., 1., 2., 3.);
35550 assert_eq_m128(r, e);
35551 }
35552
35553 #[simd_test(enable = "avx512f")]
35554 unsafe fn test_mm_mask_min_round_ss() {
35555 let a = _mm_set_ps(0., 1., 2., 3.);
35556 let b = _mm_set_ps(4., 5., 6., 7.);
35557 let r = _mm_mask_min_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
35558 let e = _mm_set_ps(0., 1., 2., 3.);
35559 assert_eq_m128(r, e);
35560 let r = _mm_mask_min_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35561 let e = _mm_set_ps(0., 1., 2., 3.);
35562 assert_eq_m128(r, e);
35563 }
35564
35565 #[simd_test(enable = "avx512f")]
35566 unsafe fn test_mm_maskz_min_round_ss() {
35567 let a = _mm_set_ps(0., 1., 2., 3.);
35568 let b = _mm_set_ps(4., 5., 6., 7.);
35569 let r = _mm_maskz_min_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
35570 let e = _mm_set_ps(0., 1., 2., 0.);
35571 assert_eq_m128(r, e);
35572 let r = _mm_maskz_min_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35573 let e = _mm_set_ps(0., 1., 2., 3.);
35574 assert_eq_m128(r, e);
35575 }
35576
35577 #[simd_test(enable = "avx512f")]
35578 unsafe fn test_mm_min_round_sd() {
35579 let a = _mm_set_pd(0., 1.);
35580 let b = _mm_set_pd(2., 3.);
35581 let r = _mm_min_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
35582 let e = _mm_set_pd(0., 1.);
35583 assert_eq_m128d(r, e);
35584 }
35585
35586 #[simd_test(enable = "avx512f")]
35587 unsafe fn test_mm_mask_min_round_sd() {
35588 let a = _mm_set_pd(0., 1.);
35589 let b = _mm_set_pd(2., 3.);
35590 let r = _mm_mask_min_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
35591 let e = _mm_set_pd(0., 1.);
35592 assert_eq_m128d(r, e);
35593 let r = _mm_mask_min_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35594 let e = _mm_set_pd(0., 1.);
35595 assert_eq_m128d(r, e);
35596 }
35597
35598 #[simd_test(enable = "avx512f")]
35599 unsafe fn test_mm_maskz_min_round_sd() {
35600 let a = _mm_set_pd(0., 1.);
35601 let b = _mm_set_pd(2., 3.);
35602 let r = _mm_maskz_min_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
35603 let e = _mm_set_pd(0., 0.);
35604 assert_eq_m128d(r, e);
35605 let r = _mm_maskz_min_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35606 let e = _mm_set_pd(0., 1.);
35607 assert_eq_m128d(r, e);
35608 }
35609
35610 #[simd_test(enable = "avx512f")]
35611 unsafe fn test_mm_sqrt_round_ss() {
35612 let a = _mm_set_ps(1., 2., 10., 20.);
35613 let b = _mm_set_ps(3., 4., 30., 4.);
35614 let r = _mm_sqrt_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35615 let e = _mm_set_ps(1., 2., 10., 2.);
35616 assert_eq_m128(r, e);
35617 }
35618
35619 #[simd_test(enable = "avx512f")]
35620 unsafe fn test_mm_mask_sqrt_round_ss() {
35621 let src = _mm_set_ps(10., 11., 100., 110.);
35622 let a = _mm_set_ps(1., 2., 10., 20.);
35623 let b = _mm_set_ps(3., 4., 30., 4.);
35624 let r = _mm_mask_sqrt_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35625 let e = _mm_set_ps(1., 2., 10., 110.);
35626 assert_eq_m128(r, e);
35627 let r = _mm_mask_sqrt_round_ss(
35628 src,
35629 0b11111111,
35630 a,
35631 b,
35632 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35633 );
35634 let e = _mm_set_ps(1., 2., 10., 2.);
35635 assert_eq_m128(r, e);
35636 }
35637
35638 #[simd_test(enable = "avx512f")]
35639 unsafe fn test_mm_maskz_sqrt_round_ss() {
35640 let a = _mm_set_ps(1., 2., 10., 20.);
35641 let b = _mm_set_ps(3., 4., 30., 4.);
35642 let r = _mm_maskz_sqrt_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35643 let e = _mm_set_ps(1., 2., 10., 0.);
35644 assert_eq_m128(r, e);
35645 let r = _mm_maskz_sqrt_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35646 let e = _mm_set_ps(1., 2., 10., 2.);
35647 assert_eq_m128(r, e);
35648 }
35649
35650 #[simd_test(enable = "avx512f")]
35651 unsafe fn test_mm_sqrt_round_sd() {
35652 let a = _mm_set_pd(1., 2.);
35653 let b = _mm_set_pd(3., 4.);
35654 let r = _mm_sqrt_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35655 let e = _mm_set_pd(1., 2.);
35656 assert_eq_m128d(r, e);
35657 }
35658
35659 #[simd_test(enable = "avx512f")]
35660 unsafe fn test_mm_mask_sqrt_round_sd() {
35661 let src = _mm_set_pd(10., 11.);
35662 let a = _mm_set_pd(1., 2.);
35663 let b = _mm_set_pd(3., 4.);
35664 let r = _mm_mask_sqrt_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35665 let e = _mm_set_pd(1., 11.);
35666 assert_eq_m128d(r, e);
35667 let r = _mm_mask_sqrt_round_sd(
35668 src,
35669 0b11111111,
35670 a,
35671 b,
35672 _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
35673 );
35674 let e = _mm_set_pd(1., 2.);
35675 assert_eq_m128d(r, e);
35676 }
35677
35678 #[simd_test(enable = "avx512f")]
35679 unsafe fn test_mm_maskz_sqrt_round_sd() {
35680 let a = _mm_set_pd(1., 2.);
35681 let b = _mm_set_pd(3., 4.);
35682 let r = _mm_maskz_sqrt_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35683 let e = _mm_set_pd(1., 0.);
35684 assert_eq_m128d(r, e);
35685 let r = _mm_maskz_sqrt_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
35686 let e = _mm_set_pd(1., 2.);
35687 assert_eq_m128d(r, e);
35688 }
35689
35690 #[simd_test(enable = "avx512f")]
35691 unsafe fn test_mm_getexp_round_ss() {
35692 let a = _mm_set1_ps(2.);
35693 let b = _mm_set1_ps(3.);
35694 let r = _mm_getexp_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
35695 let e = _mm_set_ps(2., 2., 2., 1.);
35696 assert_eq_m128(r, e);
35697 }
35698
35699 #[simd_test(enable = "avx512f")]
35700 unsafe fn test_mm_mask_getexp_round_ss() {
35701 let a = _mm_set1_ps(2.);
35702 let b = _mm_set1_ps(3.);
35703 let r = _mm_mask_getexp_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
35704 let e = _mm_set_ps(2., 2., 2., 2.);
35705 assert_eq_m128(r, e);
35706 let r = _mm_mask_getexp_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35707 let e = _mm_set_ps(2., 2., 2., 1.);
35708 assert_eq_m128(r, e);
35709 }
35710
35711 #[simd_test(enable = "avx512f")]
35712 unsafe fn test_mm_maskz_getexp_round_ss() {
35713 let a = _mm_set1_ps(2.);
35714 let b = _mm_set1_ps(3.);
35715 let r = _mm_maskz_getexp_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
35716 let e = _mm_set_ps(2., 2., 2., 0.);
35717 assert_eq_m128(r, e);
35718 let r = _mm_maskz_getexp_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35719 let e = _mm_set_ps(2., 2., 2., 1.);
35720 assert_eq_m128(r, e);
35721 }
35722
35723 #[simd_test(enable = "avx512f")]
35724 unsafe fn test_mm_getexp_round_sd() {
35725 let a = _mm_set1_pd(2.);
35726 let b = _mm_set1_pd(3.);
35727 let r = _mm_getexp_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
35728 let e = _mm_set_pd(2., 1.);
35729 assert_eq_m128d(r, e);
35730 }
35731
35732 #[simd_test(enable = "avx512f")]
35733 unsafe fn test_mm_mask_getexp_round_sd() {
35734 let a = _mm_set1_pd(2.);
35735 let b = _mm_set1_pd(3.);
35736 let r = _mm_mask_getexp_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
35737 let e = _mm_set_pd(2., 2.);
35738 assert_eq_m128d(r, e);
35739 let r = _mm_mask_getexp_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35740 let e = _mm_set_pd(2., 1.);
35741 assert_eq_m128d(r, e);
35742 }
35743
35744 #[simd_test(enable = "avx512f")]
35745 unsafe fn test_mm_maskz_getexp_round_sd() {
35746 let a = _mm_set1_pd(2.);
35747 let b = _mm_set1_pd(3.);
35748 let r = _mm_maskz_getexp_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
35749 let e = _mm_set_pd(2., 0.);
35750 assert_eq_m128d(r, e);
35751 let r = _mm_maskz_getexp_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
35752 let e = _mm_set_pd(2., 1.);
35753 assert_eq_m128d(r, e);
35754 }
35755
35756 #[simd_test(enable = "avx512f")]
35757 unsafe fn test_mm_getmant_round_ss() {
35758 let a = _mm_set1_ps(20.);
35759 let b = _mm_set1_ps(10.);
35760 let r = _mm_getmant_round_ss(
35761 a,
35762 b,
35763 _MM_MANT_NORM_1_2,
35764 _MM_MANT_SIGN_SRC,
35765 _MM_FROUND_CUR_DIRECTION,
35766 );
35767 let e = _mm_set_ps(20., 20., 20., 1.25);
35768 assert_eq_m128(r, e);
35769 }
35770
35771 #[simd_test(enable = "avx512f")]
35772 unsafe fn test_mm_mask_getmant_round_ss() {
35773 let a = _mm_set1_ps(20.);
35774 let b = _mm_set1_ps(10.);
35775 let r = _mm_mask_getmant_round_ss(
35776 a,
35777 0,
35778 a,
35779 b,
35780 _MM_MANT_NORM_1_2,
35781 _MM_MANT_SIGN_SRC,
35782 _MM_FROUND_CUR_DIRECTION,
35783 );
35784 let e = _mm_set_ps(20., 20., 20., 20.);
35785 assert_eq_m128(r, e);
35786 let r = _mm_mask_getmant_round_ss(
35787 a,
35788 0b11111111,
35789 a,
35790 b,
35791 _MM_MANT_NORM_1_2,
35792 _MM_MANT_SIGN_SRC,
35793 _MM_FROUND_CUR_DIRECTION,
35794 );
35795 let e = _mm_set_ps(20., 20., 20., 1.25);
35796 assert_eq_m128(r, e);
35797 }
35798
35799 #[simd_test(enable = "avx512f")]
35800 unsafe fn test_mm_maskz_getmant_round_ss() {
35801 let a = _mm_set1_ps(20.);
35802 let b = _mm_set1_ps(10.);
35803 let r = _mm_maskz_getmant_round_ss(
35804 0,
35805 a,
35806 b,
35807 _MM_MANT_NORM_1_2,
35808 _MM_MANT_SIGN_SRC,
35809 _MM_FROUND_CUR_DIRECTION,
35810 );
35811 let e = _mm_set_ps(20., 20., 20., 0.);
35812 assert_eq_m128(r, e);
35813 let r = _mm_maskz_getmant_round_ss(
35814 0b11111111,
35815 a,
35816 b,
35817 _MM_MANT_NORM_1_2,
35818 _MM_MANT_SIGN_SRC,
35819 _MM_FROUND_CUR_DIRECTION,
35820 );
35821 let e = _mm_set_ps(20., 20., 20., 1.25);
35822 assert_eq_m128(r, e);
35823 }
35824
35825 #[simd_test(enable = "avx512f")]
35826 unsafe fn test_mm_getmant_round_sd() {
35827 let a = _mm_set1_pd(20.);
35828 let b = _mm_set1_pd(10.);
35829 let r = _mm_getmant_round_sd(
35830 a,
35831 b,
35832 _MM_MANT_NORM_1_2,
35833 _MM_MANT_SIGN_SRC,
35834 _MM_FROUND_CUR_DIRECTION,
35835 );
35836 let e = _mm_set_pd(20., 1.25);
35837 assert_eq_m128d(r, e);
35838 }
35839
35840 #[simd_test(enable = "avx512f")]
35841 unsafe fn test_mm_mask_getmant_round_sd() {
35842 let a = _mm_set1_pd(20.);
35843 let b = _mm_set1_pd(10.);
35844 let r = _mm_mask_getmant_round_sd(
35845 a,
35846 0,
35847 a,
35848 b,
35849 _MM_MANT_NORM_1_2,
35850 _MM_MANT_SIGN_SRC,
35851 _MM_FROUND_CUR_DIRECTION,
35852 );
35853 let e = _mm_set_pd(20., 20.);
35854 assert_eq_m128d(r, e);
35855 let r = _mm_mask_getmant_round_sd(
35856 a,
35857 0b11111111,
35858 a,
35859 b,
35860 _MM_MANT_NORM_1_2,
35861 _MM_MANT_SIGN_SRC,
35862 _MM_FROUND_CUR_DIRECTION,
35863 );
35864 let e = _mm_set_pd(20., 1.25);
35865 assert_eq_m128d(r, e);
35866 }
35867
35868 #[simd_test(enable = "avx512f")]
35869 unsafe fn test_mm_maskz_getmant_round_sd() {
35870 let a = _mm_set1_pd(20.);
35871 let b = _mm_set1_pd(10.);
35872 let r = _mm_maskz_getmant_round_sd(
35873 0,
35874 a,
35875 b,
35876 _MM_MANT_NORM_1_2,
35877 _MM_MANT_SIGN_SRC,
35878 _MM_FROUND_CUR_DIRECTION,
35879 );
35880 let e = _mm_set_pd(20., 0.);
35881 assert_eq_m128d(r, e);
35882 let r = _mm_maskz_getmant_round_sd(
35883 0b11111111,
35884 a,
35885 b,
35886 _MM_MANT_NORM_1_2,
35887 _MM_MANT_SIGN_SRC,
35888 _MM_FROUND_CUR_DIRECTION,
35889 );
35890 let e = _mm_set_pd(20., 1.25);
35891 assert_eq_m128d(r, e);
35892 }
35893
35894 #[simd_test(enable = "avx512f")]
35895 unsafe fn test_mm_roundscale_round_ss() {
35896 let a = _mm_set1_ps(2.2);
35897 let b = _mm_set1_ps(1.1);
35898 let r = _mm_roundscale_round_ss(a, b, 0, _MM_FROUND_CUR_DIRECTION);
35899 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
35900 assert_eq_m128(r, e);
35901 }
35902
35903 #[simd_test(enable = "avx512f")]
35904 unsafe fn test_mm_mask_roundscale_round_ss() {
35905 let a = _mm_set1_ps(2.2);
35906 let b = _mm_set1_ps(1.1);
35907 let r = _mm_mask_roundscale_round_ss(a, 0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35908 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
35909 assert_eq_m128(r, e);
35910 let r = _mm_mask_roundscale_round_ss(a, 0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35911 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
35912 assert_eq_m128(r, e);
35913 }
35914
35915 #[simd_test(enable = "avx512f")]
35916 unsafe fn test_mm_maskz_roundscale_round_ss() {
35917 let a = _mm_set1_ps(2.2);
35918 let b = _mm_set1_ps(1.1);
35919 let r = _mm_maskz_roundscale_round_ss(0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35920 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
35921 assert_eq_m128(r, e);
35922 let r = _mm_maskz_roundscale_round_ss(0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35923 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
35924 assert_eq_m128(r, e);
35925 }
35926
35927 #[simd_test(enable = "avx512f")]
35928 unsafe fn test_mm_roundscale_round_sd() {
35929 let a = _mm_set1_pd(2.2);
35930 let b = _mm_set1_pd(1.1);
35931 let r = _mm_roundscale_round_sd(a, b, 0, _MM_FROUND_CUR_DIRECTION);
35932 let e = _mm_set_pd(2.2, 1.0);
35933 assert_eq_m128d(r, e);
35934 }
35935
35936 #[simd_test(enable = "avx512f")]
35937 unsafe fn test_mm_mask_roundscale_round_sd() {
35938 let a = _mm_set1_pd(2.2);
35939 let b = _mm_set1_pd(1.1);
35940 let r = _mm_mask_roundscale_round_sd(a, 0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35941 let e = _mm_set_pd(2.2, 2.2);
35942 assert_eq_m128d(r, e);
35943 let r = _mm_mask_roundscale_round_sd(a, 0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35944 let e = _mm_set_pd(2.2, 1.0);
35945 assert_eq_m128d(r, e);
35946 }
35947
35948 #[simd_test(enable = "avx512f")]
35949 unsafe fn test_mm_maskz_roundscale_round_sd() {
35950 let a = _mm_set1_pd(2.2);
35951 let b = _mm_set1_pd(1.1);
35952 let r = _mm_maskz_roundscale_round_sd(0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35953 let e = _mm_set_pd(2.2, 0.0);
35954 assert_eq_m128d(r, e);
35955 let r = _mm_maskz_roundscale_round_sd(0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
35956 let e = _mm_set_pd(2.2, 1.0);
35957 assert_eq_m128d(r, e);
35958 }
35959
35960 #[simd_test(enable = "avx512f")]
35961 unsafe fn test_mm_scalef_round_ss() {
35962 let a = _mm_set1_ps(1.);
35963 let b = _mm_set1_ps(3.);
35964 let r = _mm_scalef_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
35965 let e = _mm_set_ps(1., 1., 1., 8.);
35966 assert_eq_m128(r, e);
35967 }
35968
35969 #[simd_test(enable = "avx512f")]
35970 unsafe fn test_mm_mask_scalef_round_ss() {
35971 let a = _mm_set1_ps(1.);
35972 let b = _mm_set1_ps(3.);
35973 let r = _mm_mask_scalef_round_ss(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
35974 let e = _mm_set_ps(1., 1., 1., 1.);
35975 assert_eq_m128(r, e);
35976 let r = _mm_mask_scalef_round_ss(
35977 a,
35978 0b11111111,
35979 a,
35980 b,
35981 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
35982 );
35983 let e = _mm_set_ps(1., 1., 1., 8.);
35984 assert_eq_m128(r, e);
35985 }
35986
35987 #[simd_test(enable = "avx512f")]
35988 unsafe fn test_mm_maskz_scalef_round_ss() {
35989 let a = _mm_set1_ps(1.);
35990 let b = _mm_set1_ps(3.);
35991 let r = _mm_maskz_scalef_round_ss(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
35992 let e = _mm_set_ps(1., 1., 1., 0.);
35993 assert_eq_m128(r, e);
35994 let r = _mm_maskz_scalef_round_ss(
35995 0b11111111,
35996 a,
35997 b,
35998 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
35999 );
36000 let e = _mm_set_ps(1., 1., 1., 8.);
36001 assert_eq_m128(r, e);
36002 }
36003
36004 #[simd_test(enable = "avx512f")]
36005 unsafe fn test_mm_scalef_round_sd() {
36006 let a = _mm_set1_pd(1.);
36007 let b = _mm_set1_pd(3.);
36008 let r = _mm_scalef_round_sd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36009 let e = _mm_set_pd(1., 8.);
36010 assert_eq_m128d(r, e);
36011 }
36012
36013 #[simd_test(enable = "avx512f")]
36014 unsafe fn test_mm_mask_scalef_round_sd() {
36015 let a = _mm_set1_pd(1.);
36016 let b = _mm_set1_pd(3.);
36017 let r = _mm_mask_scalef_round_sd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36018 let e = _mm_set_pd(1., 1.);
36019 assert_eq_m128d(r, e);
36020 let r = _mm_mask_scalef_round_sd(
36021 a,
36022 0b11111111,
36023 a,
36024 b,
36025 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36026 );
36027 let e = _mm_set_pd(1., 8.);
36028 assert_eq_m128d(r, e);
36029 }
36030
36031 #[simd_test(enable = "avx512f")]
36032 unsafe fn test_mm_maskz_scalef_round_sd() {
36033 let a = _mm_set1_pd(1.);
36034 let b = _mm_set1_pd(3.);
36035 let r = _mm_maskz_scalef_round_sd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36036 let e = _mm_set_pd(1., 0.);
36037 assert_eq_m128d(r, e);
36038 let r = _mm_maskz_scalef_round_sd(
36039 0b11111111,
36040 a,
36041 b,
36042 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36043 );
36044 let e = _mm_set_pd(1., 8.);
36045 assert_eq_m128d(r, e);
36046 }
36047
36048 #[simd_test(enable = "avx512f")]
36049 unsafe fn test_mm_fmadd_round_ss() {
36050 let a = _mm_set1_ps(1.);
36051 let b = _mm_set1_ps(2.);
36052 let c = _mm_set1_ps(3.);
36053 let r = _mm_fmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36054 let e = _mm_set_ps(1., 1., 1., 5.);
36055 assert_eq_m128(r, e);
36056 }
36057
36058 #[simd_test(enable = "avx512f")]
36059 unsafe fn test_mm_mask_fmadd_round_ss() {
36060 let a = _mm_set1_ps(1.);
36061 let b = _mm_set1_ps(2.);
36062 let c = _mm_set1_ps(3.);
36063 let r = _mm_mask_fmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36064 assert_eq_m128(r, a);
36065 let r = _mm_mask_fmadd_round_ss(
36066 a,
36067 0b11111111,
36068 b,
36069 c,
36070 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36071 );
36072 let e = _mm_set_ps(1., 1., 1., 5.);
36073 assert_eq_m128(r, e);
36074 }
36075
36076 #[simd_test(enable = "avx512f")]
36077 unsafe fn test_mm_maskz_fmadd_round_ss() {
36078 let a = _mm_set1_ps(1.);
36079 let b = _mm_set1_ps(2.);
36080 let c = _mm_set1_ps(3.);
36081 let r = _mm_maskz_fmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36082 let e = _mm_set_ps(1., 1., 1., 0.);
36083 assert_eq_m128(r, e);
36084 let r = _mm_maskz_fmadd_round_ss(
36085 0b11111111,
36086 a,
36087 b,
36088 c,
36089 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36090 );
36091 let e = _mm_set_ps(1., 1., 1., 5.);
36092 assert_eq_m128(r, e);
36093 }
36094
36095 #[simd_test(enable = "avx512f")]
36096 unsafe fn test_mm_mask3_fmadd_round_ss() {
36097 let a = _mm_set1_ps(1.);
36098 let b = _mm_set1_ps(2.);
36099 let c = _mm_set1_ps(3.);
36100 let r = _mm_mask3_fmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36101 assert_eq_m128(r, c);
36102 let r = _mm_mask3_fmadd_round_ss(
36103 a,
36104 b,
36105 c,
36106 0b11111111,
36107 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36108 );
36109 let e = _mm_set_ps(3., 3., 3., 5.);
36110 assert_eq_m128(r, e);
36111 }
36112
36113 #[simd_test(enable = "avx512f")]
36114 unsafe fn test_mm_fmadd_round_sd() {
36115 let a = _mm_set1_pd(1.);
36116 let b = _mm_set1_pd(2.);
36117 let c = _mm_set1_pd(3.);
36118 let r = _mm_fmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36119 let e = _mm_set_pd(1., 5.);
36120 assert_eq_m128d(r, e);
36121 }
36122
36123 #[simd_test(enable = "avx512f")]
36124 unsafe fn test_mm_mask_fmadd_round_sd() {
36125 let a = _mm_set1_pd(1.);
36126 let b = _mm_set1_pd(2.);
36127 let c = _mm_set1_pd(3.);
36128 let r = _mm_mask_fmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36129 assert_eq_m128d(r, a);
36130 let r = _mm_mask_fmadd_round_sd(
36131 a,
36132 0b11111111,
36133 b,
36134 c,
36135 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36136 );
36137 let e = _mm_set_pd(1., 5.);
36138 assert_eq_m128d(r, e);
36139 }
36140
36141 #[simd_test(enable = "avx512f")]
36142 unsafe fn test_mm_maskz_fmadd_round_sd() {
36143 let a = _mm_set1_pd(1.);
36144 let b = _mm_set1_pd(2.);
36145 let c = _mm_set1_pd(3.);
36146 let r = _mm_maskz_fmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36147 let e = _mm_set_pd(1., 0.);
36148 assert_eq_m128d(r, e);
36149 let r = _mm_maskz_fmadd_round_sd(
36150 0b11111111,
36151 a,
36152 b,
36153 c,
36154 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36155 );
36156 let e = _mm_set_pd(1., 5.);
36157 assert_eq_m128d(r, e);
36158 }
36159
36160 #[simd_test(enable = "avx512f")]
36161 unsafe fn test_mm_mask3_fmadd_round_sd() {
36162 let a = _mm_set1_pd(1.);
36163 let b = _mm_set1_pd(2.);
36164 let c = _mm_set1_pd(3.);
36165 let r = _mm_mask3_fmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36166 assert_eq_m128d(r, c);
36167 let r = _mm_mask3_fmadd_round_sd(
36168 a,
36169 b,
36170 c,
36171 0b11111111,
36172 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36173 );
36174 let e = _mm_set_pd(3., 5.);
36175 assert_eq_m128d(r, e);
36176 }
36177
36178 #[simd_test(enable = "avx512f")]
36179 unsafe fn test_mm_fmsub_round_ss() {
36180 let a = _mm_set1_ps(1.);
36181 let b = _mm_set1_ps(2.);
36182 let c = _mm_set1_ps(3.);
36183 let r = _mm_fmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36184 let e = _mm_set_ps(1., 1., 1., -1.);
36185 assert_eq_m128(r, e);
36186 }
36187
36188 #[simd_test(enable = "avx512f")]
36189 unsafe fn test_mm_mask_fmsub_round_ss() {
36190 let a = _mm_set1_ps(1.);
36191 let b = _mm_set1_ps(2.);
36192 let c = _mm_set1_ps(3.);
36193 let r = _mm_mask_fmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36194 assert_eq_m128(r, a);
36195 let r = _mm_mask_fmsub_round_ss(
36196 a,
36197 0b11111111,
36198 b,
36199 c,
36200 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36201 );
36202 let e = _mm_set_ps(1., 1., 1., -1.);
36203 assert_eq_m128(r, e);
36204 }
36205
36206 #[simd_test(enable = "avx512f")]
36207 unsafe fn test_mm_maskz_fmsub_round_ss() {
36208 let a = _mm_set1_ps(1.);
36209 let b = _mm_set1_ps(2.);
36210 let c = _mm_set1_ps(3.);
36211 let r = _mm_maskz_fmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36212 let e = _mm_set_ps(1., 1., 1., 0.);
36213 assert_eq_m128(r, e);
36214 let r = _mm_maskz_fmsub_round_ss(
36215 0b11111111,
36216 a,
36217 b,
36218 c,
36219 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36220 );
36221 let e = _mm_set_ps(1., 1., 1., -1.);
36222 assert_eq_m128(r, e);
36223 }
36224
36225 #[simd_test(enable = "avx512f")]
36226 unsafe fn test_mm_mask3_fmsub_round_ss() {
36227 let a = _mm_set1_ps(1.);
36228 let b = _mm_set1_ps(2.);
36229 let c = _mm_set1_ps(3.);
36230 let r = _mm_mask3_fmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36231 assert_eq_m128(r, c);
36232 let r = _mm_mask3_fmsub_round_ss(
36233 a,
36234 b,
36235 c,
36236 0b11111111,
36237 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36238 );
36239 let e = _mm_set_ps(3., 3., 3., -1.);
36240 assert_eq_m128(r, e);
36241 }
36242
36243 #[simd_test(enable = "avx512f")]
36244 unsafe fn test_mm_fmsub_round_sd() {
36245 let a = _mm_set1_pd(1.);
36246 let b = _mm_set1_pd(2.);
36247 let c = _mm_set1_pd(3.);
36248 let r = _mm_fmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36249 let e = _mm_set_pd(1., -1.);
36250 assert_eq_m128d(r, e);
36251 }
36252
36253 #[simd_test(enable = "avx512f")]
36254 unsafe fn test_mm_mask_fmsub_round_sd() {
36255 let a = _mm_set1_pd(1.);
36256 let b = _mm_set1_pd(2.);
36257 let c = _mm_set1_pd(3.);
36258 let r = _mm_mask_fmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36259 assert_eq_m128d(r, a);
36260 let r = _mm_mask_fmsub_round_sd(
36261 a,
36262 0b11111111,
36263 b,
36264 c,
36265 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36266 );
36267 let e = _mm_set_pd(1., -1.);
36268 assert_eq_m128d(r, e);
36269 }
36270
36271 #[simd_test(enable = "avx512f")]
36272 unsafe fn test_mm_maskz_fmsub_round_sd() {
36273 let a = _mm_set1_pd(1.);
36274 let b = _mm_set1_pd(2.);
36275 let c = _mm_set1_pd(3.);
36276 let r = _mm_maskz_fmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36277 let e = _mm_set_pd(1., 0.);
36278 assert_eq_m128d(r, e);
36279 let r = _mm_maskz_fmsub_round_sd(
36280 0b11111111,
36281 a,
36282 b,
36283 c,
36284 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36285 );
36286 let e = _mm_set_pd(1., -1.);
36287 assert_eq_m128d(r, e);
36288 }
36289
36290 #[simd_test(enable = "avx512f")]
36291 unsafe fn test_mm_mask3_fmsub_round_sd() {
36292 let a = _mm_set1_pd(1.);
36293 let b = _mm_set1_pd(2.);
36294 let c = _mm_set1_pd(3.);
36295 let r = _mm_mask3_fmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36296 assert_eq_m128d(r, c);
36297 let r = _mm_mask3_fmsub_round_sd(
36298 a,
36299 b,
36300 c,
36301 0b11111111,
36302 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36303 );
36304 let e = _mm_set_pd(3., -1.);
36305 assert_eq_m128d(r, e);
36306 }
36307
36308 #[simd_test(enable = "avx512f")]
36309 unsafe fn test_mm_fnmadd_round_ss() {
36310 let a = _mm_set1_ps(1.);
36311 let b = _mm_set1_ps(2.);
36312 let c = _mm_set1_ps(3.);
36313 let r = _mm_fnmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36314 let e = _mm_set_ps(1., 1., 1., 1.);
36315 assert_eq_m128(r, e);
36316 }
36317
36318 #[simd_test(enable = "avx512f")]
36319 unsafe fn test_mm_mask_fnmadd_round_ss() {
36320 let a = _mm_set1_ps(1.);
36321 let b = _mm_set1_ps(2.);
36322 let c = _mm_set1_ps(3.);
36323 let r = _mm_mask_fnmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36324 assert_eq_m128(r, a);
36325 let r = _mm_mask_fnmadd_round_ss(
36326 a,
36327 0b11111111,
36328 b,
36329 c,
36330 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36331 );
36332 let e = _mm_set_ps(1., 1., 1., 1.);
36333 assert_eq_m128(r, e);
36334 }
36335
36336 #[simd_test(enable = "avx512f")]
36337 unsafe fn test_mm_maskz_fnmadd_round_ss() {
36338 let a = _mm_set1_ps(1.);
36339 let b = _mm_set1_ps(2.);
36340 let c = _mm_set1_ps(3.);
36341 let r =
36342 _mm_maskz_fnmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36343 let e = _mm_set_ps(1., 1., 1., 0.);
36344 assert_eq_m128(r, e);
36345 let r = _mm_maskz_fnmadd_round_ss(
36346 0b11111111,
36347 a,
36348 b,
36349 c,
36350 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36351 );
36352 let e = _mm_set_ps(1., 1., 1., 1.);
36353 assert_eq_m128(r, e);
36354 }
36355
36356 #[simd_test(enable = "avx512f")]
36357 unsafe fn test_mm_mask3_fnmadd_round_ss() {
36358 let a = _mm_set1_ps(1.);
36359 let b = _mm_set1_ps(2.);
36360 let c = _mm_set1_ps(3.);
36361 let r =
36362 _mm_mask3_fnmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36363 assert_eq_m128(r, c);
36364 let r = _mm_mask3_fnmadd_round_ss(
36365 a,
36366 b,
36367 c,
36368 0b11111111,
36369 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36370 );
36371 let e = _mm_set_ps(3., 3., 3., 1.);
36372 assert_eq_m128(r, e);
36373 }
36374
36375 #[simd_test(enable = "avx512f")]
36376 unsafe fn test_mm_fnmadd_round_sd() {
36377 let a = _mm_set1_pd(1.);
36378 let b = _mm_set1_pd(2.);
36379 let c = _mm_set1_pd(3.);
36380 let r = _mm_fnmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36381 let e = _mm_set_pd(1., 1.);
36382 assert_eq_m128d(r, e);
36383 }
36384
36385 #[simd_test(enable = "avx512f")]
36386 unsafe fn test_mm_mask_fnmadd_round_sd() {
36387 let a = _mm_set1_pd(1.);
36388 let b = _mm_set1_pd(2.);
36389 let c = _mm_set1_pd(3.);
36390 let r = _mm_mask_fnmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36391 assert_eq_m128d(r, a);
36392 let r = _mm_mask_fnmadd_round_sd(
36393 a,
36394 0b11111111,
36395 b,
36396 c,
36397 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36398 );
36399 let e = _mm_set_pd(1., 1.);
36400 assert_eq_m128d(r, e);
36401 }
36402
36403 #[simd_test(enable = "avx512f")]
36404 unsafe fn test_mm_maskz_fnmadd_round_sd() {
36405 let a = _mm_set1_pd(1.);
36406 let b = _mm_set1_pd(2.);
36407 let c = _mm_set1_pd(3.);
36408 let r =
36409 _mm_maskz_fnmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36410 let e = _mm_set_pd(1., 0.);
36411 assert_eq_m128d(r, e);
36412 let r = _mm_maskz_fnmadd_round_sd(
36413 0b11111111,
36414 a,
36415 b,
36416 c,
36417 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36418 );
36419 let e = _mm_set_pd(1., 1.);
36420 assert_eq_m128d(r, e);
36421 }
36422
36423 #[simd_test(enable = "avx512f")]
36424 unsafe fn test_mm_mask3_fnmadd_round_sd() {
36425 let a = _mm_set1_pd(1.);
36426 let b = _mm_set1_pd(2.);
36427 let c = _mm_set1_pd(3.);
36428 let r =
36429 _mm_mask3_fnmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36430 assert_eq_m128d(r, c);
36431 let r = _mm_mask3_fnmadd_round_sd(
36432 a,
36433 b,
36434 c,
36435 0b11111111,
36436 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36437 );
36438 let e = _mm_set_pd(3., 1.);
36439 assert_eq_m128d(r, e);
36440 }
36441
36442 #[simd_test(enable = "avx512f")]
36443 unsafe fn test_mm_fnmsub_round_ss() {
36444 let a = _mm_set1_ps(1.);
36445 let b = _mm_set1_ps(2.);
36446 let c = _mm_set1_ps(3.);
36447 let r = _mm_fnmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36448 let e = _mm_set_ps(1., 1., 1., -5.);
36449 assert_eq_m128(r, e);
36450 }
36451
36452 #[simd_test(enable = "avx512f")]
36453 unsafe fn test_mm_mask_fnmsub_round_ss() {
36454 let a = _mm_set1_ps(1.);
36455 let b = _mm_set1_ps(2.);
36456 let c = _mm_set1_ps(3.);
36457 let r = _mm_mask_fnmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36458 assert_eq_m128(r, a);
36459 let r = _mm_mask_fnmsub_round_ss(
36460 a,
36461 0b11111111,
36462 b,
36463 c,
36464 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36465 );
36466 let e = _mm_set_ps(1., 1., 1., -5.);
36467 assert_eq_m128(r, e);
36468 }
36469
36470 #[simd_test(enable = "avx512f")]
36471 unsafe fn test_mm_maskz_fnmsub_round_ss() {
36472 let a = _mm_set1_ps(1.);
36473 let b = _mm_set1_ps(2.);
36474 let c = _mm_set1_ps(3.);
36475 let r =
36476 _mm_maskz_fnmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36477 let e = _mm_set_ps(1., 1., 1., 0.);
36478 assert_eq_m128(r, e);
36479 let r = _mm_maskz_fnmsub_round_ss(
36480 0b11111111,
36481 a,
36482 b,
36483 c,
36484 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36485 );
36486 let e = _mm_set_ps(1., 1., 1., -5.);
36487 assert_eq_m128(r, e);
36488 }
36489
36490 #[simd_test(enable = "avx512f")]
36491 unsafe fn test_mm_mask3_fnmsub_round_ss() {
36492 let a = _mm_set1_ps(1.);
36493 let b = _mm_set1_ps(2.);
36494 let c = _mm_set1_ps(3.);
36495 let r =
36496 _mm_mask3_fnmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36497 assert_eq_m128(r, c);
36498 let r = _mm_mask3_fnmsub_round_ss(
36499 a,
36500 b,
36501 c,
36502 0b11111111,
36503 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36504 );
36505 let e = _mm_set_ps(3., 3., 3., -5.);
36506 assert_eq_m128(r, e);
36507 }
36508
36509 #[simd_test(enable = "avx512f")]
36510 unsafe fn test_mm_fnmsub_round_sd() {
36511 let a = _mm_set1_pd(1.);
36512 let b = _mm_set1_pd(2.);
36513 let c = _mm_set1_pd(3.);
36514 let r = _mm_fnmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36515 let e = _mm_set_pd(1., -5.);
36516 assert_eq_m128d(r, e);
36517 }
36518
36519 #[simd_test(enable = "avx512f")]
36520 unsafe fn test_mm_mask_fnmsub_round_sd() {
36521 let a = _mm_set1_pd(1.);
36522 let b = _mm_set1_pd(2.);
36523 let c = _mm_set1_pd(3.);
36524 let r = _mm_mask_fnmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36525 assert_eq_m128d(r, a);
36526 let r = _mm_mask_fnmsub_round_sd(
36527 a,
36528 0b11111111,
36529 b,
36530 c,
36531 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36532 );
36533 let e = _mm_set_pd(1., -5.);
36534 assert_eq_m128d(r, e);
36535 }
36536
36537 #[simd_test(enable = "avx512f")]
36538 unsafe fn test_mm_maskz_fnmsub_round_sd() {
36539 let a = _mm_set1_pd(1.);
36540 let b = _mm_set1_pd(2.);
36541 let c = _mm_set1_pd(3.);
36542 let r =
36543 _mm_maskz_fnmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36544 let e = _mm_set_pd(1., 0.);
36545 assert_eq_m128d(r, e);
36546 let r = _mm_maskz_fnmsub_round_sd(
36547 0b11111111,
36548 a,
36549 b,
36550 c,
36551 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36552 );
36553 let e = _mm_set_pd(1., -5.);
36554 assert_eq_m128d(r, e);
36555 }
36556
36557 #[simd_test(enable = "avx512f")]
36558 unsafe fn test_mm_mask3_fnmsub_round_sd() {
36559 let a = _mm_set1_pd(1.);
36560 let b = _mm_set1_pd(2.);
36561 let c = _mm_set1_pd(3.);
36562 let r =
36563 _mm_mask3_fnmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
36564 assert_eq_m128d(r, c);
36565 let r = _mm_mask3_fnmsub_round_sd(
36566 a,
36567 b,
36568 c,
36569 0b11111111,
36570 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
36571 );
36572 let e = _mm_set_pd(3., -5.);
36573 assert_eq_m128d(r, e);
36574 }
36575
36576 #[simd_test(enable = "avx512f")]
36577 unsafe fn test_mm_fixupimm_ss() {
36578 let a = _mm_set_ps(0., 0., 0., f32::NAN);
36579 let b = _mm_set1_ps(f32::MAX);
36580 let c = _mm_set1_epi32(i32::MAX);
36581 let r = _mm_fixupimm_ss(a, b, c, 5);
36582 let e = _mm_set_ps(0., 0., 0., -0.0);
36583 assert_eq_m128(r, e);
36584 }
36585
36586 #[simd_test(enable = "avx512f")]
36587 unsafe fn test_mm_mask_fixupimm_ss() {
36588 let a = _mm_set_ps(0., 0., 0., f32::NAN);
36589 let b = _mm_set1_ps(f32::MAX);
36590 let c = _mm_set1_epi32(i32::MAX);
36591 let r = _mm_mask_fixupimm_ss(a, 0b11111111, b, c, 5);
36592 let e = _mm_set_ps(0., 0., 0., -0.0);
36593 assert_eq_m128(r, e);
36594 }
36595
36596 #[simd_test(enable = "avx512f")]
36597 unsafe fn test_mm_maskz_fixupimm_ss() {
36598 let a = _mm_set_ps(0., 0., 0., f32::NAN);
36599 let b = _mm_set1_ps(f32::MAX);
36600 let c = _mm_set1_epi32(i32::MAX);
36601 let r = _mm_maskz_fixupimm_ss(0b00000000, a, b, c, 5);
36602 let e = _mm_set_ps(0., 0., 0., 0.0);
36603 assert_eq_m128(r, e);
36604 let r = _mm_maskz_fixupimm_ss(0b11111111, a, b, c, 5);
36605 let e = _mm_set_ps(0., 0., 0., -0.0);
36606 assert_eq_m128(r, e);
36607 }
36608
36609 #[simd_test(enable = "avx512f")]
36610 unsafe fn test_mm_fixupimm_sd() {
36611 let a = _mm_set_pd(0., f64::NAN);
36612 let b = _mm_set1_pd(f64::MAX);
36613 let c = _mm_set1_epi64x(i32::MAX as i64);
36614 let r = _mm_fixupimm_sd(a, b, c, 5);
36615 let e = _mm_set_pd(0., -0.0);
36616 assert_eq_m128d(r, e);
36617 }
36618
36619 #[simd_test(enable = "avx512f")]
36620 unsafe fn test_mm_mask_fixupimm_sd() {
36621 let a = _mm_set_pd(0., f64::NAN);
36622 let b = _mm_set1_pd(f64::MAX);
36623 let c = _mm_set1_epi64x(i32::MAX as i64);
36624 let r = _mm_mask_fixupimm_sd(a, 0b11111111, b, c, 5);
36625 let e = _mm_set_pd(0., -0.0);
36626 assert_eq_m128d(r, e);
36627 }
36628
36629 #[simd_test(enable = "avx512f")]
36630 unsafe fn test_mm_maskz_fixupimm_sd() {
36631 let a = _mm_set_pd(0., f64::NAN);
36632 let b = _mm_set1_pd(f64::MAX);
36633 let c = _mm_set1_epi64x(i32::MAX as i64);
36634 let r = _mm_maskz_fixupimm_sd(0b00000000, a, b, c, 5);
36635 let e = _mm_set_pd(0., 0.0);
36636 assert_eq_m128d(r, e);
36637 let r = _mm_maskz_fixupimm_sd(0b11111111, a, b, c, 5);
36638 let e = _mm_set_pd(0., -0.0);
36639 assert_eq_m128d(r, e);
36640 }
36641
36642 #[simd_test(enable = "avx512f")]
36643 unsafe fn test_mm_fixupimm_round_ss() {
36644 let a = _mm_set_ps(0., 0., 0., f32::NAN);
36645 let b = _mm_set1_ps(f32::MAX);
36646 let c = _mm_set1_epi32(i32::MAX);
36647 let r = _mm_fixupimm_round_ss(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36648 let e = _mm_set_ps(0., 0., 0., -0.0);
36649 assert_eq_m128(r, e);
36650 }
36651
36652 #[simd_test(enable = "avx512f")]
36653 unsafe fn test_mm_mask_fixupimm_round_ss() {
36654 let a = _mm_set_ps(0., 0., 0., f32::NAN);
36655 let b = _mm_set1_ps(f32::MAX);
36656 let c = _mm_set1_epi32(i32::MAX);
36657 let r = _mm_mask_fixupimm_round_ss(a, 0b11111111, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36658 let e = _mm_set_ps(0., 0., 0., -0.0);
36659 assert_eq_m128(r, e);
36660 }
36661
36662 #[simd_test(enable = "avx512f")]
36663 unsafe fn test_mm_maskz_fixupimm_round_ss() {
36664 let a = _mm_set_ps(0., 0., 0., f32::NAN);
36665 let b = _mm_set1_ps(f32::MAX);
36666 let c = _mm_set1_epi32(i32::MAX);
36667 let r = _mm_maskz_fixupimm_round_ss(0b00000000, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36668 let e = _mm_set_ps(0., 0., 0., 0.0);
36669 assert_eq_m128(r, e);
36670 let r = _mm_maskz_fixupimm_round_ss(0b11111111, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36671 let e = _mm_set_ps(0., 0., 0., -0.0);
36672 assert_eq_m128(r, e);
36673 }
36674
36675 #[simd_test(enable = "avx512f")]
36676 unsafe fn test_mm_fixupimm_round_sd() {
36677 let a = _mm_set_pd(0., f64::NAN);
36678 let b = _mm_set1_pd(f64::MAX);
36679 let c = _mm_set1_epi64x(i32::MAX as i64);
36680 let r = _mm_fixupimm_round_sd(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36681 let e = _mm_set_pd(0., -0.0);
36682 assert_eq_m128d(r, e);
36683 }
36684
36685 #[simd_test(enable = "avx512f")]
36686 unsafe fn test_mm_mask_fixupimm_round_sd() {
36687 let a = _mm_set_pd(0., f64::NAN);
36688 let b = _mm_set1_pd(f64::MAX);
36689 let c = _mm_set1_epi64x(i32::MAX as i64);
36690 let r = _mm_mask_fixupimm_round_sd(a, 0b11111111, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36691 let e = _mm_set_pd(0., -0.0);
36692 assert_eq_m128d(r, e);
36693 }
36694
36695 #[simd_test(enable = "avx512f")]
36696 unsafe fn test_mm_maskz_fixupimm_round_sd() {
36697 let a = _mm_set_pd(0., f64::NAN);
36698 let b = _mm_set1_pd(f64::MAX);
36699 let c = _mm_set1_epi64x(i32::MAX as i64);
36700 let r = _mm_maskz_fixupimm_round_sd(0b00000000, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36701 let e = _mm_set_pd(0., 0.0);
36702 assert_eq_m128d(r, e);
36703 let r = _mm_maskz_fixupimm_round_sd(0b11111111, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
36704 let e = _mm_set_pd(0., -0.0);
36705 assert_eq_m128d(r, e);
36706 }
36707
36708 #[simd_test(enable = "avx512f")]
36709 unsafe fn test_mm_mask_cvtss_sd() {
36710 let a = _mm_set_pd(6., -7.5);
36711 let b = _mm_set_ps(0., -0.5, 1., -1.5);
36712 let r = _mm_mask_cvtss_sd(a, 0, a, b);
36713 assert_eq_m128d(r, a);
36714 let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
36715 let e = _mm_set_pd(6., -1.5);
36716 assert_eq_m128d(r, e);
36717 }
36718
36719 #[simd_test(enable = "avx512f")]
36720 unsafe fn test_mm_maskz_cvtss_sd() {
36721 let a = _mm_set_pd(6., -7.5);
36722 let b = _mm_set_ps(0., -0.5, 1., -1.5);
36723 let r = _mm_maskz_cvtss_sd(0, a, b);
36724 let e = _mm_set_pd(6., 0.);
36725 assert_eq_m128d(r, e);
36726 let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
36727 let e = _mm_set_pd(6., -1.5);
36728 assert_eq_m128d(r, e);
36729 }
36730
36731 #[simd_test(enable = "avx512f")]
36732 unsafe fn test_mm_mask_cvtsd_ss() {
36733 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36734 let b = _mm_set_pd(6., -7.5);
36735 let r = _mm_mask_cvtsd_ss(a, 0, a, b);
36736 assert_eq_m128(r, a);
36737 let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
36738 let e = _mm_set_ps(0., -0.5, 1., -7.5);
36739 assert_eq_m128(r, e);
36740 }
36741
36742 #[simd_test(enable = "avx512f")]
36743 unsafe fn test_mm_maskz_cvtsd_ss() {
36744 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36745 let b = _mm_set_pd(6., -7.5);
36746 let r = _mm_maskz_cvtsd_ss(0, a, b);
36747 let e = _mm_set_ps(0., -0.5, 1., 0.);
36748 assert_eq_m128(r, e);
36749 let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
36750 let e = _mm_set_ps(0., -0.5, 1., -7.5);
36751 assert_eq_m128(r, e);
36752 }
36753
36754 #[simd_test(enable = "avx512f")]
36755 unsafe fn test_mm_cvt_roundss_sd() {
36756 let a = _mm_set_pd(6., -7.5);
36757 let b = _mm_set_ps(0., -0.5, 1., -1.5);
36758 let r = _mm_cvt_roundss_sd(a, b, _MM_FROUND_CUR_DIRECTION);
36759 let e = _mm_set_pd(6., -1.5);
36760 assert_eq_m128d(r, e);
36761 }
36762
36763 #[simd_test(enable = "avx512f")]
36764 unsafe fn test_mm_mask_cvt_roundss_sd() {
36765 let a = _mm_set_pd(6., -7.5);
36766 let b = _mm_set_ps(0., -0.5, 1., -1.5);
36767 let r = _mm_mask_cvt_roundss_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
36768 assert_eq_m128d(r, a);
36769 let r = _mm_mask_cvt_roundss_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
36770 let e = _mm_set_pd(6., -1.5);
36771 assert_eq_m128d(r, e);
36772 }
36773
36774 #[simd_test(enable = "avx512f")]
36775 unsafe fn test_mm_maskz_cvt_roundss_sd() {
36776 let a = _mm_set_pd(6., -7.5);
36777 let b = _mm_set_ps(0., -0.5, 1., -1.5);
36778 let r = _mm_maskz_cvt_roundss_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
36779 let e = _mm_set_pd(6., 0.);
36780 assert_eq_m128d(r, e);
36781 let r = _mm_maskz_cvt_roundss_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
36782 let e = _mm_set_pd(6., -1.5);
36783 assert_eq_m128d(r, e);
36784 }
36785
36786 #[simd_test(enable = "avx512f")]
36787 unsafe fn test_mm_cvt_roundsd_ss() {
36788 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36789 let b = _mm_set_pd(6., -7.5);
36790 let r = _mm_cvt_roundsd_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36791 let e = _mm_set_ps(0., -0.5, 1., -7.5);
36792 assert_eq_m128(r, e);
36793 }
36794
36795 #[simd_test(enable = "avx512f")]
36796 unsafe fn test_mm_mask_cvt_roundsd_ss() {
36797 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36798 let b = _mm_set_pd(6., -7.5);
36799 let r = _mm_mask_cvt_roundsd_ss(a, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36800 assert_eq_m128(r, a);
36801 let r =
36802 _mm_mask_cvt_roundsd_ss(a, 0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36803 let e = _mm_set_ps(0., -0.5, 1., -7.5);
36804 assert_eq_m128(r, e);
36805 }
36806
36807 #[simd_test(enable = "avx512f")]
36808 unsafe fn test_mm_maskz_cvt_roundsd_ss() {
36809 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36810 let b = _mm_set_pd(6., -7.5);
36811 let r = _mm_maskz_cvt_roundsd_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36812 let e = _mm_set_ps(0., -0.5, 1., 0.);
36813 assert_eq_m128(r, e);
36814 let r = _mm_maskz_cvt_roundsd_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36815 let e = _mm_set_ps(0., -0.5, 1., -7.5);
36816 assert_eq_m128(r, e);
36817 }
36818
36819 #[simd_test(enable = "avx512f")]
36820 unsafe fn test_mm_cvt_roundss_si32() {
36821 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36822 let r = _mm_cvt_roundss_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36823 let e: i32 = -1;
36824 assert_eq!(r, e);
36825 }
36826
36827 #[simd_test(enable = "avx512f")]
36828 unsafe fn test_mm_cvt_roundss_i32() {
36829 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36830 let r = _mm_cvt_roundss_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36831 let e: i32 = -1;
36832 assert_eq!(r, e);
36833 }
36834
36835 #[simd_test(enable = "avx512f")]
36836 unsafe fn test_mm_cvt_roundss_u32() {
36837 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36838 let r = _mm_cvt_roundss_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36839 let e: u32 = u32::MAX;
36840 assert_eq!(r, e);
36841 }
36842
36843 #[simd_test(enable = "avx512f")]
36844 unsafe fn test_mm_cvtss_i32() {
36845 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36846 let r = _mm_cvtss_i32(a);
36847 let e: i32 = -2;
36848 assert_eq!(r, e);
36849 }
36850
36851 #[simd_test(enable = "avx512f")]
36852 unsafe fn test_mm_cvtss_u32() {
36853 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36854 let r = _mm_cvtss_u32(a);
36855 let e: u32 = u32::MAX;
36856 assert_eq!(r, e);
36857 }
36858
36859 #[simd_test(enable = "avx512f")]
36860 unsafe fn test_mm_cvt_roundsd_si32() {
36861 let a = _mm_set_pd(1., -1.5);
36862 let r = _mm_cvt_roundsd_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36863 let e: i32 = -1;
36864 assert_eq!(r, e);
36865 }
36866
36867 #[simd_test(enable = "avx512f")]
36868 unsafe fn test_mm_cvt_roundsd_i32() {
36869 let a = _mm_set_pd(1., -1.5);
36870 let r = _mm_cvt_roundsd_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36871 let e: i32 = -1;
36872 assert_eq!(r, e);
36873 }
36874
36875 #[simd_test(enable = "avx512f")]
36876 unsafe fn test_mm_cvt_roundsd_u32() {
36877 let a = _mm_set_pd(1., -1.5);
36878 let r = _mm_cvt_roundsd_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36879 let e: u32 = u32::MAX;
36880 assert_eq!(r, e);
36881 }
36882
36883 #[simd_test(enable = "avx512f")]
36884 unsafe fn test_mm_cvtsd_i32() {
36885 let a = _mm_set_pd(1., -1.5);
36886 let r = _mm_cvtsd_i32(a);
36887 let e: i32 = -2;
36888 assert_eq!(r, e);
36889 }
36890
36891 #[simd_test(enable = "avx512f")]
36892 unsafe fn test_mm_cvtsd_u32() {
36893 let a = _mm_set_pd(1., -1.5);
36894 let r = _mm_cvtsd_u32(a);
36895 let e: u32 = u32::MAX;
36896 assert_eq!(r, e);
36897 }
36898
36899 #[simd_test(enable = "avx512f")]
36900 unsafe fn test_mm_cvt_roundi32_ss() {
36901 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36902 let b: i32 = 9;
36903 let r = _mm_cvt_roundi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36904 let e = _mm_set_ps(0., -0.5, 1., 9.);
36905 assert_eq_m128(r, e);
36906 }
36907
36908 #[simd_test(enable = "avx512f")]
36909 unsafe fn test_mm_cvt_roundsi32_ss() {
36910 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36911 let b: i32 = 9;
36912 let r = _mm_cvt_roundsi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36913 let e = _mm_set_ps(0., -0.5, 1., 9.);
36914 assert_eq_m128(r, e);
36915 }
36916
36917 #[simd_test(enable = "avx512f")]
36918 unsafe fn test_mm_cvt_roundu32_ss() {
36919 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36920 let b: u32 = 9;
36921 let r = _mm_cvt_roundu32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
36922 let e = _mm_set_ps(0., -0.5, 1., 9.);
36923 assert_eq_m128(r, e);
36924 }
36925
36926 #[simd_test(enable = "avx512f")]
36927 unsafe fn test_mm_cvti32_ss() {
36928 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36929 let b: i32 = 9;
36930 let r = _mm_cvti32_ss(a, b);
36931 let e = _mm_set_ps(0., -0.5, 1., 9.);
36932 assert_eq_m128(r, e);
36933 }
36934
36935 #[simd_test(enable = "avx512f")]
36936 unsafe fn test_mm_cvti32_sd() {
36937 let a = _mm_set_pd(1., -1.5);
36938 let b: i32 = 9;
36939 let r = _mm_cvti32_sd(a, b);
36940 let e = _mm_set_pd(1., 9.);
36941 assert_eq_m128d(r, e);
36942 }
36943
36944 #[simd_test(enable = "avx512f")]
36945 unsafe fn test_mm_cvtt_roundss_si32() {
36946 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36947 let r = _mm_cvtt_roundss_si32(a, _MM_FROUND_CUR_DIRECTION);
36948 let e: i32 = -2;
36949 assert_eq!(r, e);
36950 }
36951
36952 #[simd_test(enable = "avx512f")]
36953 unsafe fn test_mm_cvtt_roundss_i32() {
36954 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36955 let r = _mm_cvtt_roundss_i32(a, _MM_FROUND_CUR_DIRECTION);
36956 let e: i32 = -2;
36957 assert_eq!(r, e);
36958 }
36959
36960 #[simd_test(enable = "avx512f")]
36961 unsafe fn test_mm_cvtt_roundss_u32() {
36962 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36963 let r = _mm_cvtt_roundss_u32(a, _MM_FROUND_CUR_DIRECTION);
36964 let e: u32 = u32::MAX;
36965 assert_eq!(r, e);
36966 }
36967
36968 #[simd_test(enable = "avx512f")]
36969 unsafe fn test_mm_cvttss_i32() {
36970 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36971 let r = _mm_cvttss_i32(a);
36972 let e: i32 = -2;
36973 assert_eq!(r, e);
36974 }
36975
36976 #[simd_test(enable = "avx512f")]
36977 unsafe fn test_mm_cvttss_u32() {
36978 let a = _mm_set_ps(0., -0.5, 1., -1.5);
36979 let r = _mm_cvttss_u32(a);
36980 let e: u32 = u32::MAX;
36981 assert_eq!(r, e);
36982 }
36983
36984 #[simd_test(enable = "avx512f")]
36985 unsafe fn test_mm_cvtt_roundsd_si32() {
36986 let a = _mm_set_pd(1., -1.5);
36987 let r = _mm_cvtt_roundsd_si32(a, _MM_FROUND_CUR_DIRECTION);
36988 let e: i32 = -2;
36989 assert_eq!(r, e);
36990 }
36991
36992 #[simd_test(enable = "avx512f")]
36993 unsafe fn test_mm_cvtt_roundsd_i32() {
36994 let a = _mm_set_pd(1., -1.5);
36995 let r = _mm_cvtt_roundsd_i32(a, _MM_FROUND_CUR_DIRECTION);
36996 let e: i32 = -2;
36997 assert_eq!(r, e);
36998 }
36999
37000 #[simd_test(enable = "avx512f")]
37001 unsafe fn test_mm_cvtt_roundsd_u32() {
37002 let a = _mm_set_pd(1., -1.5);
37003 let r = _mm_cvtt_roundsd_u32(a, _MM_FROUND_CUR_DIRECTION);
37004 let e: u32 = u32::MAX;
37005 assert_eq!(r, e);
37006 }
37007
37008 #[simd_test(enable = "avx512f")]
37009 unsafe fn test_mm_cvttsd_i32() {
37010 let a = _mm_set_pd(1., -1.5);
37011 let r = _mm_cvttsd_i32(a);
37012 let e: i32 = -2;
37013 assert_eq!(r, e);
37014 }
37015
37016 #[simd_test(enable = "avx512f")]
37017 unsafe fn test_mm_cvttsd_u32() {
37018 let a = _mm_set_pd(1., -1.5);
37019 let r = _mm_cvttsd_u32(a);
37020 let e: u32 = u32::MAX;
37021 assert_eq!(r, e);
37022 }
37023
37024 #[simd_test(enable = "avx512f")]
37025 unsafe fn test_mm_cvtu32_ss() {
37026 let a = _mm_set_ps(0., -0.5, 1., -1.5);
37027 let b: u32 = 9;
37028 let r = _mm_cvtu32_ss(a, b);
37029 let e = _mm_set_ps(0., -0.5, 1., 9.);
37030 assert_eq_m128(r, e);
37031 }
37032
37033 #[simd_test(enable = "avx512f")]
37034 unsafe fn test_mm_cvtu32_sd() {
37035 let a = _mm_set_pd(1., -1.5);
37036 let b: u32 = 9;
37037 let r = _mm_cvtu32_sd(a, b);
37038 let e = _mm_set_pd(1., 9.);
37039 assert_eq_m128d(r, e);
37040 }
37041
37042 #[simd_test(enable = "avx512f")]
37043 unsafe fn test_mm_cvtu64_ss() {
37044 let a = _mm_set_ps(0., -0.5, 1., -1.5);
37045 let b: u64 = 9;
37046 let r = _mm_cvtu64_ss(a, b);
37047 let e = _mm_set_ps(0., -0.5, 1., 9.);
37048 assert_eq_m128(r, e);
37049 }
37050
37051 #[simd_test(enable = "avx512f")]
37052 unsafe fn test_mm_cvtu64_sd() {
37053 let a = _mm_set_pd(1., -1.5);
37054 let b: u64 = 9;
37055 let r = _mm_cvtu64_sd(a, b);
37056 let e = _mm_set_pd(1., 9.);
37057 assert_eq_m128d(r, e);
37058 }
37059
37060 #[simd_test(enable = "avx512f")]
37061 unsafe fn test_mm_comi_round_ss() {
37062 let a = _mm_set1_ps(2.2);
37063 let b = _mm_set1_ps(1.1);
37064 let r = _mm_comi_round_ss(a, b, 0, _MM_FROUND_CUR_DIRECTION);
37065 let e: i32 = 0;
37066 assert_eq!(r, e);
37067 }
37068
37069 #[simd_test(enable = "avx512f")]
37070 unsafe fn test_mm_comi_round_sd() {
37071 let a = _mm_set1_pd(2.2);
37072 let b = _mm_set1_pd(1.1);
37073 let r = _mm_comi_round_sd(a, b, 0, _MM_FROUND_CUR_DIRECTION);
37074 let e: i32 = 0;
37075 assert_eq!(r, e);
37076 }
37077 }