]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/core_arch/src/x86/avx512f.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / avx512f.rs
CommitLineData
3dfed10e
XL
1use crate::{
2 core_arch::{simd::*, simd_llvm::*, x86::*},
3 mem::{self, transmute},
4 ptr,
5};
6
7#[cfg(test)]
8use stdarch_test::assert_instr;
9
10/// Computes the absolute values of packed 32-bit integers in `a`.
11///
12/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
13#[inline]
14#[target_feature(enable = "avx512f")]
15#[cfg_attr(test, assert_instr(vpabsd))]
16pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
17 let a = a.as_i32x16();
18 // all-0 is a properly initialized i32x16
19 let zero: i32x16 = mem::zeroed();
20 let sub = simd_sub(zero, a);
21 let cmp: i32x16 = simd_gt(a, zero);
22 transmute(simd_select(cmp, a, sub))
23}
24
25/// Computes the absolute value of packed 32-bit integers in `a`, and store the
26/// unsigned results in `dst` using writemask `k` (elements are copied from
27/// `src` when the corresponding mask bit is not set).
28///
29/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
30#[inline]
31#[target_feature(enable = "avx512f")]
32#[cfg_attr(test, assert_instr(vpabsd))]
33pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
34 let abs = _mm512_abs_epi32(a).as_i32x16();
35 transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
36}
37
38/// Computes the absolute value of packed 32-bit integers in `a`, and store the
39/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
40/// the corresponding mask bit is not set).
41///
42/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
43#[inline]
44#[target_feature(enable = "avx512f")]
45#[cfg_attr(test, assert_instr(vpabsd))]
46pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
47 let abs = _mm512_abs_epi32(a).as_i32x16();
48 let zero = _mm512_setzero_si512().as_i32x16();
49 transmute(simd_select_bitmask(k, abs, zero))
50}
51
1b1a35ee 52/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
3dfed10e 53///
1b1a35ee 54/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48)
3dfed10e
XL
55#[inline]
56#[target_feature(enable = "avx512f")]
1b1a35ee
XL
57#[cfg_attr(test, assert_instr(vpabsq))]
58pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
59 let a = a.as_i64x8();
60 // all-0 is a properly initialized i64x8
61 let zero: i64x8 = mem::zeroed();
62 let sub = simd_sub(zero, a);
63 let cmp: i64x8 = simd_gt(a, zero);
64 transmute(simd_select(cmp, a, sub))
3dfed10e
XL
65}
66
1b1a35ee 67/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 68///
1b1a35ee 69/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49)
3dfed10e
XL
70#[inline]
71#[target_feature(enable = "avx512f")]
1b1a35ee
XL
72#[cfg_attr(test, assert_instr(vpabsq))]
73pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
74 let abs = _mm512_abs_epi64(a).as_i64x8();
75 transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
3dfed10e
XL
76}
77
1b1a35ee 78/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 79///
1b1a35ee 80/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50)
3dfed10e
XL
81#[inline]
82#[target_feature(enable = "avx512f")]
1b1a35ee
XL
83#[cfg_attr(test, assert_instr(vpabsq))]
84pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
85 let abs = _mm512_abs_epi64(a).as_i64x8();
86 let zero = _mm512_setzero_si512().as_i64x8();
87 transmute(simd_select_bitmask(k, abs, zero))
3dfed10e
XL
88}
89
1b1a35ee
XL
90/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
91///
92/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_ps&expand=65)
3dfed10e
XL
93#[inline]
94#[target_feature(enable = "avx512f")]
1b1a35ee
XL
95#[cfg_attr(test, assert_instr(vpandq))]
96pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
97 let a = _mm512_set1_epi32(0x7FFFFFFF); // from LLVM code
98 let b = transmute::<f32x16, __m512i>(v2.as_f32x16());
99 let abs = _mm512_and_epi32(a, b);
100 transmute(abs)
3dfed10e
XL
101}
102
1b1a35ee 103/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 104///
1b1a35ee 105/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_ps&expand=66)
3dfed10e
XL
106#[inline]
107#[target_feature(enable = "avx512f")]
1b1a35ee
XL
108#[cfg_attr(test, assert_instr(vpandd))]
109pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
110 let abs = _mm512_abs_ps(v2).as_f32x16();
111 transmute(simd_select_bitmask(k, abs, src.as_f32x16()))
3dfed10e
XL
112}
113
1b1a35ee 114/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
3dfed10e 115///
1b1a35ee 116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_pd&expand=60)
3dfed10e
XL
117#[inline]
118#[target_feature(enable = "avx512f")]
1b1a35ee
XL
119#[cfg_attr(test, assert_instr(vpandq))]
120pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
121 let a = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF); // from LLVM code
122 let b = transmute::<f64x8, __m512i>(v2.as_f64x8());
123 let abs = _mm512_and_epi64(a, b);
124 transmute(abs)
3dfed10e
XL
125}
126
1b1a35ee 127/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 128///
1b1a35ee 129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_pd&expand=61)
3dfed10e
XL
130#[inline]
131#[target_feature(enable = "avx512f")]
1b1a35ee
XL
132#[cfg_attr(test, assert_instr(vpandq))]
133pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
134 let abs = _mm512_abs_pd(v2).as_f64x8();
135 transmute(simd_select_bitmask(k, abs, src.as_f64x8()))
3dfed10e
XL
136}
137
1b1a35ee 138/// Add packed 32-bit integers in a and b, and store the results in dst.
3dfed10e 139///
1b1a35ee 140/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi32&expand=100)
3dfed10e
XL
141#[inline]
142#[target_feature(enable = "avx512f")]
1b1a35ee
XL
143#[cfg_attr(test, assert_instr(vpaddd))]
144pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
145 transmute(simd_add(a.as_i32x16(), b.as_i32x16()))
3dfed10e
XL
146}
147
1b1a35ee 148/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 149///
1b1a35ee 150/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi32&expand=101)
3dfed10e
XL
151#[inline]
152#[target_feature(enable = "avx512f")]
1b1a35ee
XL
153#[cfg_attr(test, assert_instr(vpaddd))]
154pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
155 let add = _mm512_add_epi32(a, b).as_i32x16();
156 transmute(simd_select_bitmask(k, add, src.as_i32x16()))
3dfed10e
XL
157}
158
1b1a35ee 159/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 160///
1b1a35ee 161/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi32&expand=102)
3dfed10e
XL
162#[inline]
163#[target_feature(enable = "avx512f")]
1b1a35ee
XL
164#[cfg_attr(test, assert_instr(vpaddd))]
165pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
166 let add = _mm512_add_epi32(a, b).as_i32x16();
167 let zero = _mm512_setzero_si512().as_i32x16();
168 transmute(simd_select_bitmask(k, add, zero))
3dfed10e
XL
169}
170
1b1a35ee 171/// Add packed 64-bit integers in a and b, and store the results in dst.
3dfed10e 172///
1b1a35ee 173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi64&expand=109)
3dfed10e
XL
174#[inline]
175#[target_feature(enable = "avx512f")]
1b1a35ee
XL
176#[cfg_attr(test, assert_instr(vpaddq))]
177pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
178 transmute(simd_add(a.as_i64x8(), b.as_i64x8()))
3dfed10e
XL
179}
180
1b1a35ee 181/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 182///
1b1a35ee 183/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi64&expand=110)
3dfed10e
XL
184#[inline]
185#[target_feature(enable = "avx512f")]
1b1a35ee
XL
186#[cfg_attr(test, assert_instr(vpaddq))]
187pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
188 let add = _mm512_add_epi64(a, b).as_i64x8();
189 transmute(simd_select_bitmask(k, add, src.as_i64x8()))
3dfed10e
XL
190}
191
1b1a35ee 192/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 193///
1b1a35ee 194/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi64&expand=111)
3dfed10e
XL
195#[inline]
196#[target_feature(enable = "avx512f")]
1b1a35ee
XL
197#[cfg_attr(test, assert_instr(vpaddq))]
198pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
199 let add = _mm512_add_epi64(a, b).as_i64x8();
200 let zero = _mm512_setzero_si512().as_i64x8();
201 transmute(simd_select_bitmask(k, add, zero))
3dfed10e
XL
202}
203
1b1a35ee 204/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
3dfed10e 205///
1b1a35ee 206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_ps&expand=139)
3dfed10e
XL
207#[inline]
208#[target_feature(enable = "avx512f")]
1b1a35ee
XL
209#[cfg_attr(test, assert_instr(vaddps))]
210pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
211 transmute(simd_add(a.as_f32x16(), b.as_f32x16()))
3dfed10e
XL
212}
213
1b1a35ee 214/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 215///
1b1a35ee 216/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_ps&expand=140)
3dfed10e
XL
217#[inline]
218#[target_feature(enable = "avx512f")]
1b1a35ee
XL
219#[cfg_attr(test, assert_instr(vaddps))]
220pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
221 let add = _mm512_add_ps(a, b).as_f32x16();
222 transmute(simd_select_bitmask(k, add, src.as_f32x16()))
3dfed10e
XL
223}
224
1b1a35ee 225/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 226///
1b1a35ee 227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_ps&expand=141)
3dfed10e
XL
228#[inline]
229#[target_feature(enable = "avx512f")]
1b1a35ee
XL
230#[cfg_attr(test, assert_instr(vaddps))]
231pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
232 let add = _mm512_add_ps(a, b).as_f32x16();
233 let zero = _mm512_setzero_ps().as_f32x16();
234 transmute(simd_select_bitmask(k, add, zero))
3dfed10e
XL
235}
236
1b1a35ee 237/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
3dfed10e 238///
1b1a35ee 239/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_pd&expand=127)
3dfed10e
XL
240#[inline]
241#[target_feature(enable = "avx512f")]
1b1a35ee
XL
242#[cfg_attr(test, assert_instr(vaddpd))]
243pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
244 transmute(simd_add(a.as_f64x8(), b.as_f64x8()))
3dfed10e
XL
245}
246
1b1a35ee 247/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 248///
1b1a35ee 249/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_pd&expand=128)
3dfed10e
XL
250#[inline]
251#[target_feature(enable = "avx512f")]
1b1a35ee
XL
252#[cfg_attr(test, assert_instr(vaddpd))]
253pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
254 let add = _mm512_add_pd(a, b).as_f64x8();
255 transmute(simd_select_bitmask(k, add, src.as_f64x8()))
3dfed10e
XL
256}
257
1b1a35ee 258/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 259///
1b1a35ee 260/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_pd&expand=129)
3dfed10e
XL
261#[inline]
262#[target_feature(enable = "avx512f")]
1b1a35ee
XL
263#[cfg_attr(test, assert_instr(vaddpd))]
264pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
265 let add = _mm512_add_pd(a, b).as_f64x8();
266 let zero = _mm512_setzero_pd().as_f64x8();
267 transmute(simd_select_bitmask(k, add, zero))
3dfed10e
XL
268}
269
1b1a35ee 270/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
3dfed10e 271///
1b1a35ee 272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi32&expand=5694)
3dfed10e
XL
273#[inline]
274#[target_feature(enable = "avx512f")]
1b1a35ee
XL
275#[cfg_attr(test, assert_instr(vpsubd))]
276pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
277 transmute(simd_sub(a.as_i32x16(), b.as_i32x16()))
3dfed10e
XL
278}
279
1b1a35ee 280/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 281///
1b1a35ee 282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi32&expand=5692)
3dfed10e
XL
283#[inline]
284#[target_feature(enable = "avx512f")]
1b1a35ee
XL
285#[cfg_attr(test, assert_instr(vpsubd))]
286pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
287 let sub = _mm512_sub_epi32(a, b).as_i32x16();
288 transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
3dfed10e
XL
289}
290
1b1a35ee 291/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 292///
1b1a35ee 293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi32&expand=5693)
3dfed10e
XL
294#[inline]
295#[target_feature(enable = "avx512f")]
1b1a35ee
XL
296#[cfg_attr(test, assert_instr(vpsubd))]
297pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
298 let sub = _mm512_sub_epi32(a, b).as_i32x16();
299 let zero = _mm512_setzero_si512().as_i32x16();
300 transmute(simd_select_bitmask(k, sub, zero))
3dfed10e
XL
301}
302
1b1a35ee 303/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
3dfed10e 304///
1b1a35ee 305/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi64&expand=5703)
3dfed10e
XL
306#[inline]
307#[target_feature(enable = "avx512f")]
1b1a35ee
XL
308#[cfg_attr(test, assert_instr(vpsubq))]
309pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
310 transmute(simd_sub(a.as_i64x8(), b.as_i64x8()))
3dfed10e
XL
311}
312
1b1a35ee 313/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 314///
1b1a35ee 315/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi64&expand=5701)
3dfed10e
XL
316#[inline]
317#[target_feature(enable = "avx512f")]
1b1a35ee
XL
318#[cfg_attr(test, assert_instr(vpsubq))]
319pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
320 let sub = _mm512_sub_epi64(a, b).as_i64x8();
321 transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
3dfed10e
XL
322}
323
1b1a35ee 324/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 325///
1b1a35ee 326/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi64&expand=5702)
3dfed10e
XL
327#[inline]
328#[target_feature(enable = "avx512f")]
1b1a35ee
XL
329#[cfg_attr(test, assert_instr(vpsubq))]
330pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
331 let add = _mm512_sub_epi64(a, b).as_i64x8();
332 let zero = _mm512_setzero_si512().as_i64x8();
333 transmute(simd_select_bitmask(k, add, zero))
3dfed10e
XL
334}
335
1b1a35ee 336/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3dfed10e 337///
1b1a35ee 338/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_ps&expand=5733)
3dfed10e
XL
339#[inline]
340#[target_feature(enable = "avx512f")]
1b1a35ee
XL
341#[cfg_attr(test, assert_instr(vsubps))]
342pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
343 transmute(simd_sub(a.as_f32x16(), b.as_f32x16()))
3dfed10e
XL
344}
345
1b1a35ee 346/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 347///
1b1a35ee 348/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_ps&expand=5731)
3dfed10e
XL
349#[inline]
350#[target_feature(enable = "avx512f")]
1b1a35ee
XL
351#[cfg_attr(test, assert_instr(vsubps))]
352pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
353 let sub = _mm512_sub_ps(a, b).as_f32x16();
354 transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
3dfed10e
XL
355}
356
1b1a35ee 357/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 358///
1b1a35ee 359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_ps&expand=5732)
3dfed10e
XL
360#[inline]
361#[target_feature(enable = "avx512f")]
1b1a35ee
XL
362#[cfg_attr(test, assert_instr(vsubps))]
363pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
364 let sub = _mm512_sub_ps(a, b).as_f32x16();
365 let zero = _mm512_setzero_ps().as_f32x16();
366 transmute(simd_select_bitmask(k, sub, zero))
3dfed10e
XL
367}
368
1b1a35ee 369/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3dfed10e 370///
1b1a35ee 371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_pd&expand=5721)
3dfed10e
XL
372#[inline]
373#[target_feature(enable = "avx512f")]
1b1a35ee
XL
374#[cfg_attr(test, assert_instr(vsubpd))]
375pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
376 transmute(simd_sub(a.as_f64x8(), b.as_f64x8()))
3dfed10e
XL
377}
378
1b1a35ee 379/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 380///
1b1a35ee 381/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_pd&expand=5719)
3dfed10e
XL
382#[inline]
383#[target_feature(enable = "avx512f")]
1b1a35ee
XL
384#[cfg_attr(test, assert_instr(vsubpd))]
385pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
386 let sub = _mm512_sub_pd(a, b).as_f64x8();
387 transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
3dfed10e
XL
388}
389
1b1a35ee 390/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 391///
1b1a35ee 392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_pd&expand=5720)
3dfed10e
XL
393#[inline]
394#[target_feature(enable = "avx512f")]
1b1a35ee
XL
395#[cfg_attr(test, assert_instr(vsubpd))]
396pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
397 let sub = _mm512_sub_pd(a, b).as_f64x8();
398 let zero = _mm512_setzero_pd().as_f64x8();
399 transmute(simd_select_bitmask(k, sub, zero))
3dfed10e
XL
400}
401
1b1a35ee 402/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
3dfed10e 403///
1b1a35ee 404/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epi32&expand=3907)
3dfed10e
XL
405#[inline]
406#[target_feature(enable = "avx512f")]
1b1a35ee
XL
407#[cfg_attr(test, assert_instr(vpmuldq))]
408pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
409 transmute(vpmuldq(a.as_i32x16(), b.as_i32x16()))
3dfed10e
XL
410}
411
1b1a35ee 412/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 413///
1b1a35ee 414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epi32&expand=3905)
3dfed10e
XL
415#[inline]
416#[target_feature(enable = "avx512f")]
1b1a35ee
XL
417#[cfg_attr(test, assert_instr(vpmuldq))]
418pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
419 let mul = _mm512_mul_epi32(a, b).as_i64x8();
420 transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
3dfed10e
XL
421}
422
1b1a35ee 423/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 424///
1b1a35ee 425/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epi32&expand=3906)
3dfed10e
XL
426#[inline]
427#[target_feature(enable = "avx512f")]
1b1a35ee
XL
428#[cfg_attr(test, assert_instr(vpmuldq))]
429pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
430 let mul = _mm512_mul_epi32(a, b).as_i64x8();
431 let zero = _mm512_setzero_si512().as_i64x8();
432 transmute(simd_select_bitmask(k, mul, zero))
3dfed10e
XL
433}
434
1b1a35ee 435/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
3dfed10e 436///
1b1a35ee 437/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullo_epi&expand=4005)
3dfed10e
XL
438#[inline]
439#[target_feature(enable = "avx512f")]
1b1a35ee
XL
440#[cfg_attr(test, assert_instr(vpmulld))]
441pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
442 transmute(simd_mul(a.as_i32x16(), b.as_i32x16()))
3dfed10e
XL
443}
444
1b1a35ee 445/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 446///
1b1a35ee 447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullo_epi32&expand=4003)
3dfed10e
XL
448#[inline]
449#[target_feature(enable = "avx512f")]
1b1a35ee
XL
450#[cfg_attr(test, assert_instr(vpmulld))]
451pub unsafe fn _mm512_mask_mullo_epi32(
452 src: __m512i,
453 k: __mmask16,
454 a: __m512i,
455 b: __m512i,
456) -> __m512i {
457 let mul = _mm512_mullo_epi32(a, b).as_i32x16();
458 transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
3dfed10e
XL
459}
460
1b1a35ee 461/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 462///
1b1a35ee 463/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mullo_epi32&expand=4004)
3dfed10e
XL
464#[inline]
465#[target_feature(enable = "avx512f")]
1b1a35ee
XL
466#[cfg_attr(test, assert_instr(vpmulld))]
467pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
468 let mul = _mm512_mullo_epi32(a, b).as_i32x16();
469 let zero = _mm512_setzero_si512().as_i32x16();
470 transmute(simd_select_bitmask(k, mul, zero))
3dfed10e
XL
471}
472
1b1a35ee 473/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
3dfed10e 474///
1b1a35ee
XL
475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullox_epi64&expand=4017)
476///
477/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
3dfed10e
XL
478#[inline]
479#[target_feature(enable = "avx512f")]
1b1a35ee
XL
480pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
481 transmute(simd_mul(a.as_i64x8(), b.as_i64x8()))
3dfed10e
XL
482}
483
1b1a35ee
XL
484/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
485///
486/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullox&expand=4016)
487///
488/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
3dfed10e
XL
489#[inline]
490#[target_feature(enable = "avx512f")]
1b1a35ee
XL
491pub unsafe fn _mm512_mask_mullox_epi64(
492 src: __m512i,
493 k: __mmask8,
494 a: __m512i,
495 b: __m512i,
496) -> __m512i {
497 let mul = _mm512_mullox_epi64(a, b).as_i64x8();
498 transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
3dfed10e
XL
499}
500
1b1a35ee
XL
501/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
502///
503/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epu32&expand=3916)
3dfed10e
XL
504#[inline]
505#[target_feature(enable = "avx512f")]
1b1a35ee
XL
506#[cfg_attr(test, assert_instr(vpmuludq))]
507pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
508 transmute(vpmuludq(a.as_u32x16(), b.as_u32x16()))
3dfed10e
XL
509}
510
1b1a35ee
XL
511/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
512///
513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epu32&expand=3914)
3dfed10e
XL
514#[inline]
515#[target_feature(enable = "avx512f")]
1b1a35ee
XL
516#[cfg_attr(test, assert_instr(vpmuludq))]
517pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
518 let mul = _mm512_mul_epu32(a, b).as_u64x8();
519 transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
3dfed10e
XL
520}
521
1b1a35ee
XL
522/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
523///
524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epu32&expand=3915)
3dfed10e
XL
525#[inline]
526#[target_feature(enable = "avx512f")]
1b1a35ee
XL
527#[cfg_attr(test, assert_instr(vpmuludq))]
528pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
529 let mul = _mm512_mul_epu32(a, b).as_u64x8();
530 let zero = _mm512_setzero_si512().as_u64x8();
531 transmute(simd_select_bitmask(k, mul, zero))
3dfed10e
XL
532}
533
1b1a35ee
XL
534/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
535///
536/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_mul_ps&expand=3934)
3dfed10e
XL
537#[inline]
538#[target_feature(enable = "avx512f")]
1b1a35ee
XL
539#[cfg_attr(test, assert_instr(vmulps))]
540pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
541 transmute(simd_mul(a.as_f32x16(), b.as_f32x16()))
3dfed10e
XL
542}
543
1b1a35ee 544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
3dfed10e 545///
1b1a35ee 546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_ps&expand=3932)
3dfed10e
XL
547#[inline]
548#[target_feature(enable = "avx512f")]
1b1a35ee
XL
549#[cfg_attr(test, assert_instr(vmulps))]
550pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
551 let mul = _mm512_mul_ps(a, b).as_f32x16();
552 transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
3dfed10e
XL
553}
554
1b1a35ee 555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 556///
1b1a35ee 557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_ps&expand=3933)
3dfed10e
XL
558#[inline]
559#[target_feature(enable = "avx512f")]
1b1a35ee
XL
560#[cfg_attr(test, assert_instr(vmulps))]
561pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
562 let mul = _mm512_mul_ps(a, b).as_f32x16();
563 let zero = _mm512_setzero_ps().as_f32x16();
564 transmute(simd_select_bitmask(k, mul, zero))
3dfed10e
XL
565}
566
1b1a35ee 567/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
3dfed10e 568///
1b1a35ee 569/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_pd&expand=3925)
3dfed10e
XL
570#[inline]
571#[target_feature(enable = "avx512f")]
1b1a35ee
XL
572#[cfg_attr(test, assert_instr(vmulpd))]
573pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
574 transmute(simd_mul(a.as_f64x8(), b.as_f64x8()))
3dfed10e
XL
575}
576
1b1a35ee 577/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
3dfed10e 578///
1b1a35ee 579/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_pd&expand=3923)
3dfed10e
XL
580#[inline]
581#[target_feature(enable = "avx512f")]
1b1a35ee
XL
582#[cfg_attr(test, assert_instr(vmulpd))]
583pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
584 let mul = _mm512_mul_pd(a, b).as_f64x8();
585 transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
3dfed10e
XL
586}
587
1b1a35ee 588/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 589///
1b1a35ee 590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_pd&expand=3924)
3dfed10e
XL
591#[inline]
592#[target_feature(enable = "avx512f")]
1b1a35ee
XL
593#[cfg_attr(test, assert_instr(vmulpd))]
594pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
595 let mul = _mm512_mul_pd(a, b).as_f64x8();
596 let zero = _mm512_setzero_pd().as_f64x8();
597 transmute(simd_select_bitmask(k, mul, zero))
3dfed10e
XL
598}
599
1b1a35ee 600/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
3dfed10e 601///
1b1a35ee 602/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_ps&expand=2162)
3dfed10e
XL
603#[inline]
604#[target_feature(enable = "avx512f")]
1b1a35ee
XL
605#[cfg_attr(test, assert_instr(vdivps))]
606pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
607 transmute(simd_div(a.as_f32x16(), b.as_f32x16()))
3dfed10e
XL
608}
609
1b1a35ee 610/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 611///
1b1a35ee 612/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_ps&expand=2163)
3dfed10e
XL
613#[inline]
614#[target_feature(enable = "avx512f")]
1b1a35ee
XL
615#[cfg_attr(test, assert_instr(vdivps))]
616pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
617 let div = _mm512_div_ps(a, b).as_f32x16();
618 transmute(simd_select_bitmask(k, div, src.as_f32x16()))
3dfed10e
XL
619}
620
1b1a35ee 621/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 622///
1b1a35ee 623/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_ps&expand=2164)
3dfed10e
XL
624#[inline]
625#[target_feature(enable = "avx512f")]
1b1a35ee
XL
626#[cfg_attr(test, assert_instr(vdivps))]
627pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
628 let div = _mm512_div_ps(a, b).as_f32x16();
629 let zero = _mm512_setzero_ps().as_f32x16();
630 transmute(simd_select_bitmask(k, div, zero))
3dfed10e
XL
631}
632
1b1a35ee 633/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
3dfed10e 634///
1b1a35ee 635/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_pd&expand=2153)
3dfed10e
XL
636#[inline]
637#[target_feature(enable = "avx512f")]
1b1a35ee
XL
638#[cfg_attr(test, assert_instr(vdivpd))]
639pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
640 transmute(simd_div(a.as_f64x8(), b.as_f64x8()))
3dfed10e
XL
641}
642
1b1a35ee 643/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 644///
1b1a35ee 645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_pd&expand=2154)
3dfed10e
XL
646#[inline]
647#[target_feature(enable = "avx512f")]
1b1a35ee
XL
648#[cfg_attr(test, assert_instr(vdivpd))]
649pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
650 let div = _mm512_div_pd(a, b).as_f64x8();
651 transmute(simd_select_bitmask(k, div, src.as_f64x8()))
3dfed10e
XL
652}
653
1b1a35ee 654/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 655///
1b1a35ee 656/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_pd&expand=2155)
3dfed10e
XL
657#[inline]
658#[target_feature(enable = "avx512f")]
1b1a35ee
XL
659#[cfg_attr(test, assert_instr(vdivpd))]
660pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
661 let div = _mm512_div_pd(a, b).as_f64x8();
662 let zero = _mm512_setzero_pd().as_f64x8();
663 transmute(simd_select_bitmask(k, div, zero))
3dfed10e
XL
664}
665
1b1a35ee 666/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
3dfed10e 667///
1b1a35ee 668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi32&expand=3582)
3dfed10e
XL
669#[inline]
670#[target_feature(enable = "avx512f")]
1b1a35ee
XL
671#[cfg_attr(test, assert_instr(vpmaxsd))]
672pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
673 transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16()))
3dfed10e
XL
674}
675
1b1a35ee 676/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 677///
1b1a35ee 678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi32&expand=3580)
3dfed10e
XL
679#[inline]
680#[target_feature(enable = "avx512f")]
1b1a35ee
XL
681#[cfg_attr(test, assert_instr(vpmaxsd))]
682pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
683 let max = _mm512_max_epi32(a, b).as_i32x16();
684 transmute(simd_select_bitmask(k, max, src.as_i32x16()))
3dfed10e
XL
685}
686
1b1a35ee 687/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 688///
1b1a35ee 689/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi32&expand=3581)
3dfed10e
XL
690#[inline]
691#[target_feature(enable = "avx512f")]
1b1a35ee
XL
692#[cfg_attr(test, assert_instr(vpmaxsd))]
693pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
694 let max = _mm512_max_epi32(a, b).as_i32x16();
695 let zero = _mm512_setzero_si512().as_i32x16();
696 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
697}
698
1b1a35ee 699/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
3dfed10e 700///
1b1a35ee 701/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi64&expand=3591)
3dfed10e
XL
702#[inline]
703#[target_feature(enable = "avx512f")]
1b1a35ee
XL
704#[cfg_attr(test, assert_instr(vpmaxsq))]
705pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
706 transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8()))
3dfed10e
XL
707}
708
1b1a35ee 709/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 710///
1b1a35ee 711/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi64&expand=3589)
3dfed10e
XL
712#[inline]
713#[target_feature(enable = "avx512f")]
1b1a35ee
XL
714#[cfg_attr(test, assert_instr(vpmaxsq))]
715pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
716 let max = _mm512_max_epi64(a, b).as_i64x8();
717 transmute(simd_select_bitmask(k, max, src.as_i64x8()))
3dfed10e
XL
718}
719
1b1a35ee 720/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 721///
1b1a35ee 722/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi64&expand=3590)
3dfed10e
XL
723#[inline]
724#[target_feature(enable = "avx512f")]
1b1a35ee
XL
725#[cfg_attr(test, assert_instr(vpmaxsq))]
726pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
727 let max = _mm512_max_epi64(a, b).as_i64x8();
728 let zero = _mm512_setzero_si512().as_i64x8();
729 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
730}
731
1b1a35ee 732/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
3dfed10e 733///
1b1a35ee 734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_ps&expand=3655)
3dfed10e
XL
735#[inline]
736#[target_feature(enable = "avx512f")]
1b1a35ee
XL
737#[cfg_attr(test, assert_instr(vmaxps))]
738pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
739 transmute(vmaxps(
740 a.as_f32x16(),
741 b.as_f32x16(),
742 _MM_FROUND_CUR_DIRECTION,
743 ))
3dfed10e
XL
744}
745
1b1a35ee 746/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 747///
1b1a35ee 748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_ps&expand=3653)
3dfed10e
XL
749#[inline]
750#[target_feature(enable = "avx512f")]
1b1a35ee
XL
751#[cfg_attr(test, assert_instr(vmaxps))]
752pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
753 let max = _mm512_max_ps(a, b).as_f32x16();
754 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
3dfed10e
XL
755}
756
1b1a35ee 757/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 758///
1b1a35ee 759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_ps&expand=3654)
3dfed10e
XL
760#[inline]
761#[target_feature(enable = "avx512f")]
1b1a35ee
XL
762#[cfg_attr(test, assert_instr(vmaxps))]
763pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
764 let max = _mm512_max_ps(a, b).as_f32x16();
765 let zero = _mm512_setzero_ps().as_f32x16();
766 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
767}
768
1b1a35ee 769/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
3dfed10e 770///
1b1a35ee 771/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_pd&expand=3645)
3dfed10e
XL
772#[inline]
773#[target_feature(enable = "avx512f")]
1b1a35ee
XL
774#[cfg_attr(test, assert_instr(vmaxpd))]
775pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
776 transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
3dfed10e
XL
777}
778
1b1a35ee 779/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 780///
1b1a35ee 781/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_pd&expand=3643)
3dfed10e
XL
782#[inline]
783#[target_feature(enable = "avx512f")]
1b1a35ee
XL
784#[cfg_attr(test, assert_instr(vmaxpd))]
785pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
786 let max = _mm512_max_pd(a, b).as_f64x8();
787 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
3dfed10e
XL
788}
789
1b1a35ee 790/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 791///
1b1a35ee 792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_pd&expand=3644)
3dfed10e
XL
793#[inline]
794#[target_feature(enable = "avx512f")]
1b1a35ee
XL
795#[cfg_attr(test, assert_instr(vmaxpd))]
796pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
797 let max = _mm512_max_pd(a, b).as_f64x8();
798 let zero = _mm512_setzero_pd().as_f64x8();
799 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
800}
801
1b1a35ee 802/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
3dfed10e 803///
1b1a35ee 804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epu32&expand=3618)
3dfed10e
XL
805#[inline]
806#[target_feature(enable = "avx512f")]
1b1a35ee
XL
807#[cfg_attr(test, assert_instr(vpmaxud))]
808pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
809 transmute(vpmaxud(a.as_u32x16(), b.as_u32x16()))
3dfed10e
XL
810}
811
1b1a35ee 812/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 813///
1b1a35ee 814/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu32&expand=3616)
3dfed10e
XL
815#[inline]
816#[target_feature(enable = "avx512f")]
1b1a35ee
XL
817#[cfg_attr(test, assert_instr(vpmaxud))]
818pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
819 let max = _mm512_max_epu32(a, b).as_u32x16();
820 transmute(simd_select_bitmask(k, max, src.as_u32x16()))
3dfed10e
XL
821}
822
1b1a35ee 823/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 824///
1b1a35ee 825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu32&expand=3617)
3dfed10e
XL
826#[inline]
827#[target_feature(enable = "avx512f")]
1b1a35ee
XL
828#[cfg_attr(test, assert_instr(vpmaxud))]
829pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
830 let max = _mm512_max_epu32(a, b).as_u32x16();
831 let zero = _mm512_setzero_si512().as_u32x16();
832 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
833}
834
1b1a35ee 835/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
3dfed10e 836///
1b1a35ee 837/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_epu64&expand=3627)
3dfed10e
XL
838#[inline]
839#[target_feature(enable = "avx512f")]
1b1a35ee
XL
840#[cfg_attr(test, assert_instr(vpmaxuq))]
841pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
842 transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8()))
3dfed10e
XL
843}
844
1b1a35ee 845/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 846///
1b1a35ee 847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu64&expand=3625)
3dfed10e
XL
848#[inline]
849#[target_feature(enable = "avx512f")]
1b1a35ee
XL
850#[cfg_attr(test, assert_instr(vpmaxuq))]
851pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
852 let max = _mm512_max_epu64(a, b).as_u64x8();
853 transmute(simd_select_bitmask(k, max, src.as_u64x8()))
3dfed10e
XL
854}
855
1b1a35ee 856/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 857///
1b1a35ee 858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu&expand=3626)
3dfed10e
XL
859#[inline]
860#[target_feature(enable = "avx512f")]
1b1a35ee
XL
861#[cfg_attr(test, assert_instr(vpmaxuq))]
862pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
863 let max = _mm512_max_epu64(a, b).as_u64x8();
864 let zero = _mm512_setzero_si512().as_u64x8();
865 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
866}
867
1b1a35ee 868/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
3dfed10e 869///
1b1a35ee 870/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi32&expand=3696)
3dfed10e
XL
871#[inline]
872#[target_feature(enable = "avx512f")]
1b1a35ee
XL
873#[cfg_attr(test, assert_instr(vpminsd))]
874pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
875 transmute(vpminsd(a.as_i32x16(), b.as_i32x16()))
3dfed10e
XL
876}
877
1b1a35ee 878/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 879///
1b1a35ee 880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi32&expand=3694)
3dfed10e
XL
881#[inline]
882#[target_feature(enable = "avx512f")]
1b1a35ee
XL
883#[cfg_attr(test, assert_instr(vpminsd))]
884pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
885 let max = _mm512_min_epi32(a, b).as_i32x16();
886 transmute(simd_select_bitmask(k, max, src.as_i32x16()))
3dfed10e
XL
887}
888
1b1a35ee 889/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 890///
1b1a35ee 891/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi32&expand=3695)
3dfed10e
XL
892#[inline]
893#[target_feature(enable = "avx512f")]
1b1a35ee
XL
894#[cfg_attr(test, assert_instr(vpminsd))]
895pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
896 let max = _mm512_min_epi32(a, b).as_i32x16();
897 let zero = _mm512_setzero_si512().as_i32x16();
898 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
899}
900
1b1a35ee 901/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3dfed10e 902///
1b1a35ee 903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi64&expand=3705)
3dfed10e
XL
904#[inline]
905#[target_feature(enable = "avx512f")]
1b1a35ee
XL
906#[cfg_attr(test, assert_instr(vpminsq))]
907pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
908 transmute(vpminsq(a.as_i64x8(), b.as_i64x8()))
3dfed10e
XL
909}
910
1b1a35ee 911/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 912///
1b1a35ee 913/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi64&expand=3703)
3dfed10e
XL
914#[inline]
915#[target_feature(enable = "avx512f")]
1b1a35ee
XL
916#[cfg_attr(test, assert_instr(vpminsq))]
917pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
918 let max = _mm512_min_epi64(a, b).as_i64x8();
919 transmute(simd_select_bitmask(k, max, src.as_i64x8()))
3dfed10e
XL
920}
921
1b1a35ee 922/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 923///
1b1a35ee 924/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi64&expand=3704)
3dfed10e
XL
925#[inline]
926#[target_feature(enable = "avx512f")]
1b1a35ee
XL
927#[cfg_attr(test, assert_instr(vpminsq))]
928pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
929 let max = _mm512_min_epi64(a, b).as_i64x8();
930 let zero = _mm512_setzero_si512().as_i64x8();
931 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
932}
933
1b1a35ee 934/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
3dfed10e 935///
1b1a35ee 936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_ps&expand=3769)
3dfed10e
XL
937#[inline]
938#[target_feature(enable = "avx512f")]
1b1a35ee
XL
939#[cfg_attr(test, assert_instr(vminps))]
940pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
941 transmute(vminps(
942 a.as_f32x16(),
943 b.as_f32x16(),
944 _MM_FROUND_CUR_DIRECTION,
945 ))
3dfed10e
XL
946}
947
1b1a35ee 948/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 949///
1b1a35ee 950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_ps&expand=3767)
3dfed10e
XL
951#[inline]
952#[target_feature(enable = "avx512f")]
1b1a35ee
XL
953#[cfg_attr(test, assert_instr(vminps))]
954pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
955 let max = _mm512_min_ps(a, b).as_f32x16();
956 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
3dfed10e
XL
957}
958
1b1a35ee 959/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 960///
1b1a35ee 961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_ps&expand=3768)
3dfed10e
XL
962#[inline]
963#[target_feature(enable = "avx512f")]
1b1a35ee
XL
964#[cfg_attr(test, assert_instr(vminps))]
965pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
966 let max = _mm512_min_ps(a, b).as_f32x16();
967 let zero = _mm512_setzero_ps().as_f32x16();
968 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
969}
970
1b1a35ee 971/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3dfed10e 972///
1b1a35ee 973/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_pd&expand=3759)
3dfed10e
XL
974#[inline]
975#[target_feature(enable = "avx512f")]
1b1a35ee
XL
976#[cfg_attr(test, assert_instr(vminpd))]
977pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
978 transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
3dfed10e
XL
979}
980
1b1a35ee 981/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 982///
1b1a35ee 983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_pd&expand=3757)
3dfed10e
XL
984#[inline]
985#[target_feature(enable = "avx512f")]
1b1a35ee
XL
986#[cfg_attr(test, assert_instr(vminpd))]
987pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
988 let max = _mm512_min_pd(a, b).as_f64x8();
989 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
3dfed10e
XL
990}
991
1b1a35ee 992/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 993///
1b1a35ee 994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_pd&expand=3758)
3dfed10e
XL
995#[inline]
996#[target_feature(enable = "avx512f")]
1b1a35ee
XL
997#[cfg_attr(test, assert_instr(vminpd))]
998pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
999 let max = _mm512_min_pd(a, b).as_f64x8();
1000 let zero = _mm512_setzero_pd().as_f64x8();
1001 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
1002}
1003
1b1a35ee 1004/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3dfed10e 1005///
1b1a35ee 1006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu32&expand=3732)
3dfed10e
XL
1007#[inline]
1008#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1009#[cfg_attr(test, assert_instr(vpminud))]
1010pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
1011 transmute(vpminud(a.as_u32x16(), b.as_u32x16()))
3dfed10e
XL
1012}
1013
1b1a35ee 1014/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 1015///
1b1a35ee 1016/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu32&expand=3730)
3dfed10e
XL
1017#[inline]
1018#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1019#[cfg_attr(test, assert_instr(vpminud))]
1020pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1021 let max = _mm512_min_epu32(a, b).as_u32x16();
1022 transmute(simd_select_bitmask(k, max, src.as_u32x16()))
3dfed10e
XL
1023}
1024
1b1a35ee 1025/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1026///
1b1a35ee 1027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu32&expand=3731)
3dfed10e
XL
1028#[inline]
1029#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1030#[cfg_attr(test, assert_instr(vpminud))]
1031pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1032 let max = _mm512_min_epu32(a, b).as_u32x16();
1033 let zero = _mm512_setzero_si512().as_u32x16();
1034 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
1035}
1036
1b1a35ee 1037/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3dfed10e 1038///
1b1a35ee 1039/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu64&expand=3741)
3dfed10e
XL
1040#[inline]
1041#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1042#[cfg_attr(test, assert_instr(vpminuq))]
1043pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
1044 transmute(vpminuq(a.as_u64x8(), b.as_u64x8()))
3dfed10e
XL
1045}
1046
1b1a35ee 1047/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 1048///
1b1a35ee 1049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu64&expand=3739)
3dfed10e
XL
1050#[inline]
1051#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1052#[cfg_attr(test, assert_instr(vpminuq))]
1053pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1054 let max = _mm512_min_epu64(a, b).as_u64x8();
1055 transmute(simd_select_bitmask(k, max, src.as_u64x8()))
3dfed10e
XL
1056}
1057
1b1a35ee 1058/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1059///
1b1a35ee 1060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu64&expand=3740)
3dfed10e
XL
1061#[inline]
1062#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1063#[cfg_attr(test, assert_instr(vpminuq))]
1064pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1065 let max = _mm512_min_epu64(a, b).as_u64x8();
1066 let zero = _mm512_setzero_si512().as_u64x8();
1067 transmute(simd_select_bitmask(k, max, zero))
3dfed10e
XL
1068}
1069
1b1a35ee 1070/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3dfed10e 1071///
1b1a35ee 1072/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_ps&expand=5371)
3dfed10e
XL
1073#[inline]
1074#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1075#[cfg_attr(test, assert_instr(vsqrtps))]
1076pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
1077 transmute(vsqrtps(a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
3dfed10e
XL
1078}
1079
1b1a35ee 1080/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 1081///
1b1a35ee 1082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_ps&expand=5369)
3dfed10e
XL
1083#[inline]
1084#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1085#[cfg_attr(test, assert_instr(vsqrtps))]
1086pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1087 let sqrt = _mm512_sqrt_ps(a).as_f32x16();
1088 transmute(simd_select_bitmask(k, sqrt, src.as_f32x16()))
3dfed10e
XL
1089}
1090
1b1a35ee 1091/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1092///
1b1a35ee 1093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_ps&expand=5370)
3dfed10e
XL
1094#[inline]
1095#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1096#[cfg_attr(test, assert_instr(vsqrtps))]
1097pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
1098 let sqrt = _mm512_sqrt_ps(a).as_f32x16();
1099 let zero = _mm512_setzero_ps().as_f32x16();
1100 transmute(simd_select_bitmask(k, sqrt, zero))
3dfed10e
XL
1101}
1102
1b1a35ee 1103/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3dfed10e 1104///
1b1a35ee 1105/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_pd&expand=5362)
3dfed10e
XL
1106#[inline]
1107#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1108#[cfg_attr(test, assert_instr(vsqrtpd))]
1109pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
1110 transmute(vsqrtpd(a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
3dfed10e
XL
1111}
1112
1b1a35ee 1113/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3dfed10e 1114///
1b1a35ee 1115/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_pd&expand=5360)
3dfed10e
XL
1116#[inline]
1117#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1118#[cfg_attr(test, assert_instr(vsqrtpd))]
1119pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1120 let sqrt = _mm512_sqrt_pd(a).as_f64x8();
1121 transmute(simd_select_bitmask(k, sqrt, src.as_f64x8()))
3dfed10e
XL
1122}
1123
1b1a35ee 1124/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1125///
1b1a35ee 1126/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_pd&expand=5361)
3dfed10e
XL
1127#[inline]
1128#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1129#[cfg_attr(test, assert_instr(vsqrtpd))]
1130pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
1131 let sqrt = _mm512_sqrt_pd(a).as_f64x8();
1132 let zero = _mm512_setzero_pd().as_f64x8();
1133 transmute(simd_select_bitmask(k, sqrt, zero))
3dfed10e
XL
1134}
1135
1b1a35ee 1136/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3dfed10e 1137///
1b1a35ee 1138/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fmadd_ps&expand=2557)
3dfed10e
XL
1139#[inline]
1140#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1141#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1142pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1143 transmute(vfmadd132ps(
1144 a.as_f32x16(),
1145 b.as_f32x16(),
1146 c.as_f32x16(),
1147 _MM_FROUND_CUR_DIRECTION,
1148 ))
3dfed10e
XL
1149}
1150
1b1a35ee 1151/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1152///
1b1a35ee 1153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_ps&expand=2558)
3dfed10e
XL
1154#[inline]
1155#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1156#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1157pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1158 let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
1159 transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
3dfed10e
XL
1160}
1161
1b1a35ee 1162/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1163///
1b1a35ee 1164/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_ps&expand=2560)
3dfed10e
XL
1165#[inline]
1166#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1167#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1168pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1169 let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
1170 let zero = _mm512_setzero_ps().as_f32x16();
1171 transmute(simd_select_bitmask(k, fmadd, zero))
3dfed10e
XL
1172}
1173
1b1a35ee 1174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1175///
1b1a35ee 1176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_ps&expand=2559)
3dfed10e
XL
1177#[inline]
1178#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1179#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1180pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1181 let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
1182 transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
3dfed10e
XL
1183}
1184
1b1a35ee 1185/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3dfed10e 1186///
1b1a35ee 1187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_pd&expand=2545)
3dfed10e
XL
1188#[inline]
1189#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1190#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1191pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1192 transmute(vfmadd132pd(
1193 a.as_f64x8(),
1194 b.as_f64x8(),
1195 c.as_f64x8(),
1196 _MM_FROUND_CUR_DIRECTION,
1197 ))
3dfed10e
XL
1198}
1199
1b1a35ee 1200/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1201///
1b1a35ee 1202/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_pd&expand=2546)
3dfed10e
XL
1203#[inline]
1204#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1205#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1206pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1207 let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
1208 transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
3dfed10e
XL
1209}
1210
1b1a35ee 1211/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1212///
1b1a35ee 1213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_pd&expand=2548)
3dfed10e
XL
1214#[inline]
1215#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1216#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1217pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1218 let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
1219 let zero = _mm512_setzero_pd().as_f64x8();
1220 transmute(simd_select_bitmask(k, fmadd, zero))
3dfed10e
XL
1221}
1222
1b1a35ee 1223/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1224///
1b1a35ee 1225/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_pd&expand=2547)
3dfed10e
XL
1226#[inline]
1227#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1228#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1229pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1230 let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
1231 transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
3dfed10e
XL
1232}
1233
1b1a35ee 1234/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3dfed10e 1235///
1b1a35ee 1236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_ps&expand=2643)
3dfed10e
XL
1237#[inline]
1238#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1239#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1240pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1241 let zero: f32x16 = mem::zeroed();
1242 let sub = simd_sub(zero, c.as_f32x16());
1243 transmute(vfmadd132ps(
1244 a.as_f32x16(),
1245 b.as_f32x16(),
1246 sub,
1247 _MM_FROUND_CUR_DIRECTION,
1248 ))
3dfed10e
XL
1249}
1250
1b1a35ee 1251/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1252///
1b1a35ee 1253/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_ps&expand=2644)
3dfed10e
XL
1254#[inline]
1255#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1256#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1257pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1258 let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
1259 transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
3dfed10e
XL
1260}
1261
1b1a35ee 1262/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1263///
1b1a35ee 1264/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_ps&expand=2646)
3dfed10e
XL
1265#[inline]
1266#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1267#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1268pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1269 let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
1270 let zero = _mm512_setzero_ps().as_f32x16();
1271 transmute(simd_select_bitmask(k, fmsub, zero))
3dfed10e
XL
1272}
1273
1b1a35ee 1274/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1275///
1b1a35ee 1276/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_ps&expand=2645)
3dfed10e
XL
1277#[inline]
1278#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1279#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1280pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1281 let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
1282 transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
3dfed10e
XL
1283}
1284
1b1a35ee 1285/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3dfed10e 1286///
1b1a35ee 1287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_pd&expand=2631)
3dfed10e
XL
1288#[inline]
1289#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1290#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1291pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1292 let zero: f64x8 = mem::zeroed();
1293 let sub = simd_sub(zero, c.as_f64x8());
1294 transmute(vfmadd132pd(
1295 a.as_f64x8(),
1296 b.as_f64x8(),
1297 sub,
1298 _MM_FROUND_CUR_DIRECTION,
1299 ))
3dfed10e
XL
1300}
1301
1b1a35ee 1302/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1303///
1b1a35ee 1304/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_pd&expand=2632)
3dfed10e
XL
1305#[inline]
1306#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1307#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1308pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1309 let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
1310 transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
3dfed10e
XL
1311}
1312
1b1a35ee 1313/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1314///
1b1a35ee 1315/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_pd&expand=2634)
3dfed10e
XL
1316#[inline]
1317#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1318#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1319pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1320 let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
1321 let zero = _mm512_setzero_pd().as_f64x8();
1322 transmute(simd_select_bitmask(k, fmsub, zero))
3dfed10e
XL
1323}
1324
1b1a35ee 1325/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1326///
1b1a35ee 1327/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_pd&expand=2633)
3dfed10e
XL
1328#[inline]
1329#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1330#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1331pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1332 let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
1333 transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
3dfed10e
XL
1334}
1335
1b1a35ee 1336/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3dfed10e 1337///
1b1a35ee 1338/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_ps&expand=2611)
3dfed10e
XL
1339#[inline]
1340#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1341#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1342pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1343 transmute(vfmaddsub213ps(
1344 a.as_f32x16(),
1345 b.as_f32x16(),
1346 c.as_f32x16(),
1347 _MM_FROUND_CUR_DIRECTION,
1348 ))
3dfed10e
XL
1349}
1350
1b1a35ee 1351/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1352///
1b1a35ee 1353/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_ps&expand=2612)
3dfed10e
XL
1354#[inline]
1355#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1356#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1357pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1358 let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
1359 transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
3dfed10e
XL
1360}
1361
1b1a35ee 1362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1363///
1b1a35ee 1364/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_ps&expand=2614)
3dfed10e
XL
1365#[inline]
1366#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1367#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1368pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1369 let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
1370 let zero = _mm512_setzero_ps().as_f32x16();
1371 transmute(simd_select_bitmask(k, fmaddsub, zero))
3dfed10e
XL
1372}
1373
1b1a35ee 1374/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1375///
1b1a35ee 1376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_ps&expand=2613)
3dfed10e
XL
1377#[inline]
1378#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1379#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1380pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1381 let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
1382 transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
3dfed10e
XL
1383}
1384
1b1a35ee 1385/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3dfed10e 1386///
1b1a35ee 1387/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_pd&expand=2599)
3dfed10e
XL
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1390#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1391pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1392 transmute(vfmaddsub213pd(
1393 a.as_f64x8(),
1394 b.as_f64x8(),
1395 c.as_f64x8(),
1396 _MM_FROUND_CUR_DIRECTION,
1397 ))
3dfed10e
XL
1398}
1399
1b1a35ee 1400/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1401///
1b1a35ee 1402/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_pd&expand=2600)
3dfed10e
XL
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1405#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1406pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1407 let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
1408 transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
3dfed10e
XL
1409}
1410
1b1a35ee 1411/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1412///
1b1a35ee 1413/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_pd&expand=2602)
3dfed10e
XL
1414#[inline]
1415#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1416#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1417pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1418 let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
1419 let zero = _mm512_setzero_pd().as_f64x8();
1420 transmute(simd_select_bitmask(k, fmaddsub, zero))
3dfed10e
XL
1421}
1422
1b1a35ee 1423/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1424///
1b1a35ee 1425/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_ps&expand=2613)
3dfed10e
XL
1426#[inline]
1427#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1428#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1429pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1430 let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
1431 transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
3dfed10e
XL
1432}
1433
1b1a35ee 1434/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3dfed10e 1435///
1b1a35ee 1436/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_ps&expand=2691)
3dfed10e
XL
1437#[inline]
1438#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1439#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1440pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1441 let zero: f32x16 = mem::zeroed();
1442 let sub = simd_sub(zero, c.as_f32x16());
1443 transmute(vfmaddsub213ps(
1444 a.as_f32x16(),
1445 b.as_f32x16(),
1446 sub,
1447 _MM_FROUND_CUR_DIRECTION,
1448 ))
3dfed10e
XL
1449}
1450
1b1a35ee 1451/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1452///
1b1a35ee 1453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_ps&expand=2692)
3dfed10e
XL
1454#[inline]
1455#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1456#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1457pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1458 let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
1459 transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
3dfed10e
XL
1460}
1461
1b1a35ee 1462/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1463///
1b1a35ee 1464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_ps&expand=2694)
3dfed10e
XL
1465#[inline]
1466#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1467#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1468pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1469 let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
1470 let zero = _mm512_setzero_ps().as_f32x16();
1471 transmute(simd_select_bitmask(k, fmsubadd, zero))
3dfed10e
XL
1472}
1473
1b1a35ee 1474/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1475///
1b1a35ee 1476/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_ps&expand=2693)
3dfed10e
XL
1477#[inline]
1478#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1479#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1480pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1481 let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
1482 transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
3dfed10e
XL
1483}
1484
1b1a35ee 1485/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3dfed10e 1486///
1b1a35ee 1487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_pd&expand=2679)
3dfed10e
XL
1488#[inline]
1489#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1490#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1491pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1492 let zero: f64x8 = mem::zeroed();
1493 let sub = simd_sub(zero, c.as_f64x8());
1494 transmute(vfmaddsub213pd(
1495 a.as_f64x8(),
1496 b.as_f64x8(),
1497 sub,
1498 _MM_FROUND_CUR_DIRECTION,
1499 ))
3dfed10e
XL
1500}
1501
1b1a35ee 1502/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1503///
1b1a35ee 1504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_pd&expand=2680)
3dfed10e
XL
1505#[inline]
1506#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1507#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1508pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1509 let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
1510 transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
3dfed10e
XL
1511}
1512
1b1a35ee 1513/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1514///
1b1a35ee 1515/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_pd&expand=2682)
3dfed10e
XL
1516#[inline]
1517#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1518#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1519pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1520 let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
1521 let zero = _mm512_setzero_pd().as_f64x8();
1522 transmute(simd_select_bitmask(k, fmsubadd, zero))
3dfed10e
XL
1523}
1524
1b1a35ee 1525/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1526///
1b1a35ee 1527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_pd&expand=2681)
3dfed10e
XL
1528#[inline]
1529#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1530#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1531pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1532 let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
1533 transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
3dfed10e
XL
1534}
1535
1b1a35ee 1536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
3dfed10e 1537///
1b1a35ee 1538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_ps&expand=2723)
3dfed10e
XL
1539#[inline]
1540#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1541#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1542pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1543 let zero: f32x16 = mem::zeroed();
1544 let sub = simd_sub(zero, a.as_f32x16());
1545 transmute(vfmadd132ps(
1546 sub,
1547 b.as_f32x16(),
1548 c.as_f32x16(),
1549 _MM_FROUND_CUR_DIRECTION,
1550 ))
3dfed10e
XL
1551}
1552
1b1a35ee 1553/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1554///
1b1a35ee 1555/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_ps&expand=2724)
3dfed10e
XL
1556#[inline]
1557#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1558#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1559pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1560 let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
1561 transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
3dfed10e
XL
1562}
1563
1b1a35ee 1564/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1565///
1b1a35ee 1566/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_ps&expand=2726)
3dfed10e
XL
1567#[inline]
1568#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1569#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1570pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1571 let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
1572 let zero = _mm512_setzero_ps().as_f32x16();
1573 transmute(simd_select_bitmask(k, fnmadd, zero))
3dfed10e
XL
1574}
1575
1b1a35ee 1576/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1577///
1b1a35ee 1578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_ps&expand=2725)
3dfed10e
XL
1579#[inline]
1580#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1581#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1582pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1583 let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
1584 transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
3dfed10e
XL
1585}
1586
1b1a35ee 1587/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
3dfed10e 1588///
1b1a35ee 1589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_pd&expand=2711)
3dfed10e
XL
1590#[inline]
1591#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1592#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1593pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1594 let zero: f64x8 = mem::zeroed();
1595 let sub = simd_sub(zero, a.as_f64x8());
1596 transmute(vfmadd132pd(
1597 sub,
1598 b.as_f64x8(),
1599 c.as_f64x8(),
1600 _MM_FROUND_CUR_DIRECTION,
1601 ))
3dfed10e
XL
1602}
1603
1b1a35ee 1604/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1605///
1b1a35ee 1606/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_pd&expand=2712)
3dfed10e
XL
1607#[inline]
1608#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1609#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1610pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1611 let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
1612 transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
3dfed10e
XL
1613}
1614
1b1a35ee 1615/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1616///
1b1a35ee 1617/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_pd&expand=2714)
3dfed10e
XL
1618#[inline]
1619#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1620#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1621pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1622 let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
1623 let zero = _mm512_setzero_pd().as_f64x8();
1624 transmute(simd_select_bitmask(k, fnmadd, zero))
3dfed10e
XL
1625}
1626
1b1a35ee 1627/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1628///
1b1a35ee 1629/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_pd&expand=2713)
3dfed10e
XL
1630#[inline]
1631#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1632#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1633pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1634 let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
1635 transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
3dfed10e
XL
1636}
1637
1b1a35ee 1638/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
3dfed10e 1639///
1b1a35ee 1640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_ps&expand=2771)
3dfed10e
XL
1641#[inline]
1642#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1643#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1644pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
1645 let zero: f32x16 = mem::zeroed();
1646 let suba = simd_sub(zero, a.as_f32x16());
1647 let subc = simd_sub(zero, c.as_f32x16());
1648 transmute(vfmadd132ps(
1649 suba,
1650 b.as_f32x16(),
1651 subc,
1652 _MM_FROUND_CUR_DIRECTION,
1653 ))
3dfed10e
XL
1654}
1655
1b1a35ee 1656/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1657///
1b1a35ee 1658/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_ps&expand=2772)
3dfed10e
XL
1659#[inline]
1660#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1661#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1662pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
1663 let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
1664 transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
3dfed10e
XL
1665}
1666
1b1a35ee 1667/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1668///
1b1a35ee 1669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_ps&expand=2774)
3dfed10e
XL
1670#[inline]
1671#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1672#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1673pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
1674 let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
1675 let zero = _mm512_setzero_ps().as_f32x16();
1676 transmute(simd_select_bitmask(k, fnmsub, zero))
3dfed10e
XL
1677}
1678
1b1a35ee 1679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1680///
1b1a35ee 1681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_ps&expand=2773)
3dfed10e
XL
1682#[inline]
1683#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1684#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1685pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
1686 let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
1687 transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
3dfed10e
XL
1688}
1689
1b1a35ee 1690/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
3dfed10e 1691///
1b1a35ee 1692/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_pd&expand=2759)
3dfed10e
XL
1693#[inline]
1694#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1695#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1696pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1697 let zero: f64x8 = mem::zeroed();
1698 let suba = simd_sub(zero, a.as_f64x8());
1699 let subc = simd_sub(zero, c.as_f64x8());
1700 transmute(vfmadd132pd(
1701 suba,
1702 b.as_f64x8(),
1703 subc,
1704 _MM_FROUND_CUR_DIRECTION,
1705 ))
3dfed10e
XL
1706}
1707
1b1a35ee 1708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3dfed10e 1709///
1b1a35ee 1710/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_pd&expand=2760)
3dfed10e
XL
1711#[inline]
1712#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1713#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1714pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
1715 let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
1716 transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
3dfed10e
XL
1717}
1718
1b1a35ee 1719/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3dfed10e 1720///
1b1a35ee 1721/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_pd&expand=2762)
3dfed10e
XL
1722#[inline]
1723#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1724#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1725pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
1726 let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
1727 let zero = _mm512_setzero_pd().as_f64x8();
1728 transmute(simd_select_bitmask(k, fnmsub, zero))
3dfed10e
XL
1729}
1730
1b1a35ee 1731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3dfed10e 1732///
1b1a35ee 1733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_pd&expand=2761)
3dfed10e
XL
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1736#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1737pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
1738 let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
1739 transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
3dfed10e
XL
1740}
1741
1b1a35ee 1742/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1743///
1b1a35ee 1744/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rcp14_ps&expand=4502)
3dfed10e
XL
1745#[inline]
1746#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1747#[cfg_attr(test, assert_instr(vrcp14ps))]
1748pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
1749 transmute(vrcp14ps(
1750 a.as_f32x16(),
1751 _mm512_setzero_ps().as_f32x16(),
1752 0b11111111_11111111,
1753 ))
3dfed10e
XL
1754}
1755
1b1a35ee 1756/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1757///
1b1a35ee 1758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rcp14_ps&expand=4500)
3dfed10e
XL
1759#[inline]
1760#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1761#[cfg_attr(test, assert_instr(vrcp14ps))]
1762pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1763 transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k))
3dfed10e
XL
1764}
1765
1b1a35ee 1766/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1767///
1b1a35ee 1768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rcp14_ps&expand=4501)
3dfed10e
XL
1769#[inline]
1770#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1771#[cfg_attr(test, assert_instr(vrcp14ps))]
1772pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
1773 transmute(vrcp14ps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
3dfed10e
XL
1774}
1775
1b1a35ee 1776/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1777///
1b1a35ee 1778/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rcp14_pd&expand=4493)
3dfed10e
XL
1779#[inline]
1780#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1781#[cfg_attr(test, assert_instr(vrcp14pd))]
1782pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
1783 transmute(vrcp14pd(
1784 a.as_f64x8(),
1785 _mm512_setzero_pd().as_f64x8(),
1786 0b11111111,
1787 ))
3dfed10e
XL
1788}
1789
1b1a35ee 1790/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1791///
1b1a35ee 1792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rcp14_pd&expand=4491)
3dfed10e
XL
1793#[inline]
1794#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1795#[cfg_attr(test, assert_instr(vrcp14pd))]
1796pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1797 transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k))
3dfed10e
XL
1798}
1799
1b1a35ee 1800/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1801///
1b1a35ee 1802/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rcp14_pd&expand=4492)
3dfed10e
XL
1803#[inline]
1804#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1805#[cfg_attr(test, assert_instr(vrcp14pd))]
1806pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
1807 transmute(vrcp14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
3dfed10e
XL
1808}
1809
1b1a35ee 1810/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1811///
1b1a35ee 1812/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rsqrt14_ps&expand=4819)
3dfed10e
XL
1813#[inline]
1814#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1815#[cfg_attr(test, assert_instr(vrsqrt14ps))]
1816pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
1817 transmute(vrsqrt14ps(
1818 a.as_f32x16(),
1819 _mm512_setzero_ps().as_f32x16(),
1820 0b11111111_11111111,
1821 ))
3dfed10e
XL
1822}
1823
1b1a35ee 1824/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1825///
1b1a35ee 1826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rsqrt14_ps&expand=4817)
3dfed10e
XL
1827#[inline]
1828#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1829#[cfg_attr(test, assert_instr(vrsqrt14ps))]
1830pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1831 transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k))
3dfed10e
XL
1832}
1833
1b1a35ee 1834/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1835///
1b1a35ee 1836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rsqrt14_ps&expand=4818)
3dfed10e
XL
1837#[inline]
1838#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1839#[cfg_attr(test, assert_instr(vrsqrt14ps))]
1840pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
1841 transmute(vrsqrt14ps(
1842 a.as_f32x16(),
1843 _mm512_setzero_ps().as_f32x16(),
1844 k,
1845 ))
3dfed10e
XL
1846}
1847
1b1a35ee 1848/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1849///
1b1a35ee 1850/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rsqrt14_pd&expand=4812)
3dfed10e
XL
1851#[inline]
1852#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1853#[cfg_attr(test, assert_instr(vrsqrt14pd))]
1854pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
1855 transmute(vrsqrt14pd(
1856 a.as_f64x8(),
1857 _mm512_setzero_pd().as_f64x8(),
1858 0b11111111,
1859 ))
3dfed10e
XL
1860}
1861
1b1a35ee 1862/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
3dfed10e 1863///
1b1a35ee 1864/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rsqrt14_pd&expand=4810)
3dfed10e
XL
1865#[inline]
1866#[target_feature(enable = "avx512f")]
1b1a35ee
XL
1867#[cfg_attr(test, assert_instr(vrsqrt14pd))]
1868pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1869 transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k))
3dfed10e
XL
1870}
1871
1b1a35ee
XL
1872/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1873///
1874/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rsqrt14_pd&expand=4811)
1875#[inline]
1876#[target_feature(enable = "avx512f")]
1877#[cfg_attr(test, assert_instr(vrsqrt14pd))]
1878pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
1879 transmute(vrsqrt14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
1880}
3dfed10e 1881
1b1a35ee
XL
1882/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
1883///
1884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_ps&expand=2844)
1885#[inline]
1886#[target_feature(enable = "avx512f")]
1887#[cfg_attr(test, assert_instr(vgetexpps))]
1888pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
1889 transmute(vgetexpps(
1890 a.as_f32x16(),
1891 _mm512_setzero_ps().as_f32x16(),
1892 0b11111111_11111111,
1893 _MM_FROUND_CUR_DIRECTION,
1894 ))
1895}
3dfed10e 1896
1b1a35ee
XL
1897/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
1898///
1899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_ps&expand=2845)
1900#[inline]
1901#[target_feature(enable = "avx512f")]
1902#[cfg_attr(test, assert_instr(vgetexpps))]
1903pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
1904 transmute(vgetexpps(
1905 a.as_f32x16(),
1906 src.as_f32x16(),
1907 k,
1908 _MM_FROUND_CUR_DIRECTION,
1909 ))
1910}
3dfed10e 1911
1b1a35ee
XL
1912/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
1913///
1914/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_ps&expand=2846)
1915#[inline]
1916#[target_feature(enable = "avx512f")]
1917#[cfg_attr(test, assert_instr(vgetexpps))]
1918pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
1919 transmute(vgetexpps(
1920 a.as_f32x16(),
1921 _mm512_setzero_ps().as_f32x16(),
1922 k,
1923 _MM_FROUND_CUR_DIRECTION,
1924 ))
3dfed10e
XL
1925}
1926
1b1a35ee
XL
1927/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
1928///
1929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_pd&expand=2835)
1930#[inline]
1931#[target_feature(enable = "avx512f")]
1932#[cfg_attr(test, assert_instr(vgetexppd))]
1933pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
1934 transmute(vgetexppd(
1935 a.as_f64x8(),
1936 _mm512_setzero_pd().as_f64x8(),
1937 0b11111111,
1938 _MM_FROUND_CUR_DIRECTION,
1939 ))
1940}
3dfed10e 1941
1b1a35ee
XL
1942/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
1943///
1944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_pd&expand=2836)
1945#[inline]
1946#[target_feature(enable = "avx512f")]
1947#[cfg_attr(test, assert_instr(vgetexppd))]
1948pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
1949 transmute(vgetexppd(
1950 a.as_f64x8(),
1951 src.as_f64x8(),
1952 k,
1953 _MM_FROUND_CUR_DIRECTION,
1954 ))
1955}
3dfed10e 1956
1b1a35ee
XL
1957/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
1958///
1959/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_pd&expand=2837)
1960#[inline]
1961#[target_feature(enable = "avx512f")]
1962#[cfg_attr(test, assert_instr(vgetexppd))]
1963pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
1964 transmute(vgetexppd(
1965 a.as_f64x8(),
1966 _mm512_setzero_pd().as_f64x8(),
1967 k,
1968 _MM_FROUND_CUR_DIRECTION,
1969 ))
1970}
1971
1972/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
1973/// The mantissa is normalized to the interval specified by interv, which can take the following values:
1974/// _MM_MANT_NORM_1_2 // interval [1, 2)
1975/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
1976/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
1977/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
1978/// The sign is determined by sc which can take the following values:
1979/// _MM_MANT_SIGN_src // sign = sign(src)
1980/// _MM_MANT_SIGN_zero // sign = 0
1981/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
1982///
1983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_ps&expand=2880)
1984#[inline]
1985#[target_feature(enable = "avx512f")]
1986#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
1987#[rustc_args_required_const(1, 2)]
1988pub unsafe fn _mm512_getmant_ps(
1989 a: __m512,
1990 norm: _MM_MANTISSA_NORM_ENUM,
1991 sign: _MM_MANTISSA_SIGN_ENUM,
1992) -> __m512 {
1993 macro_rules! call {
1994 ($imm4:expr, $imm2:expr) => {
1995 vgetmantps(
1996 a.as_f32x16(),
1997 $imm2 << 2 | $imm4,
1998 _mm512_setzero_ps().as_f32x16(),
1999 0b11111111_11111111,
2000 _MM_FROUND_CUR_DIRECTION,
2001 )
2002 };
3dfed10e 2003 }
1b1a35ee
XL
2004 let r = constify_imm4_mantissas!(norm, sign, call);
2005 transmute(r)
2006}
3dfed10e 2007
1b1a35ee
XL
2008/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2009/// The mantissa is normalized to the interval specified by interv, which can take the following values:
2010/// _MM_MANT_NORM_1_2 // interval [1, 2)
2011/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2012/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2013/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2014/// The sign is determined by sc which can take the following values:
2015/// _MM_MANT_SIGN_src // sign = sign(src)
2016/// _MM_MANT_SIGN_zero // sign = 0
2017/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2018///
2019/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_ps&expand=2881)
2020#[inline]
2021#[target_feature(enable = "avx512f")]
2022#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2023#[rustc_args_required_const(3, 4)]
2024pub unsafe fn _mm512_mask_getmant_ps(
2025 src: __m512,
2026 k: __mmask16,
2027 a: __m512,
2028 norm: _MM_MANTISSA_NORM_ENUM,
2029 sign: _MM_MANTISSA_SIGN_ENUM,
2030) -> __m512 {
2031 macro_rules! call {
2032 ($imm4:expr, $imm2:expr) => {
2033 vgetmantps(
2034 a.as_f32x16(),
2035 $imm2 << 2 | $imm4,
2036 src.as_f32x16(),
2037 k,
2038 _MM_FROUND_CUR_DIRECTION,
2039 )
2040 };
3dfed10e 2041 }
1b1a35ee
XL
2042 let r = constify_imm4_mantissas!(norm, sign, call);
2043 transmute(r)
2044}
3dfed10e 2045
1b1a35ee
XL
2046/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2047/// The mantissa is normalized to the interval specified by interv, which can take the following values:
2048/// _MM_MANT_NORM_1_2 // interval [1, 2)
2049/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2050/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2051/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2052/// The sign is determined by sc which can take the following values:
2053/// _MM_MANT_SIGN_src // sign = sign(src)
2054/// _MM_MANT_SIGN_zero // sign = 0
2055/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2056///
2057/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_ps&expand=2882)
2058#[inline]
2059#[target_feature(enable = "avx512f")]
2060#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2061#[rustc_args_required_const(2, 3)]
2062pub unsafe fn _mm512_maskz_getmant_ps(
2063 k: __mmask16,
2064 a: __m512,
2065 norm: _MM_MANTISSA_NORM_ENUM,
2066 sign: _MM_MANTISSA_SIGN_ENUM,
2067) -> __m512 {
2068 macro_rules! call {
2069 ($imm4:expr, $imm2:expr) => {
2070 vgetmantps(
2071 a.as_f32x16(),
2072 $imm2 << 2 | $imm4,
2073 _mm512_setzero_ps().as_f32x16(),
2074 k,
2075 _MM_FROUND_CUR_DIRECTION,
2076 )
2077 };
3dfed10e 2078 }
1b1a35ee
XL
2079 let r = constify_imm4_mantissas!(norm, sign, call);
2080 transmute(r)
2081}
3dfed10e 2082
1b1a35ee
XL
2083/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2084/// The mantissa is normalized to the interval specified by interv, which can take the following values:
2085/// _MM_MANT_NORM_1_2 // interval [1, 2)
2086/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2087/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2088/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2089/// The sign is determined by sc which can take the following values:
2090/// _MM_MANT_SIGN_src // sign = sign(src)
2091/// _MM_MANT_SIGN_zero // sign = 0
2092/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2093///
2094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_pd&expand=2871)
2095#[inline]
2096#[target_feature(enable = "avx512f")]
2097#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2098#[rustc_args_required_const(1, 2)]
2099pub unsafe fn _mm512_getmant_pd(
2100 a: __m512d,
2101 norm: _MM_MANTISSA_NORM_ENUM,
2102 sign: _MM_MANTISSA_SIGN_ENUM,
2103) -> __m512d {
2104 macro_rules! call {
2105 ($imm4:expr, $imm2:expr) => {
2106 vgetmantpd(
2107 a.as_f64x8(),
2108 $imm2 << 2 | $imm4,
2109 _mm512_setzero_pd().as_f64x8(),
2110 0b11111111,
2111 _MM_FROUND_CUR_DIRECTION,
2112 )
2113 };
3dfed10e 2114 }
1b1a35ee
XL
2115 let r = constify_imm4_mantissas!(norm, sign, call);
2116 transmute(r)
2117}
3dfed10e 2118
1b1a35ee
XL
2119/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2120/// The mantissa is normalized to the interval specified by interv, which can take the following values:
2121/// _MM_MANT_NORM_1_2 // interval [1, 2)
2122/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2123/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2124/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2125/// The sign is determined by sc which can take the following values:
2126/// _MM_MANT_SIGN_src // sign = sign(src)
2127/// _MM_MANT_SIGN_zero // sign = 0
2128/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2129///
2130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_pd&expand=2872)
2131#[inline]
2132#[target_feature(enable = "avx512f")]
2133#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2134#[rustc_args_required_const(3, 4)]
2135pub unsafe fn _mm512_mask_getmant_pd(
2136 src: __m512d,
2137 k: __mmask8,
2138 a: __m512d,
2139 norm: _MM_MANTISSA_NORM_ENUM,
2140 sign: _MM_MANTISSA_SIGN_ENUM,
2141) -> __m512d {
2142 macro_rules! call {
2143 ($imm4:expr, $imm2:expr) => {
2144 vgetmantpd(
2145 a.as_f64x8(),
2146 $imm2 << 2 | $imm4,
2147 src.as_f64x8(),
2148 k,
2149 _MM_FROUND_CUR_DIRECTION,
2150 )
2151 };
3dfed10e 2152 }
1b1a35ee
XL
2153 let r = constify_imm4_mantissas!(norm, sign, call);
2154 transmute(r)
2155}
3dfed10e 2156
1b1a35ee
XL
2157/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2158/// The mantissa is normalized to the interval specified by interv, which can take the following values:
2159/// _MM_MANT_NORM_1_2 // interval [1, 2)
2160/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2161/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2162/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2163/// The sign is determined by sc which can take the following values:
2164/// _MM_MANT_SIGN_src // sign = sign(src)
2165/// _MM_MANT_SIGN_zero // sign = 0
2166/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2167///
2168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_pd&expand=2873)
2169#[inline]
2170#[target_feature(enable = "avx512f")]
2171#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2172#[rustc_args_required_const(2, 3)]
2173pub unsafe fn _mm512_maskz_getmant_pd(
2174 k: __mmask8,
2175 a: __m512d,
2176 norm: _MM_MANTISSA_NORM_ENUM,
2177 sign: _MM_MANTISSA_SIGN_ENUM,
2178) -> __m512d {
2179 macro_rules! call {
2180 ($imm4:expr, $imm2:expr) => {
2181 vgetmantpd(
2182 a.as_f64x8(),
2183 $imm2 << 2 | $imm4,
2184 _mm512_setzero_pd().as_f64x8(),
2185 k,
2186 _MM_FROUND_CUR_DIRECTION,
2187 )
2188 };
3dfed10e 2189 }
1b1a35ee
XL
2190 let r = constify_imm4_mantissas!(norm, sign, call);
2191 transmute(r)
2192}
3dfed10e 2193
1b1a35ee
XL
2194/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
2195///
2196/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2197/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2198/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2199/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2200/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2201/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2202///
2203/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_round_ps&expand=145)
2204#[inline]
2205#[target_feature(enable = "avx512f")]
2206#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2207#[rustc_args_required_const(2)]
2208pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
2209 macro_rules! call {
2210 ($imm4:expr) => {
2211 vaddps(a.as_f32x16(), b.as_f32x16(), $imm4)
2212 };
3dfed10e 2213 }
1b1a35ee
XL
2214 let r = constify_imm4_round!(rounding, call);
2215 transmute(r)
2216}
3dfed10e 2217
1b1a35ee
XL
2218/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2219///
2220/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2221/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2222/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2223/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2224/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2225/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2226///
2227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_round_ps&expand=146)
2228#[inline]
2229#[target_feature(enable = "avx512f")]
2230#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2231#[rustc_args_required_const(4)]
2232pub unsafe fn _mm512_mask_add_round_ps(
2233 src: __m512,
2234 k: __mmask16,
2235 a: __m512,
2236 b: __m512,
2237 rounding: i32,
2238) -> __m512 {
2239 macro_rules! call {
2240 ($imm4:expr) => {
2241 vaddps(a.as_f32x16(), b.as_f32x16(), $imm4)
2242 };
3dfed10e 2243 }
1b1a35ee
XL
2244 let addround = constify_imm4_round!(rounding, call);
2245 transmute(simd_select_bitmask(k, addround, src.as_f32x16()))
2246}
3dfed10e 2247
1b1a35ee
XL
2248/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2249///
2250/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2251/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2252/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2253/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2254/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2255/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2256///
2257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_round_ps&expand=147)
2258#[inline]
2259#[target_feature(enable = "avx512f")]
2260#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2261#[rustc_args_required_const(3)]
2262pub unsafe fn _mm512_maskz_add_round_ps(
2263 k: __mmask16,
2264 a: __m512,
2265 b: __m512,
2266 rounding: i32,
2267) -> __m512 {
2268 macro_rules! call {
2269 ($imm4:expr) => {
2270 vaddps(a.as_f32x16(), b.as_f32x16(), $imm4)
2271 };
3dfed10e 2272 }
1b1a35ee
XL
2273 let addround = constify_imm4_round!(rounding, call);
2274 let zero = _mm512_setzero_ps().as_f32x16();
2275 transmute(simd_select_bitmask(k, addround, zero))
2276}
3dfed10e 2277
1b1a35ee
XL
2278/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
2279///
2280/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2281/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2282/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2283/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2284/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2285/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2286///
2287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_round_pd&expand=142)
2288#[inline]
2289#[target_feature(enable = "avx512f")]
2290#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2291#[rustc_args_required_const(2)]
2292pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
2293 macro_rules! call {
2294 ($imm4:expr) => {
2295 vaddpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2296 };
3dfed10e 2297 }
1b1a35ee
XL
2298 let r = constify_imm4_round!(rounding, call);
2299 transmute(r)
2300}
3dfed10e 2301
1b1a35ee
XL
2302/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2303///
2304/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2305/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2306/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2307/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2308/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2309/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2310///
2311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_round_pd&expand=143)
2312#[inline]
2313#[target_feature(enable = "avx512f")]
2314#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2315#[rustc_args_required_const(4)]
2316pub unsafe fn _mm512_mask_add_round_pd(
2317 src: __m512d,
2318 k: __mmask8,
2319 a: __m512d,
2320 b: __m512d,
2321 rounding: i32,
2322) -> __m512d {
2323 macro_rules! call {
2324 ($imm4:expr) => {
2325 vaddpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2326 };
3dfed10e 2327 }
1b1a35ee
XL
2328 let addround = constify_imm4_round!(rounding, call);
2329 transmute(simd_select_bitmask(k, addround, src.as_f64x8()))
2330}
3dfed10e 2331
1b1a35ee
XL
2332/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2333///
2334/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2335/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2336/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2337/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2338/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2339/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2340///
2341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_round_pd&expand=144)
2342#[inline]
2343#[target_feature(enable = "avx512f")]
2344#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2345#[rustc_args_required_const(3)]
2346pub unsafe fn _mm512_maskz_add_round_pd(
2347 k: __mmask8,
2348 a: __m512d,
2349 b: __m512d,
2350 rounding: i32,
2351) -> __m512d {
2352 macro_rules! call {
2353 ($imm4:expr) => {
2354 vaddpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2355 };
3dfed10e 2356 }
1b1a35ee
XL
2357 let addround = constify_imm4_round!(rounding, call);
2358 let zero = _mm512_setzero_pd().as_f64x8();
2359 transmute(simd_select_bitmask(k, addround, zero))
2360}
3dfed10e 2361
1b1a35ee
XL
2362/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
2363///
2364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2365/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2366/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2367/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2368/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2369/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2370///
2371/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_round_ps&expand=5739)
2372#[inline]
2373#[target_feature(enable = "avx512f")]
2374#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
2375#[rustc_args_required_const(2)]
2376pub unsafe fn _mm512_sub_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
2377 macro_rules! call {
2378 ($imm4:expr) => {
2379 vsubps(a.as_f32x16(), b.as_f32x16(), $imm4)
2380 };
3dfed10e 2381 }
1b1a35ee
XL
2382 let r = constify_imm4_round!(rounding, call);
2383 transmute(r)
2384}
3dfed10e 2385
1b1a35ee
XL
2386/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2387///
2388/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2389/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2390/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2391/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2392/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2393/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2394///
2395/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_round_ps&expand=5737)
2396#[inline]
2397#[target_feature(enable = "avx512f")]
2398#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
2399#[rustc_args_required_const(4)]
2400pub unsafe fn _mm512_mask_sub_round_ps(
2401 src: __m512,
2402 k: __mmask16,
2403 a: __m512,
2404 b: __m512,
2405 rounding: i32,
2406) -> __m512 {
2407 macro_rules! call {
2408 ($imm4:expr) => {
2409 vsubps(a.as_f32x16(), b.as_f32x16(), $imm4)
2410 };
3dfed10e 2411 }
1b1a35ee
XL
2412 let subround = constify_imm4_round!(rounding, call);
2413 transmute(simd_select_bitmask(k, subround, src.as_f32x16()))
2414}
3dfed10e 2415
1b1a35ee
XL
2416/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2417///
2418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2419/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2420/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2421/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2422/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2423/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2424///
2425/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_round_ps&expand=5738)
2426#[inline]
2427#[target_feature(enable = "avx512f")]
2428#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
2429#[rustc_args_required_const(3)]
2430pub unsafe fn _mm512_maskz_sub_round_ps(
2431 k: __mmask16,
2432 a: __m512,
2433 b: __m512,
2434 rounding: i32,
2435) -> __m512 {
2436 macro_rules! call {
2437 ($imm4:expr) => {
2438 vsubps(a.as_f32x16(), b.as_f32x16(), $imm4)
2439 };
3dfed10e 2440 }
1b1a35ee
XL
2441 let subround = constify_imm4_round!(rounding, call);
2442 let zero = _mm512_setzero_ps().as_f32x16();
2443 transmute(simd_select_bitmask(k, subround, zero))
2444}
3dfed10e 2445
1b1a35ee
XL
2446/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
2447///
2448/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2449/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2450/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2451/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2452/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2453/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2454///
2455/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_round_pd&expand=5736)
2456#[inline]
2457#[target_feature(enable = "avx512f")]
2458#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
2459#[rustc_args_required_const(2)]
2460pub unsafe fn _mm512_sub_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
2461 macro_rules! call {
2462 ($imm4:expr) => {
2463 vsubpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2464 };
3dfed10e 2465 }
1b1a35ee
XL
2466 let r = constify_imm4_round!(rounding, call);
2467 transmute(r)
2468}
3dfed10e 2469
1b1a35ee
XL
2470/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2471///
2472/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2473/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2474/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2475/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2476/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2477/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2478///
2479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_round_pd&expand=5734)
2480#[inline]
2481#[target_feature(enable = "avx512f")]
2482#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
2483#[rustc_args_required_const(4)]
2484pub unsafe fn _mm512_mask_sub_round_pd(
2485 src: __m512d,
2486 k: __mmask8,
2487 a: __m512d,
2488 b: __m512d,
2489 rounding: i32,
2490) -> __m512d {
2491 macro_rules! call {
2492 ($imm4:expr) => {
2493 vsubpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2494 };
3dfed10e 2495 }
1b1a35ee
XL
2496 let subround = constify_imm4_round!(rounding, call);
2497 transmute(simd_select_bitmask(k, subround, src.as_f64x8()))
2498}
3dfed10e 2499
1b1a35ee
XL
2500/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2501///
2502/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2503/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2504/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2505/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2506/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2507/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2508///
2509/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_round_pd&expand=5735)
2510#[inline]
2511#[target_feature(enable = "avx512f")]
2512#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
2513#[rustc_args_required_const(3)]
2514pub unsafe fn _mm512_maskz_sub_round_pd(
2515 k: __mmask8,
2516 a: __m512d,
2517 b: __m512d,
2518 rounding: i32,
2519) -> __m512d {
2520 macro_rules! call {
2521 ($imm4:expr) => {
2522 vsubpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2523 };
3dfed10e 2524 }
1b1a35ee
XL
2525 let subround = constify_imm4_round!(rounding, call);
2526 let zero = _mm512_setzero_pd().as_f64x8();
2527 transmute(simd_select_bitmask(k, subround, zero))
2528}
3dfed10e 2529
1b1a35ee
XL
2530/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
2531///
2532/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2533/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2534/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2535/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2536/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2537/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2538///
2539/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_round_ps&expand=3940)
2540#[inline]
2541#[target_feature(enable = "avx512f")]
2542#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
2543#[rustc_args_required_const(2)]
2544pub unsafe fn _mm512_mul_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
2545 macro_rules! call {
2546 ($imm4:expr) => {
2547 vmulps(a.as_f32x16(), b.as_f32x16(), $imm4)
2548 };
3dfed10e 2549 }
1b1a35ee
XL
2550 let r = constify_imm4_round!(rounding, call);
2551 transmute(r)
2552}
3dfed10e 2553
1b1a35ee
XL
2554/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2555///
2556/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2557/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2558/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2559/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2560/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2561/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2562///
2563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_round_ps&expand=3938)
2564#[inline]
2565#[target_feature(enable = "avx512f")]
2566#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
2567#[rustc_args_required_const(4)]
2568pub unsafe fn _mm512_mask_mul_round_ps(
2569 src: __m512,
2570 k: __mmask16,
2571 a: __m512,
2572 b: __m512,
2573 rounding: i32,
2574) -> __m512 {
2575 macro_rules! call {
2576 ($imm4:expr) => {
2577 vmulps(a.as_f32x16(), b.as_f32x16(), $imm4)
2578 };
3dfed10e 2579 }
1b1a35ee
XL
2580 let mulround = constify_imm4_round!(rounding, call);
2581 transmute(simd_select_bitmask(k, mulround, src.as_f32x16()))
2582}
3dfed10e 2583
1b1a35ee
XL
2584/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2585///
2586/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2587/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2588/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2589/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2590/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2591/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2592///
2593/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_round_ps&expand=3939)
2594#[inline]
2595#[target_feature(enable = "avx512f")]
2596#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
2597#[rustc_args_required_const(3)]
2598pub unsafe fn _mm512_maskz_mul_round_ps(
2599 k: __mmask16,
2600 a: __m512,
2601 b: __m512,
2602 rounding: i32,
2603) -> __m512 {
2604 macro_rules! call {
2605 ($imm4:expr) => {
2606 vmulps(a.as_f32x16(), b.as_f32x16(), $imm4)
2607 };
3dfed10e 2608 }
1b1a35ee
XL
2609 let mulround = constify_imm4_round!(rounding, call);
2610 let zero = _mm512_setzero_ps().as_f32x16();
2611 transmute(simd_select_bitmask(k, mulround, zero))
2612}
3dfed10e 2613
1b1a35ee
XL
2614/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
2615///
2616/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2617/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2618/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2619/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2620/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2621/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2622///
2623/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_round_pd&expand=3937)
2624#[inline]
2625#[target_feature(enable = "avx512f")]
2626#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
2627#[rustc_args_required_const(2)]
2628pub unsafe fn _mm512_mul_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
2629 macro_rules! call {
2630 ($imm4:expr) => {
2631 vmulpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2632 };
3dfed10e 2633 }
1b1a35ee
XL
2634 let r = constify_imm4_round!(rounding, call);
2635 transmute(r)
2636}
3dfed10e 2637
1b1a35ee
XL
2638/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2639///
2640/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2641/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2642/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2643/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2644/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2645/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2646///
2647/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_round_pd&expand=3935)
2648#[inline]
2649#[target_feature(enable = "avx512f")]
2650#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
2651#[rustc_args_required_const(4)]
2652pub unsafe fn _mm512_mask_mul_round_pd(
2653 src: __m512d,
2654 k: __mmask8,
2655 a: __m512d,
2656 b: __m512d,
2657 rounding: i32,
2658) -> __m512d {
2659 macro_rules! call {
2660 ($imm4:expr) => {
2661 vmulpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2662 };
3dfed10e 2663 }
1b1a35ee
XL
2664 let mulround = constify_imm4_round!(rounding, call);
2665 transmute(simd_select_bitmask(k, mulround, src.as_f64x8()))
2666}
3dfed10e 2667
1b1a35ee
XL
2668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2669///
2670/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2671/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2672/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2673/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2674/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2675/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2676///
2677/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_round_ps&expand=3939)
2678#[inline]
2679#[target_feature(enable = "avx512f")]
2680#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
2681#[rustc_args_required_const(3)]
2682pub unsafe fn _mm512_maskz_mul_round_pd(
2683 k: __mmask8,
2684 a: __m512d,
2685 b: __m512d,
2686 rounding: i32,
2687) -> __m512d {
2688 macro_rules! call {
2689 ($imm4:expr) => {
2690 vmulpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2691 };
3dfed10e 2692 }
1b1a35ee
XL
2693 let mulround = constify_imm4_round!(rounding, call);
2694 let zero = _mm512_setzero_pd().as_f64x8();
2695 transmute(simd_select_bitmask(k, mulround, zero))
2696}
3dfed10e 2697
1b1a35ee
XL
2698/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2699///
2700/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2701/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2702/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2703/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2704/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2705/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2706///
2707/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_round_ps&expand=2168)
2708#[inline]
2709#[target_feature(enable = "avx512f")]
2710#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
2711#[rustc_args_required_const(2)]
2712pub unsafe fn _mm512_div_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
2713 macro_rules! call {
2714 ($imm4:expr) => {
2715 vdivps(a.as_f32x16(), b.as_f32x16(), $imm4)
2716 };
3dfed10e 2717 }
1b1a35ee
XL
2718 let r = constify_imm4_round!(rounding, call);
2719 transmute(r)
2720}
3dfed10e 2721
1b1a35ee
XL
2722/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2723///
2724/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2725/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2726/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2727/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2728/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2729/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2730///
2731/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_round_ps&expand=2169)
2732#[inline]
2733#[target_feature(enable = "avx512f")]
2734#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
2735#[rustc_args_required_const(4)]
2736pub unsafe fn _mm512_mask_div_round_ps(
2737 src: __m512,
2738 k: __mmask16,
2739 a: __m512,
2740 b: __m512,
2741 rounding: i32,
2742) -> __m512 {
2743 macro_rules! call {
2744 ($imm4:expr) => {
2745 vdivps(a.as_f32x16(), b.as_f32x16(), $imm4)
2746 };
3dfed10e 2747 }
1b1a35ee
XL
2748 let divround = constify_imm4_round!(rounding, call);
2749 transmute(simd_select_bitmask(k, divround, src.as_f32x16()))
2750}
3dfed10e 2751
1b1a35ee
XL
2752/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2753///
2754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2755/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2756/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2757/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2758/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2759/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2760///
2761/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_round_ps&expand=2170)
2762#[inline]
2763#[target_feature(enable = "avx512f")]
2764#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
2765#[rustc_args_required_const(3)]
2766pub unsafe fn _mm512_maskz_div_round_ps(
2767 k: __mmask16,
2768 a: __m512,
2769 b: __m512,
2770 rounding: i32,
2771) -> __m512 {
2772 macro_rules! call {
2773 ($imm4:expr) => {
2774 vdivps(a.as_f32x16(), b.as_f32x16(), $imm4)
2775 };
3dfed10e 2776 }
1b1a35ee
XL
2777 let divround = constify_imm4_round!(rounding, call);
2778 let zero = _mm512_setzero_ps().as_f32x16();
2779 transmute(simd_select_bitmask(k, divround, zero))
2780}
3dfed10e 2781
1b1a35ee
XL
2782/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.
2783///
2784/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2785/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2786/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2787/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2788/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2789/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2790///
2791/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_round_pd&expand=2165)
2792#[inline]
2793#[target_feature(enable = "avx512f")]
2794#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
2795#[rustc_args_required_const(2)]
2796pub unsafe fn _mm512_div_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
2797 macro_rules! call {
2798 ($imm4:expr) => {
2799 vdivpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2800 };
3dfed10e 2801 }
1b1a35ee
XL
2802 let r = constify_imm4_round!(rounding, call);
2803 transmute(r)
2804}
3dfed10e 2805
1b1a35ee
XL
2806/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2807///
2808/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2809/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2810/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2811/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2812/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2813/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2814///
2815/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_round_pd&expand=2166)
2816#[inline]
2817#[target_feature(enable = "avx512f")]
2818#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
2819#[rustc_args_required_const(4)]
2820pub unsafe fn _mm512_mask_div_round_pd(
2821 src: __m512d,
2822 k: __mmask8,
2823 a: __m512d,
2824 b: __m512d,
2825 rounding: i32,
2826) -> __m512d {
2827 macro_rules! call {
2828 ($imm4:expr) => {
2829 vdivpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2830 };
3dfed10e 2831 }
1b1a35ee
XL
2832 let divround = constify_imm4_round!(rounding, call);
2833 transmute(simd_select_bitmask(k, divround, src.as_f64x8()))
2834}
3dfed10e 2835
1b1a35ee
XL
2836/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2837///
2838/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2839/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2840/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2841/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2842/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2843/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2844///
2845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_round_pd&expand=2167)
2846#[inline]
2847#[target_feature(enable = "avx512f")]
2848#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
2849#[rustc_args_required_const(3)]
2850pub unsafe fn _mm512_maskz_div_round_pd(
2851 k: __mmask8,
2852 a: __m512d,
2853 b: __m512d,
2854 rounding: i32,
2855) -> __m512d {
2856 macro_rules! call {
2857 ($imm4:expr) => {
2858 vdivpd(a.as_f64x8(), b.as_f64x8(), $imm4)
2859 };
3dfed10e 2860 }
1b1a35ee
XL
2861 let divround = constify_imm4_round!(rounding, call);
2862 let zero = _mm512_setzero_pd().as_f64x8();
2863 transmute(simd_select_bitmask(k, divround, zero))
2864}
3dfed10e 2865
1b1a35ee
XL
2866/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
2867///
2868/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2869/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2870/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2871/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2872/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2873/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2874///
2875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_round_ps&expand=5377)
2876#[inline]
2877#[target_feature(enable = "avx512f")]
2878#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
2879#[rustc_args_required_const(1)]
2880pub unsafe fn _mm512_sqrt_round_ps(a: __m512, rounding: i32) -> __m512 {
2881 macro_rules! call {
2882 ($imm4:expr) => {
2883 vsqrtps(a.as_f32x16(), $imm4)
2884 };
3dfed10e 2885 }
1b1a35ee
XL
2886 let r = constify_imm4_round!(rounding, call);
2887 transmute(r)
2888}
3dfed10e 2889
1b1a35ee
XL
2890/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2891///
2892/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2893/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2894/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2895/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2896/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2897/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2898///
2899/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_round_ps&expand=5375)
2900#[inline]
2901#[target_feature(enable = "avx512f")]
2902#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
2903#[rustc_args_required_const(3)]
2904pub unsafe fn _mm512_mask_sqrt_round_ps(
2905 src: __m512,
2906 k: __mmask16,
2907 a: __m512,
2908 rounding: i32,
2909) -> __m512 {
2910 macro_rules! call {
2911 ($imm4:expr) => {
2912 vsqrtps(a.as_f32x16(), $imm4)
2913 };
3dfed10e 2914 }
1b1a35ee
XL
2915 let sqrtround = constify_imm4_round!(rounding, call);
2916 transmute(simd_select_bitmask(k, sqrtround, src.as_f32x16()))
2917}
3dfed10e 2918
1b1a35ee
XL
2919/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2920///
2921/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2922/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2923/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2924/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2925/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2926/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2927///
2928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_round_ps&expand=5376)
2929#[inline]
2930#[target_feature(enable = "avx512f")]
2931#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
2932#[rustc_args_required_const(2)]
2933pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512, rounding: i32) -> __m512 {
2934 macro_rules! call {
2935 ($imm4:expr) => {
2936 vsqrtps(a.as_f32x16(), $imm4)
2937 };
3dfed10e 2938 }
1b1a35ee
XL
2939 let sqrtround = constify_imm4_round!(rounding, call);
2940 let zero = _mm512_setzero_ps().as_f32x16();
2941 transmute(simd_select_bitmask(k, sqrtround, zero))
2942}
3dfed10e 2943
1b1a35ee
XL
2944/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
2945///
2946/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2947/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2948/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2949/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2950/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2951/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2952///
2953/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_round_pd&expand=5374)
2954#[inline]
2955#[target_feature(enable = "avx512f")]
2956#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
2957#[rustc_args_required_const(1)]
2958pub unsafe fn _mm512_sqrt_round_pd(a: __m512d, rounding: i32) -> __m512d {
2959 macro_rules! call {
2960 ($imm4:expr) => {
2961 vsqrtpd(a.as_f64x8(), $imm4)
2962 };
3dfed10e 2963 }
1b1a35ee
XL
2964 let r = constify_imm4_round!(rounding, call);
2965 transmute(r)
2966}
3dfed10e 2967
1b1a35ee
XL
2968/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2969///
2970/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
2971/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
2972/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
2973/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
2974/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
2975/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2976///
2977/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_round_pd&expand=5372)
2978#[inline]
2979#[target_feature(enable = "avx512f")]
2980#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
2981#[rustc_args_required_const(3)]
2982pub unsafe fn _mm512_mask_sqrt_round_pd(
2983 src: __m512d,
2984 k: __mmask8,
2985 a: __m512d,
2986 rounding: i32,
2987) -> __m512d {
2988 macro_rules! call {
2989 ($imm4:expr) => {
2990 vsqrtpd(a.as_f64x8(), $imm4)
2991 };
3dfed10e 2992 }
1b1a35ee
XL
2993 let sqrtround = constify_imm4_round!(rounding, call);
2994 transmute(simd_select_bitmask(k, sqrtround, src.as_f64x8()))
2995}
3dfed10e 2996
1b1a35ee
XL
2997/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3000/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3001/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3002/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3003/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3004/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3005///
3006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_round_pd&expand=5373)
3007#[inline]
3008#[target_feature(enable = "avx512f")]
3009#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3010#[rustc_args_required_const(2)]
3011pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d, rounding: i32) -> __m512d {
3012 macro_rules! call {
3013 ($imm4:expr) => {
3014 vsqrtpd(a.as_f64x8(), $imm4)
3015 };
3dfed10e 3016 }
1b1a35ee
XL
3017 let sqrtround = constify_imm4_round!(rounding, call);
3018 let zero = _mm512_setzero_pd().as_f64x8();
3019 transmute(simd_select_bitmask(k, sqrtround, zero))
3020}
3dfed10e 3021
1b1a35ee
XL
3022/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3023///
3024/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3025/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3026/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3027/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3028/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3029/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3030///
3031/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_round_ps&expand=2565)
3032#[inline]
3033#[target_feature(enable = "avx512f")]
3034#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3035#[rustc_args_required_const(3)]
3036pub unsafe fn _mm512_fmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3037 macro_rules! call {
3038 ($imm4:expr) => {
3039 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3040 };
3dfed10e 3041 }
1b1a35ee
XL
3042 let r = constify_imm4_round!(rounding, call);
3043 transmute(r)
3044}
3dfed10e 3045
1b1a35ee
XL
3046/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3047///
3048/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3049/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3050/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3051/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3052/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3053/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3054///
3055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_round_ps&expand=2566)
3056#[inline]
3057#[target_feature(enable = "avx512f")]
3058#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3059#[rustc_args_required_const(4)]
3060pub unsafe fn _mm512_mask_fmadd_round_ps(
3061 a: __m512,
3062 k: __mmask16,
3063 b: __m512,
3064 c: __m512,
3065 rounding: i32,
3066) -> __m512 {
3067 macro_rules! call {
3068 ($imm4:expr) => {
3069 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3070 };
3dfed10e 3071 }
1b1a35ee
XL
3072 let fmadd = constify_imm4_round!(rounding, call);
3073 transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
3074}
3dfed10e 3075
1b1a35ee
XL
3076/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3077///
3078/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3079/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3080/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3081/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3082/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3083/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3084///
3085/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_round_ps&expand=2568)
3086#[inline]
3087#[target_feature(enable = "avx512f")]
3088#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3089#[rustc_args_required_const(4)]
3090pub unsafe fn _mm512_maskz_fmadd_round_ps(
3091 k: __mmask16,
3092 a: __m512,
3093 b: __m512,
3094 c: __m512,
3095 rounding: i32,
3096) -> __m512 {
3097 macro_rules! call {
3098 ($imm4:expr) => {
3099 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3100 };
3dfed10e 3101 }
1b1a35ee
XL
3102 let fmadd = constify_imm4_round!(rounding, call);
3103 let zero = _mm512_setzero_ps().as_f32x16();
3104 transmute(simd_select_bitmask(k, fmadd, zero))
3105}
3dfed10e 3106
1b1a35ee
XL
3107/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3108///
3109/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3110/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3111/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3112/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3113/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3114/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3115///
3116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_round_ps&expand=2567)
3117#[inline]
3118#[target_feature(enable = "avx512f")]
3119#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3120#[rustc_args_required_const(4)]
3121pub unsafe fn _mm512_mask3_fmadd_round_ps(
3122 a: __m512,
3123 b: __m512,
3124 c: __m512,
3125 k: __mmask16,
3126 rounding: i32,
3127) -> __m512 {
3128 macro_rules! call {
3129 ($imm4:expr) => {
3130 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3131 };
3dfed10e 3132 }
1b1a35ee
XL
3133 let fmadd = constify_imm4_round!(rounding, call);
3134 transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
3135}
3dfed10e 3136
1b1a35ee
XL
3137/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3138///
3139/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3140/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3141/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3142/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3143/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3144/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3145///
3146/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_round_pd&expand=2561)
3147#[inline]
3148#[target_feature(enable = "avx512f")]
3149#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3150#[rustc_args_required_const(3)]
3151pub unsafe fn _mm512_fmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
3152 macro_rules! call {
3153 ($imm4:expr) => {
3154 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3155 };
3dfed10e 3156 }
1b1a35ee
XL
3157 let r = constify_imm4_round!(rounding, call);
3158 transmute(r)
3159}
3dfed10e 3160
1b1a35ee
XL
3161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3162///
3163/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3164/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3165/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3166/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3167/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3168/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3169///
3170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_round_pd&expand=2562)
3171#[inline]
3172#[target_feature(enable = "avx512f")]
3173#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3174#[rustc_args_required_const(4)]
3175pub unsafe fn _mm512_mask_fmadd_round_pd(
3176 a: __m512d,
3177 k: __mmask8,
3178 b: __m512d,
3179 c: __m512d,
3180 rounding: i32,
3181) -> __m512d {
3182 macro_rules! call {
3183 ($imm4:expr) => {
3184 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3185 };
3dfed10e 3186 }
1b1a35ee
XL
3187 let fmadd = constify_imm4_round!(rounding, call);
3188 transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
3189}
3dfed10e 3190
1b1a35ee
XL
3191/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3194/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3195/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3196/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3197/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3198/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3199///
3200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_round_pd&expand=2564)
3201#[inline]
3202#[target_feature(enable = "avx512f")]
3203#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3204#[rustc_args_required_const(4)]
3205pub unsafe fn _mm512_maskz_fmadd_round_pd(
3206 k: __mmask8,
3207 a: __m512d,
3208 b: __m512d,
3209 c: __m512d,
3210 rounding: i32,
3211) -> __m512d {
3212 macro_rules! call {
3213 ($imm4:expr) => {
3214 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3215 };
3dfed10e 3216 }
1b1a35ee
XL
3217 let fmadd = constify_imm4_round!(rounding, call);
3218 let zero = _mm512_setzero_pd().as_f64x8();
3219 transmute(simd_select_bitmask(k, fmadd, zero))
3220}
3dfed10e 3221
1b1a35ee
XL
3222/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3223///
3224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3225/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3226/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3227/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3228/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3229/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3230///
3231/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_round_pd&expand=2563)
3232#[inline]
3233#[target_feature(enable = "avx512f")]
3234#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3235#[rustc_args_required_const(4)]
3236pub unsafe fn _mm512_mask3_fmadd_round_pd(
3237 a: __m512d,
3238 b: __m512d,
3239 c: __m512d,
3240 k: __mmask8,
3241 rounding: i32,
3242) -> __m512d {
3243 macro_rules! call {
3244 ($imm4:expr) => {
3245 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3246 };
3dfed10e 3247 }
1b1a35ee
XL
3248 let fmadd = constify_imm4_round!(rounding, call);
3249 transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
3250}
3dfed10e 3251
1b1a35ee
XL
3252/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3253///
3254/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3255/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3256/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3257/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3258/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3259/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3260///
3261/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_round_ps&expand=2651)
3262#[inline]
3263#[target_feature(enable = "avx512f")]
3264#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3265#[rustc_args_required_const(3)]
3266pub unsafe fn _mm512_fmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3267 let zero: f32x16 = mem::zeroed();
3268 let sub = simd_sub(zero, c.as_f32x16());
3269 macro_rules! call {
3270 ($imm4:expr) => {
3271 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3272 };
3dfed10e 3273 }
1b1a35ee
XL
3274 let r = constify_imm4_round!(rounding, call);
3275 transmute(r)
3276}
3dfed10e 3277
1b1a35ee
XL
3278/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3279///
3280/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3281/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3282/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3283/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3284/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3285/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3286///
3287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_round_ps&expand=2652)
3288#[inline]
3289#[target_feature(enable = "avx512f")]
3290#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3291#[rustc_args_required_const(4)]
3292pub unsafe fn _mm512_mask_fmsub_round_ps(
3293 a: __m512,
3294 k: __mmask16,
3295 b: __m512,
3296 c: __m512,
3297 rounding: i32,
3298) -> __m512 {
3299 let zero: f32x16 = mem::zeroed();
3300 let sub = simd_sub(zero, c.as_f32x16());
3301 macro_rules! call {
3302 ($imm4:expr) => {
3303 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3304 };
3dfed10e 3305 }
1b1a35ee
XL
3306 let fmsub = constify_imm4_round!(rounding, call);
3307 transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
3308}
3dfed10e 3309
1b1a35ee
XL
3310/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3311///
3312/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3313/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3314/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3315/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3316/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3317/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3318///
3319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_round_ps&expand=2654)
3320#[inline]
3321#[target_feature(enable = "avx512f")]
3322#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3323#[rustc_args_required_const(4)]
3324pub unsafe fn _mm512_maskz_fmsub_round_ps(
3325 k: __mmask16,
3326 a: __m512,
3327 b: __m512,
3328 c: __m512,
3329 rounding: i32,
3330) -> __m512 {
3331 let zero: f32x16 = mem::zeroed();
3332 let sub = simd_sub(zero, c.as_f32x16());
3333 macro_rules! call {
3334 ($imm4:expr) => {
3335 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3336 };
3dfed10e 3337 }
1b1a35ee
XL
3338 let fmsub = constify_imm4_round!(rounding, call);
3339 transmute(simd_select_bitmask(k, fmsub, zero))
3340}
3dfed10e 3341
1b1a35ee
XL
3342/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3343///
3344/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3345/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3346/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3347/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3348/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3349/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3350///
3351/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_round_ps&expand=2653)
3352#[inline]
3353#[target_feature(enable = "avx512f")]
3354#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3355#[rustc_args_required_const(4)]
3356pub unsafe fn _mm512_mask3_fmsub_round_ps(
3357 a: __m512,
3358 b: __m512,
3359 c: __m512,
3360 k: __mmask16,
3361 rounding: i32,
3362) -> __m512 {
3363 let zero: f32x16 = mem::zeroed();
3364 let sub = simd_sub(zero, c.as_f32x16());
3365 macro_rules! call {
3366 ($imm4:expr) => {
3367 vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3368 };
3dfed10e 3369 }
1b1a35ee
XL
3370 let fmsub = constify_imm4_round!(rounding, call);
3371 transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
3372}
3dfed10e 3373
1b1a35ee
XL
3374/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3375///
3376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3377/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3378/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3379/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3380/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3381/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3382///
3383/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_round_pd&expand=2647)
3384#[inline]
3385#[target_feature(enable = "avx512f")]
3386#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
3387#[rustc_args_required_const(3)]
3388pub unsafe fn _mm512_fmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
3389 let zero: f64x8 = mem::zeroed();
3390 let sub = simd_sub(zero, c.as_f64x8());
3391 macro_rules! call {
3392 ($imm4:expr) => {
3393 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3394 };
3dfed10e 3395 }
1b1a35ee
XL
3396 let r = constify_imm4_round!(rounding, call);
3397 transmute(r)
3398}
3dfed10e 3399
1b1a35ee
XL
3400/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3401///
3402/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3403/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3404/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3405/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3406/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3407/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3408///
3409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_round_pd&expand=2648)
3410#[inline]
3411#[target_feature(enable = "avx512f")]
3412#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
3413#[rustc_args_required_const(4)]
3414pub unsafe fn _mm512_mask_fmsub_round_pd(
3415 a: __m512d,
3416 k: __mmask8,
3417 b: __m512d,
3418 c: __m512d,
3419 rounding: i32,
3420) -> __m512d {
3421 let zero: f64x8 = mem::zeroed();
3422 let sub = simd_sub(zero, c.as_f64x8());
3423 macro_rules! call {
3424 ($imm4:expr) => {
3425 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3426 };
3dfed10e 3427 }
1b1a35ee
XL
3428 let fmsub = constify_imm4_round!(rounding, call);
3429 transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
3430}
3dfed10e 3431
1b1a35ee
XL
3432/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3433///
3434/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3435/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3436/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3437/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3438/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3439/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3440///
3441/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_round_pd&expand=2650)
3442#[inline]
3443#[target_feature(enable = "avx512f")]
3444#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
3445#[rustc_args_required_const(4)]
3446pub unsafe fn _mm512_maskz_fmsub_round_pd(
3447 k: __mmask8,
3448 a: __m512d,
3449 b: __m512d,
3450 c: __m512d,
3451 rounding: i32,
3452) -> __m512d {
3453 let zero: f64x8 = mem::zeroed();
3454 let sub = simd_sub(zero, c.as_f64x8());
3455 macro_rules! call {
3456 ($imm4:expr) => {
3457 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3458 };
3dfed10e 3459 }
1b1a35ee
XL
3460 let fmsub = constify_imm4_round!(rounding, call);
3461 transmute(simd_select_bitmask(k, fmsub, zero))
3462}
3dfed10e 3463
1b1a35ee
XL
3464/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3465///
3466/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3467/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3468/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3469/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3470/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3471/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3472///
3473/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_round_pd&expand=2649)
3474#[inline]
3475#[target_feature(enable = "avx512f")]
3476#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
3477#[rustc_args_required_const(4)]
3478pub unsafe fn _mm512_mask3_fmsub_round_pd(
3479 a: __m512d,
3480 b: __m512d,
3481 c: __m512d,
3482 k: __mmask8,
3483 rounding: i32,
3484) -> __m512d {
3485 let zero: f64x8 = mem::zeroed();
3486 let sub = simd_sub(zero, c.as_f64x8());
3487 macro_rules! call {
3488 ($imm4:expr) => {
3489 vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3490 };
3dfed10e 3491 }
1b1a35ee
XL
3492 let fmsub = constify_imm4_round!(rounding, call);
3493 transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
3494}
3dfed10e 3495
1b1a35ee
XL
3496/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3497///
3498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3499/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3500/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3501/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3502/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3503/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3504///
3505/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_round_ps&expand=2619)
3506#[inline]
3507#[target_feature(enable = "avx512f")]
3508#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3509#[rustc_args_required_const(3)]
3510pub unsafe fn _mm512_fmaddsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3511 macro_rules! call {
3512 ($imm4:expr) => {
3513 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3514 };
3dfed10e 3515 }
1b1a35ee
XL
3516 let r = constify_imm4_round!(rounding, call);
3517 transmute(r)
3518}
3dfed10e 3519
1b1a35ee
XL
3520/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3521///
3522/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3523/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3524/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3525/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3526/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3527/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3528///
3529/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_round_ps&expand=2620)
3530#[inline]
3531#[target_feature(enable = "avx512f")]
3532#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3533#[rustc_args_required_const(4)]
3534pub unsafe fn _mm512_mask_fmaddsub_round_ps(
3535 a: __m512,
3536 k: __mmask16,
3537 b: __m512,
3538 c: __m512,
3539 rounding: i32,
3540) -> __m512 {
3541 macro_rules! call {
3542 ($imm4:expr) => {
3543 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3544 };
3dfed10e 3545 }
1b1a35ee
XL
3546 let fmaddsub = constify_imm4_round!(rounding, call);
3547 transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
3548}
3dfed10e 3549
1b1a35ee
XL
3550/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3551///
3552/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3553/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3554/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3555/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3556/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3557/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3558///
3559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_round_ps&expand=2622)
3560#[inline]
3561#[target_feature(enable = "avx512f")]
3562#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3563#[rustc_args_required_const(4)]
3564pub unsafe fn _mm512_maskz_fmaddsub_round_ps(
3565 k: __mmask16,
3566 a: __m512,
3567 b: __m512,
3568 c: __m512,
3569 rounding: i32,
3570) -> __m512 {
3571 macro_rules! call {
3572 ($imm4:expr) => {
3573 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3574 };
3dfed10e 3575 }
1b1a35ee
XL
3576 let fmaddsub = constify_imm4_round!(rounding, call);
3577 let zero = _mm512_setzero_ps().as_f32x16();
3578 transmute(simd_select_bitmask(k, fmaddsub, zero))
3579}
3dfed10e 3580
1b1a35ee
XL
3581/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3582///
3583/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3584/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3585/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3586/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3587/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3588/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3589///
3590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_round_ps&expand=2621)
3591#[inline]
3592#[target_feature(enable = "avx512f")]
3593#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3594#[rustc_args_required_const(4)]
3595pub unsafe fn _mm512_mask3_fmaddsub_round_ps(
3596 a: __m512,
3597 b: __m512,
3598 c: __m512,
3599 k: __mmask16,
3600 rounding: i32,
3601) -> __m512 {
3602 macro_rules! call {
3603 ($imm4:expr) => {
3604 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16(), $imm4)
3605 };
3dfed10e 3606 }
1b1a35ee
XL
3607 let fmaddsub = constify_imm4_round!(rounding, call);
3608 transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
3609}
3dfed10e 3610
1b1a35ee
XL
3611/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3612///
3613/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3614/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3615/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3616/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3617/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3618/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3619///
3620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_round_pd&expand=2615)
3621#[inline]
3622#[target_feature(enable = "avx512f")]
3623#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3624#[rustc_args_required_const(3)]
3625pub unsafe fn _mm512_fmaddsub_round_pd(
3626 a: __m512d,
3627 b: __m512d,
3628 c: __m512d,
3629 rounding: i32,
3630) -> __m512d {
3631 macro_rules! call {
3632 ($imm4:expr) => {
3633 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3634 };
3dfed10e 3635 }
1b1a35ee
XL
3636 let r = constify_imm4_round!(rounding, call);
3637 transmute(r)
3638}
3dfed10e 3639
1b1a35ee
XL
3640/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3641///
3642/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3643/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3644/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3645/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3646/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3647/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3648///
3649/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_round_pd&expand=2616)
3650#[inline]
3651#[target_feature(enable = "avx512f")]
3652#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3653#[rustc_args_required_const(4)]
3654pub unsafe fn _mm512_mask_fmaddsub_round_pd(
3655 a: __m512d,
3656 k: __mmask8,
3657 b: __m512d,
3658 c: __m512d,
3659 rounding: i32,
3660) -> __m512d {
3661 macro_rules! call {
3662 ($imm4:expr) => {
3663 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3664 };
3dfed10e 3665 }
1b1a35ee
XL
3666 let fmaddsub = constify_imm4_round!(rounding, call);
3667 transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
3668}
3dfed10e 3669
1b1a35ee
XL
3670/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3671///
3672/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3673/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3674/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3675/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3676/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3677/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3678///
3679/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_round_pd&expand=2618)
3680#[inline]
3681#[target_feature(enable = "avx512f")]
3682#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3683#[rustc_args_required_const(4)]
3684pub unsafe fn _mm512_maskz_fmaddsub_round_pd(
3685 k: __mmask8,
3686 a: __m512d,
3687 b: __m512d,
3688 c: __m512d,
3689 rounding: i32,
3690) -> __m512d {
3691 macro_rules! call {
3692 ($imm4:expr) => {
3693 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3694 };
3dfed10e 3695 }
1b1a35ee
XL
3696 let fmaddsub = constify_imm4_round!(rounding, call);
3697 let zero = _mm512_setzero_pd().as_f64x8();
3698 transmute(simd_select_bitmask(k, fmaddsub, zero))
3699}
3dfed10e 3700
1b1a35ee
XL
3701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3702///
3703/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3704/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3705/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3706/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3707/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3708/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3709///
3710/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_round_pd&expand=2617)
3711#[inline]
3712#[target_feature(enable = "avx512f")]
3713#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
3714#[rustc_args_required_const(4)]
3715pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
3716 a: __m512d,
3717 b: __m512d,
3718 c: __m512d,
3719 k: __mmask8,
3720 rounding: i32,
3721) -> __m512d {
3722 macro_rules! call {
3723 ($imm4:expr) => {
3724 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8(), $imm4)
3725 };
3dfed10e 3726 }
1b1a35ee
XL
3727 let fmaddsub = constify_imm4_round!(rounding, call);
3728 transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
3729}
3dfed10e 3730
1b1a35ee
XL
3731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3732///
3733/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3734/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3735/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3736/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3737/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3738/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3739///
3740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_round_ps&expand=2699)
3741#[inline]
3742#[target_feature(enable = "avx512f")]
3743#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3744#[rustc_args_required_const(3)]
3745pub unsafe fn _mm512_fmsubadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3746 let zero: f32x16 = mem::zeroed();
3747 let sub = simd_sub(zero, c.as_f32x16());
3748 macro_rules! call {
3749 ($imm4:expr) => {
3750 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3751 };
3752 }
3753 let r = constify_imm4_round!(rounding, call);
3754 transmute(r)
3755}
3756
3757/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3758///
3759/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3760/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3761/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3762/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3763/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3764/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3765///
3766/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_round_ps&expand=2700)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3770#[rustc_args_required_const(4)]
3771pub unsafe fn _mm512_mask_fmsubadd_round_ps(
3772 a: __m512,
3773 k: __mmask16,
3774 b: __m512,
3775 c: __m512,
3776 rounding: i32,
3777) -> __m512 {
3778 let zero: f32x16 = mem::zeroed();
3779 let sub = simd_sub(zero, c.as_f32x16());
3780 macro_rules! call {
3781 ($imm4:expr) => {
3782 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3783 };
3784 }
3785 let fmsubadd = constify_imm4_round!(rounding, call);
3786 transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
3787}
3788
3789/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3790///
3791/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3792/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3793/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3794/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3795/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3796/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3797///
3798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_round_ps&expand=2702)
3799#[inline]
3800#[target_feature(enable = "avx512f")]
3801#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3802#[rustc_args_required_const(4)]
3803pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
3804 k: __mmask16,
3805 a: __m512,
3806 b: __m512,
3807 c: __m512,
3808 rounding: i32,
3809) -> __m512 {
3810 let zero: f32x16 = mem::zeroed();
3811 let sub = simd_sub(zero, c.as_f32x16());
3812 macro_rules! call {
3813 ($imm4:expr) => {
3814 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3815 };
3816 }
3817 let fmsubadd = constify_imm4_round!(rounding, call);
3818 transmute(simd_select_bitmask(k, fmsubadd, zero))
3819}
3820
3821/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3822///
3823/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3824/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3825/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3826/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3827/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3828/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3829///
3830/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_round_ps&expand=2701)
3831#[inline]
3832#[target_feature(enable = "avx512f")]
3833#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
3834#[rustc_args_required_const(4)]
3835pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
3836 a: __m512,
3837 b: __m512,
3838 c: __m512,
3839 k: __mmask16,
3840 rounding: i32,
3841) -> __m512 {
3842 let zero: f32x16 = mem::zeroed();
3843 let sub = simd_sub(zero, c.as_f32x16());
3844 macro_rules! call {
3845 ($imm4:expr) => {
3846 vfmaddsub213ps(a.as_f32x16(), b.as_f32x16(), sub, $imm4)
3847 };
3848 }
3849 let fmsubadd = constify_imm4_round!(rounding, call);
3850 transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
3851}
3852
3853/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
3854///
3855/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3856/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3857/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3858/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3859/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3860/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3861///
3862/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_round_pd&expand=2695)
3863#[inline]
3864#[target_feature(enable = "avx512f")]
3865#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3866#[rustc_args_required_const(3)]
3867pub unsafe fn _mm512_fmsubadd_round_pd(
3868 a: __m512d,
3869 b: __m512d,
3870 c: __m512d,
3871 rounding: i32,
3872) -> __m512d {
3873 let zero: f64x8 = mem::zeroed();
3874 let sub = simd_sub(zero, c.as_f64x8());
3875 macro_rules! call {
3876 ($imm4:expr) => {
3877 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3878 };
3879 }
3880 let r = constify_imm4_round!(rounding, call);
3881 transmute(r)
3882}
3883
3884/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3885///
3886/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3887/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3888/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3889/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3890/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3891/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3892///
3893/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_round_pd&expand=2696)
3894#[inline]
3895#[target_feature(enable = "avx512f")]
3896#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3897#[rustc_args_required_const(4)]
3898pub unsafe fn _mm512_mask_fmsubadd_round_pd(
3899 a: __m512d,
3900 k: __mmask8,
3901 b: __m512d,
3902 c: __m512d,
3903 rounding: i32,
3904) -> __m512d {
3905 let zero: f64x8 = mem::zeroed();
3906 let sub = simd_sub(zero, c.as_f64x8());
3907 macro_rules! call {
3908 ($imm4:expr) => {
3909 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3910 };
3911 }
3912 let fmsubadd = constify_imm4_round!(rounding, call);
3913 transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
3914}
3915
3916/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3917///
3918/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3919/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3920/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3921/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3922/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3923/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3924///
3925/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_round_pd&expand=2698)
3926#[inline]
3927#[target_feature(enable = "avx512f")]
3928#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3929#[rustc_args_required_const(4)]
3930pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
3931 k: __mmask8,
3932 a: __m512d,
3933 b: __m512d,
3934 c: __m512d,
3935 rounding: i32,
3936) -> __m512d {
3937 let zero: f64x8 = mem::zeroed();
3938 let sub = simd_sub(zero, c.as_f64x8());
3939 macro_rules! call {
3940 ($imm4:expr) => {
3941 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3942 };
3943 }
3944 let fmsubadd = constify_imm4_round!(rounding, call);
3945 transmute(simd_select_bitmask(k, fmsubadd, zero))
3946}
3947
3948/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3949///
3950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3951/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3952/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3953/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3954/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3955/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3956///
3957/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_round_pd&expand=2697)
3958#[inline]
3959#[target_feature(enable = "avx512f")]
3960#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
3961#[rustc_args_required_const(4)]
3962pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
3963 a: __m512d,
3964 b: __m512d,
3965 c: __m512d,
3966 k: __mmask8,
3967 rounding: i32,
3968) -> __m512d {
3969 let zero: f64x8 = mem::zeroed();
3970 let sub = simd_sub(zero, c.as_f64x8());
3971 macro_rules! call {
3972 ($imm4:expr) => {
3973 vfmaddsub213pd(a.as_f64x8(), b.as_f64x8(), sub, $imm4)
3974 };
3975 }
3976 let fmsubadd = constify_imm4_round!(rounding, call);
3977 transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
3978}
3979
3980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
3981///
3982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
3983/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
3984/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
3985/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
3986/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
3987/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3988///
3989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_round_ps&expand=2731)
3990#[inline]
3991#[target_feature(enable = "avx512f")]
3992#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
3993#[rustc_args_required_const(3)]
3994pub unsafe fn _mm512_fnmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
3995 let zero: f32x16 = mem::zeroed();
3996 let sub = simd_sub(zero, a.as_f32x16());
3997 macro_rules! call {
3998 ($imm4:expr) => {
3999 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4000 };
4001 }
4002 let r = constify_imm4_round!(rounding, call);
4003 transmute(r)
4004}
4005
4006/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4007///
4008/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4009/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4010/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4011/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4012/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4013/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4014///
4015/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_round_ps&expand=2732)
4016#[inline]
4017#[target_feature(enable = "avx512f")]
4018#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4019#[rustc_args_required_const(4)]
4020pub unsafe fn _mm512_mask_fnmadd_round_ps(
4021 a: __m512,
4022 k: __mmask16,
4023 b: __m512,
4024 c: __m512,
4025 rounding: i32,
4026) -> __m512 {
4027 let zero: f32x16 = mem::zeroed();
4028 let sub = simd_sub(zero, a.as_f32x16());
4029 macro_rules! call {
4030 ($imm4:expr) => {
4031 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4032 };
4033 }
4034 let fnmadd = constify_imm4_round!(rounding, call);
4035 transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
4036}
4037
4038/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4039///
4040/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4041/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4042/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4043/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4044/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4045/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4046///
4047/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_round_ps&expand=2734)
4048#[inline]
4049#[target_feature(enable = "avx512f")]
4050#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4051#[rustc_args_required_const(4)]
4052pub unsafe fn _mm512_maskz_fnmadd_round_ps(
4053 k: __mmask16,
4054 a: __m512,
4055 b: __m512,
4056 c: __m512,
4057 rounding: i32,
4058) -> __m512 {
4059 let zero: f32x16 = mem::zeroed();
4060 let sub = simd_sub(zero, a.as_f32x16());
4061 macro_rules! call {
4062 ($imm4:expr) => {
4063 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4064 };
4065 }
4066 let fnmadd = constify_imm4_round!(rounding, call);
4067 transmute(simd_select_bitmask(k, fnmadd, zero))
4068}
4069
4070/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4071///
4072/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4073/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4074/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4075/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4076/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4077/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4078///
4079/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_round_ps&expand=2733)
4080#[inline]
4081#[target_feature(enable = "avx512f")]
4082#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4083#[rustc_args_required_const(4)]
4084pub unsafe fn _mm512_mask3_fnmadd_round_ps(
4085 a: __m512,
4086 b: __m512,
4087 c: __m512,
4088 k: __mmask16,
4089 rounding: i32,
4090) -> __m512 {
4091 let zero: f32x16 = mem::zeroed();
4092 let sub = simd_sub(zero, a.as_f32x16());
4093 macro_rules! call {
4094 ($imm4:expr) => {
4095 vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16(), $imm4)
4096 };
4097 }
4098 let fnmadd = constify_imm4_round!(rounding, call);
4099 transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4103///
4104/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4105/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4106/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4107/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4108/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4109/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4110///
4111/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_pd&expand=2711)
4112#[inline]
4113#[target_feature(enable = "avx512f")]
4114#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4115#[rustc_args_required_const(3)]
4116pub unsafe fn _mm512_fnmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
4117 let zero: f64x8 = mem::zeroed();
4118 let sub = simd_sub(zero, a.as_f64x8());
4119 macro_rules! call {
4120 ($imm4:expr) => {
4121 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4122 };
4123 }
4124 let r = constify_imm4_round!(rounding, call);
4125 transmute(r)
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4129///
4130/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4131/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4132/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4133/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4134/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4135/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4136///
4137/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_round_pd&expand=2728)
4138#[inline]
4139#[target_feature(enable = "avx512f")]
4140#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4141#[rustc_args_required_const(4)]
4142pub unsafe fn _mm512_mask_fnmadd_round_pd(
4143 a: __m512d,
4144 k: __mmask8,
4145 b: __m512d,
4146 c: __m512d,
4147 rounding: i32,
4148) -> __m512d {
4149 let zero: f64x8 = mem::zeroed();
4150 let sub = simd_sub(zero, a.as_f64x8());
4151 macro_rules! call {
4152 ($imm4:expr) => {
4153 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4154 };
4155 }
4156 let fnmadd = constify_imm4_round!(rounding, call);
4157 transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
4158}
4159
4160/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4161///
4162/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4163/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4164/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4165/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4166/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4167/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4168///
4169/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_round_pd&expand=2730)
4170#[inline]
4171#[target_feature(enable = "avx512f")]
4172#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4173#[rustc_args_required_const(4)]
4174pub unsafe fn _mm512_maskz_fnmadd_round_pd(
4175 k: __mmask8,
4176 a: __m512d,
4177 b: __m512d,
4178 c: __m512d,
4179 rounding: i32,
4180) -> __m512d {
4181 let zero: f64x8 = mem::zeroed();
4182 let sub = simd_sub(zero, a.as_f64x8());
4183 macro_rules! call {
4184 ($imm4:expr) => {
4185 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4186 };
4187 }
4188 let fnmadd = constify_imm4_round!(rounding, call);
4189 transmute(simd_select_bitmask(k, fnmadd, zero))
4190}
4191
4192/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4193///
4194/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4195/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4196/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4197/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4198/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4199/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4200///
4201/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_round_pd&expand=2729)
4202#[inline]
4203#[target_feature(enable = "avx512f")]
4204#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4205#[rustc_args_required_const(4)]
4206pub unsafe fn _mm512_mask3_fnmadd_round_pd(
4207 a: __m512d,
4208 b: __m512d,
4209 c: __m512d,
4210 k: __mmask8,
4211 rounding: i32,
4212) -> __m512d {
4213 let zero: f64x8 = mem::zeroed();
4214 let sub = simd_sub(zero, a.as_f64x8());
4215 macro_rules! call {
4216 ($imm4:expr) => {
4217 vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8(), $imm4)
4218 };
4219 }
4220 let fnmadd = constify_imm4_round!(rounding, call);
4221 transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
4222}
4223
4224/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4225///
4226/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4227/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4228/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4229/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4230/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4231/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4232///
4233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_round_ps&expand=2779)
4234#[inline]
4235#[target_feature(enable = "avx512f")]
4236#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4237#[rustc_args_required_const(3)]
4238pub unsafe fn _mm512_fnmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
4239 let zero: f32x16 = mem::zeroed();
4240 let suba = simd_sub(zero, a.as_f32x16());
4241 let subc = simd_sub(zero, c.as_f32x16());
4242 macro_rules! call {
4243 ($imm4:expr) => {
4244 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
4245 };
4246 }
4247 let r = constify_imm4_round!(rounding, call);
4248 transmute(r)
4249}
4250
4251/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4252///
4253/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4254/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4255/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4256/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4257/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4258/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4259///
4260/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_round_ps&expand=2780)
4261#[inline]
4262#[target_feature(enable = "avx512f")]
4263#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4264#[rustc_args_required_const(4)]
4265pub unsafe fn _mm512_mask_fnmsub_round_ps(
4266 a: __m512,
4267 k: __mmask16,
4268 b: __m512,
4269 c: __m512,
4270 rounding: i32,
4271) -> __m512 {
4272 let zero: f32x16 = mem::zeroed();
4273 let suba = simd_sub(zero, a.as_f32x16());
4274 let subc = simd_sub(zero, c.as_f32x16());
4275 macro_rules! call {
4276 ($imm4:expr) => {
4277 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
4278 };
4279 }
4280 let fnmsub = constify_imm4_round!(rounding, call);
4281 transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
4282}
4283
4284/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4285///
4286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4287/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4288/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4289/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4290/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4291/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4292///
4293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_round_ps&expand=2782)
4294#[inline]
4295#[target_feature(enable = "avx512f")]
4296#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4297#[rustc_args_required_const(4)]
4298pub unsafe fn _mm512_maskz_fnmsub_round_ps(
4299 k: __mmask16,
4300 a: __m512,
4301 b: __m512,
4302 c: __m512,
4303 rounding: i32,
4304) -> __m512 {
4305 let zero: f32x16 = mem::zeroed();
4306 let suba = simd_sub(zero, a.as_f32x16());
4307 let subc = simd_sub(zero, c.as_f32x16());
4308 macro_rules! call {
4309 ($imm4:expr) => {
4310 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
4311 };
4312 }
4313 let fnmsub = constify_imm4_round!(rounding, call);
4314 transmute(simd_select_bitmask(k, fnmsub, zero))
4315}
4316
4317/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4318///
4319/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4320/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4321/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4322/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4323/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4324/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4325///
4326/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_round_ps&expand=2781)
4327#[inline]
4328#[target_feature(enable = "avx512f")]
4329#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4330#[rustc_args_required_const(4)]
4331pub unsafe fn _mm512_mask3_fnmsub_round_ps(
4332 a: __m512,
4333 b: __m512,
4334 c: __m512,
4335 k: __mmask16,
4336 rounding: i32,
4337) -> __m512 {
4338 let zero: f32x16 = mem::zeroed();
4339 let suba = simd_sub(zero, a.as_f32x16());
4340 let subc = simd_sub(zero, c.as_f32x16());
4341 macro_rules! call {
4342 ($imm4:expr) => {
4343 vfmadd132ps(suba, b.as_f32x16(), subc, $imm4)
4344 };
4345 }
4346 let fnmsub = constify_imm4_round!(rounding, call);
4347 transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
4348}
4349
4350/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4351///
4352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4353/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4354/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4355/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4356/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4357/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4358///
4359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_round_pd&expand=2775)
4360#[inline]
4361#[target_feature(enable = "avx512f")]
4362#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4363#[rustc_args_required_const(3)]
4364pub unsafe fn _mm512_fnmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
4365 let zero: f64x8 = mem::zeroed();
4366 let suba = simd_sub(zero, a.as_f64x8());
4367 let subc = simd_sub(zero, c.as_f64x8());
4368 macro_rules! call {
4369 ($imm4:expr) => {
4370 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
4371 };
4372 }
4373 let r = constify_imm4_round!(rounding, call);
4374 transmute(r)
4375}
4376
4377/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4378///
4379/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4380/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4381/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4382/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4383/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4384/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4385///
4386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_round_pd&expand=2776)
4387#[inline]
4388#[target_feature(enable = "avx512f")]
4389#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4390#[rustc_args_required_const(4)]
4391pub unsafe fn _mm512_mask_fnmsub_round_pd(
4392 a: __m512d,
4393 k: __mmask8,
4394 b: __m512d,
4395 c: __m512d,
4396 rounding: i32,
4397) -> __m512d {
4398 let zero: f64x8 = mem::zeroed();
4399 let suba = simd_sub(zero, a.as_f64x8());
4400 let subc = simd_sub(zero, c.as_f64x8());
4401 macro_rules! call {
4402 ($imm4:expr) => {
4403 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
4404 };
4405 }
4406 let fnmsub = constify_imm4_round!(rounding, call);
4407 transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
4408}
4409
4410/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4411///
4412/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4413/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4414/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4415/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4416/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4417/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4418///
4419/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_round_pd&expand=2778)
4420#[inline]
4421#[target_feature(enable = "avx512f")]
4422#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4423#[rustc_args_required_const(4)]
4424pub unsafe fn _mm512_maskz_fnmsub_round_pd(
4425 k: __mmask8,
4426 a: __m512d,
4427 b: __m512d,
4428 c: __m512d,
4429 rounding: i32,
4430) -> __m512d {
4431 let zero: f64x8 = mem::zeroed();
4432 let suba = simd_sub(zero, a.as_f64x8());
4433 let subc = simd_sub(zero, c.as_f64x8());
4434 macro_rules! call {
4435 ($imm4:expr) => {
4436 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
4437 };
4438 }
4439 let fnmsub = constify_imm4_round!(rounding, call);
4440 transmute(simd_select_bitmask(k, fnmsub, zero))
4441}
4442
4443/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4444///
4445/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
4446/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
4447/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
4448/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
4449/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
4450/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4451///
4452/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_round_pd&expand=2777)
4453#[inline]
4454#[target_feature(enable = "avx512f")]
4455#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4456#[rustc_args_required_const(4)]
4457pub unsafe fn _mm512_mask3_fnmsub_round_pd(
4458 a: __m512d,
4459 b: __m512d,
4460 c: __m512d,
4461 k: __mmask8,
4462 rounding: i32,
4463) -> __m512d {
4464 let zero: f64x8 = mem::zeroed();
4465 let suba = simd_sub(zero, a.as_f64x8());
4466 let subc = simd_sub(zero, c.as_f64x8());
4467 macro_rules! call {
4468 ($imm4:expr) => {
4469 vfmadd132pd(suba, b.as_f64x8(), subc, $imm4)
4470 };
4471 }
4472 let fnmsub = constify_imm4_round!(rounding, call);
4473 transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
4474}
4475
4476/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
4477/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4478///
4479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_round_ps&expand=3662)
4480#[inline]
4481#[target_feature(enable = "avx512f")]
4482#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
4483#[rustc_args_required_const(2)]
4484pub unsafe fn _mm512_max_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
4485 macro_rules! call {
4486 ($imm4:expr) => {
4487 vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
4488 };
4489 }
4490 let r = constify_imm4_sae!(sae, call);
4491 transmute(r)
4492}
4493
4494/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4495/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4496///
4497/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_round_ps&expand=3660)
4498#[inline]
4499#[target_feature(enable = "avx512f")]
4500#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
4501#[rustc_args_required_const(4)]
4502pub unsafe fn _mm512_mask_max_round_ps(
4503 src: __m512,
4504 k: __mmask16,
4505 a: __m512,
4506 b: __m512,
4507 sae: i32,
4508) -> __m512 {
4509 macro_rules! call {
4510 ($imm4:expr) => {
4511 vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
4512 };
4513 }
4514 let max = constify_imm4_sae!(sae, call);
4515 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
4516}
4517
4518/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4519/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4520///
4521/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_round_ps&expand=3661)
4522#[inline]
4523#[target_feature(enable = "avx512f")]
4524#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
4525#[rustc_args_required_const(3)]
4526pub unsafe fn _mm512_maskz_max_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
4527 macro_rules! call {
4528 ($imm4:expr) => {
4529 vmaxps(a.as_f32x16(), b.as_f32x16(), $imm4)
4530 };
4531 }
4532 let max = constify_imm4_sae!(sae, call);
4533 let zero = _mm512_setzero_ps().as_f32x16();
4534 transmute(simd_select_bitmask(k, max, zero))
4535}
4536
4537/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
4538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4539///
4540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_round_pd&expand=3659)
4541#[inline]
4542#[target_feature(enable = "avx512f")]
4543#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
4544#[rustc_args_required_const(2)]
4545pub unsafe fn _mm512_max_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
4546 macro_rules! call {
4547 ($imm4:expr) => {
4548 vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
4549 };
4550 }
4551 let r = constify_imm4_sae!(sae, call);
4552 transmute(r)
4553}
4554
4555/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4556/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4557///
4558/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_round_pd&expand=3657)
4559#[inline]
4560#[target_feature(enable = "avx512f")]
4561#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
4562#[rustc_args_required_const(4)]
4563pub unsafe fn _mm512_mask_max_round_pd(
4564 src: __m512d,
4565 k: __mmask8,
4566 a: __m512d,
4567 b: __m512d,
4568 sae: i32,
4569) -> __m512d {
4570 macro_rules! call {
4571 ($imm4:expr) => {
4572 vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
4573 };
4574 }
4575 let max = constify_imm4_sae!(sae, call);
4576 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
4577}
4578
4579/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4580/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4581///
4582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_round_pd&expand=3658)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
4586#[rustc_args_required_const(3)]
4587pub unsafe fn _mm512_maskz_max_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
4588 macro_rules! call {
4589 ($imm4:expr) => {
4590 vmaxpd(a.as_f64x8(), b.as_f64x8(), $imm4)
4591 };
4592 }
4593 let max = constify_imm4_sae!(sae, call);
4594 let zero = _mm512_setzero_pd().as_f64x8();
4595 transmute(simd_select_bitmask(k, max, zero))
4596}
4597
4598/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
4599/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4600///
4601/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_round_ps&expand=3776)
4602#[inline]
4603#[target_feature(enable = "avx512f")]
4604#[cfg_attr(test, assert_instr(vminps, sae = 8))]
4605#[rustc_args_required_const(2)]
4606pub unsafe fn _mm512_min_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
4607 macro_rules! call {
4608 ($imm4:expr) => {
4609 vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
4610 };
4611 }
4612 let r = constify_imm4_sae!(sae, call);
4613 transmute(r)
4614}
4615
4616/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4617/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4618///
4619/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_round_ps&expand=3774)
4620#[inline]
4621#[target_feature(enable = "avx512f")]
4622#[cfg_attr(test, assert_instr(vminps, sae = 8))]
4623#[rustc_args_required_const(4)]
4624pub unsafe fn _mm512_mask_min_round_ps(
4625 src: __m512,
4626 k: __mmask16,
4627 a: __m512,
4628 b: __m512,
4629 sae: i32,
4630) -> __m512 {
4631 macro_rules! call {
4632 ($imm4:expr) => {
4633 vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
4634 };
4635 }
4636 let max = constify_imm4_sae!(sae, call);
4637 transmute(simd_select_bitmask(k, max, src.as_f32x16()))
4638}
4639
4640/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4641/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4642///
4643/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_round_ps&expand=3775)
4644#[inline]
4645#[target_feature(enable = "avx512f")]
4646#[cfg_attr(test, assert_instr(vminps, sae = 8))]
4647#[rustc_args_required_const(3)]
4648pub unsafe fn _mm512_maskz_min_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
4649 macro_rules! call {
4650 ($imm4:expr) => {
4651 vminps(a.as_f32x16(), b.as_f32x16(), $imm4)
4652 };
4653 }
4654 let max = constify_imm4_sae!(sae, call);
4655 let zero = _mm512_setzero_ps().as_f32x16();
4656 transmute(simd_select_bitmask(k, max, zero))
4657}
4658
4659/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
4660/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4661///
4662/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_round_pd&expand=3773)
4663#[inline]
4664#[target_feature(enable = "avx512f")]
4665#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
4666#[rustc_args_required_const(2)]
4667pub unsafe fn _mm512_min_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
4668 macro_rules! call {
4669 ($imm4:expr) => {
4670 vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
4671 };
4672 }
4673 let r = constify_imm4_sae!(sae, call);
4674 transmute(r)
4675}
4676
4677/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
4678/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4679///
4680/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_round_pd&expand=3771)
4681#[inline]
4682#[target_feature(enable = "avx512f")]
4683#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
4684#[rustc_args_required_const(4)]
4685pub unsafe fn _mm512_mask_min_round_pd(
4686 src: __m512d,
4687 k: __mmask8,
4688 a: __m512d,
4689 b: __m512d,
4690 sae: i32,
4691) -> __m512d {
4692 macro_rules! call {
4693 ($imm4:expr) => {
4694 vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
4695 };
4696 }
4697 let max = constify_imm4_sae!(sae, call);
4698 transmute(simd_select_bitmask(k, max, src.as_f64x8()))
4699}
4700
4701/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4702/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4703///
4704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_round_pd&expand=3772)
4705#[inline]
4706#[target_feature(enable = "avx512f")]
4707#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
4708#[rustc_args_required_const(3)]
4709pub unsafe fn _mm512_maskz_min_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
4710 macro_rules! call {
4711 ($imm4:expr) => {
4712 vminpd(a.as_f64x8(), b.as_f64x8(), $imm4)
4713 };
4714 }
4715 let max = constify_imm4_sae!(sae, call);
4716 let zero = _mm512_setzero_pd().as_f64x8();
4717 transmute(simd_select_bitmask(k, max, zero))
4718}
4719
4720/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
4721/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4722///
4723/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_round_ps&expand=2850)
4724#[inline]
4725#[target_feature(enable = "avx512f")]
4726#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
4727#[rustc_args_required_const(1)]
4728pub unsafe fn _mm512_getexp_round_ps(a: __m512, sae: i32) -> __m512 {
4729 macro_rules! call {
4730 ($imm4:expr) => {
4731 vgetexpps(
4732 a.as_f32x16(),
4733 _mm512_setzero_ps().as_f32x16(),
4734 0b11111111_11111111,
4735 $imm4,
4736 )
4737 };
4738 }
4739 let r = constify_imm4_sae!(sae, call);
4740 transmute(r)
4741}
4742
4743/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4744/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4745///
4746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_round_ps&expand=2851)
4747#[inline]
4748#[target_feature(enable = "avx512f")]
4749#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
4750#[rustc_args_required_const(3)]
4751pub unsafe fn _mm512_mask_getexp_round_ps(
4752 src: __m512,
4753 k: __mmask16,
4754 a: __m512,
4755 sae: i32,
4756) -> __m512 {
4757 macro_rules! call {
4758 ($imm4:expr) => {
4759 vgetexpps(a.as_f32x16(), src.as_f32x16(), k, $imm4)
4760 };
4761 }
4762 let r = constify_imm4_sae!(sae, call);
4763 transmute(r)
4764}
4765
4766/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4767/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4768///
4769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_round_ps&expand=2852)
4770#[inline]
4771#[target_feature(enable = "avx512f")]
4772#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
4773#[rustc_args_required_const(2)]
4774pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512, sae: i32) -> __m512 {
4775 macro_rules! call {
4776 ($imm4:expr) => {
4777 vgetexpps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k, $imm4)
4778 };
4779 }
4780 let r = constify_imm4_sae!(sae, call);
4781 transmute(r)
4782}
4783
4784/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
4785/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4786///
4787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_round_pd&expand=2847)
4788#[inline]
4789#[target_feature(enable = "avx512f")]
4790#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
4791#[rustc_args_required_const(1)]
4792pub unsafe fn _mm512_getexp_round_pd(a: __m512d, sae: i32) -> __m512d {
4793 macro_rules! call {
4794 ($imm4:expr) => {
4795 vgetexppd(
4796 a.as_f64x8(),
4797 _mm512_setzero_pd().as_f64x8(),
4798 0b11111111,
4799 $imm4,
4800 )
4801 };
4802 }
4803 let r = constify_imm4_sae!(sae, call);
4804 transmute(r)
4805}
4806
4807/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4809///
4810/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_round_pd&expand=2848)
4811#[inline]
4812#[target_feature(enable = "avx512f")]
4813#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
4814#[rustc_args_required_const(3)]
4815pub unsafe fn _mm512_mask_getexp_round_pd(
4816 src: __m512d,
4817 k: __mmask8,
4818 a: __m512d,
4819 sae: i32,
4820) -> __m512d {
4821 macro_rules! call {
4822 ($imm4:expr) => {
4823 vgetexppd(a.as_f64x8(), src.as_f64x8(), k, $imm4)
4824 };
4825 }
4826 let r = constify_imm4_sae!(sae, call);
4827 transmute(r)
4828}
4829
4830/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
4831/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4832///
4833/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_round_pd&expand=2849)
4834#[inline]
4835#[target_feature(enable = "avx512f")]
4836#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
4837#[rustc_args_required_const(2)]
4838pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d, sae: i32) -> __m512d {
4839 macro_rules! call {
4840 ($imm4:expr) => {
4841 vgetexppd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k, $imm4)
4842 };
4843 }
4844 let r = constify_imm4_sae!(sae, call);
4845 transmute(r)
4846}
4847
4848/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
4849/// The mantissa is normalized to the interval specified by interv, which can take the following values:
4850/// _MM_MANT_NORM_1_2 // interval [1, 2)
4851/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
4852/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
4853/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
4854/// The sign is determined by sc which can take the following values:
4855/// _MM_MANT_SIGN_src // sign = sign(src)
4856/// _MM_MANT_SIGN_zero // sign = 0
4857/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
4858/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4859///
4860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_round_ps&expand=2886)
4861#[inline]
4862#[target_feature(enable = "avx512f")]
4863#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
4864#[rustc_args_required_const(1, 2, 3)]
4865pub unsafe fn _mm512_getmant_round_ps(
4866 a: __m512,
4867 norm: _MM_MANTISSA_NORM_ENUM,
4868 sign: _MM_MANTISSA_SIGN_ENUM,
4869 sae: i32,
4870) -> __m512 {
4871 macro_rules! call {
4872 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
4873 vgetmantps(
4874 a.as_f32x16(),
4875 $imm2 << 2 | $imm4_1,
4876 _mm512_setzero_ps().as_f32x16(),
4877 0b11111111_11111111,
4878 $imm4_2,
4879 )
4880 };
4881 }
4882 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
4883 transmute(r)
4884}
4885
4886/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
4887/// The mantissa is normalized to the interval specified by interv, which can take the following values:
4888/// _MM_MANT_NORM_1_2 // interval [1, 2)
4889/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
4890/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
4891/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
4892/// The sign is determined by sc which can take the following values:
4893/// _MM_MANT_SIGN_src // sign = sign(src)
4894/// _MM_MANT_SIGN_zero // sign = 0
4895/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
4896/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4897///
4898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_round_ps&expand=2887)
4899#[inline]
4900#[target_feature(enable = "avx512f")]
4901#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
4902#[rustc_args_required_const(3, 4, 5)]
4903pub unsafe fn _mm512_mask_getmant_round_ps(
4904 src: __m512,
4905 k: __mmask16,
4906 a: __m512,
4907 norm: _MM_MANTISSA_NORM_ENUM,
4908 sign: _MM_MANTISSA_SIGN_ENUM,
4909 sae: i32,
4910) -> __m512 {
4911 macro_rules! call {
4912 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
4913 vgetmantps(
4914 a.as_f32x16(),
4915 $imm2 << 2 | $imm4_1,
4916 src.as_f32x16(),
4917 k,
4918 $imm4_2,
4919 )
4920 };
4921 }
4922 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
4923 transmute(r)
4924}
4925
4926/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
4927/// The mantissa is normalized to the interval specified by interv, which can take the following values:
4928/// _MM_MANT_NORM_1_2 // interval [1, 2)
4929/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
4930/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
4931/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
4932/// The sign is determined by sc which can take the following values:
4933/// _MM_MANT_SIGN_src // sign = sign(src)
4934/// _MM_MANT_SIGN_zero // sign = 0
4935/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
4936/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4937///
4938/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_round_ps&expand=2888)
4939#[inline]
4940#[target_feature(enable = "avx512f")]
4941#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
4942#[rustc_args_required_const(2, 3, 4)]
4943pub unsafe fn _mm512_maskz_getmant_round_ps(
4944 k: __mmask16,
4945 a: __m512,
4946 norm: _MM_MANTISSA_NORM_ENUM,
4947 sign: _MM_MANTISSA_SIGN_ENUM,
4948 sae: i32,
4949) -> __m512 {
4950 macro_rules! call {
4951 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
4952 vgetmantps(
4953 a.as_f32x16(),
4954 $imm2 << 2 | $imm4_1,
4955 _mm512_setzero_ps().as_f32x16(),
4956 k,
4957 $imm4_2,
4958 )
4959 };
4960 }
4961 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
4962 transmute(r)
4963}
4964
4965/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
4966/// The mantissa is normalized to the interval specified by interv, which can take the following values:
4967/// _MM_MANT_NORM_1_2 // interval [1, 2)
4968/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
4969/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
4970/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
4971/// The sign is determined by sc which can take the following values:
4972/// _MM_MANT_SIGN_src // sign = sign(src)
4973/// _MM_MANT_SIGN_zero // sign = 0
4974/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
4975/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
4976///
4977/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_round_pd&expand=2883)
4978#[inline]
4979#[target_feature(enable = "avx512f")]
4980#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
4981#[rustc_args_required_const(1, 2, 3)]
4982pub unsafe fn _mm512_getmant_round_pd(
4983 a: __m512d,
4984 norm: _MM_MANTISSA_NORM_ENUM,
4985 sign: _MM_MANTISSA_SIGN_ENUM,
4986 sae: i32,
4987) -> __m512d {
4988 macro_rules! call {
4989 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
4990 vgetmantpd(
4991 a.as_f64x8(),
4992 $imm2 << 2 | $imm4_1,
4993 _mm512_setzero_pd().as_f64x8(),
4994 0b11111111,
4995 $imm4_2,
4996 )
4997 };
4998 }
4999 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
5000 transmute(r)
5001}
5002
5003/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
5004/// The mantissa is normalized to the interval specified by interv, which can take the following values:
5005/// _MM_MANT_NORM_1_2 // interval [1, 2)
5006/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
5007/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
5008/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
5009/// The sign is determined by sc which can take the following values:
5010/// _MM_MANT_SIGN_src // sign = sign(src)
5011/// _MM_MANT_SIGN_zero // sign = 0
5012/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
5013/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5014///
5015/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_round_pd&expand=2884)
5016#[inline]
5017#[target_feature(enable = "avx512f")]
5018#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
5019#[rustc_args_required_const(3, 4, 5)]
5020pub unsafe fn _mm512_mask_getmant_round_pd(
5021 src: __m512d,
5022 k: __mmask8,
5023 a: __m512d,
5024 norm: _MM_MANTISSA_NORM_ENUM,
5025 sign: _MM_MANTISSA_SIGN_ENUM,
5026 sae: i32,
5027) -> __m512d {
5028 macro_rules! call {
5029 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
5030 vgetmantpd(
5031 a.as_f64x8(),
5032 $imm2 << 2 | $imm4_1,
5033 src.as_f64x8(),
5034 k,
5035 $imm4_2,
5036 )
5037 };
5038 }
5039 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
5040 transmute(r)
5041}
5042
5043/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
5044/// The mantissa is normalized to the interval specified by interv, which can take the following values:
5045/// _MM_MANT_NORM_1_2 // interval [1, 2)
5046/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
5047/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
5048/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
5049/// The sign is determined by sc which can take the following values:
5050/// _MM_MANT_SIGN_src // sign = sign(src)
5051/// _MM_MANT_SIGN_zero // sign = 0
5052/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
5053/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5054///
5055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_round_pd&expand=2885)
5056#[inline]
5057#[target_feature(enable = "avx512f")]
5058#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
5059#[rustc_args_required_const(2, 3, 4)]
5060pub unsafe fn _mm512_maskz_getmant_round_pd(
5061 k: __mmask8,
5062 a: __m512d,
5063 norm: _MM_MANTISSA_NORM_ENUM,
5064 sign: _MM_MANTISSA_SIGN_ENUM,
5065 sae: i32,
5066) -> __m512d {
5067 macro_rules! call {
5068 ($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
5069 vgetmantpd(
5070 a.as_f64x8(),
5071 $imm2 << 2 | $imm4_1,
5072 _mm512_setzero_pd().as_f64x8(),
5073 k,
5074 $imm4_2,
5075 )
5076 };
5077 }
5078 let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
5079 transmute(r)
5080}
5081
5082/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
5083///
5084/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvtps_epi32&expand=1737)
5085#[inline]
5086#[target_feature(enable = "avx512f")]
5087#[cfg_attr(test, assert_instr(vcvtps2dq))]
5088pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
5089 transmute(vcvtps2dq(
5090 a.as_f32x16(),
5091 _mm512_setzero_si512().as_i32x16(),
5092 0b11111111_11111111,
5093 _MM_FROUND_CUR_DIRECTION,
5094 ))
5095}
5096
5097/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5098///
5099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_epi32&expand=1738)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[cfg_attr(test, assert_instr(vcvtps2dq))]
5103pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
5104 transmute(vcvtps2dq(
5105 a.as_f32x16(),
5106 src.as_i32x16(),
5107 k,
5108 _MM_FROUND_CUR_DIRECTION,
5109 ))
5110}
5111
5112/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5113///
5114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_epi32&expand=1739)
5115#[inline]
5116#[target_feature(enable = "avx512f")]
5117#[cfg_attr(test, assert_instr(vcvtps2dq))]
5118pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
5119 transmute(vcvtps2dq(
5120 a.as_f32x16(),
5121 _mm512_setzero_si512().as_i32x16(),
5122 k,
5123 _MM_FROUND_CUR_DIRECTION,
5124 ))
5125}
5126
5127/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
5128///
5129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_epu32&expand=1755)
5130#[inline]
5131#[target_feature(enable = "avx512f")]
5132#[cfg_attr(test, assert_instr(vcvtps2udq))]
5133pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
5134 transmute(vcvtps2udq(
5135 a.as_f32x16(),
5136 _mm512_setzero_si512().as_u32x16(),
5137 0b11111111_11111111,
5138 _MM_FROUND_CUR_DIRECTION,
5139 ))
5140}
5141
5142/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5143///
5144/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_epu32&expand=1756)
5145#[inline]
5146#[target_feature(enable = "avx512f")]
5147#[cfg_attr(test, assert_instr(vcvtps2udq))]
5148pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
5149 transmute(vcvtps2udq(
5150 a.as_f32x16(),
5151 src.as_u32x16(),
5152 k,
5153 _MM_FROUND_CUR_DIRECTION,
5154 ))
5155}
5156
5157/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5158///
5159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=maskz_cvt_roundps_epu32&expand=1343)
5160#[inline]
5161#[target_feature(enable = "avx512f")]
5162#[cfg_attr(test, assert_instr(vcvtps2udq))]
5163pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
5164 transmute(vcvtps2udq(
5165 a.as_f32x16(),
5166 _mm512_setzero_si512().as_u32x16(),
5167 k,
5168 _MM_FROUND_CUR_DIRECTION,
5169 ))
5170}
5171
5172/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
5173///
5174/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_pd&expand=1769)
5175#[inline]
5176#[target_feature(enable = "avx512f")]
5177#[cfg_attr(test, assert_instr(vcvtps2pd))]
5178pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
5179 transmute(vcvtps2pd(
5180 a.as_f32x8(),
5181 _mm512_setzero_pd().as_f64x8(),
5182 0b11111111,
5183 _MM_FROUND_CUR_DIRECTION,
5184 ))
5185}
5186
5187/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5188///
5189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_pd&expand=1770)
5190#[inline]
5191#[target_feature(enable = "avx512f")]
5192#[cfg_attr(test, assert_instr(vcvtps2pd))]
5193pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
5194 transmute(vcvtps2pd(
5195 a.as_f32x8(),
5196 src.as_f64x8(),
5197 k,
5198 _MM_FROUND_CUR_DIRECTION,
5199 ))
5200}
5201
5202/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5203///
5204/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_pd&expand=1771)
5205#[inline]
5206#[target_feature(enable = "avx512f")]
5207#[cfg_attr(test, assert_instr(vcvtps2pd))]
5208pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
5209 transmute(vcvtps2pd(
5210 a.as_f32x8(),
5211 _mm512_setzero_pd().as_f64x8(),
5212 k,
5213 _MM_FROUND_CUR_DIRECTION,
5214 ))
5215}
5216
5217/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
5218///
5219/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
5220/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
5221/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
5222/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
5223/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
5224/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5225///
5226/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_epi32&expand=1335)
5227#[inline]
5228#[target_feature(enable = "avx512f")]
5229#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
5230#[rustc_args_required_const(1)]
5231pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i {
5232 macro_rules! call {
5233 ($imm4:expr) => {
5234 vcvtps2dq(
5235 a.as_f32x16(),
5236 _mm512_setzero_si512().as_i32x16(),
5237 0b11111111_11111111,
5238 $imm4,
5239 )
5240 };
5241 }
5242 let r = constify_imm4_round!(rounding, call);
5243 transmute(r)
5244}
5245
5246/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5247///
5248/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
5249/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
5250/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
5251/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
5252/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
5253/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5254///
5255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epi32&expand=1336)
5256#[inline]
5257#[target_feature(enable = "avx512f")]
5258#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
5259#[rustc_args_required_const(3)]
5260pub unsafe fn _mm512_mask_cvt_roundps_epi32(
5261 src: __m512i,
5262 k: __mmask16,
5263 a: __m512,
5264 rounding: i32,
5265) -> __m512i {
5266 macro_rules! call {
5267 ($imm4:expr) => {
5268 vcvtps2dq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
5269 };
5270 }
5271 let r = constify_imm4_round!(rounding, call);
5272 transmute(r)
5273}
5274
5275/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5276///
5277/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
5278/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
5279/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
5280/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
5281/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
5282/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5283///
5284/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_epi32&expand=1337)
5285#[inline]
5286#[target_feature(enable = "avx512f")]
5287#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
5288#[rustc_args_required_const(2)]
5289pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
5290 macro_rules! call {
5291 ($imm4:expr) => {
5292 vcvtps2dq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
5293 };
5294 }
5295 let r = constify_imm4_round!(rounding, call);
5296 transmute(r)
5297}
5298
5299/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
5300///
5301/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
5302/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
5303/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
5304/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
5305/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
5306/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5307///
5308/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_epu32&expand=1341)
5309#[inline]
5310#[target_feature(enable = "avx512f")]
5311#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
5312#[rustc_args_required_const(1)]
5313pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i {
5314 macro_rules! call {
5315 ($imm4:expr) => {
5316 vcvtps2udq(
5317 a.as_f32x16(),
5318 _mm512_setzero_si512().as_u32x16(),
5319 0b11111111_11111111,
5320 $imm4,
5321 )
5322 };
5323 }
5324 let r = constify_imm4_round!(rounding, call);
5325 transmute(r)
5326}
5327
5328/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5329///
5330/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
5331/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
5332/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
5333/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
5334/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
5335/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5336///
5337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epu32&expand=1342)
5338#[inline]
5339#[target_feature(enable = "avx512f")]
5340#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
5341#[rustc_args_required_const(3)]
5342pub unsafe fn _mm512_mask_cvt_roundps_epu32(
5343 src: __m512i,
5344 k: __mmask16,
5345 a: __m512,
5346 rounding: i32,
5347) -> __m512i {
5348 macro_rules! call {
5349 ($imm4:expr) => {
5350 vcvtps2udq(a.as_f32x16(), src.as_u32x16(), k, $imm4)
5351 };
5352 }
5353 let r = constify_imm4_round!(rounding, call);
5354 transmute(r)
5355}
5356
5357/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5358///
5359/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
5360/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
5361/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
5362/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
5363/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
5364/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5365///
5366/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=maskz_cvt_roundps_epu32&expand=1343)
5367#[inline]
5368#[target_feature(enable = "avx512f")]
5369#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
5370#[rustc_args_required_const(2)]
5371pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
5372 macro_rules! call {
5373 ($imm4:expr) => {
5374 vcvtps2udq(a.as_f32x16(), _mm512_setzero_si512().as_u32x16(), k, $imm4)
5375 };
5376 }
5377 let r = constify_imm4_round!(rounding, call);
5378 transmute(r)
5379}
5380
5381/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
5382/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5383///
5384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundps_pd&expand=1347)
5385#[inline]
5386#[target_feature(enable = "avx512f")]
5387#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
5388#[rustc_args_required_const(1)]
5389pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d {
5390 macro_rules! call {
5391 ($imm4:expr) => {
5392 vcvtps2pd(
5393 a.as_f32x8(),
5394 _mm512_setzero_pd().as_f64x8(),
5395 0b11111111,
5396 $imm4,
5397 )
5398 };
5399 }
5400 let r = constify_imm4_sae!(sae, call);
5401 transmute(r)
5402}
5403
5404/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5405/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5406///
5407/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epi32&expand=1336)
5408#[inline]
5409#[target_feature(enable = "avx512f")]
5410#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
5411#[rustc_args_required_const(3)]
5412pub unsafe fn _mm512_mask_cvt_roundps_pd(
5413 src: __m512d,
5414 k: __mmask8,
5415 a: __m256,
5416 sae: i32,
5417) -> __m512d {
5418 macro_rules! call {
5419 ($imm4:expr) => {
5420 vcvtps2pd(a.as_f32x8(), src.as_f64x8(), k, $imm4)
5421 };
5422 }
5423 let r = constify_imm4_sae!(sae, call);
5424 transmute(r)
5425}
5426
5427/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5428/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5429///
5430/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_epi32&expand=1337)
5431#[inline]
5432#[target_feature(enable = "avx512f")]
5433#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
5434#[rustc_args_required_const(2)]
5435pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> __m512d {
5436 macro_rules! call {
5437 ($imm4:expr) => {
5438 vcvtps2pd(a.as_f32x8(), _mm512_setzero_pd().as_f64x8(), k, $imm4)
5439 };
5440 }
5441 let r = constify_imm4_sae!(sae, call);
5442 transmute(r)
5443}
5444
5445/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
5446/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5447///
5448/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundps_epi32&expand=1916)
5449#[inline]
5450#[target_feature(enable = "avx512f")]
5451#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
5452#[rustc_args_required_const(1)]
5453pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i {
5454 macro_rules! call {
5455 ($imm4:expr) => {
5456 vcvttps2dq(
5457 a.as_f32x16(),
5458 _mm512_setzero_si512().as_i32x16(),
5459 0b11111111_11111111,
5460 $imm4,
5461 )
5462 };
5463 }
5464 let r = constify_imm4_sae!(sae, call);
5465 transmute(r)
5466}
5467
5468/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5469/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5470///
5471/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundps_epi32&expand=1917)
5472#[inline]
5473#[target_feature(enable = "avx512f")]
5474#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
5475#[rustc_args_required_const(3)]
5476pub unsafe fn _mm512_mask_cvtt_roundps_epi32(
5477 src: __m512i,
5478 k: __mmask16,
5479 a: __m512,
5480 sae: i32,
5481) -> __m512i {
5482 macro_rules! call {
5483 ($imm4:expr) => {
5484 vcvttps2dq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
5485 };
5486 }
5487 let r = constify_imm4_sae!(sae, call);
5488 transmute(r)
5489}
5490
5491/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5492/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5493///
5494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epi32&expand=1918)
5495#[inline]
5496#[target_feature(enable = "avx512f")]
5497#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
5498#[rustc_args_required_const(2)]
5499pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
5500 macro_rules! call {
5501 ($imm4:expr) => {
5502 vcvttps2dq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
5503 };
5504 }
5505 let r = constify_imm4_sae!(sae, call);
5506 transmute(r)
5507}
5508
5509/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
5510/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5511///
5512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundps_epu32&expand=1922)
5513#[inline]
5514#[target_feature(enable = "avx512f")]
5515#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
5516#[rustc_args_required_const(1)]
5517pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i {
5518 macro_rules! call {
5519 ($imm4:expr) => {
5520 vcvttps2udq(
5521 a.as_f32x16(),
5522 _mm512_setzero_si512().as_i32x16(),
5523 0b11111111_11111111,
5524 $imm4,
5525 )
5526 };
5527 }
5528 let r = constify_imm4_sae!(sae, call);
5529 transmute(r)
5530}
5531
5532/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5533/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5534///
5535/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundps_epu32&expand=1923)
5536#[inline]
5537#[target_feature(enable = "avx512f")]
5538#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
5539#[rustc_args_required_const(3)]
5540pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
5541 src: __m512i,
5542 k: __mmask16,
5543 a: __m512,
5544 sae: i32,
5545) -> __m512i {
5546 macro_rules! call {
5547 ($imm4:expr) => {
5548 vcvttps2udq(a.as_f32x16(), src.as_i32x16(), k, $imm4)
5549 };
5550 }
5551 let r = constify_imm4_sae!(sae, call);
5552 transmute(r)
5553}
5554
5555/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5556/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5557///
5558/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epu32&expand=1924)
5559#[inline]
5560#[target_feature(enable = "avx512f")]
5561#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
5562#[rustc_args_required_const(2)]
5563pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
5564 macro_rules! call {
5565 ($imm4:expr) => {
5566 vcvttps2udq(a.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k, $imm4)
5567 };
5568 }
5569 let r = constify_imm4_sae!(sae, call);
5570 transmute(r)
5571}
5572
5573/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
5574/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5575///
5576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundpd_epi32&expand=1904)
5577#[inline]
5578#[target_feature(enable = "avx512f")]
5579#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
5580#[rustc_args_required_const(1)]
5581pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i {
5582 macro_rules! call {
5583 ($imm4:expr) => {
5584 vcvttpd2dq(
5585 a.as_f64x8(),
5586 _mm256_setzero_si256().as_i32x8(),
5587 0b11111111,
5588 $imm4,
5589 )
5590 };
5591 }
5592 let r = constify_imm4_sae!(sae, call);
5593 transmute(r)
5594}
5595
5596/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5597/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5598///
5599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundpd_epi32&expand=1905)
5600#[inline]
5601#[target_feature(enable = "avx512f")]
5602#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
5603#[rustc_args_required_const(3)]
5604pub unsafe fn _mm512_mask_cvtt_roundpd_epi32(
5605 src: __m256i,
5606 k: __mmask8,
5607 a: __m512d,
5608 sae: i32,
5609) -> __m256i {
5610 macro_rules! call {
5611 ($imm4:expr) => {
5612 vcvttpd2dq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
5613 };
5614 }
5615 let r = constify_imm4_sae!(sae, call);
5616 transmute(r)
5617}
5618
5619/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5620/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5621///
5622/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epi32&expand=1918)
5623#[inline]
5624#[target_feature(enable = "avx512f")]
5625#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
5626#[rustc_args_required_const(2)]
5627pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
5628 macro_rules! call {
5629 ($imm4:expr) => {
5630 vcvttpd2dq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
5631 };
5632 }
5633 let r = constify_imm4_sae!(sae, call);
5634 transmute(r)
5635}
5636
5637/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
5638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5639///
5640/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundpd_epu32&expand=1910)
5641#[inline]
5642#[target_feature(enable = "avx512f")]
5643#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
5644#[rustc_args_required_const(1)]
5645pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i {
5646 macro_rules! call {
5647 ($imm4:expr) => {
5648 vcvttpd2udq(
5649 a.as_f64x8(),
5650 _mm256_setzero_si256().as_i32x8(),
5651 0b11111111,
5652 $imm4,
5653 )
5654 };
5655 }
5656 let r = constify_imm4_sae!(sae, call);
5657 transmute(r)
5658}
5659
5660/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5661/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5662///
5663/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundpd_epu32&expand=1911)
5664#[inline]
5665#[target_feature(enable = "avx512f")]
5666#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
5667#[rustc_args_required_const(3)]
5668pub unsafe fn _mm512_mask_cvtt_roundpd_epu32(
5669 src: __m256i,
5670 k: __mmask8,
5671 a: __m512d,
5672 sae: i32,
5673) -> __m256i {
5674 macro_rules! call {
5675 ($imm4:expr) => {
5676 vcvttpd2udq(a.as_f64x8(), src.as_i32x8(), k, $imm4)
5677 };
5678 }
5679 let r = constify_imm4_sae!(sae, call);
5680 transmute(r)
5681}
5682
5683/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
5684///
5685/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttps_epi32&expand=1984)
5686#[inline]
5687#[target_feature(enable = "avx512f")]
5688#[cfg_attr(test, assert_instr(vcvttps2dq))]
5689pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
5690 transmute(vcvttps2dq(
5691 a.as_f32x16(),
5692 _mm512_setzero_si512().as_i32x16(),
5693 0b11111111_11111111,
5694 _MM_FROUND_CUR_DIRECTION,
5695 ))
5696}
5697
5698/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5699///
5700/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttps_epi32&expand=1985)
5701#[inline]
5702#[target_feature(enable = "avx512f")]
5703#[cfg_attr(test, assert_instr(vcvttps2dq))]
5704pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
5705 transmute(vcvttps2dq(
5706 a.as_f32x16(),
5707 src.as_i32x16(),
5708 k,
5709 _MM_FROUND_CUR_DIRECTION,
5710 ))
5711}
5712
5713/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5714///
5715/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttps_epi32&expand=1986)
5716#[inline]
5717#[target_feature(enable = "avx512f")]
5718#[cfg_attr(test, assert_instr(vcvttps2dq))]
5719pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
5720 transmute(vcvttps2dq(
5721 a.as_f32x16(),
5722 _mm512_setzero_si512().as_i32x16(),
5723 k,
5724 _MM_FROUND_CUR_DIRECTION,
5725 ))
5726}
5727
5728/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
5729///
5730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttps_epu32&expand=2002)
5731#[inline]
5732#[target_feature(enable = "avx512f")]
5733#[cfg_attr(test, assert_instr(vcvttps2udq))]
5734pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
5735 transmute(vcvttps2udq(
5736 a.as_f32x16(),
5737 _mm512_setzero_si512().as_i32x16(),
5738 0b11111111_11111111,
5739 _MM_FROUND_CUR_DIRECTION,
5740 ))
5741}
5742
5743/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5744///
5745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttps_epu32&expand=2003)
5746#[inline]
5747#[target_feature(enable = "avx512f")]
5748#[cfg_attr(test, assert_instr(vcvttps2udq))]
5749pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
5750 transmute(vcvttps2udq(
5751 a.as_f32x16(),
5752 src.as_i32x16(),
5753 k,
5754 _MM_FROUND_CUR_DIRECTION,
5755 ))
5756}
5757
5758/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5759///
5760/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttps_epu32&expand=2004)
5761#[inline]
5762#[target_feature(enable = "avx512f")]
5763#[cfg_attr(test, assert_instr(vcvttps2udq))]
5764pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
5765 transmute(vcvttps2udq(
5766 a.as_f32x16(),
5767 _mm512_setzero_si512().as_i32x16(),
5768 k,
5769 _MM_FROUND_CUR_DIRECTION,
5770 ))
5771}
5772
5773/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5774/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5775///
5776/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundpd_epu32&expand=1912)
5777#[inline]
5778#[target_feature(enable = "avx512f")]
5779#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
5780#[rustc_args_required_const(2)]
5781pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
5782 macro_rules! call {
5783 ($imm4:expr) => {
5784 vcvttpd2udq(a.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k, $imm4)
5785 };
5786 }
5787 let r = constify_imm4_sae!(sae, call);
5788 transmute(r)
5789}
5790
5791/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
5792///
5793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttpd_epi32&expand=1947)
5794#[inline]
5795#[target_feature(enable = "avx512f")]
5796#[cfg_attr(test, assert_instr(vcvttpd2dq))]
5797pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
5798 transmute(vcvttpd2dq(
5799 a.as_f64x8(),
5800 _mm256_setzero_si256().as_i32x8(),
5801 0b11111111,
5802 _MM_FROUND_CUR_DIRECTION,
5803 ))
5804}
5805
5806/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5807///
5808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttpd_epi32&expand=1948)
5809#[inline]
5810#[target_feature(enable = "avx512f")]
5811#[cfg_attr(test, assert_instr(vcvttpd2dq))]
5812pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
5813 transmute(vcvttpd2dq(
5814 a.as_f64x8(),
5815 src.as_i32x8(),
5816 k,
5817 _MM_FROUND_CUR_DIRECTION,
5818 ))
5819}
5820
5821/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5822///
5823/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttpd_epi32&expand=1949)
5824#[inline]
5825#[target_feature(enable = "avx512f")]
5826#[cfg_attr(test, assert_instr(vcvttpd2dq))]
5827pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
5828 transmute(vcvttpd2dq(
5829 a.as_f64x8(),
5830 _mm256_setzero_si256().as_i32x8(),
5831 k,
5832 _MM_FROUND_CUR_DIRECTION,
5833 ))
5834}
5835
5836/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
5837///
5838/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttpd_epu32&expand=1965)
5839#[inline]
5840#[target_feature(enable = "avx512f")]
5841#[cfg_attr(test, assert_instr(vcvttpd2udq))]
5842pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
5843 transmute(vcvttpd2udq(
5844 a.as_f64x8(),
5845 _mm256_setzero_si256().as_i32x8(),
5846 0b11111111,
5847 _MM_FROUND_CUR_DIRECTION,
5848 ))
5849}
5850
5851/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5852///
5853/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttpd_epu32&expand=1966)
5854#[inline]
5855#[target_feature(enable = "avx512f")]
5856#[cfg_attr(test, assert_instr(vcvttpd2udq))]
5857pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
5858 transmute(vcvttpd2udq(
5859 a.as_f64x8(),
5860 src.as_i32x8(),
5861 k,
5862 _MM_FROUND_CUR_DIRECTION,
5863 ))
5864}
5865
5866/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5867///
5868/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttpd_epu32&expand=1967)
5869#[inline]
5870#[target_feature(enable = "avx512f")]
5871#[cfg_attr(test, assert_instr(vcvttpd2udq))]
5872pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
5873 transmute(vcvttpd2udq(
5874 a.as_f64x8(),
5875 _mm256_setzero_si256().as_i32x8(),
5876 k,
5877 _MM_FROUND_CUR_DIRECTION,
5878 ))
5879}
5880
5881/// Returns vector of type `__m512d` with all elements set to zero.
5882///
5883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
5884#[inline]
5885#[target_feature(enable = "avx512f")]
5886#[cfg_attr(test, assert_instr(vxorps))]
5887pub unsafe fn _mm512_setzero_pd() -> __m512d {
5888 // All-0 is a properly initialized __m512d
5889 mem::zeroed()
5890}
5891
5892/// Returns vector of type `__m512d` with all elements set to zero.
5893///
5894/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
5895#[inline]
5896#[target_feature(enable = "avx512f")]
5897#[cfg_attr(test, assert_instr(vxorps))]
5898pub unsafe fn _mm512_setzero_ps() -> __m512 {
5899 // All-0 is a properly initialized __m512
5900 mem::zeroed()
5901}
5902
5903/// Returns vector of type `__m512i` with all elements set to zero.
5904///
5905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
5906#[inline]
5907#[target_feature(enable = "avx512f")]
5908#[cfg_attr(test, assert_instr(vxorps))]
5909pub unsafe fn _mm512_setzero_si512() -> __m512i {
5910 // All-0 is a properly initialized __m512i
5911 mem::zeroed()
5912}
5913
5914/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
5915/// order.
5916#[inline]
5917#[target_feature(enable = "avx512f")]
5918pub unsafe fn _mm512_setr_epi32(
5919 e15: i32,
5920 e14: i32,
5921 e13: i32,
5922 e12: i32,
5923 e11: i32,
5924 e10: i32,
5925 e9: i32,
5926 e8: i32,
5927 e7: i32,
5928 e6: i32,
5929 e5: i32,
5930 e4: i32,
5931 e3: i32,
5932 e2: i32,
5933 e1: i32,
5934 e0: i32,
5935) -> __m512i {
5936 let r = i32x16(
5937 e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
5938 );
5939 transmute(r)
5940}
5941
5942/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices.
5943///
5944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_pd)
5945#[inline]
5946#[target_feature(enable = "avx512f")]
5947#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
5948#[rustc_args_required_const(2)]
5949pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8, scale: i32) -> __m512d {
5950 let zero = _mm512_setzero_pd().as_f64x8();
5951 let neg_one = -1;
5952 let slice = slice as *const i8;
5953 let offsets = offsets.as_i32x8();
5954 macro_rules! call {
5955 ($imm8:expr) => {
5956 vgatherdpd(zero, slice, offsets, neg_one, $imm8)
5957 };
5958 }
5959 let r = constify_imm8_gather!(scale, call);
5960 transmute(r)
5961}
5962
5963/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices.
5964///
5965/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_pd)
5966#[inline]
5967#[target_feature(enable = "avx512f")]
5968#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
5969#[rustc_args_required_const(4)]
5970pub unsafe fn _mm512_mask_i32gather_pd(
5971 src: __m512d,
5972 mask: __mmask8,
5973 offsets: __m256i,
5974 slice: *const u8,
5975 scale: i32,
5976) -> __m512d {
5977 let src = src.as_f64x8();
5978 let slice = slice as *const i8;
5979 let offsets = offsets.as_i32x8();
5980 macro_rules! call {
5981 ($imm8:expr) => {
5982 vgatherdpd(src, slice, offsets, mask as i8, $imm8)
5983 };
5984 }
5985 let r = constify_imm8_gather!(scale, call);
5986 transmute(r)
5987}
5988
5989/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices.
5990///
5991/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_pd)
5992#[inline]
5993#[target_feature(enable = "avx512f")]
5994#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
5995#[rustc_args_required_const(2)]
5996pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8, scale: i32) -> __m512d {
5997 let zero = _mm512_setzero_pd().as_f64x8();
5998 let neg_one = -1;
5999 let slice = slice as *const i8;
6000 let offsets = offsets.as_i64x8();
6001 macro_rules! call {
6002 ($imm8:expr) => {
6003 vgatherqpd(zero, slice, offsets, neg_one, $imm8)
6004 };
6005 }
6006 let r = constify_imm8_gather!(scale, call);
6007 transmute(r)
6008}
6009
6010/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices.
6011///
6012/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_pd)
6013#[inline]
6014#[target_feature(enable = "avx512f")]
6015#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
6016#[rustc_args_required_const(4)]
6017pub unsafe fn _mm512_mask_i64gather_pd(
6018 src: __m512d,
6019 mask: __mmask8,
6020 offsets: __m512i,
6021 slice: *const u8,
6022 scale: i32,
6023) -> __m512d {
6024 let src = src.as_f64x8();
6025 let slice = slice as *const i8;
6026 let offsets = offsets.as_i64x8();
6027 macro_rules! call {
6028 ($imm8:expr) => {
6029 vgatherqpd(src, slice, offsets, mask as i8, $imm8)
6030 };
6031 }
6032 let r = constify_imm8_gather!(scale, call);
6033 transmute(r)
6034}
6035
6036/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices.
6037///
6038/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_ps)
6039#[inline]
6040#[target_feature(enable = "avx512f")]
6041#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
6042#[rustc_args_required_const(2)]
6043pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m256 {
6044 let zero = _mm256_setzero_ps().as_f32x8();
6045 let neg_one = -1;
6046 let slice = slice as *const i8;
6047 let offsets = offsets.as_i64x8();
6048 macro_rules! call {
6049 ($imm8:expr) => {
6050 vgatherqps(zero, slice, offsets, neg_one, $imm8)
6051 };
6052 }
6053 let r = constify_imm8_gather!(scale, call);
6054 transmute(r)
6055}
6056
6057/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices.
6058///
6059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_ps)
6060#[inline]
6061#[target_feature(enable = "avx512f")]
6062#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
6063#[rustc_args_required_const(4)]
6064pub unsafe fn _mm512_mask_i64gather_ps(
6065 src: __m256,
6066 mask: __mmask8,
6067 offsets: __m512i,
6068 slice: *const u8,
6069 scale: i32,
6070) -> __m256 {
6071 let src = src.as_f32x8();
6072 let slice = slice as *const i8;
6073 let offsets = offsets.as_i64x8();
6074 macro_rules! call {
6075 ($imm8:expr) => {
6076 vgatherqps(src, slice, offsets, mask as i8, $imm8)
6077 };
6078 }
6079 let r = constify_imm8_gather!(scale, call);
6080 transmute(r)
6081}
6082
6083/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices.
6084///
6085/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_ps)
6086#[inline]
6087#[target_feature(enable = "avx512f")]
6088#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
6089#[rustc_args_required_const(2)]
6090pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m512 {
6091 let zero = _mm512_setzero_ps().as_f32x16();
6092 let neg_one = -1;
6093 let slice = slice as *const i8;
6094 let offsets = offsets.as_i32x16();
6095 macro_rules! call {
6096 ($imm8:expr) => {
6097 vgatherdps(zero, slice, offsets, neg_one, $imm8)
6098 };
6099 }
6100 let r = constify_imm8_gather!(scale, call);
6101 transmute(r)
6102}
6103
6104/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices.
6105///
6106/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_ps)
6107#[inline]
6108#[target_feature(enable = "avx512f")]
6109#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
6110#[rustc_args_required_const(4)]
6111pub unsafe fn _mm512_mask_i32gather_ps(
6112 src: __m512,
6113 mask: __mmask16,
6114 offsets: __m512i,
6115 slice: *const u8,
6116 scale: i32,
6117) -> __m512 {
6118 let src = src.as_f32x16();
6119 let slice = slice as *const i8;
6120 let offsets = offsets.as_i32x16();
6121 macro_rules! call {
6122 ($imm8:expr) => {
6123 vgatherdps(src, slice, offsets, mask as i16, $imm8)
6124 };
6125 }
6126 let r = constify_imm8_gather!(scale, call);
6127 transmute(r)
6128}
6129
6130/// Gather 32-bit integers from memory using 32-bit indices.
6131///
6132/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi32)
6133#[inline]
6134#[target_feature(enable = "avx512f")]
6135#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
6136#[rustc_args_required_const(2)]
6137pub unsafe fn _mm512_i32gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
6138 let zero = _mm512_setzero_si512().as_i32x16();
6139 let neg_one = -1;
6140 let slice = slice as *const i8;
6141 let offsets = offsets.as_i32x16();
6142 macro_rules! call {
6143 ($imm8:expr) => {
6144 vpgatherdd(zero, slice, offsets, neg_one, $imm8)
6145 };
6146 }
6147 let r = constify_imm8_gather!(scale, call);
6148 transmute(r)
6149}
6150
6151/// Gather 32-bit integers from memory using 32-bit indices.
6152///
6153/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi32)
6154#[inline]
6155#[target_feature(enable = "avx512f")]
6156#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
6157#[rustc_args_required_const(4)]
6158pub unsafe fn _mm512_mask_i32gather_epi32(
6159 src: __m512i,
6160 mask: __mmask16,
6161 offsets: __m512i,
6162 slice: *const u8,
6163 scale: i32,
6164) -> __m512i {
6165 let src = src.as_i32x16();
6166 let mask = mask as i16;
6167 let slice = slice as *const i8;
6168 let offsets = offsets.as_i32x16();
6169 macro_rules! call {
6170 ($imm8:expr) => {
6171 vpgatherdd(src, slice, offsets, mask, $imm8)
6172 };
6173 }
6174 let r = constify_imm8!(scale, call);
6175 transmute(r)
6176}
6177
6178/// Gather 64-bit integers from memory using 32-bit indices.
6179///
6180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi64)
6181#[inline]
6182#[target_feature(enable = "avx512f")]
6183#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
6184#[rustc_args_required_const(2)]
6185pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale: i32) -> __m512i {
6186 let zero = _mm512_setzero_si512().as_i64x8();
6187 let neg_one = -1;
6188 let slice = slice as *const i8;
6189 let offsets = offsets.as_i32x8();
6190 macro_rules! call {
6191 ($imm8:expr) => {
6192 vpgatherdq(zero, slice, offsets, neg_one, $imm8)
6193 };
6194 }
6195 let r = constify_imm8_gather!(scale, call);
6196 transmute(r)
6197}
6198
6199/// Gather 64-bit integers from memory using 32-bit indices.
6200///
6201/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi64)
6202#[inline]
6203#[target_feature(enable = "avx512f")]
6204#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
6205#[rustc_args_required_const(4)]
6206pub unsafe fn _mm512_mask_i32gather_epi64(
6207 src: __m512i,
6208 mask: __mmask8,
6209 offsets: __m256i,
6210 slice: *const u8,
6211 scale: i32,
6212) -> __m512i {
6213 let src = src.as_i64x8();
6214 let mask = mask as i8;
6215 let slice = slice as *const i8;
6216 let offsets = offsets.as_i32x8();
6217 macro_rules! call {
6218 ($imm8:expr) => {
6219 vpgatherdq(src, slice, offsets, mask, $imm8)
6220 };
6221 }
6222 let r = constify_imm8_gather!(scale, call);
6223 transmute(r)
6224}
6225
6226/// Gather 64-bit integers from memory using 64-bit indices.
6227///
6228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi64)
6229#[inline]
6230#[target_feature(enable = "avx512f")]
6231#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
6232#[rustc_args_required_const(2)]
6233pub unsafe fn _mm512_i64gather_epi64(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
6234 let zero = _mm512_setzero_si512().as_i64x8();
6235 let neg_one = -1;
6236 let slice = slice as *const i8;
6237 let offsets = offsets.as_i64x8();
6238 macro_rules! call {
6239 ($imm8:expr) => {
6240 vpgatherqq(zero, slice, offsets, neg_one, $imm8)
6241 };
6242 }
6243 let r = constify_imm8_gather!(scale, call);
6244 transmute(r)
6245}
6246
6247/// Gather 64-bit integers from memory using 64-bit indices.
6248///
6249/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi64)
6250#[inline]
6251#[target_feature(enable = "avx512f")]
6252#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
6253#[rustc_args_required_const(4)]
6254pub unsafe fn _mm512_mask_i64gather_epi64(
6255 src: __m512i,
6256 mask: __mmask8,
6257 offsets: __m512i,
6258 slice: *const u8,
6259 scale: i32,
6260) -> __m512i {
6261 let src = src.as_i64x8();
6262 let mask = mask as i8;
6263 let slice = slice as *const i8;
6264 let offsets = offsets.as_i64x8();
6265 macro_rules! call {
6266 ($imm8:expr) => {
6267 vpgatherqq(src, slice, offsets, mask, $imm8)
6268 };
6269 }
6270 let r = constify_imm8_gather!(scale, call);
6271 transmute(r)
6272}
6273
6274/// Gather 32-bit integers from memory using 64-bit indices.
6275///
6276/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi32)
6277#[inline]
6278#[target_feature(enable = "avx512f")]
6279#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
6280#[rustc_args_required_const(2)]
6281pub unsafe fn _mm512_i64gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m256i {
6282 let zeros = _mm256_setzero_si256().as_i32x8();
6283 let neg_one = -1;
6284 let slice = slice as *const i8;
6285 let offsets = offsets.as_i64x8();
6286 macro_rules! call {
6287 ($imm8:expr) => {
6288 vpgatherqd(zeros, slice, offsets, neg_one, $imm8)
6289 };
6290 }
6291 let r = constify_imm8_gather!(scale, call);
6292 transmute(r)
6293}
6294
6295/// Gather 32-bit integers from memory using 64-bit indices.
6296///
6297/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi32)
6298#[inline]
6299#[target_feature(enable = "avx512f")]
6300#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
6301#[rustc_args_required_const(4)]
6302pub unsafe fn _mm512_mask_i64gather_epi32(
6303 src: __m256i,
6304 mask: __mmask8,
6305 offsets: __m512i,
6306 slice: *const u8,
6307 scale: i32,
6308) -> __m256i {
6309 let src = src.as_i32x8();
6310 let mask = mask as i8;
6311 let slice = slice as *const i8;
6312 let offsets = offsets.as_i64x8();
6313 macro_rules! call {
6314 ($imm8:expr) => {
6315 vpgatherqd(src, slice, offsets, mask, $imm8)
6316 };
6317 }
6318 let r = constify_imm8_gather!(scale, call);
6319 transmute(r)
6320}
6321
6322/// Scatter double-precision (64-bit) floating-point elements from memory using 32-bit indices.
6323///
6324/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_pd)
6325#[inline]
6326#[target_feature(enable = "avx512f")]
6327#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
6328#[rustc_args_required_const(3)]
6329pub unsafe fn _mm512_i32scatter_pd(slice: *mut u8, offsets: __m256i, src: __m512d, scale: i32) {
6330 let src = src.as_f64x8();
6331 let neg_one = -1;
6332 let slice = slice as *mut i8;
6333 let offsets = offsets.as_i32x8();
6334 macro_rules! call {
6335 ($imm8:expr) => {
6336 vscatterdpd(slice, neg_one, offsets, src, $imm8)
6337 };
6338 }
6339 constify_imm8_gather!(scale, call);
6340}
6341
6342/// Scatter double-precision (64-bit) floating-point elements from src into memory using 32-bit indices.
6343///
6344/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_pd)
6345#[inline]
6346#[target_feature(enable = "avx512f")]
6347#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
6348#[rustc_args_required_const(4)]
6349pub unsafe fn _mm512_mask_i32scatter_pd(
6350 slice: *mut u8,
6351 mask: __mmask8,
6352 offsets: __m256i,
6353 src: __m512d,
6354 scale: i32,
6355) {
6356 let src = src.as_f64x8();
6357 let slice = slice as *mut i8;
6358 let offsets = offsets.as_i32x8();
6359 macro_rules! call {
6360 ($imm8:expr) => {
6361 vscatterdpd(slice, mask as i8, offsets, src, $imm8)
6362 };
6363 }
6364 constify_imm8_gather!(scale, call);
6365}
6366
6367/// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices.
6368///
6369/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_pd)
6370#[inline]
6371#[target_feature(enable = "avx512f")]
6372#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
6373#[rustc_args_required_const(3)]
6374pub unsafe fn _mm512_i64scatter_pd(slice: *mut u8, offsets: __m512i, src: __m512d, scale: i32) {
6375 let src = src.as_f64x8();
6376 let neg_one = -1;
6377 let slice = slice as *mut i8;
6378 let offsets = offsets.as_i64x8();
6379 macro_rules! call {
6380 ($imm8:expr) => {
6381 vscatterqpd(slice, neg_one, offsets, src, $imm8)
6382 };
6383 }
6384 constify_imm8_gather!(scale, call);
6385}
6386
6387/// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices.
6388///
6389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_pd)
6390#[inline]
6391#[target_feature(enable = "avx512f")]
6392#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
6393#[rustc_args_required_const(4)]
6394pub unsafe fn _mm512_mask_i64scatter_pd(
6395 slice: *mut u8,
6396 mask: __mmask8,
6397 offsets: __m512i,
6398 src: __m512d,
6399 scale: i32,
6400) {
6401 let src = src.as_f64x8();
6402 let slice = slice as *mut i8;
6403 let offsets = offsets.as_i64x8();
6404 macro_rules! call {
6405 ($imm8:expr) => {
6406 vscatterqpd(slice, mask as i8, offsets, src, $imm8)
6407 };
6408 }
6409 constify_imm8_gather!(scale, call);
6410}
6411
6412/// Scatter single-precision (32-bit) floating-point elements from memory using 32-bit indices.
6413///
6414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_ps)
6415#[inline]
6416#[target_feature(enable = "avx512f")]
6417#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
6418#[rustc_args_required_const(3)]
6419pub unsafe fn _mm512_i32scatter_ps(slice: *mut u8, offsets: __m512i, src: __m512, scale: i32) {
6420 let src = src.as_f32x16();
6421 let neg_one = -1;
6422 let slice = slice as *mut i8;
6423 let offsets = offsets.as_i32x16();
6424 macro_rules! call {
6425 ($imm8:expr) => {
6426 vscatterdps(slice, neg_one, offsets, src, $imm8)
6427 };
6428 }
6429 constify_imm8_gather!(scale, call);
6430}
6431
6432/// Scatter single-precision (32-bit) floating-point elements from src into memory using 32-bit indices.
6433///
6434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_ps)
6435#[inline]
6436#[target_feature(enable = "avx512f")]
6437#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
6438#[rustc_args_required_const(4)]
6439pub unsafe fn _mm512_mask_i32scatter_ps(
6440 slice: *mut u8,
6441 mask: __mmask16,
6442 offsets: __m512i,
6443 src: __m512,
6444 scale: i32,
6445) {
6446 let src = src.as_f32x16();
6447 let slice = slice as *mut i8;
6448 let offsets = offsets.as_i32x16();
6449 macro_rules! call {
6450 ($imm8:expr) => {
6451 vscatterdps(slice, mask as i16, offsets, src, $imm8)
6452 };
6453 }
6454 constify_imm8_gather!(scale, call);
6455}
6456
6457/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices.
6458///
6459/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_ps)
6460#[inline]
6461#[target_feature(enable = "avx512f")]
6462#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
6463#[rustc_args_required_const(3)]
6464pub unsafe fn _mm512_i64scatter_ps(slice: *mut u8, offsets: __m512i, src: __m256, scale: i32) {
6465 let src = src.as_f32x8();
6466 let neg_one = -1;
6467 let slice = slice as *mut i8;
6468 let offsets = offsets.as_i64x8();
6469 macro_rules! call {
6470 ($imm8:expr) => {
6471 vscatterqps(slice, neg_one, offsets, src, $imm8)
6472 };
6473 }
6474 constify_imm8_gather!(scale, call);
6475}
6476
6477/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices.
6478///
6479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_ps)
6480#[inline]
6481#[target_feature(enable = "avx512f")]
6482#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
6483#[rustc_args_required_const(4)]
6484pub unsafe fn _mm512_mask_i64scatter_ps(
6485 slice: *mut u8,
6486 mask: __mmask8,
6487 offsets: __m512i,
6488 src: __m256,
6489 scale: i32,
6490) {
6491 let src = src.as_f32x8();
6492 let slice = slice as *mut i8;
6493 let offsets = offsets.as_i64x8();
6494 macro_rules! call {
6495 ($imm8:expr) => {
6496 vscatterqps(slice, mask as i8, offsets, src, $imm8)
6497 };
6498 }
6499 constify_imm8_gather!(scale, call);
6500}
6501
6502/// Scatter 64-bit integers from src into memory using 32-bit indices.
6503///
6504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_epi64)
6505#[inline]
6506#[target_feature(enable = "avx512f")]
6507#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
6508#[rustc_args_required_const(3)]
6509pub unsafe fn _mm512_i32scatter_epi64(slice: *mut u8, offsets: __m256i, src: __m512i, scale: i32) {
6510 let src = src.as_i64x8();
6511 let neg_one = -1;
6512 let slice = slice as *mut i8;
6513 let offsets = offsets.as_i32x8();
6514 macro_rules! call {
6515 ($imm8:expr) => {
6516 vpscatterdq(slice, neg_one, offsets, src, $imm8)
6517 };
6518 }
6519 constify_imm8_gather!(scale, call);
6520}
6521
6522/// Scatter 64-bit integers from src into memory using 32-bit indices.
6523///
6524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi64)
6525#[inline]
6526#[target_feature(enable = "avx512f")]
6527#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
6528#[rustc_args_required_const(4)]
6529pub unsafe fn _mm512_mask_i32scatter_epi64(
6530 slice: *mut u8,
6531 mask: __mmask8,
6532 offsets: __m256i,
6533 src: __m512i,
6534 scale: i32,
6535) {
6536 let src = src.as_i64x8();
6537 let mask = mask as i8;
6538 let slice = slice as *mut i8;
6539 let offsets = offsets.as_i32x8();
6540 macro_rules! call {
6541 ($imm8:expr) => {
6542 vpscatterdq(slice, mask, offsets, src, $imm8)
6543 };
6544 }
6545 constify_imm8_gather!(scale, call);
6546}
6547
6548/// Scatter 64-bit integers from src into memory using 64-bit indices.
6549///
6550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi64)
6551#[inline]
6552#[target_feature(enable = "avx512f")]
6553#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
6554#[rustc_args_required_const(3)]
6555pub unsafe fn _mm512_i64scatter_epi64(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
6556 let src = src.as_i64x8();
6557 let neg_one = -1;
6558 let slice = slice as *mut i8;
6559 let offsets = offsets.as_i64x8();
6560 macro_rules! call {
6561 ($imm8:expr) => {
6562 vpscatterqq(slice, neg_one, offsets, src, $imm8)
6563 };
6564 }
6565 constify_imm8_gather!(scale, call);
6566}
6567
6568/// Scatter 64-bit integers from src into memory using 64-bit indices.
6569///
6570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi64)
6571#[inline]
6572#[target_feature(enable = "avx512f")]
6573#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
6574#[rustc_args_required_const(4)]
6575pub unsafe fn _mm512_mask_i64scatter_epi64(
6576 slice: *mut u8,
6577 mask: __mmask8,
6578 offsets: __m512i,
6579 src: __m512i,
6580 scale: i32,
6581) {
6582 let src = src.as_i64x8();
6583 let mask = mask as i8;
6584 let slice = slice as *mut i8;
6585 let offsets = offsets.as_i64x8();
6586 macro_rules! call {
6587 ($imm8:expr) => {
6588 vpscatterqq(slice, mask, offsets, src, $imm8)
6589 };
6590 }
6591 constify_imm8_gather!(scale, call);
6592}
6593
6594/// Scatter 32-bit integers from src into memory using 32-bit indices.
6595///
6596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32)
6597#[inline]
6598#[target_feature(enable = "avx512f")]
6599#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
6600#[rustc_args_required_const(3)]
6601pub unsafe fn _mm512_i32scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
6602 let src = src.as_i32x16();
6603 let neg_one = -1;
6604 let slice = slice as *mut i8;
6605 let offsets = offsets.as_i32x16();
6606 macro_rules! call {
6607 ($imm8:expr) => {
6608 vpscatterdd(slice, neg_one, offsets, src, $imm8)
6609 };
6610 }
6611 constify_imm8_gather!(scale, call);
6612}
6613
6614/// Scatter 32-bit integers from src into memory using 32-bit indices.
6615///
6616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi32)
6617#[inline]
6618#[target_feature(enable = "avx512f")]
6619#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
6620#[rustc_args_required_const(4)]
6621pub unsafe fn _mm512_mask_i32scatter_epi32(
6622 slice: *mut u8,
6623 mask: __mmask16,
6624 offsets: __m512i,
6625 src: __m512i,
6626 scale: i32,
6627) {
6628 let src = src.as_i32x16();
6629 let mask = mask as i16;
6630 let slice = slice as *mut i8;
6631 let offsets = offsets.as_i32x16();
6632 macro_rules! call {
6633 ($imm8:expr) => {
6634 vpscatterdd(slice, mask, offsets, src, $imm8)
6635 };
6636 }
6637 constify_imm8_gather!(scale, call);
6638}
6639
6640/// Scatter 32-bit integers from src into memory using 64-bit indices.
6641///
6642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32)
6643#[inline]
6644#[target_feature(enable = "avx512f")]
6645#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
6646#[rustc_args_required_const(3)]
6647pub unsafe fn _mm512_i64scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m256i, scale: i32) {
6648 let src = src.as_i32x8();
6649 let neg_one = -1;
6650 let slice = slice as *mut i8;
6651 let offsets = offsets.as_i64x8();
6652 macro_rules! call {
6653 ($imm8:expr) => {
6654 vpscatterqd(slice, neg_one, offsets, src, $imm8)
6655 };
6656 }
6657 constify_imm8_gather!(scale, call);
6658}
6659
6660/// Scatter 32-bit integers from src into memory using 64-bit indices.
6661///
6662/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi32)
6663#[inline]
6664#[target_feature(enable = "avx512f")]
6665#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
6666#[rustc_args_required_const(4)]
6667pub unsafe fn _mm512_mask_i64scatter_epi32(
6668 slice: *mut u8,
6669 mask: __mmask8,
6670 offsets: __m512i,
6671 src: __m256i,
6672 scale: i32,
6673) {
6674 let src = src.as_i32x8();
6675 let mask = mask as i8;
6676 let slice = slice as *mut i8;
6677 let offsets = offsets.as_i64x8();
6678 macro_rules! call {
6679 ($imm8:expr) => {
6680 vpscatterqd(slice, mask, offsets, src, $imm8)
6681 };
6682 }
6683 constify_imm8_gather!(scale, call);
6684}
6685
6686/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
6687///
6688/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685)
6689#[inline]
6690#[target_feature(enable = "avx512f")]
6691#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
6692#[rustc_args_required_const(1)]
6693pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
6694 macro_rules! call {
6695 ($imm8:expr) => {
6696 vprold(a.as_i32x16(), $imm8)
6697 };
6698 }
6699 let r = constify_imm8_sae!(imm8, call);
6700 transmute(r)
6701}
6702
6703/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6704///
6705/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
6706#[inline]
6707#[target_feature(enable = "avx512f")]
6708#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
6709#[rustc_args_required_const(3)]
6710pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
6711 macro_rules! call {
6712 ($imm8:expr) => {
6713 vprold(a.as_i32x16(), $imm8)
6714 };
6715 }
6716 let rol = constify_imm8_sae!(imm8, call);
6717 transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
6718}
6719
6720/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
6723#[inline]
6724#[target_feature(enable = "avx512f")]
6725#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
6726#[rustc_args_required_const(2)]
6727pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
6728 macro_rules! call {
6729 ($imm8:expr) => {
6730 vprold(a.as_i32x16(), $imm8)
6731 };
6732 }
6733 let rol = constify_imm8_sae!(imm8, call);
6734 let zero = _mm512_setzero_si512().as_i32x16();
6735 transmute(simd_select_bitmask(k, rol, zero))
6736}
6737
6738/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
6739///
6740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
6741#[inline]
6742#[target_feature(enable = "avx512f")]
6743#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
6744#[rustc_args_required_const(1)]
6745pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
6746 macro_rules! call {
6747 ($imm8:expr) => {
6748 vprord(a.as_i32x16(), $imm8)
6749 };
6750 }
6751 let r = constify_imm8_sae!(imm8, call);
6752 transmute(r)
6753}
6754
6755/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6756///
6757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
6758#[inline]
6759#[target_feature(enable = "avx512f")]
6760#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
6761#[rustc_args_required_const(3)]
6762pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
6763 macro_rules! call {
6764 ($imm8:expr) => {
6765 vprord(a.as_i32x16(), $imm8)
6766 };
6767 }
6768 let ror = constify_imm8_sae!(imm8, call);
6769 transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
6770}
6771
6772/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6773///
6774/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
6775#[inline]
6776#[target_feature(enable = "avx512f")]
6777#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
6778#[rustc_args_required_const(2)]
6779pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
6780 macro_rules! call {
6781 ($imm8:expr) => {
6782 vprord(a.as_i32x16(), $imm8)
6783 };
6784 }
6785 let ror = constify_imm8_sae!(imm8, call);
6786 let zero = _mm512_setzero_si512().as_i32x16();
6787 transmute(simd_select_bitmask(k, ror, zero))
6788}
6789
6790/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
6791///
6792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694)
6793#[inline]
6794#[target_feature(enable = "avx512f")]
6795#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
6796#[rustc_args_required_const(1)]
6797pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
6798 macro_rules! call {
6799 ($imm8:expr) => {
6800 vprolq(a.as_i64x8(), $imm8)
6801 };
6802 }
6803 let r = constify_imm8_sae!(imm8, call);
6804 transmute(r)
6805}
6806
6807/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6808///
6809/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
6810#[inline]
6811#[target_feature(enable = "avx512f")]
6812#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
6813#[rustc_args_required_const(3)]
6814pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
6815 macro_rules! call {
6816 ($imm8:expr) => {
6817 vprolq(a.as_i64x8(), $imm8)
6818 };
6819 }
6820 let rol = constify_imm8_sae!(imm8, call);
6821 transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
6822}
6823
6824/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6825///
6826/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
6827#[inline]
6828#[target_feature(enable = "avx512f")]
6829#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
6830#[rustc_args_required_const(2)]
6831pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
6832 macro_rules! call {
6833 ($imm8:expr) => {
6834 vprolq(a.as_i64x8(), $imm8)
6835 };
6836 }
6837 let rol = constify_imm8_sae!(imm8, call);
6838 let zero = _mm512_setzero_si512().as_i64x8();
6839 transmute(simd_select_bitmask(k, rol, zero))
6840}
6841
6842/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
6843///
6844/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
6845#[inline]
6846#[target_feature(enable = "avx512f")]
6847#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
6848#[rustc_args_required_const(1)]
6849pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
6850 macro_rules! call {
6851 ($imm8:expr) => {
6852 vprorq(a.as_i64x8(), $imm8)
6853 };
6854 }
6855 let r = constify_imm8_sae!(imm8, call);
6856 transmute(r)
6857}
6858
6859/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6860///
6861/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
6862#[inline]
6863#[target_feature(enable = "avx512f")]
6864#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
6865#[rustc_args_required_const(3)]
6866pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
6867 macro_rules! call {
6868 ($imm8:expr) => {
6869 vprorq(a.as_i64x8(), $imm8)
6870 };
6871 }
6872 let ror = constify_imm8_sae!(imm8, call);
6873 transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
6874}
6875
6876/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6877///
6878/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
6879#[inline]
6880#[target_feature(enable = "avx512f")]
6881#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
6882#[rustc_args_required_const(2)]
6883pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
6884 macro_rules! call {
6885 ($imm8:expr) => {
6886 vprorq(a.as_i64x8(), $imm8)
6887 };
6888 }
6889 let ror = constify_imm8_sae!(imm8, call);
6890 let zero = _mm512_setzero_si512().as_i64x8();
6891 transmute(simd_select_bitmask(k, ror, zero))
6892}
6893
6894/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6895///
6896/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
6897#[inline]
6898#[target_feature(enable = "avx512f")]
6899#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
6900#[rustc_args_required_const(1)]
6901pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
6902 macro_rules! call {
6903 ($imm8:expr) => {
6904 vpsllid(a.as_i32x16(), $imm8)
6905 };
6906 }
6907 let r = constify_imm8_sae!(imm8, call);
6908 transmute(r)
6909}
6910
6911/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6912///
6913/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
6914#[inline]
6915#[target_feature(enable = "avx512f")]
6916#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
6917#[rustc_args_required_const(3)]
6918pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
6919 macro_rules! call {
6920 ($imm8:expr) => {
6921 vpsllid(a.as_i32x16(), $imm8)
6922 };
6923 }
6924 let shf = constify_imm8_sae!(imm8, call);
6925 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
6926}
6927
6928/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6929///
6930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
6931#[inline]
6932#[target_feature(enable = "avx512f")]
6933#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
6934#[rustc_args_required_const(2)]
6935pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
6936 macro_rules! call {
6937 ($imm8:expr) => {
6938 vpsllid(a.as_i32x16(), $imm8)
6939 };
6940 }
6941 let shf = constify_imm8_sae!(imm8, call);
6942 let zero = _mm512_setzero_si512().as_i32x16();
6943 transmute(simd_select_bitmask(k, shf, zero))
6944}
6945
6946/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
6947///
6948/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
6949#[inline]
6950#[target_feature(enable = "avx512f")]
6951#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
6952#[rustc_args_required_const(1)]
6953pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
6954 macro_rules! call {
6955 ($imm8:expr) => {
6956 vpsrlid(a.as_i32x16(), $imm8)
6957 };
6958 }
6959 let r = constify_imm8_sae!(imm8, call);
6960 transmute(r)
6961}
6962
6963/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6964///
6965/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
6966#[inline]
6967#[target_feature(enable = "avx512f")]
6968#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
6969#[rustc_args_required_const(3)]
6970pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
6971 macro_rules! call {
6972 ($imm8:expr) => {
6973 vpsrlid(a.as_i32x16(), $imm8)
6974 };
6975 }
6976 let shf = constify_imm8_sae!(imm8, call);
6977 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
6978}
6979
6980/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6981///
6982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
6983#[inline]
6984#[target_feature(enable = "avx512f")]
6985#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
6986#[rustc_args_required_const(2)]
6987pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
6988 macro_rules! call {
6989 ($imm8:expr) => {
6990 vpsrlid(a.as_i32x16(), $imm8)
6991 };
6992 }
6993 let shf = constify_imm8_sae!(imm8, call);
6994 let zero = _mm512_setzero_si512().as_i32x16();
6995 transmute(simd_select_bitmask(k, shf, zero))
6996}
6997
6998/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
6999///
7000/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
7001#[inline]
7002#[target_feature(enable = "avx512f")]
7003#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
7004#[rustc_args_required_const(1)]
7005pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
7006 macro_rules! call {
7007 ($imm8:expr) => {
7008 vpslliq(a.as_i64x8(), $imm8)
7009 };
7010 }
7011 let r = constify_imm8_sae!(imm8, call);
7012 transmute(r)
7013}
7014
7015/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7016///
7017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
7018#[inline]
7019#[target_feature(enable = "avx512f")]
7020#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
7021#[rustc_args_required_const(3)]
7022pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
7023 macro_rules! call {
7024 ($imm8:expr) => {
7025 vpslliq(a.as_i64x8(), $imm8)
7026 };
7027 }
7028 let shf = constify_imm8_sae!(imm8, call);
7029 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7030}
7031
7032/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7033///
7034/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
7035#[inline]
7036#[target_feature(enable = "avx512f")]
7037#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
7038#[rustc_args_required_const(2)]
7039pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
7040 macro_rules! call {
7041 ($imm8:expr) => {
7042 vpslliq(a.as_i64x8(), $imm8)
7043 };
7044 }
7045 let shf = constify_imm8_sae!(imm8, call);
7046 let zero = _mm512_setzero_si512().as_i64x8();
7047 transmute(simd_select_bitmask(k, shf, zero))
7048}
7049
7050/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
7051///
7052/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
7053#[inline]
7054#[target_feature(enable = "avx512f")]
7055#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
7056#[rustc_args_required_const(1)]
7057pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
7058 macro_rules! call {
7059 ($imm8:expr) => {
7060 vpsrliq(a.as_i64x8(), $imm8)
7061 };
7062 }
7063 let r = constify_imm8_sae!(imm8, call);
7064 transmute(r)
7065}
7066
7067/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7068///
7069/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
7070#[inline]
7071#[target_feature(enable = "avx512f")]
7072#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
7073#[rustc_args_required_const(3)]
7074pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
7075 macro_rules! call {
7076 ($imm8:expr) => {
7077 vpsrliq(a.as_i64x8(), $imm8)
7078 };
7079 }
7080 let shf = constify_imm8_sae!(imm8, call);
7081 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7082}
7083
7084/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7085///
7086/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
7087#[inline]
7088#[target_feature(enable = "avx512f")]
7089#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
7090#[rustc_args_required_const(2)]
7091pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
7092 macro_rules! call {
7093 ($imm8:expr) => {
7094 vpsrliq(a.as_i64x8(), $imm8)
7095 };
7096 }
7097 let shf = constify_imm8_sae!(imm8, call);
7098 let zero = _mm512_setzero_si512().as_i64x8();
7099 transmute(simd_select_bitmask(k, shf, zero))
7100}
7101
7102/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
7103///
7104/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280)
7105#[inline]
7106#[target_feature(enable = "avx512f")]
7107#[cfg_attr(test, assert_instr(vpslld))]
7108pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
7109 transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
7110}
7111
7112/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7113///
7114/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278)
7115#[inline]
7116#[target_feature(enable = "avx512f")]
7117#[cfg_attr(test, assert_instr(vpslld))]
7118pub unsafe fn _mm512_mask_sll_epi32(
7119 src: __m512i,
7120 k: __mmask16,
7121 a: __m512i,
7122 count: __m128i,
7123) -> __m512i {
7124 let shf = _mm512_sll_epi32(a, count).as_i32x16();
7125 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7126}
7127
7128/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7129///
7130/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279)
7131#[inline]
7132#[target_feature(enable = "avx512f")]
7133#[cfg_attr(test, assert_instr(vpslld))]
7134pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
7135 let shf = _mm512_sll_epi32(a, count).as_i32x16();
7136 let zero = _mm512_setzero_si512().as_i32x16();
7137 transmute(simd_select_bitmask(k, shf, zero))
7138}
7139
7140/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
7141///
7142/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492)
7143#[inline]
7144#[target_feature(enable = "avx512f")]
7145#[cfg_attr(test, assert_instr(vpsrld))]
7146pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
7147 transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
7148}
7149
7150/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7151///
7152/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490)
7153#[inline]
7154#[target_feature(enable = "avx512f")]
7155#[cfg_attr(test, assert_instr(vpsrld))]
7156pub unsafe fn _mm512_mask_srl_epi32(
7157 src: __m512i,
7158 k: __mmask16,
7159 a: __m512i,
7160 count: __m128i,
7161) -> __m512i {
7162 let shf = _mm512_srl_epi32(a, count).as_i32x16();
7163 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7164}
7165
7166/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7167///
7168/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491)
7169#[inline]
7170#[target_feature(enable = "avx512f")]
7171#[cfg_attr(test, assert_instr(vpsrld))]
7172pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
7173 let shf = _mm512_srl_epi32(a, count).as_i32x16();
7174 let zero = _mm512_setzero_si512().as_i32x16();
7175 transmute(simd_select_bitmask(k, shf, zero))
7176}
7177
7178/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
7179///
7180/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289)
7181#[inline]
7182#[target_feature(enable = "avx512f")]
7183#[cfg_attr(test, assert_instr(vpsllq))]
7184pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
7185 transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
7186}
7187
7188/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7189///
7190/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287)
7191#[inline]
7192#[target_feature(enable = "avx512f")]
7193#[cfg_attr(test, assert_instr(vpsllq))]
7194pub unsafe fn _mm512_mask_sll_epi64(
7195 src: __m512i,
7196 k: __mmask8,
7197 a: __m512i,
7198 count: __m128i,
7199) -> __m512i {
7200 let shf = _mm512_sll_epi64(a, count).as_i64x8();
7201 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7202}
7203
7204/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7205///
7206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
7207#[inline]
7208#[target_feature(enable = "avx512f")]
7209#[cfg_attr(test, assert_instr(vpsllq))]
7210pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
7211 let shf = _mm512_sll_epi64(a, count).as_i64x8();
7212 let zero = _mm512_setzero_si512().as_i64x8();
7213 transmute(simd_select_bitmask(k, shf, zero))
7214}
7215
7216/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
7217///
7218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501)
7219#[inline]
7220#[target_feature(enable = "avx512f")]
7221#[cfg_attr(test, assert_instr(vpsrlq))]
7222pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
7223 transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
7224}
7225
7226/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499)
7229#[inline]
7230#[target_feature(enable = "avx512f")]
7231#[cfg_attr(test, assert_instr(vpsrlq))]
7232pub unsafe fn _mm512_mask_srl_epi64(
7233 src: __m512i,
7234 k: __mmask8,
7235 a: __m512i,
7236 count: __m128i,
7237) -> __m512i {
7238 let shf = _mm512_srl_epi64(a, count).as_i64x8();
7239 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7240}
7241
7242/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7243///
7244/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
7245#[inline]
7246#[target_feature(enable = "avx512f")]
7247#[cfg_attr(test, assert_instr(vpsrlq))]
7248pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
7249 let shf = _mm512_srl_epi64(a, count).as_i64x8();
7250 let zero = _mm512_setzero_si512().as_i64x8();
7251 transmute(simd_select_bitmask(k, shf, zero))
7252}
7253
7254/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7255///
7256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407)
7257#[inline]
7258#[target_feature(enable = "avx512f")]
7259#[cfg_attr(test, assert_instr(vpsrad))]
7260pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
7261 transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
7262}
7263
7264/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7265///
7266/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405)
7267#[inline]
7268#[target_feature(enable = "avx512f")]
7269#[cfg_attr(test, assert_instr(vpsrad))]
7270pub unsafe fn _mm512_mask_sra_epi32(
7271 src: __m512i,
7272 k: __mmask16,
7273 a: __m512i,
7274 count: __m128i,
7275) -> __m512i {
7276 let shf = _mm512_sra_epi32(a, count).as_i32x16();
7277 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7278}
7279
7280/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7281///
7282/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406)
7283#[inline]
7284#[target_feature(enable = "avx512f")]
7285#[cfg_attr(test, assert_instr(vpsrad))]
7286pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
7287 let shf = _mm512_sra_epi32(a, count).as_i32x16();
7288 let zero = _mm512_setzero_si512().as_i32x16();
7289 transmute(simd_select_bitmask(k, shf, zero))
7290}
7291
7292/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
7293///
7294/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416)
7295#[inline]
7296#[target_feature(enable = "avx512f")]
7297#[cfg_attr(test, assert_instr(vpsraq))]
7298pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
7299 transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
7300}
7301
7302/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7303///
7304/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414)
7305#[inline]
7306#[target_feature(enable = "avx512f")]
7307#[cfg_attr(test, assert_instr(vpsraq))]
7308pub unsafe fn _mm512_mask_sra_epi64(
7309 src: __m512i,
7310 k: __mmask8,
7311 a: __m512i,
7312 count: __m128i,
7313) -> __m512i {
7314 let shf = _mm512_sra_epi64(a, count).as_i64x8();
7315 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7316}
7317
7318/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7319///
7320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415)
7321#[inline]
7322#[target_feature(enable = "avx512f")]
7323#[cfg_attr(test, assert_instr(vpsraq))]
7324pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
7325 let shf = _mm512_sra_epi64(a, count).as_i64x8();
7326 let zero = _mm512_setzero_si512().as_i64x8();
7327 transmute(simd_select_bitmask(k, shf, zero))
7328}
7329
7330/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7331///
7332/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
7333#[inline]
7334#[target_feature(enable = "avx512f")]
7335#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
7336#[rustc_args_required_const(1)]
7337pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
7338 macro_rules! call {
7339 ($imm8:expr) => {
7340 vpsraid(a.as_i32x16(), $imm8)
7341 };
7342 }
7343 let r = constify_imm8_sae!(imm8, call);
7344 transmute(r)
7345}
7346
7347/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7348///
7349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
7350#[inline]
7351#[target_feature(enable = "avx512f")]
7352#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
7353#[rustc_args_required_const(3)]
7354pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
7355 macro_rules! call {
7356 ($imm8:expr) => {
7357 vpsraid(a.as_i32x16(), $imm8)
7358 };
7359 }
7360 let shf = constify_imm8_sae!(imm8, call);
7361 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7362}
7363
7364/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7365///
7366/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
7367#[inline]
7368#[target_feature(enable = "avx512f")]
7369#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
7370#[rustc_args_required_const(2)]
7371pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
7372 macro_rules! call {
7373 ($imm8:expr) => {
7374 vpsraid(a.as_i32x16(), $imm8)
7375 };
7376 }
7377 let shf = constify_imm8_sae!(imm8, call);
7378 let zero = _mm512_setzero_si512().as_i32x16();
7379 transmute(simd_select_bitmask(k, shf, zero))
7380}
7381
7382/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
7383///
7384/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
7385#[inline]
7386#[target_feature(enable = "avx512f")]
7387#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
7388#[rustc_args_required_const(1)]
7389pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
7390 macro_rules! call {
7391 ($imm8:expr) => {
7392 vpsraiq(a.as_i64x8(), $imm8)
7393 };
7394 }
7395 let r = constify_imm8_sae!(imm8, call);
7396 transmute(r)
7397}
7398
7399/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7400///
7401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
7402#[inline]
7403#[target_feature(enable = "avx512f")]
7404#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
7405#[rustc_args_required_const(3)]
7406pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
7407 macro_rules! call {
7408 ($imm8:expr) => {
7409 vpsraiq(a.as_i64x8(), $imm8)
7410 };
7411 }
7412 let shf = constify_imm8_sae!(imm8, call);
7413 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7414}
7415
7416/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7417///
7418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
7419#[inline]
7420#[target_feature(enable = "avx512f")]
7421#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
7422#[rustc_args_required_const(2)]
7423pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
7424 macro_rules! call {
7425 ($imm8:expr) => {
7426 vpsraiq(a.as_i64x8(), $imm8)
7427 };
7428 }
7429 let shf = constify_imm8_sae!(imm8, call);
7430 let zero = _mm512_setzero_si512().as_i64x8();
7431 transmute(simd_select_bitmask(k, shf, zero))
7432}
7433
7434/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7435///
7436/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465)
7437#[inline]
7438#[target_feature(enable = "avx512f")]
7439#[cfg_attr(test, assert_instr(vpsravd))]
7440pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
7441 transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
7442}
7443
7444/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7445///
7446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463)
7447#[inline]
7448#[target_feature(enable = "avx512f")]
7449#[cfg_attr(test, assert_instr(vpsravd))]
7450pub unsafe fn _mm512_mask_srav_epi32(
7451 src: __m512i,
7452 k: __mmask16,
7453 a: __m512i,
7454 count: __m512i,
7455) -> __m512i {
7456 let shf = _mm512_srav_epi32(a, count).as_i32x16();
7457 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7458}
7459
7460/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7461///
7462/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464)
7463#[inline]
7464#[target_feature(enable = "avx512f")]
7465#[cfg_attr(test, assert_instr(vpsravd))]
7466pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
7467 let shf = _mm512_srav_epi32(a, count).as_i32x16();
7468 let zero = _mm512_setzero_si512().as_i32x16();
7469 transmute(simd_select_bitmask(k, shf, zero))
7470}
7471
7472/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
7473///
7474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474)
7475#[inline]
7476#[target_feature(enable = "avx512f")]
7477#[cfg_attr(test, assert_instr(vpsravq))]
7478pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
7479 transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
7480}
7481
7482/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7483///
7484/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472)
7485#[inline]
7486#[target_feature(enable = "avx512f")]
7487#[cfg_attr(test, assert_instr(vpsravq))]
7488pub unsafe fn _mm512_mask_srav_epi64(
7489 src: __m512i,
7490 k: __mmask8,
7491 a: __m512i,
7492 count: __m512i,
7493) -> __m512i {
7494 let shf = _mm512_srav_epi64(a, count).as_i64x8();
7495 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7496}
7497
7498/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7499///
7500/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473)
7501#[inline]
7502#[target_feature(enable = "avx512f")]
7503#[cfg_attr(test, assert_instr(vpsravq))]
7504pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
7505 let shf = _mm512_srav_epi64(a, count).as_i64x8();
7506 let zero = _mm512_setzero_si512().as_i64x8();
7507 transmute(simd_select_bitmask(k, shf, zero))
7508}
7509
7510/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
7511///
7512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703)
7513#[inline]
7514#[target_feature(enable = "avx512f")]
7515#[cfg_attr(test, assert_instr(vprolvd))]
7516pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
7517 transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
7518}
7519
7520/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7521///
7522/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701)
7523#[inline]
7524#[target_feature(enable = "avx512f")]
7525#[cfg_attr(test, assert_instr(vprolvd))]
7526pub unsafe fn _mm512_mask_rolv_epi32(
7527 src: __m512i,
7528 k: __mmask16,
7529 a: __m512i,
7530 b: __m512i,
7531) -> __m512i {
7532 let rol = _mm512_rolv_epi32(a, b).as_i32x16();
7533 transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
7534}
7535
7536/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7537///
7538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702)
7539#[inline]
7540#[target_feature(enable = "avx512f")]
7541#[cfg_attr(test, assert_instr(vprolvd))]
7542pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
7543 let rol = _mm512_rolv_epi32(a, b).as_i32x16();
7544 let zero = _mm512_setzero_si512().as_i32x16();
7545 transmute(simd_select_bitmask(k, rol, zero))
7546}
7547
7548/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
7549///
7550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739)
7551#[inline]
7552#[target_feature(enable = "avx512f")]
7553#[cfg_attr(test, assert_instr(vprorvd))]
7554pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
7555 transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
7556}
7557
7558/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7559///
7560/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737)
7561#[inline]
7562#[target_feature(enable = "avx512f")]
7563#[cfg_attr(test, assert_instr(vprorvd))]
7564pub unsafe fn _mm512_mask_rorv_epi32(
7565 src: __m512i,
7566 k: __mmask16,
7567 a: __m512i,
7568 b: __m512i,
7569) -> __m512i {
7570 let ror = _mm512_rorv_epi32(a, b).as_i32x16();
7571 transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
7572}
7573
7574/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7575///
7576/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738)
7577#[inline]
7578#[target_feature(enable = "avx512f")]
7579#[cfg_attr(test, assert_instr(vprorvd))]
7580pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
7581 let ror = _mm512_rorv_epi32(a, b).as_i32x16();
7582 let zero = _mm512_setzero_si512().as_i32x16();
7583 transmute(simd_select_bitmask(k, ror, zero))
7584}
7585
7586/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
7587///
7588/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712)
7589#[inline]
7590#[target_feature(enable = "avx512f")]
7591#[cfg_attr(test, assert_instr(vprolvq))]
7592pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
7593 transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
7594}
7595
7596/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7597///
7598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710)
7599#[inline]
7600#[target_feature(enable = "avx512f")]
7601#[cfg_attr(test, assert_instr(vprolvq))]
7602pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
7603 let rol = _mm512_rolv_epi64(a, b).as_i64x8();
7604 transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
7605}
7606
7607/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7608///
7609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711)
7610#[inline]
7611#[target_feature(enable = "avx512f")]
7612#[cfg_attr(test, assert_instr(vprolvq))]
7613pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
7614 let rol = _mm512_rolv_epi64(a, b).as_i64x8();
7615 let zero = _mm512_setzero_si512().as_i64x8();
7616 transmute(simd_select_bitmask(k, rol, zero))
7617}
7618
7619/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
7620///
7621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748)
7622#[inline]
7623#[target_feature(enable = "avx512f")]
7624#[cfg_attr(test, assert_instr(vprorvq))]
7625pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
7626 transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
7627}
7628
7629/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7630///
7631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746)
7632#[inline]
7633#[target_feature(enable = "avx512f")]
7634#[cfg_attr(test, assert_instr(vprorvq))]
7635pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
7636 let ror = _mm512_rorv_epi64(a, b).as_i64x8();
7637 transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
7638}
7639
7640/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7641///
7642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747)
7643#[inline]
7644#[target_feature(enable = "avx512f")]
7645#[cfg_attr(test, assert_instr(vprorvq))]
7646pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
7647 let ror = _mm512_rorv_epi64(a, b).as_i64x8();
7648 let zero = _mm512_setzero_si512().as_i64x8();
7649 transmute(simd_select_bitmask(k, ror, zero))
7650}
7651
7652/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7653///
7654/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342)
7655#[inline]
7656#[target_feature(enable = "avx512f")]
7657#[cfg_attr(test, assert_instr(vpsllvd))]
7658pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
7659 transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
7660}
7661
7662/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7663///
7664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340)
7665#[inline]
7666#[target_feature(enable = "avx512f")]
7667#[cfg_attr(test, assert_instr(vpsllvd))]
7668pub unsafe fn _mm512_mask_sllv_epi32(
7669 src: __m512i,
7670 k: __mmask16,
7671 a: __m512i,
7672 count: __m512i,
7673) -> __m512i {
7674 let shf = _mm512_sllv_epi32(a, count).as_i32x16();
7675 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7676}
7677
7678/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7679///
7680/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341)
7681#[inline]
7682#[target_feature(enable = "avx512f")]
7683#[cfg_attr(test, assert_instr(vpsllvd))]
7684pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
7685 let shf = _mm512_sllv_epi32(a, count).as_i32x16();
7686 let zero = _mm512_setzero_si512().as_i32x16();
7687 transmute(simd_select_bitmask(k, shf, zero))
7688}
7689
7690/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7691///
7692/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554)
7693#[inline]
7694#[target_feature(enable = "avx512f")]
7695#[cfg_attr(test, assert_instr(vpsrlvd))]
7696pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
7697 transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
7698}
7699
7700/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7701///
7702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552)
7703#[inline]
7704#[target_feature(enable = "avx512f")]
7705#[cfg_attr(test, assert_instr(vpsrlvd))]
7706pub unsafe fn _mm512_mask_srlv_epi32(
7707 src: __m512i,
7708 k: __mmask16,
7709 a: __m512i,
7710 count: __m512i,
7711) -> __m512i {
7712 let shf = _mm512_srlv_epi32(a, count).as_i32x16();
7713 transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
7714}
7715
7716/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7717///
7718/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553)
7719#[inline]
7720#[target_feature(enable = "avx512f")]
7721#[cfg_attr(test, assert_instr(vpsrlvd))]
7722pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
7723 let shf = _mm512_srlv_epi32(a, count).as_i32x16();
7724 let zero = _mm512_setzero_si512().as_i32x16();
7725 transmute(simd_select_bitmask(k, shf, zero))
7726}
7727
7728/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7729///
7730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351)
7731#[inline]
7732#[target_feature(enable = "avx512f")]
7733#[cfg_attr(test, assert_instr(vpsllvq))]
7734pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
7735 transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
7736}
7737
7738/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7739///
7740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349)
7741#[inline]
7742#[target_feature(enable = "avx512f")]
7743#[cfg_attr(test, assert_instr(vpsllvq))]
7744pub unsafe fn _mm512_mask_sllv_epi64(
7745 src: __m512i,
7746 k: __mmask8,
7747 a: __m512i,
7748 count: __m512i,
7749) -> __m512i {
7750 let shf = _mm512_sllv_epi64(a, count).as_i64x8();
7751 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7752}
7753
7754/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7755///
7756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350)
7757#[inline]
7758#[target_feature(enable = "avx512f")]
7759#[cfg_attr(test, assert_instr(vpsllvq))]
7760pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
7761 let shf = _mm512_sllv_epi64(a, count).as_i64x8();
7762 let zero = _mm512_setzero_si512().as_i64x8();
7763 transmute(simd_select_bitmask(k, shf, zero))
7764}
7765
7766/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
7767///
7768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563)
7769#[inline]
7770#[target_feature(enable = "avx512f")]
7771#[cfg_attr(test, assert_instr(vpsrlvq))]
7772pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
7773 transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
7774}
7775
7776/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7777///
7778/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561)
7779#[inline]
7780#[target_feature(enable = "avx512f")]
7781#[cfg_attr(test, assert_instr(vpsrlvq))]
7782pub unsafe fn _mm512_mask_srlv_epi64(
7783 src: __m512i,
7784 k: __mmask8,
7785 a: __m512i,
7786 count: __m512i,
7787) -> __m512i {
7788 let shf = _mm512_srlv_epi64(a, count).as_i64x8();
7789 transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
7790}
7791
7792/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7793///
7794/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562)
7795#[inline]
7796#[target_feature(enable = "avx512f")]
7797#[cfg_attr(test, assert_instr(vpsrlvq))]
7798pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
7799 let shf = _mm512_srlv_epi64(a, count).as_i64x8();
7800 let zero = _mm512_setzero_si512().as_i64x8();
7801 transmute(simd_select_bitmask(k, shf, zero))
7802}
7803
7804/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
7805///
7806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permute_ps&expand=4170)
7807#[inline]
7808#[target_feature(enable = "avx512f")]
7809#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
7810#[rustc_args_required_const(1)]
7811pub unsafe fn _mm512_permute_ps(a: __m512, imm8: i32) -> __m512 {
7812 macro_rules! call {
7813 ($imm8:expr) => {
7814 vpermilps(a.as_f32x16(), _mm512_set1_epi32($imm8).as_i32x16())
7815 };
7816 }
7817 let r = constify_imm8_sae!(imm8, call);
7818 transmute(r)
7819}
7820
7821/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7822///
7823/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permute_ps&expand=4168)
7824#[inline]
7825#[target_feature(enable = "avx512f")]
7826#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
7827#[rustc_args_required_const(3)]
7828pub unsafe fn _mm512_mask_permute_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
7829 macro_rules! call {
7830 ($imm8:expr) => {
7831 vpermilps(a.as_f32x16(), _mm512_set1_epi32($imm8).as_i32x16())
7832 };
7833 }
7834 let permute = constify_imm8_sae!(imm8, call);
7835 transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
7836}
7837
7838/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7839///
7840/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permute_ps&expand=4169)
7841#[inline]
7842#[target_feature(enable = "avx512f")]
7843#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
7844#[rustc_args_required_const(2)]
7845pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
7846 macro_rules! call {
7847 ($imm8:expr) => {
7848 vpermilps(a.as_f32x16(), _mm512_set1_epi32($imm8).as_i32x16())
7849 };
7850 }
7851 let permute = constify_imm8_sae!(imm8, call);
7852 let zero = _mm512_setzero_ps().as_f32x16();
7853 transmute(simd_select_bitmask(k, permute, zero))
7854}
7855
7856/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
7857///
7858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permute_pd&expand=4161)
7859#[inline]
7860#[target_feature(enable = "avx512f")]
7861#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
7862#[rustc_args_required_const(1)]
7863pub unsafe fn _mm512_permute_pd(a: __m512d, imm8: i32) -> __m512d {
7864 macro_rules! call {
7865 ($imm8:expr) => {
7866 vpermilpd(a.as_f64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7867 };
7868 }
7869 let r = constify_imm8_sae!(imm8, call);
7870 transmute(r)
7871}
7872
7873/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7874///
7875/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permute_pd&expand=4159)
7876#[inline]
7877#[target_feature(enable = "avx512f")]
7878#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
7879#[rustc_args_required_const(3)]
7880pub unsafe fn _mm512_mask_permute_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
7881 macro_rules! call {
7882 ($imm8:expr) => {
7883 vpermilpd(a.as_f64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7884 };
7885 }
7886 let permute = constify_imm8_sae!(imm8, call);
7887 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
7888}
7889
7890/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7891///
7892/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permute_pd&expand=4160)
7893#[inline]
7894#[target_feature(enable = "avx512f")]
7895#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
7896#[rustc_args_required_const(2)]
7897pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
7898 macro_rules! call {
7899 ($imm8:expr) => {
7900 vpermilpd(a.as_f64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7901 };
7902 }
7903 let permute = constify_imm8_sae!(imm8, call);
7904 let zero = _mm512_setzero_pd().as_f64x8();
7905 transmute(simd_select_bitmask(k, permute, zero))
7906}
7907
7908/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
7909///
7910/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex_epi64&expand=4208)
7911#[inline]
7912#[target_feature(enable = "avx512f")]
7913#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
7914//shoud be vpermq, but generate vpermpd. It generates vpermq with mask. change to vbroadcast becaise CI Windows
7915#[rustc_args_required_const(1)]
7916pub unsafe fn _mm512_permutex_epi64(a: __m512i, imm8: i32) -> __m512i {
7917 macro_rules! call {
7918 ($imm8:expr) => {
7919 vpermq(a.as_i64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7920 };
7921 }
7922 let r = constify_imm8_sae!(imm8, call);
7923 transmute(r)
7924}
7925
7926/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7927///
7928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex_epi64&expand=4206)
7929#[inline]
7930#[target_feature(enable = "avx512f")]
7931#[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))] //shoud be vpermq. change to vpbroadcast becaise CI Windows
7932#[rustc_args_required_const(3)]
7933pub unsafe fn _mm512_mask_permutex_epi64(
7934 src: __m512i,
7935 k: __mmask8,
7936 a: __m512i,
7937 imm8: i32,
7938) -> __m512i {
7939 macro_rules! call {
7940 ($imm8:expr) => {
7941 vpermq(a.as_i64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7942 };
7943 }
7944 let permute = constify_imm8_sae!(imm8, call);
7945 transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
7946}
7947
7948/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7949///
7950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex_epi64&expand=4207)
7951#[inline]
7952#[target_feature(enable = "avx512f")]
7953#[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))] //shoud be vpermq. change to vpbroadcast becaise CI Windows
7954#[rustc_args_required_const(2)]
7955pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
7956 macro_rules! call {
7957 ($imm8:expr) => {
7958 vpermq(a.as_i64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7959 };
7960 }
7961 let permute = constify_imm8_sae!(imm8, call);
7962 let zero = _mm512_setzero_si512().as_i64x8();
7963 transmute(simd_select_bitmask(k, permute, zero))
7964}
7965
7966/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
7967///
7968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex_pd&expand=4214)
7969#[inline]
7970#[target_feature(enable = "avx512f")]
7971#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
7972#[rustc_args_required_const(1)]
7973pub unsafe fn _mm512_permutex_pd(a: __m512d, imm8: i32) -> __m512d {
7974 macro_rules! call {
7975 ($imm8:expr) => {
7976 vpermpd(a.as_f64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7977 };
7978 }
7979 let r = constify_imm8_sae!(imm8, call);
7980 transmute(r)
7981}
7982
7983/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7984///
7985/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex_pd&expand=4212)
7986#[inline]
7987#[target_feature(enable = "avx512f")]
7988#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
7989#[rustc_args_required_const(3)]
7990pub unsafe fn _mm512_mask_permutex_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
7991 macro_rules! call {
7992 ($imm8:expr) => {
7993 vpermpd(a.as_f64x8(), _mm512_set1_epi64($imm8).as_i64x8())
7994 };
7995 }
7996 let permute = constify_imm8_sae!(imm8, call);
7997 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
7998}
7999
8000/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8001///
8002/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex_pd&expand=4213)
8003#[inline]
8004#[target_feature(enable = "avx512f")]
8005#[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
8006#[rustc_args_required_const(2)]
8007pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
8008 macro_rules! call {
8009 ($imm8:expr) => {
8010 vpermpd(a.as_f64x8(), _mm512_set1_epi64($imm8).as_i64x8())
8011 };
8012 }
8013 let permute = constify_imm8_sae!(imm8, call);
8014 let zero = _mm512_setzero_pd().as_f64x8();
8015 transmute(simd_select_bitmask(k, permute, zero))
8016}
8017
8018/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
8019///
8020/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_epi32&expand=4182)
8021#[inline]
8022#[target_feature(enable = "avx512f")]
8023#[cfg_attr(test, assert_instr(vperm))] //should be vpermd, but generate vpermps. It generates vpermd with mask
8024pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
8025 transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
8026}
8027
8028/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
8029///
8030/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_epi32&expand=4181)
8031#[inline]
8032#[target_feature(enable = "avx512f")]
8033#[cfg_attr(test, assert_instr(vpermd))]
8034pub unsafe fn _mm512_mask_permutevar_epi32(
8035 src: __m512i,
8036 k: __mmask16,
8037 idx: __m512i,
8038 a: __m512i,
8039) -> __m512i {
8040 let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
8041 transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
8042}
8043
8044/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
8045///
8046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_ps&expand=4200)
8047#[inline]
8048#[target_feature(enable = "avx512f")]
8049#[cfg_attr(test, assert_instr(vpermilps))]
8050pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
8051 transmute(vpermilps(a.as_f32x16(), b.as_i32x16()))
8052}
8053
8054/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8055///
8056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_ps&expand=4198)
8057#[inline]
8058#[target_feature(enable = "avx512f")]
8059#[cfg_attr(test, assert_instr(vpermilps))]
8060pub unsafe fn _mm512_mask_permutevar_ps(
8061 src: __m512,
8062 k: __mmask16,
8063 a: __m512,
8064 b: __m512i,
8065) -> __m512 {
8066 let permute = _mm512_permutevar_ps(a, b).as_f32x16();
8067 transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
8068}
8069
8070/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8071///
8072/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutevar_ps&expand=4199)
8073#[inline]
8074#[target_feature(enable = "avx512f")]
8075#[cfg_attr(test, assert_instr(vpermilps))]
8076pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
8077 let permute = _mm512_permutevar_ps(a, b).as_f32x16();
8078 let zero = _mm512_setzero_ps().as_f32x16();
8079 transmute(simd_select_bitmask(k, permute, zero))
8080}
8081
8082/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
8083///
8084/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_pd&expand=4191)
8085#[inline]
8086#[target_feature(enable = "avx512f")]
8087#[cfg_attr(test, assert_instr(vpermilpd))]
8088pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
8089 transmute(vpermilpd(a.as_f64x8(), b.as_i64x8()))
8090}
8091
8092/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8093///
8094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_pd&expand=4189)
8095#[inline]
8096#[target_feature(enable = "avx512f")]
8097#[cfg_attr(test, assert_instr(vpermilpd))]
8098pub unsafe fn _mm512_mask_permutevar_pd(
8099 src: __m512d,
8100 k: __mmask8,
8101 a: __m512d,
8102 b: __m512i,
8103) -> __m512d {
8104 let permute = _mm512_permutevar_pd(a, b).as_f64x8();
8105 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
8106}
8107
8108/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8109///
8110/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutevar_pd&expand=4190)
8111#[inline]
8112#[target_feature(enable = "avx512f")]
8113#[cfg_attr(test, assert_instr(vpermilpd))]
8114pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
8115 let permute = _mm512_permutevar_pd(a, b).as_f64x8();
8116 let zero = _mm512_setzero_pd().as_f64x8();
8117 transmute(simd_select_bitmask(k, permute, zero))
8118}
8119
8120/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8121///
8122/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_epi32&expand=4301)
8123#[inline]
8124#[target_feature(enable = "avx512f")]
8125#[cfg_attr(test, assert_instr(vperm))] //should be vpermd, but generate vpermps. It generates vpermd with mask
8126pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
8127 transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
8128}
8129
8130/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8131///
8132/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_epi32&expand=4299)
8133#[inline]
8134#[target_feature(enable = "avx512f")]
8135#[cfg_attr(test, assert_instr(vpermd))]
8136pub unsafe fn _mm512_mask_permutexvar_epi32(
8137 src: __m512i,
8138 k: __mmask16,
8139 idx: __m512i,
8140 a: __m512i,
8141) -> __m512i {
8142 let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
8143 transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
8144}
8145
8146/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8147///
8148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_epi32&expand=4300)
8149#[inline]
8150#[target_feature(enable = "avx512f")]
8151#[cfg_attr(test, assert_instr(vpermd))]
8152pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
8153 let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
8154 let zero = _mm512_setzero_si512().as_i32x16();
8155 transmute(simd_select_bitmask(k, permute, zero))
8156}
8157
8158/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
8159///
8160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_epi64&expand=4307)
8161#[inline]
8162#[target_feature(enable = "avx512f")]
8163#[cfg_attr(test, assert_instr(vperm))] //should be vpermq, but generate vpermpd. It generates vpermd with mask
8164pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
8165 transmute(vpermq(a.as_i64x8(), idx.as_i64x8()))
8166}
8167
8168/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8169///
8170/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_epi64&expand=4305)
8171#[inline]
8172#[target_feature(enable = "avx512f")]
8173#[cfg_attr(test, assert_instr(vpermq))]
8174pub unsafe fn _mm512_mask_permutexvar_epi64(
8175 src: __m512i,
8176 k: __mmask8,
8177 idx: __m512i,
8178 a: __m512i,
8179) -> __m512i {
8180 let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
8181 transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
8182}
8183
8184/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8185///
8186/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_epi64&expand=4306)
8187#[inline]
8188#[target_feature(enable = "avx512f")]
8189#[cfg_attr(test, assert_instr(vpermq))]
8190pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
8191 let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
8192 let zero = _mm512_setzero_si512().as_i64x8();
8193 transmute(simd_select_bitmask(k, permute, zero))
8194}
8195
8196/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
8197///
8198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_ps&expand=4200)
8199#[inline]
8200#[target_feature(enable = "avx512f")]
8201#[cfg_attr(test, assert_instr(vpermps))]
8202pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
8203 transmute(vpermps(a.as_f32x16(), idx.as_i32x16()))
8204}
8205
8206/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8207///
8208/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_ps&expand=4326)
8209#[inline]
8210#[target_feature(enable = "avx512f")]
8211#[cfg_attr(test, assert_instr(vpermps))]
8212pub unsafe fn _mm512_mask_permutexvar_ps(
8213 src: __m512,
8214 k: __mmask16,
8215 idx: __m512i,
8216 a: __m512,
8217) -> __m512 {
8218 let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
8219 transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
8220}
8221
8222/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8223///
8224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_ps&expand=4327)
8225#[inline]
8226#[target_feature(enable = "avx512f")]
8227#[cfg_attr(test, assert_instr(vpermps))]
8228pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
8229 let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
8230 let zero = _mm512_setzero_ps().as_f32x16();
8231 transmute(simd_select_bitmask(k, permute, zero))
8232}
8233
8234/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
8235///
8236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_pd&expand=4322)
8237#[inline]
8238#[target_feature(enable = "avx512f")]
8239#[cfg_attr(test, assert_instr(vpermpd))]
8240pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
8241 transmute(vpermpd(a.as_f64x8(), idx.as_i64x8()))
8242}
8243
8244/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8245///
8246/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_pd&expand=4320)
8247#[inline]
8248#[target_feature(enable = "avx512f")]
8249#[cfg_attr(test, assert_instr(vpermpd))]
8250pub unsafe fn _mm512_mask_permutexvar_pd(
8251 src: __m512d,
8252 k: __mmask8,
8253 idx: __m512i,
8254 a: __m512d,
8255) -> __m512d {
8256 let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
8257 transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
8258}
8259
8260/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8261///
8262/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_pd&expand=4321)
8263#[inline]
8264#[target_feature(enable = "avx512f")]
8265#[cfg_attr(test, assert_instr(vpermpd))]
8266pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
8267 let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
8268 let zero = _mm512_setzero_pd().as_f64x8();
8269 transmute(simd_select_bitmask(k, permute, zero))
8270}
8271
8272/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8273///
8274/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_epi32&expand=4238)
8275#[inline]
8276#[target_feature(enable = "avx512f")]
8277#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
8278pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8279 transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16()))
8280}
8281
8282/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8283///
8284/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_epi32&expand=4235)
8285#[inline]
8286#[target_feature(enable = "avx512f")]
8287#[cfg_attr(test, assert_instr(vpermt2d))]
8288pub unsafe fn _mm512_mask_permutex2var_epi32(
8289 a: __m512i,
8290 k: __mmask16,
8291 idx: __m512i,
8292 b: __m512i,
8293) -> __m512i {
8294 let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
8295 transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
8296}
8297
8298/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8299///
8300/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_epi32&expand=4237)
8301#[inline]
8302#[target_feature(enable = "avx512f")]
8303#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
8304pub unsafe fn _mm512_maskz_permutex2var_epi32(
8305 k: __mmask16,
8306 a: __m512i,
8307 idx: __m512i,
8308 b: __m512i,
8309) -> __m512i {
8310 let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
8311 let zero = _mm512_setzero_si512().as_i32x16();
8312 transmute(simd_select_bitmask(k, permute, zero))
8313}
8314
8315/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8316///
8317/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_epi32&expand=4236)
8318#[inline]
8319#[target_feature(enable = "avx512f")]
8320#[cfg_attr(test, assert_instr(vpermi2d))]
8321pub unsafe fn _mm512_mask2_permutex2var_epi32(
8322 a: __m512i,
8323 idx: __m512i,
8324 k: __mmask16,
8325 b: __m512i,
8326) -> __m512i {
8327 let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
8328 transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
8329}
8330
8331/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8332///
8333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_epi64&expand=4250)
8334#[inline]
8335#[target_feature(enable = "avx512f")]
8336#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
8337pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
8338 transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8()))
8339}
8340
8341/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8342///
8343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_epi64&expand=4247)
8344#[inline]
8345#[target_feature(enable = "avx512f")]
8346#[cfg_attr(test, assert_instr(vpermt2q))]
8347pub unsafe fn _mm512_mask_permutex2var_epi64(
8348 a: __m512i,
8349 k: __mmask8,
8350 idx: __m512i,
8351 b: __m512i,
8352) -> __m512i {
8353 let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
8354 transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
8355}
8356
8357/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8358///
8359/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_epi64&expand=4249)
8360#[inline]
8361#[target_feature(enable = "avx512f")]
8362#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
8363pub unsafe fn _mm512_maskz_permutex2var_epi64(
8364 k: __mmask8,
8365 a: __m512i,
8366 idx: __m512i,
8367 b: __m512i,
8368) -> __m512i {
8369 let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
8370 let zero = _mm512_setzero_si512().as_i64x8();
8371 transmute(simd_select_bitmask(k, permute, zero))
8372}
8373
8374/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8375///
8376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_epi64&expand=4248)
8377#[inline]
8378#[target_feature(enable = "avx512f")]
8379#[cfg_attr(test, assert_instr(vpermi2q))]
8380pub unsafe fn _mm512_mask2_permutex2var_epi64(
8381 a: __m512i,
8382 idx: __m512i,
8383 k: __mmask8,
8384 b: __m512i,
8385) -> __m512i {
8386 let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
8387 transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
8388}
8389
8390/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8391///
8392/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_ps&expand=4286)
8393#[inline]
8394#[target_feature(enable = "avx512f")]
8395#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
8396pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
8397 transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16()))
8398}
8399
8400/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8401///
8402/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_ps&expand=4283)
8403#[inline]
8404#[target_feature(enable = "avx512f")]
8405#[cfg_attr(test, assert_instr(vpermt2ps))]
8406pub unsafe fn _mm512_mask_permutex2var_ps(
8407 a: __m512,
8408 k: __mmask16,
8409 idx: __m512i,
8410 b: __m512,
8411) -> __m512 {
8412 let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
8413 transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
8414}
8415
8416/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8417///
8418/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_ps&expand=4285)
8419#[inline]
8420#[target_feature(enable = "avx512f")]
8421#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
8422pub unsafe fn _mm512_maskz_permutex2var_ps(
8423 k: __mmask16,
8424 a: __m512,
8425 idx: __m512i,
8426 b: __m512,
8427) -> __m512 {
8428 let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
8429 let zero = _mm512_setzero_ps().as_f32x16();
8430 transmute(simd_select_bitmask(k, permute, zero))
8431}
8432
8433/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
8434///
8435/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_ps&expand=4284)
8436#[inline]
8437#[target_feature(enable = "avx512f")]
8438#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
8439pub unsafe fn _mm512_mask2_permutex2var_ps(
8440 a: __m512,
8441 idx: __m512i,
8442 k: __mmask16,
8443 b: __m512,
8444) -> __m512 {
8445 let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
8446 let zero = _mm512_setzero_ps().as_f32x16();
8447 transmute(simd_select_bitmask(k, permute, zero))
8448}
8449
8450/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
8451///
8452/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_pd&expand=4274)
8453#[inline]
8454#[target_feature(enable = "avx512f")]
8455#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
8456pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
8457 transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8()))
8458}
8459
8460/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
8461///
8462/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_pd&expand=4271)
8463#[inline]
8464#[target_feature(enable = "avx512f")]
8465#[cfg_attr(test, assert_instr(vpermt2pd))]
8466pub unsafe fn _mm512_mask_permutex2var_pd(
8467 a: __m512d,
8468 k: __mmask8,
8469 idx: __m512i,
8470 b: __m512d,
8471) -> __m512d {
8472 let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
8473 transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
8474}
8475
8476/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8477///
8478/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_pd&expand=4273)
8479#[inline]
8480#[target_feature(enable = "avx512f")]
8481#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
8482pub unsafe fn _mm512_maskz_permutex2var_pd(
8483 k: __mmask8,
8484 a: __m512d,
8485 idx: __m512i,
8486 b: __m512d,
8487) -> __m512d {
8488 let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
8489 let zero = _mm512_setzero_pd().as_f64x8();
8490 transmute(simd_select_bitmask(k, permute, zero))
8491}
8492
8493/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
8494///
8495/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_pd&expand=4272)
8496#[inline]
8497#[target_feature(enable = "avx512f")]
8498#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
8499pub unsafe fn _mm512_mask2_permutex2var_pd(
8500 a: __m512d,
8501 idx: __m512i,
8502 k: __mmask8,
8503 b: __m512d,
8504) -> __m512d {
8505 let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
8506 let zero = _mm512_setzero_pd().as_f64x8();
8507 transmute(simd_select_bitmask(k, permute, zero))
8508}
8509
8510/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
8511///
8512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_epi32&expand=5150)
8513#[inline]
8514#[target_feature(enable = "avx512f")]
8515#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] //should be vpshufd, but generate vpermilps
8516#[rustc_args_required_const(1)]
8517pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
8518 let imm8 = (imm8 & 0xFF) as u8;
8519
8520 let a = a.as_i32x16();
8521 macro_rules! shuffle4 {
8522 (
8523 $a:expr,
8524 $b:expr,
8525 $c:expr,
8526 $d:expr,
8527 $e:expr,
8528 $f:expr,
8529 $g:expr,
8530 $h:expr,
8531 $i:expr,
8532 $j:expr,
8533 $k:expr,
8534 $l:expr,
8535 $m:expr,
8536 $n:expr,
8537 $o:expr,
8538 $p:expr
8539 ) => {
8540 simd_shuffle16(
8541 a,
8542 a,
8543 [
8544 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
8545 ],
8546 );
8547 };
8548 }
8549 macro_rules! shuffle3 {
8550 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
8551 match (imm8 >> 6) & 0x3 {
8552 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
8553 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
8554 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
8555 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
8556 }
8557 };
8558 }
8559 macro_rules! shuffle2 {
8560 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
8561 match (imm8 >> 4) & 0x3 {
8562 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
8563 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
8564 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
8565 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
8566 }
8567 };
8568 }
8569 macro_rules! shuffle1 {
8570 ($a:expr, $e:expr, $i: expr, $m: expr) => {
8571 match (imm8 >> 2) & 0x3 {
8572 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
8573 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
8574 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
8575 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
8576 }
8577 };
8578 }
8579 let r: i32x16 = match imm8 & 0x3 {
8580 0 => shuffle1!(0, 4, 8, 12),
8581 1 => shuffle1!(1, 5, 9, 13),
8582 2 => shuffle1!(2, 6, 10, 14),
8583 _ => shuffle1!(3, 7, 11, 15),
8584 };
8585 transmute(r)
8586}
8587
8588/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8589///
8590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_epi32&expand=5148)
8591#[inline]
8592#[target_feature(enable = "avx512f")]
8593#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] //should be vpshufd, but generate vpermilps
8594#[rustc_args_required_const(3)]
8595pub unsafe fn _mm512_mask_shuffle_epi32(
8596 src: __m512i,
8597 k: __mmask16,
8598 a: __m512i,
8599 imm8: _MM_PERM_ENUM,
8600) -> __m512i {
8601 let imm8 = (imm8 & 0xFF) as u8;
8602
8603 let a = a.as_i32x16();
8604 macro_rules! shuffle4 {
8605 (
8606 $a:expr,
8607 $b:expr,
8608 $c:expr,
8609 $d:expr,
8610 $e:expr,
8611 $f:expr,
8612 $g:expr,
8613 $h:expr,
8614 $i:expr,
8615 $j:expr,
8616 $k:expr,
8617 $l:expr,
8618 $m:expr,
8619 $n:expr,
8620 $o:expr,
8621 $p:expr
8622 ) => {
8623 simd_shuffle16(
8624 a,
8625 a,
8626 [
8627 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
8628 ],
8629 );
8630 };
8631 }
8632 macro_rules! shuffle3 {
8633 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
8634 match (imm8 >> 6) & 0x3 {
8635 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
8636 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
8637 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
8638 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
8639 }
8640 };
8641 }
8642 macro_rules! shuffle2 {
8643 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
8644 match (imm8 >> 4) & 0x3 {
8645 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
8646 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
8647 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
8648 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
8649 }
8650 };
8651 }
8652 macro_rules! shuffle1 {
8653 ($a:expr, $e:expr, $i: expr, $m: expr) => {
8654 match (imm8 >> 2) & 0x3 {
8655 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
8656 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
8657 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
8658 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
8659 }
8660 };
8661 }
8662 let shuffle: i32x16 = match imm8 & 0x3 {
8663 0 => shuffle1!(0, 4, 8, 12),
8664 1 => shuffle1!(1, 5, 9, 13),
8665 2 => shuffle1!(2, 6, 10, 14),
8666 _ => shuffle1!(3, 7, 11, 15),
8667 };
8668 transmute(simd_select_bitmask(k, shuffle, src.as_i32x16()))
8669}
8670
8671/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8672///
8673/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_epi32&expand=5149)
8674#[inline]
8675#[target_feature(enable = "avx512f")]
8676#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] //should be vpshufd, but generate vpermilps
8677#[rustc_args_required_const(2)]
8678pub unsafe fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
8679 let imm8 = (imm8 & 0xFF) as u8;
8680
8681 let a = a.as_i32x16();
8682 macro_rules! shuffle4 {
8683 (
8684 $a:expr,
8685 $b:expr,
8686 $c:expr,
8687 $d:expr,
8688 $e:expr,
8689 $f:expr,
8690 $g:expr,
8691 $h:expr,
8692 $i:expr,
8693 $j:expr,
8694 $k:expr,
8695 $l:expr,
8696 $m:expr,
8697 $n:expr,
8698 $o:expr,
8699 $p:expr
8700 ) => {
8701 simd_shuffle16(
8702 a,
8703 a,
8704 [
8705 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
8706 ],
8707 );
8708 };
8709 }
8710 macro_rules! shuffle3 {
8711 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
8712 match (imm8 >> 6) & 0x3 {
8713 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
8714 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
8715 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
8716 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
8717 }
8718 };
8719 }
8720 macro_rules! shuffle2 {
8721 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
8722 match (imm8 >> 4) & 0x3 {
8723 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
8724 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
8725 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
8726 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
8727 }
8728 };
8729 }
8730 macro_rules! shuffle1 {
8731 ($a:expr, $e:expr, $i: expr, $m: expr) => {
8732 match (imm8 >> 2) & 0x3 {
8733 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
8734 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
8735 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
8736 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
8737 }
8738 };
8739 }
8740 let shuffle: i32x16 = match imm8 & 0x3 {
8741 0 => shuffle1!(0, 4, 8, 12),
8742 1 => shuffle1!(1, 5, 9, 13),
8743 2 => shuffle1!(2, 6, 10, 14),
8744 _ => shuffle1!(3, 7, 11, 15),
8745 };
8746 let zero = _mm512_setzero_si512().as_i32x16();
8747 transmute(simd_select_bitmask(k, shuffle, zero))
8748}
8749
8750/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
8751///
8752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_ps&expand=5203)
8753#[inline]
8754#[target_feature(enable = "avx512f")]
8755#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
8756#[rustc_args_required_const(2)]
8757pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512, imm8: i32) -> __m512 {
8758 let imm8 = (imm8 & 0xFF) as u8;
8759 macro_rules! shuffle4 {
8760 (
8761 $a:expr,
8762 $b:expr,
8763 $c:expr,
8764 $d:expr,
8765 $e:expr,
8766 $f:expr,
8767 $g:expr,
8768 $h:expr,
8769 $i:expr,
8770 $j:expr,
8771 $k:expr,
8772 $l:expr,
8773 $m:expr,
8774 $n:expr,
8775 $o:expr,
8776 $p:expr
8777 ) => {
8778 simd_shuffle16(
8779 a,
8780 b,
8781 [
8782 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
8783 ],
8784 );
8785 };
8786 }
8787 macro_rules! shuffle3 {
8788 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
8789 match (imm8 >> 6) & 0x3 {
8790 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
8791 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
8792 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
8793 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
8794 }
8795 };
8796 }
8797 macro_rules! shuffle2 {
8798 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
8799 match (imm8 >> 4) & 0x3 {
8800 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
8801 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
8802 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
8803 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
8804 }
8805 };
8806 }
8807 macro_rules! shuffle1 {
8808 ($a:expr, $e:expr, $i: expr, $m: expr) => {
8809 match (imm8 >> 2) & 0x3 {
8810 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
8811 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
8812 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
8813 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
8814 }
8815 };
8816 }
8817 match imm8 & 0x3 {
8818 0 => shuffle1!(0, 4, 8, 12),
8819 1 => shuffle1!(1, 5, 9, 13),
8820 2 => shuffle1!(2, 6, 10, 14),
8821 _ => shuffle1!(3, 7, 11, 15),
8822 }
8823}
8824
8825/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8826///
8827/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_ps&expand=5201)
8828#[inline]
8829#[target_feature(enable = "avx512f")]
8830#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
8831#[rustc_args_required_const(4)]
8832pub unsafe fn _mm512_mask_shuffle_ps(
8833 src: __m512,
8834 k: __mmask16,
8835 a: __m512,
8836 b: __m512,
8837 imm8: i32,
8838) -> __m512 {
8839 let imm8 = (imm8 & 0xFF) as u8;
8840 macro_rules! shuffle4 {
8841 (
8842 $a:expr,
8843 $b:expr,
8844 $c:expr,
8845 $d:expr,
8846 $e:expr,
8847 $f:expr,
8848 $g:expr,
8849 $h:expr,
8850 $i:expr,
8851 $j:expr,
8852 $k:expr,
8853 $l:expr,
8854 $m:expr,
8855 $n:expr,
8856 $o:expr,
8857 $p:expr
8858 ) => {
8859 simd_shuffle16(
8860 a,
8861 b,
8862 [
8863 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
8864 ],
8865 );
8866 };
8867 }
8868 macro_rules! shuffle3 {
8869 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
8870 match (imm8 >> 6) & 0x3 {
8871 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
8872 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
8873 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
8874 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
8875 }
8876 };
8877 }
8878 macro_rules! shuffle2 {
8879 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
8880 match (imm8 >> 4) & 0x3 {
8881 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
8882 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
8883 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
8884 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
8885 }
8886 };
8887 }
8888 macro_rules! shuffle1 {
8889 ($a:expr, $e:expr, $i: expr, $m: expr) => {
8890 match (imm8 >> 2) & 0x3 {
8891 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
8892 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
8893 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
8894 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
8895 }
8896 };
8897 }
8898 let shuffle = match imm8 & 0x3 {
8899 0 => shuffle1!(0, 4, 8, 12),
8900 1 => shuffle1!(1, 5, 9, 13),
8901 2 => shuffle1!(2, 6, 10, 14),
8902 _ => shuffle1!(3, 7, 11, 15),
8903 };
8904
8905 transmute(simd_select_bitmask(k, shuffle, src.as_f32x16()))
8906}
8907
8908/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8909///
8910/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_ps&expand=5202)
8911#[inline]
8912#[target_feature(enable = "avx512f")]
8913#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
8914#[rustc_args_required_const(3)]
8915pub unsafe fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
8916 let imm8 = (imm8 & 0xFF) as u8;
8917 macro_rules! shuffle4 {
8918 (
8919 $a:expr,
8920 $b:expr,
8921 $c:expr,
8922 $d:expr,
8923 $e:expr,
8924 $f:expr,
8925 $g:expr,
8926 $h:expr,
8927 $i:expr,
8928 $j:expr,
8929 $k:expr,
8930 $l:expr,
8931 $m:expr,
8932 $n:expr,
8933 $o:expr,
8934 $p:expr
8935 ) => {
8936 simd_shuffle16(
8937 a,
8938 b,
8939 [
8940 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
8941 ],
8942 );
8943 };
8944 }
8945 macro_rules! shuffle3 {
8946 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
8947 match (imm8 >> 6) & 0x3 {
8948 0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
8949 1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
8950 2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
8951 _ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
8952 }
8953 };
8954 }
8955 macro_rules! shuffle2 {
8956 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
8957 match (imm8 >> 4) & 0x3 {
8958 0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
8959 1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
8960 2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
8961 _ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
8962 }
8963 };
8964 }
8965 macro_rules! shuffle1 {
8966 ($a:expr, $e:expr, $i: expr, $m: expr) => {
8967 match (imm8 >> 2) & 0x3 {
8968 0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
8969 1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
8970 2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
8971 _ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
8972 }
8973 };
8974 }
8975 let shuffle = match imm8 & 0x3 {
8976 0 => shuffle1!(0, 4, 8, 12),
8977 1 => shuffle1!(1, 5, 9, 13),
8978 2 => shuffle1!(2, 6, 10, 14),
8979 _ => shuffle1!(3, 7, 11, 15),
8980 };
8981
8982 let zero = _mm512_setzero_ps().as_f32x16();
8983 transmute(simd_select_bitmask(k, shuffle, zero))
8984}
8985
8986/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
8987///
8988/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_pd&expand=5192)
8989#[inline]
8990#[target_feature(enable = "avx512f")]
8991#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
8992#[rustc_args_required_const(2)]
8993pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
8994 let imm8 = (imm8 & 0xFF) as u8;
8995 macro_rules! shuffle8 {
8996 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
8997 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
8998 };
8999 }
9000 macro_rules! shuffle7 {
9001 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
9002 match (imm8 >> 7) & 0x1 {
9003 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
9004 _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
9005 }
9006 };
9007 }
9008 macro_rules! shuffle6 {
9009 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
9010 match (imm8 >> 6) & 0x1 {
9011 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
9012 _ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
9013 }
9014 };
9015 }
9016 macro_rules! shuffle5 {
9017 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
9018 match (imm8 >> 5) & 0x1 {
9019 0 => shuffle6!($a, $b, $c, $d, $e, 12),
9020 _ => shuffle6!($a, $b, $c, $d, $e, 13),
9021 }
9022 };
9023 }
9024 macro_rules! shuffle4 {
9025 ($a:expr, $b:expr, $c:expr, $d:expr) => {
9026 match (imm8 >> 4) & 0x1 {
9027 0 => shuffle5!($a, $b, $c, $d, 4),
9028 _ => shuffle5!($a, $b, $c, $d, 5),
9029 }
9030 };
9031 }
9032 macro_rules! shuffle3 {
9033 ($a:expr, $b:expr, $c:expr) => {
9034 match (imm8 >> 3) & 0x1 {
9035 0 => shuffle4!($a, $b, $c, 10),
9036 _ => shuffle4!($a, $b, $c, 11),
9037 }
9038 };
9039 }
9040 macro_rules! shuffle2 {
9041 ($a:expr, $b:expr) => {
9042 match (imm8 >> 2) & 0x1 {
9043 0 => shuffle3!($a, $b, 2),
9044 _ => shuffle3!($a, $b, 3),
9045 }
9046 };
9047 }
9048 macro_rules! shuffle1 {
9049 ($a:expr) => {
9050 match (imm8 >> 1) & 0x1 {
9051 0 => shuffle2!($a, 8),
9052 _ => shuffle2!($a, 9),
9053 }
9054 };
9055 }
9056 match imm8 & 0x1 {
9057 0 => shuffle1!(0),
9058 _ => shuffle1!(1),
9059 }
9060}
9061
9062/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9063///
9064/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_pd&expand=5190)
9065#[inline]
9066#[target_feature(enable = "avx512f")]
9067#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
9068#[rustc_args_required_const(4)]
9069pub unsafe fn _mm512_mask_shuffle_pd(
9070 src: __m512d,
9071 k: __mmask8,
9072 a: __m512d,
9073 b: __m512d,
9074 imm8: i32,
9075) -> __m512d {
9076 let imm8 = (imm8 & 0xFF) as u8;
9077 macro_rules! shuffle8 {
9078 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
9079 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
9080 };
9081 }
9082 macro_rules! shuffle7 {
9083 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
9084 match (imm8 >> 7) & 0x1 {
9085 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
9086 _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
9087 }
9088 };
9089 }
9090 macro_rules! shuffle6 {
9091 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
9092 match (imm8 >> 6) & 0x1 {
9093 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
9094 _ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
9095 }
9096 };
9097 }
9098 macro_rules! shuffle5 {
9099 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
9100 match (imm8 >> 5) & 0x1 {
9101 0 => shuffle6!($a, $b, $c, $d, $e, 12),
9102 _ => shuffle6!($a, $b, $c, $d, $e, 13),
9103 }
9104 };
9105 }
9106 macro_rules! shuffle4 {
9107 ($a:expr, $b:expr, $c:expr, $d:expr) => {
9108 match (imm8 >> 4) & 0x1 {
9109 0 => shuffle5!($a, $b, $c, $d, 4),
9110 _ => shuffle5!($a, $b, $c, $d, 5),
9111 }
9112 };
9113 }
9114 macro_rules! shuffle3 {
9115 ($a:expr, $b:expr, $c:expr) => {
9116 match (imm8 >> 3) & 0x1 {
9117 0 => shuffle4!($a, $b, $c, 10),
9118 _ => shuffle4!($a, $b, $c, 11),
9119 }
9120 };
9121 }
9122 macro_rules! shuffle2 {
9123 ($a:expr, $b:expr) => {
9124 match (imm8 >> 2) & 0x1 {
9125 0 => shuffle3!($a, $b, 2),
9126 _ => shuffle3!($a, $b, 3),
9127 }
9128 };
9129 }
9130 macro_rules! shuffle1 {
9131 ($a:expr) => {
9132 match (imm8 >> 1) & 0x1 {
9133 0 => shuffle2!($a, 8),
9134 _ => shuffle2!($a, 9),
9135 }
9136 };
9137 }
9138 let shuffle = match imm8 & 0x1 {
9139 0 => shuffle1!(0),
9140 _ => shuffle1!(1),
9141 };
9142
9143 transmute(simd_select_bitmask(k, shuffle, src.as_f64x8()))
9144}
9145
9146/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9147///
9148/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_pd&expand=5191)
9149#[inline]
9150#[target_feature(enable = "avx512f")]
9151#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
9152#[rustc_args_required_const(3)]
9153pub unsafe fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __m512d {
9154 let imm8 = (imm8 & 0xFF) as u8;
9155 macro_rules! shuffle8 {
9156 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
9157 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
9158 };
9159 }
9160 macro_rules! shuffle7 {
9161 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
9162 match (imm8 >> 7) & 0x1 {
9163 0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
9164 _ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
9165 }
9166 };
9167 }
9168 macro_rules! shuffle6 {
9169 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
9170 match (imm8 >> 6) & 0x1 {
9171 0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
9172 _ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
9173 }
9174 };
9175 }
9176 macro_rules! shuffle5 {
9177 ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
9178 match (imm8 >> 5) & 0x1 {
9179 0 => shuffle6!($a, $b, $c, $d, $e, 12),
9180 _ => shuffle6!($a, $b, $c, $d, $e, 13),
9181 }
9182 };
9183 }
9184 macro_rules! shuffle4 {
9185 ($a:expr, $b:expr, $c:expr, $d:expr) => {
9186 match (imm8 >> 4) & 0x1 {
9187 0 => shuffle5!($a, $b, $c, $d, 4),
9188 _ => shuffle5!($a, $b, $c, $d, 5),
9189 }
9190 };
9191 }
9192 macro_rules! shuffle3 {
9193 ($a:expr, $b:expr, $c:expr) => {
9194 match (imm8 >> 3) & 0x1 {
9195 0 => shuffle4!($a, $b, $c, 10),
9196 _ => shuffle4!($a, $b, $c, 11),
9197 }
9198 };
9199 }
9200 macro_rules! shuffle2 {
9201 ($a:expr, $b:expr) => {
9202 match (imm8 >> 2) & 0x1 {
9203 0 => shuffle3!($a, $b, 2),
9204 _ => shuffle3!($a, $b, 3),
9205 }
9206 };
9207 }
9208 macro_rules! shuffle1 {
9209 ($a:expr) => {
9210 match (imm8 >> 1) & 0x1 {
9211 0 => shuffle2!($a, 8),
9212 _ => shuffle2!($a, 9),
9213 }
9214 };
9215 }
9216 let shuffle = match imm8 & 0x1 {
9217 0 => shuffle1!(0),
9218 _ => shuffle1!(1),
9219 };
9220
9221 let zero = _mm512_setzero_pd().as_f64x8();
9222 transmute(simd_select_bitmask(k, shuffle, zero))
9223}
9224
9225/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
9226///
9227/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_i32&expand=5177)
9228#[inline]
9229#[target_feature(enable = "avx512f")]
9230#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] //should be vshufi32x4, but generate vshufi64x2
9231#[rustc_args_required_const(2)]
9232pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
9233 let imm8 = (imm8 & 0xFF) as u8;
9234
9235 let a = a.as_i32x16();
9236 let b = b.as_i32x16();
9237 macro_rules! shuffle4 {
9238 (
9239 $a:expr,
9240 $b:expr,
9241 $c:expr,
9242 $d:expr,
9243 $e:expr,
9244 $f:expr,
9245 $g:expr,
9246 $h:expr,
9247 $i:expr,
9248 $j:expr,
9249 $k:expr,
9250 $l:expr,
9251 $m:expr,
9252 $n:expr,
9253 $o:expr,
9254 $p:expr
9255 ) => {
9256 simd_shuffle16(
9257 a,
9258 b,
9259 [
9260 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
9261 ],
9262 );
9263 };
9264 }
9265 macro_rules! shuffle3 {
9266 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
9267 match (imm8 >> 6) & 0x3 {
9268 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
9269 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
9270 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
9271 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
9272 }
9273 };
9274 }
9275 macro_rules! shuffle2 {
9276 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
9277 match (imm8 >> 4) & 0x3 {
9278 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
9279 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
9280 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
9281 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
9282 }
9283 };
9284 }
9285 macro_rules! shuffle1 {
9286 ($a:expr, $e:expr, $i: expr, $m: expr) => {
9287 match (imm8 >> 2) & 0x3 {
9288 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
9289 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
9290 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
9291 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
9292 }
9293 };
9294 }
9295 let r: i32x16 = match imm8 & 0x3 {
9296 0 => shuffle1!(0, 1, 2, 3),
9297 1 => shuffle1!(4, 5, 6, 7),
9298 2 => shuffle1!(8, 9, 10, 11),
9299 _ => shuffle1!(12, 13, 14, 15),
9300 };
9301
9302 transmute(r)
9303}
9304
9305/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9306///
9307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_i32x&expand=5175)
9308#[inline]
9309#[target_feature(enable = "avx512f")]
9310#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
9311#[rustc_args_required_const(4)]
9312pub unsafe fn _mm512_mask_shuffle_i32x4(
9313 src: __m512i,
9314 k: __mmask16,
9315 a: __m512i,
9316 b: __m512i,
9317 imm8: i32,
9318) -> __m512i {
9319 let imm8 = (imm8 & 0xFF) as u8;
9320
9321 let a = a.as_i32x16();
9322 let b = b.as_i32x16();
9323 macro_rules! shuffle4 {
9324 (
9325 $a:expr,
9326 $b:expr,
9327 $c:expr,
9328 $d:expr,
9329 $e:expr,
9330 $f:expr,
9331 $g:expr,
9332 $h:expr,
9333 $i:expr,
9334 $j:expr,
9335 $k:expr,
9336 $l:expr,
9337 $m:expr,
9338 $n:expr,
9339 $o:expr,
9340 $p:expr
9341 ) => {
9342 simd_shuffle16(
9343 a,
9344 b,
9345 [
9346 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
9347 ],
9348 );
9349 };
9350 }
9351 macro_rules! shuffle3 {
9352 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
9353 match (imm8 >> 6) & 0x3 {
9354 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
9355 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
9356 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
9357 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
9358 }
9359 };
9360 }
9361 macro_rules! shuffle2 {
9362 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
9363 match (imm8 >> 4) & 0x3 {
9364 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
9365 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
9366 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
9367 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
9368 }
9369 };
9370 }
9371 macro_rules! shuffle1 {
9372 ($a:expr, $e:expr, $i: expr, $m: expr) => {
9373 match (imm8 >> 2) & 0x3 {
9374 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
9375 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
9376 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
9377 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
9378 }
9379 };
9380 }
9381 let shuffle = match imm8 & 0x3 {
9382 0 => shuffle1!(0, 1, 2, 3),
9383 1 => shuffle1!(4, 5, 6, 7),
9384 2 => shuffle1!(8, 9, 10, 11),
9385 _ => shuffle1!(12, 13, 14, 15),
9386 };
9387
9388 transmute(simd_select_bitmask(k, shuffle, src.as_i32x16()))
9389}
9390
9391/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9392///
9393/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_i32&expand=5176)
9394#[inline]
9395#[target_feature(enable = "avx512f")]
9396#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
9397#[rustc_args_required_const(3)]
9398pub unsafe fn _mm512_maskz_shuffle_i32x4(
9399 k: __mmask16,
9400 a: __m512i,
9401 b: __m512i,
9402 imm8: i32,
9403) -> __m512i {
9404 let imm8 = (imm8 & 0xFF) as u8;
9405
9406 let a = a.as_i32x16();
9407 let b = b.as_i32x16();
9408 macro_rules! shuffle4 {
9409 (
9410 $a:expr,
9411 $b:expr,
9412 $c:expr,
9413 $d:expr,
9414 $e:expr,
9415 $f:expr,
9416 $g:expr,
9417 $h:expr,
9418 $i:expr,
9419 $j:expr,
9420 $k:expr,
9421 $l:expr,
9422 $m:expr,
9423 $n:expr,
9424 $o:expr,
9425 $p:expr
9426 ) => {
9427 simd_shuffle16(
9428 a,
9429 b,
9430 [
9431 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
9432 ],
9433 );
9434 };
9435 }
9436 macro_rules! shuffle3 {
9437 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
9438 match (imm8 >> 6) & 0x3 {
9439 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
9440 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
9441 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
9442 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
9443 }
9444 };
9445 }
9446 macro_rules! shuffle2 {
9447 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
9448 match (imm8 >> 4) & 0x3 {
9449 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
9450 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
9451 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
9452 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
9453 }
9454 };
9455 }
9456 macro_rules! shuffle1 {
9457 ($a:expr, $e:expr, $i: expr, $m: expr) => {
9458 match (imm8 >> 2) & 0x3 {
9459 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
9460 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
9461 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
9462 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
9463 }
9464 };
9465 }
9466 let shuffle = match imm8 & 0x3 {
9467 0 => shuffle1!(0, 1, 2, 3),
9468 1 => shuffle1!(4, 5, 6, 7),
9469 2 => shuffle1!(8, 9, 10, 11),
9470 _ => shuffle1!(12, 13, 14, 15),
9471 };
9472
9473 let zero = _mm512_setzero_si512().as_i32x16();
9474 transmute(simd_select_bitmask(k, shuffle, zero))
9475}
9476
9477/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
9478///
9479/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_i64x2&expand=5183)
9480#[inline]
9481#[target_feature(enable = "avx512f")]
9482#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
9483#[rustc_args_required_const(2)]
9484pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
9485 let imm8 = (imm8 & 0xFF) as u8;
9486 macro_rules! shuffle4 {
9487 (
9488 $a:expr,
9489 $b:expr,
9490 $c:expr,
9491 $d:expr,
9492 $e:expr,
9493 $f:expr,
9494 $g:expr,
9495 $h:expr
9496 ) => {
9497 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
9498 };
9499 }
9500 macro_rules! shuffle3 {
9501 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
9502 match (imm8 >> 6) & 0x3 {
9503 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
9504 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
9505 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
9506 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
9507 }
9508 };
9509 }
9510 macro_rules! shuffle2 {
9511 ($a:expr, $b:expr, $e:expr, $f:expr) => {
9512 match (imm8 >> 4) & 0x3 {
9513 0 => shuffle3!($a, $b, $e, $f, 8, 9),
9514 1 => shuffle3!($a, $b, $e, $f, 10, 11),
9515 2 => shuffle3!($a, $b, $e, $f, 12, 13),
9516 _ => shuffle3!($a, $b, $e, $f, 14, 15),
9517 }
9518 };
9519 }
9520 macro_rules! shuffle1 {
9521 ($a:expr, $e:expr) => {
9522 match (imm8 >> 2) & 0x3 {
9523 0 => shuffle2!($a, $e, 0, 1),
9524 1 => shuffle2!($a, $e, 2, 3),
9525 2 => shuffle2!($a, $e, 4, 5),
9526 _ => shuffle2!($a, $e, 6, 7),
9527 }
9528 };
9529 }
9530 match imm8 & 0x3 {
9531 0 => shuffle1!(0, 1),
9532 1 => shuffle1!(2, 3),
9533 2 => shuffle1!(4, 5),
9534 _ => shuffle1!(6, 7),
9535 }
9536}
9537
9538/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9539///
9540/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_i64x&expand=5181)
9541#[inline]
9542#[target_feature(enable = "avx512f")]
9543#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
9544#[rustc_args_required_const(4)]
9545pub unsafe fn _mm512_mask_shuffle_i64x2(
9546 src: __m512i,
9547 k: __mmask8,
9548 a: __m512i,
9549 b: __m512i,
9550 imm8: i32,
9551) -> __m512i {
9552 let imm8 = (imm8 & 0xFF) as u8;
9553 macro_rules! shuffle4 {
9554 (
9555 $a:expr,
9556 $b:expr,
9557 $c:expr,
9558 $d:expr,
9559 $e:expr,
9560 $f:expr,
9561 $g:expr,
9562 $h:expr
9563 ) => {
9564 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
9565 };
9566 }
9567 macro_rules! shuffle3 {
9568 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
9569 match (imm8 >> 6) & 0x3 {
9570 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
9571 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
9572 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
9573 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
9574 }
9575 };
9576 }
9577 macro_rules! shuffle2 {
9578 ($a:expr, $b:expr, $e:expr, $f:expr) => {
9579 match (imm8 >> 4) & 0x3 {
9580 0 => shuffle3!($a, $b, $e, $f, 8, 9),
9581 1 => shuffle3!($a, $b, $e, $f, 10, 11),
9582 2 => shuffle3!($a, $b, $e, $f, 12, 13),
9583 _ => shuffle3!($a, $b, $e, $f, 14, 15),
9584 }
9585 };
9586 }
9587 macro_rules! shuffle1 {
9588 ($a:expr, $e:expr) => {
9589 match (imm8 >> 2) & 0x3 {
9590 0 => shuffle2!($a, $e, 0, 1),
9591 1 => shuffle2!($a, $e, 2, 3),
9592 2 => shuffle2!($a, $e, 4, 5),
9593 _ => shuffle2!($a, $e, 6, 7),
9594 }
9595 };
9596 }
9597 let shuffle = match imm8 & 0x3 {
9598 0 => shuffle1!(0, 1),
9599 1 => shuffle1!(2, 3),
9600 2 => shuffle1!(4, 5),
9601 _ => shuffle1!(6, 7),
9602 };
9603
9604 transmute(simd_select_bitmask(k, shuffle, src.as_i64x8()))
9605}
9606
9607/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9608///
9609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_i64&expand=5182)
9610#[inline]
9611#[target_feature(enable = "avx512f")]
9612#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
9613#[rustc_args_required_const(3)]
9614pub unsafe fn _mm512_maskz_shuffle_i64x2(
9615 k: __mmask8,
9616 a: __m512i,
9617 b: __m512i,
9618 imm8: i32,
9619) -> __m512i {
9620 let imm8 = (imm8 & 0xFF) as u8;
9621 macro_rules! shuffle4 {
9622 (
9623 $a:expr,
9624 $b:expr,
9625 $c:expr,
9626 $d:expr,
9627 $e:expr,
9628 $f:expr,
9629 $g:expr,
9630 $h:expr
9631 ) => {
9632 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
9633 };
9634 }
9635 macro_rules! shuffle3 {
9636 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
9637 match (imm8 >> 6) & 0x3 {
9638 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
9639 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
9640 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
9641 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
9642 }
9643 };
9644 }
9645 macro_rules! shuffle2 {
9646 ($a:expr, $b:expr, $e:expr, $f:expr) => {
9647 match (imm8 >> 4) & 0x3 {
9648 0 => shuffle3!($a, $b, $e, $f, 8, 9),
9649 1 => shuffle3!($a, $b, $e, $f, 10, 11),
9650 2 => shuffle3!($a, $b, $e, $f, 12, 13),
9651 _ => shuffle3!($a, $b, $e, $f, 14, 15),
9652 }
9653 };
9654 }
9655 macro_rules! shuffle1 {
9656 ($a:expr, $e:expr) => {
9657 match (imm8 >> 2) & 0x3 {
9658 0 => shuffle2!($a, $e, 0, 1),
9659 1 => shuffle2!($a, $e, 2, 3),
9660 2 => shuffle2!($a, $e, 4, 5),
9661 _ => shuffle2!($a, $e, 6, 7),
9662 }
9663 };
9664 }
9665 let shuffle = match imm8 & 0x3 {
9666 0 => shuffle1!(0, 1),
9667 1 => shuffle1!(2, 3),
9668 2 => shuffle1!(4, 5),
9669 _ => shuffle1!(6, 7),
9670 };
9671
9672 let zero = _mm512_setzero_si512().as_i64x8();
9673 transmute(simd_select_bitmask(k, shuffle, zero))
9674}
9675
9676/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
9677///
9678/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_f32x4&expand=5165)
9679#[inline]
9680#[target_feature(enable = "avx512f")]
9681#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] //should be vshuff32x4, but generate vshuff64x2
9682#[rustc_args_required_const(2)]
9683pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 {
9684 let imm8 = (imm8 & 0xFF) as u8;
9685 macro_rules! shuffle4 {
9686 (
9687 $a:expr,
9688 $b:expr,
9689 $c:expr,
9690 $d:expr,
9691 $e:expr,
9692 $f:expr,
9693 $g:expr,
9694 $h:expr,
9695 $i:expr,
9696 $j:expr,
9697 $k:expr,
9698 $l:expr,
9699 $m:expr,
9700 $n:expr,
9701 $o:expr,
9702 $p:expr
9703 ) => {
9704 simd_shuffle16(
9705 a,
9706 b,
9707 [
9708 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
9709 ],
9710 );
9711 };
9712 }
9713 macro_rules! shuffle3 {
9714 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
9715 match (imm8 >> 6) & 0x3 {
9716 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
9717 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
9718 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
9719 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
9720 }
9721 };
9722 }
9723 macro_rules! shuffle2 {
9724 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
9725 match (imm8 >> 4) & 0x3 {
9726 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
9727 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
9728 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
9729 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
9730 }
9731 };
9732 }
9733 macro_rules! shuffle1 {
9734 ($a:expr, $e:expr, $i: expr, $m: expr) => {
9735 match (imm8 >> 2) & 0x3 {
9736 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
9737 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
9738 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
9739 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
9740 }
9741 };
9742 }
9743 match imm8 & 0x3 {
9744 0 => shuffle1!(0, 1, 2, 3),
9745 1 => shuffle1!(4, 5, 6, 7),
9746 2 => shuffle1!(8, 9, 10, 11),
9747 _ => shuffle1!(12, 13, 14, 15),
9748 }
9749}
9750
9751/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9752///
9753/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_f32&expand=5163)
9754#[inline]
9755#[target_feature(enable = "avx512f")]
9756#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
9757#[rustc_args_required_const(4)]
9758pub unsafe fn _mm512_mask_shuffle_f32x4(
9759 src: __m512,
9760 k: __mmask16,
9761 a: __m512,
9762 b: __m512,
9763 imm8: i32,
9764) -> __m512 {
9765 let imm8 = (imm8 & 0xFF) as u8;
9766 macro_rules! shuffle4 {
9767 (
9768 $a:expr,
9769 $b:expr,
9770 $c:expr,
9771 $d:expr,
9772 $e:expr,
9773 $f:expr,
9774 $g:expr,
9775 $h:expr,
9776 $i:expr,
9777 $j:expr,
9778 $k:expr,
9779 $l:expr,
9780 $m:expr,
9781 $n:expr,
9782 $o:expr,
9783 $p:expr
9784 ) => {
9785 simd_shuffle16(
9786 a,
9787 b,
9788 [
9789 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
9790 ],
9791 );
9792 };
9793 }
9794 macro_rules! shuffle3 {
9795 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
9796 match (imm8 >> 6) & 0x3 {
9797 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
9798 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
9799 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
9800 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
9801 }
9802 };
9803 }
9804 macro_rules! shuffle2 {
9805 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
9806 match (imm8 >> 4) & 0x3 {
9807 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
9808 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
9809 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
9810 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
9811 }
9812 };
9813 }
9814 macro_rules! shuffle1 {
9815 ($a:expr, $e:expr, $i: expr, $m: expr) => {
9816 match (imm8 >> 2) & 0x3 {
9817 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
9818 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
9819 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
9820 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
9821 }
9822 };
9823 }
9824 let shuffle = match imm8 & 0x3 {
9825 0 => shuffle1!(0, 1, 2, 3),
9826 1 => shuffle1!(4, 5, 6, 7),
9827 2 => shuffle1!(8, 9, 10, 11),
9828 _ => shuffle1!(12, 13, 14, 15),
9829 };
9830
9831 transmute(simd_select_bitmask(k, shuffle, src.as_f32x16()))
9832}
9833
9834/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
9835///
9836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_f32&expand=5164)
9837#[inline]
9838#[target_feature(enable = "avx512f")]
9839#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
9840#[rustc_args_required_const(3)]
9841pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
9842 let imm8 = (imm8 & 0xFF) as u8;
9843 macro_rules! shuffle4 {
9844 (
9845 $a:expr,
9846 $b:expr,
9847 $c:expr,
9848 $d:expr,
9849 $e:expr,
9850 $f:expr,
9851 $g:expr,
9852 $h:expr,
9853 $i:expr,
9854 $j:expr,
9855 $k:expr,
9856 $l:expr,
9857 $m:expr,
9858 $n:expr,
9859 $o:expr,
9860 $p:expr
9861 ) => {
9862 simd_shuffle16(
9863 a,
9864 b,
9865 [
9866 $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
9867 ],
9868 );
9869 };
9870 }
9871 macro_rules! shuffle3 {
9872 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
9873 match (imm8 >> 6) & 0x3 {
9874 0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
9875 1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
9876 2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
9877 _ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
9878 }
9879 };
9880 }
9881 macro_rules! shuffle2 {
9882 ($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
9883 match (imm8 >> 4) & 0x3 {
9884 0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
9885 1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
9886 2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
9887 _ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
9888 }
9889 };
9890 }
9891 macro_rules! shuffle1 {
9892 ($a:expr, $e:expr, $i: expr, $m: expr) => {
9893 match (imm8 >> 2) & 0x3 {
9894 0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
9895 1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
9896 2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
9897 _ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
9898 }
9899 };
9900 }
9901 let shuffle = match imm8 & 0x3 {
9902 0 => shuffle1!(0, 1, 2, 3),
9903 1 => shuffle1!(4, 5, 6, 7),
9904 2 => shuffle1!(8, 9, 10, 11),
9905 _ => shuffle1!(12, 13, 14, 15),
9906 };
9907
9908 let zero = _mm512_setzero_ps().as_f32x16();
9909 transmute(simd_select_bitmask(k, shuffle, zero))
9910}
9911
9912/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
9913///
9914/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_f64x2&expand=5171)
9915#[inline]
9916#[target_feature(enable = "avx512f")]
9917#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
9918#[rustc_args_required_const(2)]
9919pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
9920 let imm8 = (imm8 & 0xFF) as u8;
9921 macro_rules! shuffle4 {
9922 (
9923 $a:expr,
9924 $b:expr,
9925 $c:expr,
9926 $d:expr,
9927 $e:expr,
9928 $f:expr,
9929 $g:expr,
9930 $h:expr
9931 ) => {
9932 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
9933 };
9934 }
9935 macro_rules! shuffle3 {
9936 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
9937 match (imm8 >> 6) & 0x3 {
9938 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
9939 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
9940 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
9941 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
9942 }
9943 };
9944 }
9945 macro_rules! shuffle2 {
9946 ($a:expr, $b:expr, $e:expr, $f:expr) => {
9947 match (imm8 >> 4) & 0x3 {
9948 0 => shuffle3!($a, $b, $e, $f, 8, 9),
9949 1 => shuffle3!($a, $b, $e, $f, 10, 11),
9950 2 => shuffle3!($a, $b, $e, $f, 12, 13),
9951 _ => shuffle3!($a, $b, $e, $f, 14, 15),
9952 }
9953 };
9954 }
9955 macro_rules! shuffle1 {
9956 ($a:expr, $e:expr) => {
9957 match (imm8 >> 2) & 0x3 {
9958 0 => shuffle2!($a, $e, 0, 1),
9959 1 => shuffle2!($a, $e, 2, 3),
9960 2 => shuffle2!($a, $e, 4, 5),
9961 _ => shuffle2!($a, $e, 6, 7),
9962 }
9963 };
9964 }
9965 match imm8 & 0x3 {
9966 0 => shuffle1!(0, 1),
9967 1 => shuffle1!(2, 3),
9968 2 => shuffle1!(4, 5),
9969 _ => shuffle1!(6, 7),
9970 }
9971}
9972
9973/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
9974///
9975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_f64x2&expand=5169)
9976#[inline]
9977#[target_feature(enable = "avx512f")]
9978#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
9979#[rustc_args_required_const(4)]
9980pub unsafe fn _mm512_mask_shuffle_f64x2(
9981 src: __m512d,
9982 k: __mmask8,
9983 a: __m512d,
9984 b: __m512d,
9985 imm8: i32,
9986) -> __m512d {
9987 let imm8 = (imm8 & 0xFF) as u8;
9988 macro_rules! shuffle4 {
9989 (
9990 $a:expr,
9991 $b:expr,
9992 $c:expr,
9993 $d:expr,
9994 $e:expr,
9995 $f:expr,
9996 $g:expr,
9997 $h:expr
9998 ) => {
9999 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
10000 };
10001 }
10002 macro_rules! shuffle3 {
10003 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
10004 match (imm8 >> 6) & 0x3 {
10005 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
10006 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
10007 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
10008 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
10009 }
10010 };
10011 }
10012 macro_rules! shuffle2 {
10013 ($a:expr, $b:expr, $e:expr, $f:expr) => {
10014 match (imm8 >> 4) & 0x3 {
10015 0 => shuffle3!($a, $b, $e, $f, 8, 9),
10016 1 => shuffle3!($a, $b, $e, $f, 10, 11),
10017 2 => shuffle3!($a, $b, $e, $f, 12, 13),
10018 _ => shuffle3!($a, $b, $e, $f, 14, 15),
10019 }
10020 };
10021 }
10022 macro_rules! shuffle1 {
10023 ($a:expr, $e:expr) => {
10024 match (imm8 >> 2) & 0x3 {
10025 0 => shuffle2!($a, $e, 0, 1),
10026 1 => shuffle2!($a, $e, 2, 3),
10027 2 => shuffle2!($a, $e, 4, 5),
10028 _ => shuffle2!($a, $e, 6, 7),
10029 }
10030 };
10031 }
10032 let shuffle = match imm8 & 0x3 {
10033 0 => shuffle1!(0, 1),
10034 1 => shuffle1!(2, 3),
10035 2 => shuffle1!(4, 5),
10036 _ => shuffle1!(6, 7),
10037 };
10038
10039 transmute(simd_select_bitmask(k, shuffle, src.as_f64x8()))
10040}
10041
10042/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10043///
10044/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_f64x2&expand=5170)
10045#[inline]
10046#[target_feature(enable = "avx512f")]
10047#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
10048#[rustc_args_required_const(3)]
10049pub unsafe fn _mm512_maskz_shuffle_f64x2(
10050 k: __mmask8,
10051 a: __m512d,
10052 b: __m512d,
10053 imm8: i32,
10054) -> __m512d {
10055 let imm8 = (imm8 & 0xFF) as u8;
10056 macro_rules! shuffle4 {
10057 (
10058 $a:expr,
10059 $b:expr,
10060 $c:expr,
10061 $d:expr,
10062 $e:expr,
10063 $f:expr,
10064 $g:expr,
10065 $h:expr
10066 ) => {
10067 simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]);
10068 };
10069 }
10070 macro_rules! shuffle3 {
10071 ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
10072 match (imm8 >> 6) & 0x3 {
10073 0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
10074 1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
10075 2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
10076 _ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
10077 }
10078 };
10079 }
10080 macro_rules! shuffle2 {
10081 ($a:expr, $b:expr, $e:expr, $f:expr) => {
10082 match (imm8 >> 4) & 0x3 {
10083 0 => shuffle3!($a, $b, $e, $f, 8, 9),
10084 1 => shuffle3!($a, $b, $e, $f, 10, 11),
10085 2 => shuffle3!($a, $b, $e, $f, 12, 13),
10086 _ => shuffle3!($a, $b, $e, $f, 14, 15),
10087 }
10088 };
10089 }
10090 macro_rules! shuffle1 {
10091 ($a:expr, $e:expr) => {
10092 match (imm8 >> 2) & 0x3 {
10093 0 => shuffle2!($a, $e, 0, 1),
10094 1 => shuffle2!($a, $e, 2, 3),
10095 2 => shuffle2!($a, $e, 4, 5),
10096 _ => shuffle2!($a, $e, 6, 7),
10097 }
10098 };
10099 }
10100 let shuffle = match imm8 & 0x3 {
10101 0 => shuffle1!(0, 1),
10102 1 => shuffle1!(2, 3),
10103 2 => shuffle1!(4, 5),
10104 _ => shuffle1!(6, 7),
10105 };
10106
10107 let zero = _mm512_setzero_pd().as_f64x8();
10108 transmute(simd_select_bitmask(k, shuffle, zero))
10109}
10110
10111/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
10112///
10113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extractf32x4_ps&expand=2442)
10114#[inline]
10115#[target_feature(enable = "avx512f")]
10116#[cfg_attr(
10117 all(test, not(target_os = "windows")),
10118 assert_instr(vextractf32x4, imm8 = 3)
10119)]
10120#[rustc_args_required_const(1)]
10121pub unsafe fn _mm512_extractf32x4_ps(a: __m512, imm8: i32) -> __m128 {
10122 match imm8 & 0x3 {
10123 0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
10124 1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
10125 2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
10126 _ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
10127 }
10128}
10129
10130/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
10131///
10132/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_moveldup_ps&expand=3862)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[cfg_attr(test, assert_instr(vmovsldup))]
10136pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
10137 let r: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
10138 transmute(r)
10139}
10140
10141/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10142///
10143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_moveldup_ps&expand=3860)
10144#[inline]
10145#[target_feature(enable = "avx512f")]
10146#[cfg_attr(test, assert_instr(vmovsldup))]
10147pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10148 let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
10149 transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
10150}
10151
10152/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10153///
10154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_moveldup_ps&expand=3861)
10155#[inline]
10156#[target_feature(enable = "avx512f")]
10157#[cfg_attr(test, assert_instr(vmovsldup))]
10158pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
10159 let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
10160 let zero = _mm512_setzero_ps().as_f32x16();
10161 transmute(simd_select_bitmask(k, mov, zero))
10162}
10163
10164/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
10165///
10166/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_movehdup_ps&expand=3852)
10167#[inline]
10168#[target_feature(enable = "avx512f")]
10169#[cfg_attr(test, assert_instr(vmovshdup))]
10170pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
10171 let r: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
10172 transmute(r)
10173}
10174
10175/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10176///
10177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_movehdup&expand=3850)
10178#[inline]
10179#[target_feature(enable = "avx512f")]
10180#[cfg_attr(test, assert_instr(vmovshdup))]
10181pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10182 let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
10183 transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
10184}
10185
10186/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10187///
10188/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_moveh&expand=3851)
10189#[inline]
10190#[target_feature(enable = "avx512f")]
10191#[cfg_attr(test, assert_instr(vmovshdup))]
10192pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
10193 let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
10194 let zero = _mm512_setzero_ps().as_f32x16();
10195 transmute(simd_select_bitmask(k, mov, zero))
10196}
10197
10198/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
10199///
10200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_movedup_pd&expand=3843)
10201#[inline]
10202#[target_feature(enable = "avx512f")]
10203#[cfg_attr(test, assert_instr(vmovddup))]
10204pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
10205 let r: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
10206 transmute(r)
10207}
10208
10209/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10210///
10211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_movedup_pd&expand=3841)
10212#[inline]
10213#[target_feature(enable = "avx512f")]
10214#[cfg_attr(test, assert_instr(vmovddup))]
10215pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
10216 let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
10217 transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
10218}
10219
10220/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10221///
10222/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_movedup_pd&expand=3842)
10223#[inline]
10224#[target_feature(enable = "avx512f")]
10225#[cfg_attr(test, assert_instr(vmovddup))]
10226pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
10227 let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
10228 let zero = _mm512_setzero_pd().as_f64x8();
10229 transmute(simd_select_bitmask(k, mov, zero))
10230}
10231
10232/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10233///
10234/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
10235///
10236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
10237#[inline]
10238#[target_feature(enable = "avx512f")]
10239#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
10240pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
10241 transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
10242}
10243
10244/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10245///
10246/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273)
10247#[inline]
10248#[target_feature(enable = "avx512f")]
10249#[cfg_attr(test, assert_instr(vpandd))]
10250pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
10251 let and = _mm512_and_epi32(a, b).as_i32x16();
10252 transmute(simd_select_bitmask(k, and, src.as_i32x16()))
10253}
10254
10255/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10256///
10257/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274)
10258#[inline]
10259#[target_feature(enable = "avx512f")]
10260#[cfg_attr(test, assert_instr(vpandd))]
10261pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
10262 let and = _mm512_and_epi32(a, b).as_i32x16();
10263 let zero = _mm512_setzero_si512().as_i32x16();
10264 transmute(simd_select_bitmask(k, and, zero))
10265}
10266
10267/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
10268///
10269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279)
10270#[inline]
10271#[target_feature(enable = "avx512f")]
10272#[cfg_attr(test, assert_instr(vpandq))]
10273pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
10274 transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
10275}
10276
10277/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10278///
10279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280)
10280#[inline]
10281#[target_feature(enable = "avx512f")]
10282#[cfg_attr(test, assert_instr(vpandq))]
10283pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
10284 let and = _mm512_and_epi64(a, b).as_i64x8();
10285 transmute(simd_select_bitmask(k, and, src.as_i64x8()))
10286}
10287
10288/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10289///
10290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274)
10291#[inline]
10292#[target_feature(enable = "avx512f")]
10293#[cfg_attr(test, assert_instr(vpandq))]
10294pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
10295 let and = _mm512_and_epi64(a, b).as_i64x8();
10296 let zero = _mm512_setzero_si512().as_i64x8();
10297 transmute(simd_select_bitmask(k, and, zero))
10298}
10299
10300/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
10301///
10302/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
10303#[inline]
10304#[target_feature(enable = "avx512f")]
10305#[cfg_attr(test, assert_instr(vpandq))]
10306pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
10307 transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
10308}
10309
10310/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
10311///
10312/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
10313#[inline]
10314#[target_feature(enable = "avx512f")]
10315#[cfg_attr(test, assert_instr(vporq))]
10316pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
10317 transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
10318}
10319
10320/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10321///
10322/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040)
10323#[inline]
10324#[target_feature(enable = "avx512f")]
10325#[cfg_attr(test, assert_instr(vpord))]
10326pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
10327 let or = _mm512_or_epi32(a, b).as_i32x16();
10328 transmute(simd_select_bitmask(k, or, src.as_i32x16()))
10329}
10330
10331/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10332///
10333/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041)
10334#[inline]
10335#[target_feature(enable = "avx512f")]
10336#[cfg_attr(test, assert_instr(vpord))]
10337pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
10338 let or = _mm512_or_epi32(a, b).as_i32x16();
10339 let zero = _mm512_setzero_si512().as_i32x16();
10340 transmute(simd_select_bitmask(k, or, zero))
10341}
10342
10343/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
10344///
10345/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051)
10346#[inline]
10347#[target_feature(enable = "avx512f")]
10348#[cfg_attr(test, assert_instr(vporq))]
10349pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
10350 transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
10351}
10352
10353/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10354///
10355/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049)
10356#[inline]
10357#[target_feature(enable = "avx512f")]
10358#[cfg_attr(test, assert_instr(vporq))]
10359pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
10360 let or = _mm512_or_epi64(a, b).as_i64x8();
10361 transmute(simd_select_bitmask(k, or, src.as_i64x8()))
10362}
10363
10364/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10365///
10366/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050)
10367#[inline]
10368#[target_feature(enable = "avx512f")]
10369#[cfg_attr(test, assert_instr(vporq))]
10370pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
10371 let or = _mm512_or_epi64(a, b).as_i64x8();
10372 let zero = _mm512_setzero_si512().as_i64x8();
10373 transmute(simd_select_bitmask(k, or, zero))
10374}
10375
10376/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
10377///
10378/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072)
10379#[inline]
10380#[target_feature(enable = "avx512f")]
10381#[cfg_attr(test, assert_instr(vporq))]
10382pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
10383 transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
10384}
10385
10386/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
10387///
10388/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
10389#[inline]
10390#[target_feature(enable = "avx512f")]
10391#[cfg_attr(test, assert_instr(vpxorq))]
10392pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
10393 transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
10394}
10395
10396/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10397///
10398/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140)
10399#[inline]
10400#[target_feature(enable = "avx512f")]
10401#[cfg_attr(test, assert_instr(vpxord))]
10402pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
10403 let xor = _mm512_xor_epi32(a, b).as_i32x16();
10404 transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
10405}
10406
10407/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10408///
10409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141)
10410#[inline]
10411#[target_feature(enable = "avx512f")]
10412#[cfg_attr(test, assert_instr(vpxord))]
10413pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
10414 let xor = _mm512_xor_epi32(a, b).as_i32x16();
10415 let zero = _mm512_setzero_si512().as_i32x16();
10416 transmute(simd_select_bitmask(k, xor, zero))
10417}
10418
10419/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
10420///
10421/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151)
10422#[inline]
10423#[target_feature(enable = "avx512f")]
10424#[cfg_attr(test, assert_instr(vpxorq))]
10425pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
10426 transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
10427}
10428
10429/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10430///
10431/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149)
10432#[inline]
10433#[target_feature(enable = "avx512f")]
10434#[cfg_attr(test, assert_instr(vpxorq))]
10435pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
10436 let xor = _mm512_xor_epi64(a, b).as_i64x8();
10437 transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
10438}
10439
10440/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10441///
10442/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150)
10443#[inline]
10444#[target_feature(enable = "avx512f")]
10445#[cfg_attr(test, assert_instr(vpxorq))]
10446pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
10447 let xor = _mm512_xor_epi64(a, b).as_i64x8();
10448 let zero = _mm512_setzero_si512().as_i64x8();
10449 transmute(simd_select_bitmask(k, xor, zero))
10450}
10451
10452/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
10453///
10454/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172)
10455#[inline]
10456#[target_feature(enable = "avx512f")]
10457#[cfg_attr(test, assert_instr(vpxorq))]
10458pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
10459 transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
10460}
10461
10462/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
10463///
10464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
10465#[inline]
10466#[target_feature(enable = "avx512f")]
10467#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
10468pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
10469 transmute(a & b)
10470}
10471
10472/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
10473///
10474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210)
10475#[inline]
10476#[target_feature(enable = "avx512f")]
10477#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
10478pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
10479 transmute(a & b)
10480}
10481
10482/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
10483///
10484/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
10485#[inline]
10486#[target_feature(enable = "avx512f")]
10487#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
10488pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
10489 transmute(a | b)
10490}
10491
10492/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
10493///
10494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237)
10495#[inline]
10496#[target_feature(enable = "avx512f")]
10497#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
10498pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
10499 transmute(a | b)
10500}
10501
10502/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
10503///
10504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
10505#[inline]
10506#[target_feature(enable = "avx512f")]
10507#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
10508pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
10509 transmute(a ^ b)
10510}
10511
10512/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
10513///
10514/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
10518pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
10519 transmute(a ^ b)
10520}
10521
10522/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
10523///
10524/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=knot_mask16&expand=3233)
10525#[inline]
10526#[target_feature(enable = "avx512f")]
10527pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
10528 transmute(a ^ 0b11111111_11111111)
10529}
10530
10531/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
10532///
10533/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_knot&expand=3231)
10534#[inline]
10535#[target_feature(enable = "avx512f")]
10536pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
10537 transmute(a ^ 0b11111111_11111111)
10538}
10539
10540/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
10541///
10542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kandn_mask16&expand=3218)
10543#[inline]
10544#[target_feature(enable = "avx512f")]
10545#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
10546pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
10547 _mm512_kand(_mm512_knot(a), b)
10548}
10549
10550/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
10551///
10552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kandn&expand=3216)
10553#[inline]
10554#[target_feature(enable = "avx512f")]
10555#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
10556pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
10557 _mm512_kand(_mm512_knot(a), b)
10558}
10559
10560/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
10561///
10562/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxnor_mask16&expand=3285)
10563#[inline]
10564#[target_feature(enable = "avx512f")]
10565#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
10566pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
10567 _mm512_knot(_mm512_kxor(a, b))
10568}
10569
10570/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
10571///
10572/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxnor&expand=3283)
10573#[inline]
10574#[target_feature(enable = "avx512f")]
10575#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
10576pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
10577 _mm512_knot(_mm512_kxor(a, b))
10578}
10579
10580/// Copy 16-bit mask a to k.
10581///
10582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_kmov&expand=3228)
10583#[inline]
10584#[target_feature(enable = "avx512f")]
10585#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
10586pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
10587 let r: u16 = a;
10588 transmute(r)
10589}
10590
10591/// Sets packed 32-bit integers in `dst` with the supplied values.
10592///
10593/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
10594#[inline]
10595#[target_feature(enable = "avx512f")]
10596pub unsafe fn _mm512_set_ps(
10597 e0: f32,
10598 e1: f32,
10599 e2: f32,
10600 e3: f32,
10601 e4: f32,
10602 e5: f32,
10603 e6: f32,
10604 e7: f32,
10605 e8: f32,
10606 e9: f32,
10607 e10: f32,
10608 e11: f32,
10609 e12: f32,
10610 e13: f32,
10611 e14: f32,
10612 e15: f32,
10613) -> __m512 {
10614 _mm512_setr_ps(
10615 e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
10616 )
10617}
10618
10619/// Sets packed 32-bit integers in `dst` with the supplied values in
10620/// reverse order.
10621///
10622/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
10623#[inline]
10624#[target_feature(enable = "avx512f")]
10625pub unsafe fn _mm512_setr_ps(
10626 e0: f32,
10627 e1: f32,
10628 e2: f32,
10629 e3: f32,
10630 e4: f32,
10631 e5: f32,
10632 e6: f32,
10633 e7: f32,
10634 e8: f32,
10635 e9: f32,
10636 e10: f32,
10637 e11: f32,
10638 e12: f32,
10639 e13: f32,
10640 e14: f32,
10641 e15: f32,
10642) -> __m512 {
10643 let r = f32x16::new(
10644 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
10645 );
10646 transmute(r)
10647}
10648
10649/// Broadcast 64-bit float `a` to all elements of `dst`.
10650#[inline]
10651#[target_feature(enable = "avx512f")]
10652pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
10653 transmute(f64x8::splat(a))
10654}
10655
10656/// Broadcast 32-bit float `a` to all elements of `dst`.
10657#[inline]
10658#[target_feature(enable = "avx512f")]
10659pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
10660 transmute(f32x16::splat(a))
10661}
10662
10663/// Sets packed 32-bit integers in `dst` with the supplied values.
10664#[inline]
10665#[target_feature(enable = "avx512f")]
10666pub unsafe fn _mm512_set_epi32(
10667 e15: i32,
10668 e14: i32,
10669 e13: i32,
10670 e12: i32,
10671 e11: i32,
10672 e10: i32,
10673 e9: i32,
10674 e8: i32,
10675 e7: i32,
10676 e6: i32,
10677 e5: i32,
10678 e4: i32,
10679 e3: i32,
10680 e2: i32,
10681 e1: i32,
10682 e0: i32,
10683) -> __m512i {
10684 _mm512_setr_epi32(
10685 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
10686 )
10687}
10688
10689/// Broadcast 32-bit integer `a` to all elements of `dst`.
10690#[inline]
10691#[target_feature(enable = "avx512f")]
10692pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
10693 transmute(i32x16::splat(a))
10694}
10695
10696/// Broadcast 64-bit integer `a` to all elements of `dst`.
10697#[inline]
10698#[target_feature(enable = "avx512f")]
10699pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
10700 transmute(i64x8::splat(a))
10701}
10702
10703/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
10704///
10705/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
10706#[inline]
10707#[target_feature(enable = "avx512f")]
10708#[cfg_attr(test, assert_instr(vcmp))]
10709pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10710 _mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
10711}
10712
10713/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
10714/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10715///
10716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
10717#[inline]
10718#[target_feature(enable = "avx512f")]
10719#[cfg_attr(test, assert_instr(vcmp))]
10720pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10721 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS)
10722}
10723
10724/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
10725///
10726/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps)
10727#[inline]
10728#[target_feature(enable = "avx512f")]
10729#[cfg_attr(test, assert_instr(vcmp))]
10730pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10731 _mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
10732}
10733
10734/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
10735/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10736///
10737/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps)
10738#[inline]
10739#[target_feature(enable = "avx512f")]
10740#[cfg_attr(test, assert_instr(vcmp))]
10741pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10742 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US)
10743}
10744
10745/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
10746///
10747/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
10748#[inline]
10749#[target_feature(enable = "avx512f")]
10750#[cfg_attr(test, assert_instr(vcmp))]
10751pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10752 _mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
10753}
10754
10755/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
10756/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10757///
10758/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
10759#[inline]
10760#[target_feature(enable = "avx512f")]
10761#[cfg_attr(test, assert_instr(vcmp))]
10762pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10763 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS)
10764}
10765
10766/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
10767///
10768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps)
10769#[inline]
10770#[target_feature(enable = "avx512f")]
10771#[cfg_attr(test, assert_instr(vcmp))]
10772pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10773 _mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
10774}
10775
10776/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
10777/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10778///
10779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps)
10780#[inline]
10781#[target_feature(enable = "avx512f")]
10782#[cfg_attr(test, assert_instr(vcmp))]
10783pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10784 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US)
10785}
10786
10787/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
10788///
10789/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
10790#[inline]
10791#[target_feature(enable = "avx512f")]
10792#[cfg_attr(test, assert_instr(vcmp))]
10793pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10794 _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
10795}
10796
10797/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
10798/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10799///
10800/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
10801#[inline]
10802#[target_feature(enable = "avx512f")]
10803#[cfg_attr(test, assert_instr(vcmp))]
10804pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10805 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
10806}
10807
10808/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
10809///
10810/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
10811#[inline]
10812#[target_feature(enable = "avx512f")]
10813#[cfg_attr(test, assert_instr(vcmp))]
10814pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10815 _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
10816}
10817
10818/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
10819/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10820///
10821/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
10822#[inline]
10823#[target_feature(enable = "avx512f")]
10824#[cfg_attr(test, assert_instr(vcmp))]
10825pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10826 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ)
10827}
10828
10829/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
10830///
10831/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
10832#[inline]
10833#[target_feature(enable = "avx512f")]
10834#[rustc_args_required_const(2)]
10835#[cfg_attr(test, assert_instr(vcmp, op = 0))]
10836pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 {
10837 let neg_one = -1;
10838 macro_rules! call {
10839 ($imm5:expr) => {
10840 vcmpps(
10841 a.as_f32x16(),
10842 b.as_f32x16(),
10843 $imm5,
10844 neg_one,
10845 _MM_FROUND_CUR_DIRECTION,
10846 )
10847 };
10848 }
10849 let r = constify_imm5!(op, call);
10850 transmute(r)
10851}
10852
10853/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
10854/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10855///
10856/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
10857#[inline]
10858#[target_feature(enable = "avx512f")]
10859#[rustc_args_required_const(3)]
10860#[cfg_attr(test, assert_instr(vcmp, op = 0))]
10861pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 {
10862 macro_rules! call {
10863 ($imm5:expr) => {
10864 vcmpps(
10865 a.as_f32x16(),
10866 b.as_f32x16(),
10867 $imm5,
10868 m as i16,
10869 _MM_FROUND_CUR_DIRECTION,
10870 )
10871 };
10872 }
10873 let r = constify_imm5!(op, call);
10874 transmute(r)
10875}
10876
10877/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
10878///
10879/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
10880#[inline]
10881#[target_feature(enable = "avx512f")]
10882#[rustc_args_required_const(2, 3)]
10883#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
10884pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 {
10885 let neg_one = -1;
10886 macro_rules! call {
10887 ($imm5:expr, $imm4:expr) => {
10888 vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4)
10889 };
10890 }
10891 let r = constify_imm5_sae!(op, sae, call);
10892 transmute(r)
10893}
10894
10895/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
10896/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10897///
10898/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask)
10899#[inline]
10900#[target_feature(enable = "avx512f")]
10901#[rustc_args_required_const(3, 4)]
10902#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
10903pub unsafe fn _mm512_mask_cmp_round_ps_mask(
10904 m: __mmask16,
10905 a: __m512,
10906 b: __m512,
10907 op: i32,
10908 sae: i32,
10909) -> __mmask16 {
10910 macro_rules! call {
10911 ($imm5:expr, $imm4:expr) => {
10912 vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4)
10913 };
10914 }
10915 let r = constify_imm5_sae!(op, sae, call);
10916 transmute(r)
10917}
10918
10919/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
10920///
10921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
10922#[inline]
10923#[target_feature(enable = "avx512f")]
10924#[cfg_attr(test, assert_instr(vcmp, op = 0))]
10925pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10926 _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
10927}
10928
10929/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
10930///
10931/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask)
10932#[inline]
10933#[target_feature(enable = "avx512f")]
10934#[cfg_attr(test, assert_instr(vcmp, op = 0))]
10935pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10936 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q)
10937}
10938
10939/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
10940///
10941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
10942#[inline]
10943#[target_feature(enable = "avx512f")]
10944#[cfg_attr(test, assert_instr(vcmp, op = 0))]
10945pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
10946 _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
10947}
10948
10949/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
10950///
10951/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask)
10952#[inline]
10953#[target_feature(enable = "avx512f")]
10954#[cfg_attr(test, assert_instr(vcmp, op = 0))]
10955pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
10956 _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q)
10957}
10958
10959/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
10960///
10961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd)
10962#[inline]
10963#[target_feature(enable = "avx512f")]
10964#[cfg_attr(test, assert_instr(vcmp))]
10965pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
10966 _mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
10967}
10968
10969/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
10970/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10971///
10972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd)
10973#[inline]
10974#[target_feature(enable = "avx512f")]
10975#[cfg_attr(test, assert_instr(vcmp))]
10976pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
10977 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS)
10978}
10979
10980/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
10981///
10982/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd)
10983#[inline]
10984#[target_feature(enable = "avx512f")]
10985#[cfg_attr(test, assert_instr(vcmp))]
10986pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
10987 _mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
10988}
10989
10990/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
10991/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
10992///
10993/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd)
10994#[inline]
10995#[target_feature(enable = "avx512f")]
10996#[cfg_attr(test, assert_instr(vcmp))]
10997pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
10998 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
10999}
11000
11001/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
11002///
11003/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd)
11004#[inline]
11005#[target_feature(enable = "avx512f")]
11006#[cfg_attr(test, assert_instr(vcmp))]
11007pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
11008 _mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
11009}
11010
11011/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
11012/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11013///
11014/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd)
11015#[inline]
11016#[target_feature(enable = "avx512f")]
11017#[cfg_attr(test, assert_instr(vcmp))]
11018pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
11019 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS)
11020}
11021
11022/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
11023///
11024/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd)
11025#[inline]
11026#[target_feature(enable = "avx512f")]
11027#[cfg_attr(test, assert_instr(vcmp))]
11028pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
11029 _mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
11030}
11031
11032/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
11033/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11034///
11035/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd)
11036#[inline]
11037#[target_feature(enable = "avx512f")]
11038#[cfg_attr(test, assert_instr(vcmp))]
11039pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
11040 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US)
11041}
11042
11043/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
11044///
11045/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd)
11046#[inline]
11047#[target_feature(enable = "avx512f")]
11048#[cfg_attr(test, assert_instr(vcmp))]
11049pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
11050 _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
11051}
11052
11053/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
11054/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[cfg_attr(test, assert_instr(vcmp))]
11060pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
11061 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ)
11062}
11063
11064/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
11065///
11066/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd)
11067#[inline]
11068#[target_feature(enable = "avx512f")]
11069#[cfg_attr(test, assert_instr(vcmp))]
11070pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
11071 _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
11072}
11073
11074/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
11075/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11076///
11077/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask)
11078#[inline]
11079#[target_feature(enable = "avx512f")]
11080#[cfg_attr(test, assert_instr(vcmp))]
11081pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
11082 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ)
11083}
11084
11085/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
11086///
11087/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
11088#[inline]
11089#[target_feature(enable = "avx512f")]
11090#[rustc_args_required_const(2)]
11091#[cfg_attr(test, assert_instr(vcmp, op = 0))]
11092pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 {
11093 let neg_one = -1;
11094 macro_rules! call {
11095 ($imm5:expr) => {
11096 vcmppd(
11097 a.as_f64x8(),
11098 b.as_f64x8(),
11099 $imm5,
11100 neg_one,
11101 _MM_FROUND_CUR_DIRECTION,
11102 )
11103 };
11104 }
11105 let r = constify_imm5!(op, call);
11106 transmute(r)
11107}
11108
11109/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
11110/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11111///
11112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask)
11113#[inline]
11114#[target_feature(enable = "avx512f")]
11115#[rustc_args_required_const(3)]
11116#[cfg_attr(test, assert_instr(vcmp, op = 0))]
11117pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 {
11118 macro_rules! call {
11119 ($imm5:expr) => {
11120 vcmppd(
11121 a.as_f64x8(),
11122 b.as_f64x8(),
11123 $imm5,
11124 m as i8,
11125 _MM_FROUND_CUR_DIRECTION,
11126 )
11127 };
11128 }
11129 let r = constify_imm5!(op, call);
11130 transmute(r)
11131}
11132
11133/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op.
11134///
11135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
11136#[inline]
11137#[target_feature(enable = "avx512f")]
11138#[rustc_args_required_const(2, 3)]
11139#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11140pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 {
11141 let neg_one = -1;
11142 macro_rules! call {
11143 ($imm5:expr, $imm4:expr) => {
11144 vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4)
11145 };
11146 }
11147 let r = constify_imm5_sae!(op, sae, call);
11148 transmute(r)
11149}
11150
11151/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op,
11152/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11153///
11154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask)
11155#[inline]
11156#[target_feature(enable = "avx512f")]
11157#[rustc_args_required_const(3, 4)]
11158#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11159pub unsafe fn _mm512_mask_cmp_round_pd_mask(
11160 m: __mmask8,
11161 a: __m512d,
11162 b: __m512d,
11163 op: i32,
11164 sae: i32,
11165) -> __mmask8 {
11166 macro_rules! call {
11167 ($imm5:expr, $imm4:expr) => {
11168 vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4)
11169 };
11170 }
11171 let r = constify_imm5_sae!(op, sae, call);
11172 transmute(r)
11173}
11174
11175/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
11176///
11177/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
11178#[inline]
11179#[target_feature(enable = "avx512f")]
11180#[cfg_attr(test, assert_instr(vcmp, op = 0))]
11181pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
11182 _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
11183}
11184
11185/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector.
11186///
11187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask)
11188#[inline]
11189#[target_feature(enable = "avx512f")]
11190#[cfg_attr(test, assert_instr(vcmp, op = 0))]
11191pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
11192 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q)
11193}
11194
11195/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
11196///
11197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
11198#[inline]
11199#[target_feature(enable = "avx512f")]
11200#[cfg_attr(test, assert_instr(vcmp, op = 0))]
11201pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
11202 _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
11203}
11204
11205/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector.
11206///
11207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask)
11208#[inline]
11209#[target_feature(enable = "avx512f")]
11210#[cfg_attr(test, assert_instr(vcmp, op = 0))]
11211pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
11212 _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q)
11213}
11214
11215/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
11216///
11217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757)
11218#[inline]
11219#[target_feature(enable = "avx512f")]
11220#[rustc_args_required_const(2)]
11221#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11222pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 {
11223 let neg_one = -1;
11224 macro_rules! call {
11225 ($imm5:expr) => {
11226 vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
11227 };
11228 }
11229 let r = constify_imm5!(op, call);
11230 transmute(r)
11231}
11232
11233/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
11234///
11235/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757)
11236#[inline]
11237#[target_feature(enable = "avx512f")]
11238#[rustc_args_required_const(3)]
11239#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11240pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 {
11241 macro_rules! call {
11242 ($imm5:expr) => {
11243 vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
11244 };
11245 }
11246 let r = constify_imm5!(op, call);
11247 transmute(r)
11248}
11249
11250/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
11251///
11252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757)
11253#[inline]
11254#[target_feature(enable = "avx512f")]
11255#[rustc_args_required_const(2, 3)]
11256#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11257pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 {
11258 let neg_one = -1;
11259 macro_rules! call {
11260 ($imm5:expr, $imm4:expr) => {
11261 vcmpss(a, b, $imm5, neg_one, $imm4)
11262 };
11263 }
11264 let r = constify_imm5_sae!(op, sae, call);
11265 transmute(r)
11266}
11267
11268/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
11269///
11270/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757)
11271#[inline]
11272#[target_feature(enable = "avx512f")]
11273#[rustc_args_required_const(3, 4)]
11274#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11275pub unsafe fn _mm_mask_cmp_round_ss_mask(
11276 m: __mmask8,
11277 a: __m128,
11278 b: __m128,
11279 op: i32,
11280 sae: i32,
11281) -> __mmask8 {
11282 macro_rules! call {
11283 ($imm5:expr, $imm4:expr) => {
11284 vcmpss(a, b, $imm5, m as i8, $imm4)
11285 };
11286 }
11287 let r = constify_imm5_sae!(op, sae, call);
11288 transmute(r)
11289}
11290
11291/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
11292///
11293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757)
11294#[inline]
11295#[target_feature(enable = "avx512f")]
11296#[rustc_args_required_const(2)]
11297#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11298pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 {
11299 let neg_one = -1;
11300 macro_rules! call {
11301 ($imm5:expr) => {
11302 vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
11303 };
11304 }
11305 let r = constify_imm5!(op, call);
11306 transmute(r)
11307}
11308
11309/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
11310///
11311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757)
11312#[inline]
11313#[target_feature(enable = "avx512f")]
11314#[rustc_args_required_const(3)]
11315#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11316pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 {
11317 macro_rules! call {
11318 ($imm5:expr) => {
11319 vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION)
11320 };
11321 }
11322 let r = constify_imm5!(op, call);
11323 transmute(r)
11324}
11325
11326/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector.
11327///
11328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757)
11329#[inline]
11330#[target_feature(enable = "avx512f")]
11331#[rustc_args_required_const(2, 3)]
11332#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11333pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 {
11334 let neg_one = -1;
11335 macro_rules! call {
11336 ($imm5:expr, $imm4:expr) => {
11337 vcmpsd(a, b, $imm5, neg_one, $imm4)
11338 };
11339 }
11340 let r = constify_imm5_sae!(op, sae, call);
11341 transmute(r)
11342}
11343
11344/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set).
11345///
11346/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757)
11347#[inline]
11348#[target_feature(enable = "avx512f")]
11349#[rustc_args_required_const(3, 4)]
11350#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))]
11351pub unsafe fn _mm_mask_cmp_round_sd_mask(
11352 m: __mmask8,
11353 a: __m128d,
11354 b: __m128d,
11355 op: i32,
11356 sae: i32,
11357) -> __mmask8 {
11358 macro_rules! call {
11359 ($imm5:expr, $imm4:expr) => {
11360 vcmpsd(a, b, $imm5, m as i8, $imm4)
11361 };
11362 }
11363 let r = constify_imm5_sae!(op, sae, call);
11364 transmute(r)
11365}
11366
11367/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
11368///
11369/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
11370#[inline]
11371#[target_feature(enable = "avx512f")]
11372#[cfg_attr(test, assert_instr(vpcmp))]
11373pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11374 simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
11375}
11376
11377/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
11378/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11379///
11380/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32)
11381#[inline]
11382#[target_feature(enable = "avx512f")]
11383#[cfg_attr(test, assert_instr(vpcmp))]
11384pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11385 _mm512_cmplt_epu32_mask(a, b) & m
11386}
11387
11388/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector.
11389///
11390/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32)
11391#[inline]
11392#[target_feature(enable = "avx512f")]
11393#[cfg_attr(test, assert_instr(vpcmp))]
11394pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11395 simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
11396}
11397
11398/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k
11399/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11400///
11401/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32)
11402#[inline]
11403#[target_feature(enable = "avx512f")]
11404#[cfg_attr(test, assert_instr(vpcmp))]
11405pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11406 _mm512_cmpgt_epu32_mask(a, b) & m
11407}
11408
11409/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
11410///
11411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32)
11412#[inline]
11413#[target_feature(enable = "avx512f")]
11414#[cfg_attr(test, assert_instr(vpcmp))]
11415pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11416 simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
11417}
11418
11419/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
11420/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11421///
11422/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32)
11423#[inline]
11424#[target_feature(enable = "avx512f")]
11425#[cfg_attr(test, assert_instr(vpcmp))]
11426pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11427 _mm512_cmple_epu32_mask(a, b) & m
11428}
11429
11430/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
11431///
11432/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32)
11433#[inline]
11434#[target_feature(enable = "avx512f")]
11435#[cfg_attr(test, assert_instr(vpcmp))]
11436pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11437 simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
11438}
11439
11440/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
11441/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11442///
11443/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32)
11444#[inline]
11445#[target_feature(enable = "avx512f")]
11446#[cfg_attr(test, assert_instr(vpcmp))]
11447pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11448 _mm512_cmpge_epu32_mask(a, b) & m
11449}
11450
11451/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector.
11452///
11453/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32)
11454#[inline]
11455#[target_feature(enable = "avx512f")]
11456#[cfg_attr(test, assert_instr(vpcmp))]
11457pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11458 simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
11459}
11460
11461/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k
11462/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11463///
11464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32)
11465#[inline]
11466#[target_feature(enable = "avx512f")]
11467#[cfg_attr(test, assert_instr(vpcmp))]
11468pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11469 _mm512_cmpeq_epu32_mask(a, b) & m
11470}
11471
11472/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector.
11473///
11474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32)
11475#[inline]
11476#[target_feature(enable = "avx512f")]
11477#[cfg_attr(test, assert_instr(vpcmp))]
11478pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11479 simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
11480}
11481
11482/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k
11483/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11484///
11485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask)
11486#[inline]
11487#[target_feature(enable = "avx512f")]
11488#[cfg_attr(test, assert_instr(vpcmp))]
11489pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11490 _mm512_cmpneq_epu32_mask(a, b) & m
11491}
11492
11493/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op.
11494///
11495/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
11496#[inline]
11497#[target_feature(enable = "avx512f")]
11498#[rustc_args_required_const(2)]
11499#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
11500pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
11501 let neg_one = -1;
11502 macro_rules! call {
11503 ($imm3:expr) => {
11504 vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
11505 };
11506 }
11507 let r = constify_imm3!(op, call);
11508 transmute(r)
11509}
11510
11511/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op,
11512/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11513///
11514/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[rustc_args_required_const(3)]
11518#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
11519pub unsafe fn _mm512_mask_cmp_epu32_mask(
11520 m: __mmask16,
11521 a: __m512i,
11522 b: __m512i,
11523 op: _MM_CMPINT_ENUM,
11524) -> __mmask16 {
11525 macro_rules! call {
11526 ($imm3:expr) => {
11527 vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
11528 };
11529 }
11530 let r = constify_imm3!(op, call);
11531 transmute(r)
11532}
11533
11534/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
11535///
11536/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32)
11537#[inline]
11538#[target_feature(enable = "avx512f")]
11539#[cfg_attr(test, assert_instr(vpcmp))]
11540pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11541 simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
11542}
11543
11544/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k
11545/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11546///
11547/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32)
11548#[inline]
11549#[target_feature(enable = "avx512f")]
11550#[cfg_attr(test, assert_instr(vpcmp))]
11551pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11552 _mm512_cmplt_epi32_mask(a, b) & m
11553}
11554
11555/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector.
11556///
11557/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32)
11558#[inline]
11559#[target_feature(enable = "avx512f")]
11560#[cfg_attr(test, assert_instr(vpcmp))]
11561pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11562 simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
11563}
11564
11565/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k
11566/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11567///
11568/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32)
11569#[inline]
11570#[target_feature(enable = "avx512f")]
11571#[cfg_attr(test, assert_instr(vpcmp))]
11572pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11573 _mm512_cmpgt_epi32_mask(a, b) & m
11574}
11575
11576/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
11577///
11578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32)
11579#[inline]
11580#[target_feature(enable = "avx512f")]
11581#[cfg_attr(test, assert_instr(vpcmp))]
11582pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11583 simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
11584}
11585
11586/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
11587/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11588///
11589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32)
11590#[inline]
11591#[target_feature(enable = "avx512f")]
11592#[cfg_attr(test, assert_instr(vpcmp))]
11593pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11594 _mm512_cmple_epi32_mask(a, b) & m
11595}
11596
11597/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
11598///
11599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32)
11600#[inline]
11601#[target_feature(enable = "avx512f")]
11602#[cfg_attr(test, assert_instr(vpcmp))]
11603pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11604 simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
11605}
11606
11607/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
11608/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11609///
11610/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32)
11611#[inline]
11612#[target_feature(enable = "avx512f")]
11613#[cfg_attr(test, assert_instr(vpcmp))]
11614pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11615 _mm512_cmpge_epi32_mask(a, b) & m
11616}
11617
11618/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector.
11619///
11620/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32)
11621#[inline]
11622#[target_feature(enable = "avx512f")]
11623#[cfg_attr(test, assert_instr(vpcmp))]
11624pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11625 simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
11626}
11627
11628/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k
11629/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11630///
11631/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32)
11632#[inline]
11633#[target_feature(enable = "avx512f")]
11634#[cfg_attr(test, assert_instr(vpcmp))]
11635pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11636 _mm512_cmpeq_epi32_mask(a, b) & m
11637}
11638
11639/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector.
11640///
11641/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[cfg_attr(test, assert_instr(vpcmp))]
11645pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
11646 simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
11647}
11648
11649/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k
11650/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11651///
11652/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32)
11653#[inline]
11654#[target_feature(enable = "avx512f")]
11655#[cfg_attr(test, assert_instr(vpcmp))]
11656pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
11657 _mm512_cmpneq_epi32_mask(a, b) & m
11658}
11659
11660/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op.
11661///
11662/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
11663#[inline]
11664#[target_feature(enable = "avx512f")]
11665#[rustc_args_required_const(2)]
11666#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
11667pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 {
11668 let neg_one = -1;
11669 macro_rules! call {
11670 ($imm3:expr) => {
11671 vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one)
11672 };
11673 }
11674 let r = constify_imm3!(op, call);
11675 transmute(r)
11676}
11677
11678/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op,
11679/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11680///
11681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask)
11682#[inline]
11683#[target_feature(enable = "avx512f")]
11684#[rustc_args_required_const(3)]
11685#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
11686pub unsafe fn _mm512_mask_cmp_epi32_mask(
11687 m: __mmask16,
11688 a: __m512i,
11689 b: __m512i,
11690 op: _MM_CMPINT_ENUM,
11691) -> __mmask16 {
11692 macro_rules! call {
11693 ($imm3:expr) => {
11694 vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16)
11695 };
11696 }
11697 let r = constify_imm3!(op, call);
11698 transmute(r)
11699}
11700
11701/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector.
11702///
11703/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64)
11704#[inline]
11705#[target_feature(enable = "avx512f")]
11706#[cfg_attr(test, assert_instr(vpcmp))]
11707pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11708 simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
11709}
11710
11711/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k
11712/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11713///
11714/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64)
11715#[inline]
11716#[target_feature(enable = "avx512f")]
11717#[cfg_attr(test, assert_instr(vpcmp))]
11718pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11719 _mm512_cmplt_epu64_mask(a, b) & m
11720}
11721
11722/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector.
11723///
11724/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64)
11725#[inline]
11726#[target_feature(enable = "avx512f")]
11727#[cfg_attr(test, assert_instr(vpcmp))]
11728pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11729 simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
11730}
11731
11732/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k
11733/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11734///
11735/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64)
11736#[inline]
11737#[target_feature(enable = "avx512f")]
11738#[cfg_attr(test, assert_instr(vpcmp))]
11739pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11740 _mm512_cmpgt_epu64_mask(a, b) & m
11741}
11742
11743/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
11744///
11745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64)
11746#[inline]
11747#[target_feature(enable = "avx512f")]
11748#[cfg_attr(test, assert_instr(vpcmp))]
11749pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11750 simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
11751}
11752
11753/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
11754/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11755///
11756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64)
11757#[inline]
11758#[target_feature(enable = "avx512f")]
11759#[cfg_attr(test, assert_instr(vpcmp))]
11760pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11761 _mm512_cmple_epu64_mask(a, b) & m
11762}
11763
11764/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
11765///
11766/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64)
11767#[inline]
11768#[target_feature(enable = "avx512f")]
11769#[cfg_attr(test, assert_instr(vpcmp))]
11770pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11771 simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
11772}
11773
11774/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
11775/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11776///
11777/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64)
11778#[inline]
11779#[target_feature(enable = "avx512f")]
11780#[cfg_attr(test, assert_instr(vpcmp))]
11781pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11782 _mm512_cmpge_epu64_mask(b, a) & m
11783}
11784
11785/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector.
11786///
11787/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64)
11788#[inline]
11789#[target_feature(enable = "avx512f")]
11790#[cfg_attr(test, assert_instr(vpcmp))]
11791pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11792 simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
11793}
11794
11795/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k
11796/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11797///
11798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64)
11799#[inline]
11800#[target_feature(enable = "avx512f")]
11801#[cfg_attr(test, assert_instr(vpcmp))]
11802pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11803 _mm512_cmpeq_epu64_mask(a, b) & m
11804}
11805
11806/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector.
11807///
11808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64)
11809#[inline]
11810#[target_feature(enable = "avx512f")]
11811#[cfg_attr(test, assert_instr(vpcmp))]
11812pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11813 simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
11814}
11815
11816/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k
11817/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11818///
11819/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask)
11820#[inline]
11821#[target_feature(enable = "avx512f")]
11822#[cfg_attr(test, assert_instr(vpcmp))]
11823pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11824 _mm512_cmpneq_epu64_mask(a, b) & m
11825}
11826
11827/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op.
11828///
11829/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
11830#[inline]
11831#[target_feature(enable = "avx512f")]
11832#[rustc_args_required_const(2)]
11833#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
11834pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
11835 let neg_one = -1;
11836 macro_rules! call {
11837 ($imm3:expr) => {
11838 vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
11839 };
11840 }
11841 let r = constify_imm3!(op, call);
11842 transmute(r)
11843}
11844
11845/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op,
11846/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11847///
11848/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask)
11849#[inline]
11850#[target_feature(enable = "avx512f")]
11851#[rustc_args_required_const(3)]
11852#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
11853pub unsafe fn _mm512_mask_cmp_epu64_mask(
11854 m: __mmask8,
11855 a: __m512i,
11856 b: __m512i,
11857 op: _MM_CMPINT_ENUM,
11858) -> __mmask8 {
11859 macro_rules! call {
11860 ($imm3:expr) => {
11861 vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
11862 };
11863 }
11864 let r = constify_imm3!(op, call);
11865 transmute(r)
11866}
11867
11868/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector.
11869///
11870/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64)
11871#[inline]
11872#[target_feature(enable = "avx512f")]
11873#[cfg_attr(test, assert_instr(vpcmp))]
11874pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11875 simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
11876}
11877
11878/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k
11879/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11880///
11881/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64)
11882#[inline]
11883#[target_feature(enable = "avx512f")]
11884#[cfg_attr(test, assert_instr(vpcmp))]
11885pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11886 _mm512_cmplt_epi64_mask(a, b) & m
11887}
11888
11889/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector.
11890///
11891/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64)
11892#[inline]
11893#[target_feature(enable = "avx512f")]
11894#[cfg_attr(test, assert_instr(vpcmp))]
11895pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11896 simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
11897}
11898
11899/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k
11900/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11901///
11902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64)
11903#[inline]
11904#[target_feature(enable = "avx512f")]
11905#[cfg_attr(test, assert_instr(vpcmp))]
11906pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11907 _mm512_cmpgt_epi64_mask(a, b) & m
11908}
11909
11910/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector.
11911///
11912/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64)
11913#[inline]
11914#[target_feature(enable = "avx512f")]
11915#[cfg_attr(test, assert_instr(vpcmp))]
11916pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11917 simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
11918}
11919
11920/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k
11921/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11922///
11923/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64)
11924#[inline]
11925#[target_feature(enable = "avx512f")]
11926#[cfg_attr(test, assert_instr(vpcmp))]
11927pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11928 _mm512_cmple_epi64_mask(a, b) & m
11929}
11930
11931/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector.
11932///
11933/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64)
11934#[inline]
11935#[target_feature(enable = "avx512f")]
11936#[cfg_attr(test, assert_instr(vpcmp))]
11937pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11938 simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
11939}
11940
11941/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k
11942/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11943///
11944/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64)
11945#[inline]
11946#[target_feature(enable = "avx512f")]
11947#[cfg_attr(test, assert_instr(vpcmp))]
11948pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11949 _mm512_cmpge_epi64_mask(b, a) & m
11950}
11951
11952/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector.
11953///
11954/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64)
11955#[inline]
11956#[target_feature(enable = "avx512f")]
11957#[cfg_attr(test, assert_instr(vpcmp))]
11958pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11959 simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
11960}
11961
11962/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k
11963/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11964///
11965/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64)
11966#[inline]
11967#[target_feature(enable = "avx512f")]
11968#[cfg_attr(test, assert_instr(vpcmp))]
11969pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11970 _mm512_cmpeq_epi64_mask(a, b) & m
11971}
11972
11973/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector.
11974///
11975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64)
11976#[inline]
11977#[target_feature(enable = "avx512f")]
11978#[cfg_attr(test, assert_instr(vpcmp))]
11979pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
11980 simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
11981}
11982
11983/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k
11984/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
11985///
11986/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64)
11987#[inline]
11988#[target_feature(enable = "avx512f")]
11989#[cfg_attr(test, assert_instr(vpcmp))]
11990pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
11991 _mm512_cmpneq_epi64_mask(a, b) & m
11992}
11993
11994/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op.
11995///
11996/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
11997#[inline]
11998#[target_feature(enable = "avx512f")]
11999#[rustc_args_required_const(2)]
12000#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
12001pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 {
12002 let neg_one = -1;
12003 macro_rules! call {
12004 ($imm3:expr) => {
12005 vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one)
12006 };
12007 }
12008 let r = constify_imm3!(op, call);
12009 transmute(r)
12010}
12011
12012/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op,
12013/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
12014///
12015/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask)
12016#[inline]
12017#[target_feature(enable = "avx512f")]
12018#[rustc_args_required_const(3)]
12019#[cfg_attr(test, assert_instr(vpcmp, op = 0))]
12020pub unsafe fn _mm512_mask_cmp_epi64_mask(
12021 m: __mmask8,
12022 a: __m512i,
12023 b: __m512i,
12024 op: _MM_CMPINT_ENUM,
12025) -> __mmask8 {
12026 macro_rules! call {
12027 ($imm3:expr) => {
12028 vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8)
12029 };
12030 }
12031 let r = constify_imm3!(op, call);
12032 transmute(r)
12033}
12034
12035/// Returns vector of type `__m512d` with undefined elements.
12036///
12037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
12038#[inline]
12039#[target_feature(enable = "avx512f")]
12040// This intrinsic has no corresponding instruction.
12041pub unsafe fn _mm512_undefined_pd() -> __m512d {
12042 _mm512_set1_pd(0.0)
12043}
12044
12045/// Returns vector of type `__m512` with undefined elements.
12046///
12047/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
12048#[inline]
12049#[target_feature(enable = "avx512f")]
12050// This intrinsic has no corresponding instruction.
12051pub unsafe fn _mm512_undefined_ps() -> __m512 {
12052 _mm512_set1_ps(0.0)
12053}
12054
12055/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
12056/// floating-point elements) from memory into result.
12057/// `mem_addr` does not need to be aligned on any particular boundary.
12058///
12059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
12060#[inline]
12061#[target_feature(enable = "avx512f")]
12062#[cfg_attr(test, assert_instr(vmovups))]
12063pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
12064 ptr::read_unaligned(mem_addr as *const __m512d)
12065}
12066
12067/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
12068/// floating-point elements) from `a` into memory.
12069/// `mem_addr` does not need to be aligned on any particular boundary.
12070///
12071/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
12072#[inline]
12073#[target_feature(enable = "avx512f")]
12074#[cfg_attr(test, assert_instr(vmovups))]
12075pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
12076 ptr::write_unaligned(mem_addr as *mut __m512d, a);
12077}
12078
12079/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
12080/// floating-point elements) from memory into result.
12081/// `mem_addr` does not need to be aligned on any particular boundary.
12082///
12083/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
12084#[inline]
12085#[target_feature(enable = "avx512f")]
12086#[cfg_attr(test, assert_instr(vmovups))]
12087pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
12088 ptr::read_unaligned(mem_addr as *const __m512)
12089}
12090
12091/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
12092/// floating-point elements) from `a` into memory.
12093/// `mem_addr` does not need to be aligned on any particular boundary.
12094///
12095/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
12096#[inline]
12097#[target_feature(enable = "avx512f")]
12098#[cfg_attr(test, assert_instr(vmovups))]
12099#[stable(feature = "simd_x86", since = "1.27.0")]
12100pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
12101 ptr::write_unaligned(mem_addr as *mut __m512, a);
12102}
12103
12104/// Sets packed 64-bit integers in `dst` with the supplied values in
12105/// reverse order.
12106///
12107/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
12108#[inline]
12109#[target_feature(enable = "avx512f")]
12110pub unsafe fn _mm512_setr_pd(
12111 e0: f64,
12112 e1: f64,
12113 e2: f64,
12114 e3: f64,
12115 e4: f64,
12116 e5: f64,
12117 e6: f64,
12118 e7: f64,
12119) -> __m512d {
12120 let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
12121 transmute(r)
12122}
12123
12124/// Sets packed 64-bit integers in `dst` with the supplied values.
12125///
12126/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
12127#[inline]
12128#[target_feature(enable = "avx512f")]
12129pub unsafe fn _mm512_set_pd(
12130 e0: f64,
12131 e1: f64,
12132 e2: f64,
12133 e3: f64,
12134 e4: f64,
12135 e5: f64,
12136 e6: f64,
12137 e7: f64,
12138) -> __m512d {
12139 _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
12140}
12141
12142/// Equal
12143pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
12144/// Less-than
12145pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
12146/// Less-than-or-equal
12147pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
12148/// False
12149pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
12150/// Not-equal
12151pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
12152/// Not less-than
12153pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
12154/// Not less-than-or-equal
12155pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
12156/// True
12157pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
12158
12159/// interval [1, 2)
12160pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
12161/// interval [0.5, 2)
12162pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
12163/// interval [0.5, 1)
12164pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
12165/// interval [0.75, 1.5)
12166pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
12167
12168/// sign = sign(SRC)
12169pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
12170/// sign = 0
12171pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
12172/// DEST = NaN if sign(SRC) = 1
12173pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
12174
12175pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
12176pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
12177pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
12178pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
12179pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
12180pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
12181pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
12182pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
12183pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
12184pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
12185pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
12186pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
12187pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
12188pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
12189pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
12190pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
12191pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
12192pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
12193pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
12194pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
12195pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
12196pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
12197pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
12198pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
12199pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
12200pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
12201pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
12202pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
12203pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
12204pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
12205pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
12206pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
12207pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
12208pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
12209pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
12210pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
12211pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
12212pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
12213pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
12214pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
12215pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
12216pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
12217pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
12218pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
12219pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
12220pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
12221pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
12222pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
12223pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
12224pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
12225pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
12226pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
12227pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
12228pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
12229pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
12230pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
12231pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
12232pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
12233pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
12234pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
12235pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
12236pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
12237pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
12238pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
12239pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
12240pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
12241pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
12242pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
12243pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
12244pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
12245pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
12246pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
12247pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
12248pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
12249pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
12250pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
12251pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
12252pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
12253pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
12254pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
12255pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
12256pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
12257pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
12258pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
12259pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
12260pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
12261pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
12262pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
12263pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
12264pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
12265pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
12266pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
12267pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
12268pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
12269pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
12270pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
12271pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
12272pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
12273pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
12274pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
12275pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
12276pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
12277pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
12278pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
12279pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
12280pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
12281pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
12282pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
12283pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
12284pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
12285pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
12286pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
12287pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
12288pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
12289pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
12290pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
12291pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
12292pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
12293pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
12294pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
12295pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
12296pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
12297pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
12298pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
12299pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
12300pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
12301pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
12302pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
12303pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
12304pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
12305pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
12306pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
12307pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
12308pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
12309pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
12310pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
12311pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
12312pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
12313pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
12314pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
12315pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
12316pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
12317pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
12318pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
12319pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
12320pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
12321pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
12322pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
12323pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
12324pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
12325pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
12326pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
12327pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
12328pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
12329pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
12330pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
12331pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
12332pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
12333pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
12334pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
12335pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
12336pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
12337pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
12338pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
12339pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
12340pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
12341pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
12342pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
12343pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
12344pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
12345pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
12346pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
12347pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
12348pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
12349pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
12350pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
12351pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
12352pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
12353pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
12354pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
12355pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
12356pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
12357pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
12358pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
12359pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
12360pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
12361pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
12362pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
12363pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
12364pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
12365pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
12366pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
12367pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
12368pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
12369pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
12370pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
12371pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
12372pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
12373pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
12374pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
12375pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
12376pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
12377pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
12378pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
12379pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
12380pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
12381pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
12382pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
12383pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
12384pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
12385pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
12386pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
12387pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
12388pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
12389pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
12390pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
12391pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
12392pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
12393pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
12394pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
12395pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
12396pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
12397pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
12398pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
12399pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
12400pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
12401pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
12402pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
12403pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
12404pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
12405pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
12406pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
12407pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
12408pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
12409pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
12410pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
12411pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
12412pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
12413pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
12414pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
12415pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
12416pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
12417pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
12418pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
12419pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
12420pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
12421pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
12422pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
12423pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
12424pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
12425pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
12426pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
12427pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
12428pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
12429pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
12430pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
12431
12432#[allow(improper_ctypes)]
12433extern "C" {
12434 #[link_name = "llvm.x86.avx512.pmul.dq.512"]
12435 fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
12436 #[link_name = "llvm.x86.avx512.pmulu.dq.512"]
12437 fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
12438
12439 #[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
12440 fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
12441 #[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
12442 fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
12443 #[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
12444 fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
12445 #[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
12446 fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
12447
12448 #[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
12449 fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
12450 #[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
12451 fn vpmaxuq(a: u64x8, b: u64x8) -> i64x8;
12452 #[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
12453 fn vpminud(a: u32x16, b: u32x16) -> u32x16;
12454 #[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
12455 fn vpminuq(a: u64x8, b: u64x8) -> i64x8;
12456
12457 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
12458 fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
12459 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
12460 fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
12461
12462 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
12463 fn vfmadd132ps(a: f32x16, b: f32x16, c: f32x16, rounding: i32) -> f32x16;
12464 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
12465 fn vfmadd132pd(a: f64x8, b: f64x8, c: f64x8, rounding: i32) -> f64x8;
12466
12467 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
12468 fn vfmaddsub213ps(a: f32x16, b: f32x16, c: f32x16, d: i32) -> f32x16; //from clang
12469 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
12470 fn vfmaddsub213pd(a: f64x8, b: f64x8, c: f64x8, d: i32) -> f64x8; //from clang
12471
12472 #[link_name = "llvm.x86.avx512.add.ps.512"]
12473 fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
12474 #[link_name = "llvm.x86.avx512.add.pd.512"]
12475 fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
12476 #[link_name = "llvm.x86.avx512.sub.ps.512"]
12477 fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
12478 #[link_name = "llvm.x86.avx512.sub.pd.512"]
12479 fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
12480 #[link_name = "llvm.x86.avx512.mul.ps.512"]
12481 fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
12482 #[link_name = "llvm.x86.avx512.mul.pd.512"]
12483 fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
12484 #[link_name = "llvm.x86.avx512.div.ps.512"]
12485 fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
12486 #[link_name = "llvm.x86.avx512.div.pd.512"]
12487 fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
12488
12489 #[link_name = "llvm.x86.avx512.max.ps.512"]
12490 fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
12491 #[link_name = "llvm.x86.avx512.max.pd.512"]
12492 fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
12493 #[link_name = "llvm.x86.avx512.min.ps.512"]
12494 fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
12495 #[link_name = "llvm.x86.avx512.min.pd.512"]
12496 fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
12497
12498 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
12499 fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
12500 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
12501 fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
12502
12503 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
12504 fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
12505 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
12506 fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
12507
12508 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
12509 fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
12510 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
12511 fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
12512 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
12513 fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
12514 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
12515 fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
12516
12517 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
12518 fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
12519 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
12520 fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
12521 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
12522 fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
12523
12524 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
12525 fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
12526 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
12527 fn vcvttps2udq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> u32x16;
12528 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
12529 fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
12530 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
12531 fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
12532
12533 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
12534 fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
12535 #[link_name = "llvm.x86.avx512.gather.dps.512"]
12536 fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
12537 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
12538 fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
12539 #[link_name = "llvm.x86.avx512.gather.qps.512"]
12540 fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
12541 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
12542 fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
12543 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
12544 fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
12545 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
12546 fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
12547 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
12548 fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
12549
12550 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
12551 fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
12552 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
12553 fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
12554 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
12555 fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
12556 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
12557 fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
12558 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
12559 fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
12560 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
12561 fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
12562 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
12563 fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
12564 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
12565 fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
12566
12567 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
12568 fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
12569 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
12570 fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
12571 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
12572 fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
12573 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
12574 fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
12575 #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
12576 fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
12577 #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
12578 fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
12579 #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
12580 fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
12581 #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
12582 fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
12583
12584 #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
12585 fn vprold(a: i32x16, i8: i32) -> i32x16;
12586 #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
12587 fn vprord(a: i32x16, i8: i32) -> i32x16;
12588 #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
12589 fn vprolq(a: i64x8, i8: i32) -> i64x8;
12590 #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
12591 fn vprorq(a: i64x8, i8: i32) -> i64x8;
12592
12593 #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
12594 fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
12595 #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
12596 fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
12597 #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
12598 fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
12599 #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
12600 fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
12601
12602 #[link_name = "llvm.x86.avx512.psllv.d.512"]
12603 fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
12604 #[link_name = "llvm.x86.avx512.psrlv.d.512"]
12605 fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
12606 #[link_name = "llvm.x86.avx512.psllv.q.512"]
12607 fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
12608 #[link_name = "llvm.x86.avx512.psrlv.q.512"]
12609 fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
12610
12611 #[link_name = "llvm.x86.avx512.pslli.d.512"]
12612 fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
12613 #[link_name = "llvm.x86.avx512.psrli.d.512"]
12614 fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
12615 #[link_name = "llvm.x86.avx512.pslli.q.512"]
12616 fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
12617 #[link_name = "llvm.x86.avx512.psrli.q.512"]
12618 fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
12619
12620 #[link_name = "llvm.x86.avx512.psll.d.512"]
12621 fn vpslld(a: i32x16, count: i32x4) -> i32x16;
12622 #[link_name = "llvm.x86.avx512.psrl.d.512"]
12623 fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
12624 #[link_name = "llvm.x86.avx512.psll.q.512"]
12625 fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
12626 #[link_name = "llvm.x86.avx512.psrl.q.512"]
12627 fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
12628
12629 #[link_name = "llvm.x86.avx512.psra.d.512"]
12630 fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
12631 #[link_name = "llvm.x86.avx512.psra.q.512"]
12632 fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
12633
12634 #[link_name = "llvm.x86.avx512.psrai.d.512"]
12635 fn vpsraid(a: i32x16, imm8: u32) -> i32x16;
12636 #[link_name = "llvm.x86.avx512.psrai.q.512"]
12637 fn vpsraiq(a: i64x8, imm8: u32) -> i64x8;
12638
12639 #[link_name = "llvm.x86.avx512.psrav.d.512"]
12640 fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
12641 #[link_name = "llvm.x86.avx512.psrav.q.512"]
12642 fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
12643
12644 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
12645 fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
12646 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
12647 fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
12648
12649 #[link_name = "llvm.x86.avx512.permvar.si.512"]
12650 fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
12651 #[link_name = "llvm.x86.avx512.permvar.di.512"]
12652 fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
12653 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
12654 fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
12655 #[link_name = "llvm.x86.avx512.permvar.df.512"]
12656 fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
12657
12658 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
12659 fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
12660 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
12661 fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
12662 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
12663 fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
12664 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
12665 fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
12666}
12667
12668#[cfg(test)]
12669mod tests {
12670 use std;
12671 use stdarch_test::simd_test;
12672
12673 use crate::core_arch::x86::*;
12674 use crate::hint::black_box;
12675
12676 #[simd_test(enable = "avx512f")]
12677 unsafe fn test_mm512_abs_epi32() {
12678 #[rustfmt::skip]
12679 let a = _mm512_setr_epi32(
12680 0, 1, -1, i32::MAX,
12681 i32::MIN, 100, -100, -32,
12682 0, 1, -1, i32::MAX,
12683 i32::MIN, 100, -100, -32,
12684 );
12685 let r = _mm512_abs_epi32(a);
12686 let e = _mm512_setr_epi32(
12687 0,
12688 1,
12689 1,
12690 i32::MAX,
12691 i32::MAX.wrapping_add(1),
12692 100,
12693 100,
12694 32,
12695 0,
12696 1,
12697 1,
12698 i32::MAX,
12699 i32::MAX.wrapping_add(1),
12700 100,
12701 100,
12702 32,
12703 );
12704 assert_eq_m512i(r, e);
12705 }
12706
12707 #[simd_test(enable = "avx512f")]
12708 unsafe fn test_mm512_mask_abs_epi32() {
12709 #[rustfmt::skip]
12710 let a = _mm512_setr_epi32(
12711 0, 1, -1, i32::MAX,
12712 i32::MIN, 100, -100, -32,
12713 0, 1, -1, i32::MAX,
12714 i32::MIN, 100, -100, -32,
12715 );
12716 let r = _mm512_mask_abs_epi32(a, 0, a);
12717 assert_eq_m512i(r, a);
12718 let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
12719 let e = _mm512_setr_epi32(
12720 0,
12721 1,
12722 1,
12723 i32::MAX,
12724 i32::MAX.wrapping_add(1),
12725 100,
12726 100,
12727 32,
12728 0,
12729 1,
12730 -1,
12731 i32::MAX,
12732 i32::MIN,
12733 100,
12734 -100,
12735 -32,
12736 );
12737 assert_eq_m512i(r, e);
12738 }
12739
12740 #[simd_test(enable = "avx512f")]
12741 unsafe fn test_mm512_maskz_abs_epi32() {
12742 #[rustfmt::skip]
12743 let a = _mm512_setr_epi32(
12744 0, 1, -1, i32::MAX,
12745 i32::MIN, 100, -100, -32,
12746 0, 1, -1, i32::MAX,
12747 i32::MIN, 100, -100, -32,
12748 );
12749 let r = _mm512_maskz_abs_epi32(0, a);
12750 assert_eq_m512i(r, _mm512_setzero_si512());
12751 let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
12752 let e = _mm512_setr_epi32(
12753 0,
12754 1,
12755 1,
12756 i32::MAX,
12757 i32::MAX.wrapping_add(1),
12758 100,
12759 100,
12760 32,
12761 0,
12762 0,
12763 0,
12764 0,
12765 0,
12766 0,
12767 0,
12768 0,
12769 );
12770 assert_eq_m512i(r, e);
12771 }
12772
12773 #[simd_test(enable = "avx512f")]
12774 unsafe fn test_mm512_abs_ps() {
12775 #[rustfmt::skip]
12776 let a = _mm512_setr_ps(
12777 0., 1., -1., f32::MAX,
12778 f32::MIN, 100., -100., -32.,
12779 0., 1., -1., f32::MAX,
12780 f32::MIN, 100., -100., -32.,
12781 );
12782 let r = _mm512_abs_ps(a);
12783 let e = _mm512_setr_ps(
12784 0.,
12785 1.,
12786 1.,
12787 f32::MAX,
12788 f32::MAX,
12789 100.,
12790 100.,
12791 32.,
12792 0.,
12793 1.,
12794 1.,
12795 f32::MAX,
12796 f32::MAX,
12797 100.,
12798 100.,
12799 32.,
12800 );
12801 assert_eq_m512(r, e);
12802 }
12803
12804 #[simd_test(enable = "avx512f")]
12805 unsafe fn test_mm512_mask_abs_ps() {
12806 let a = _mm512_setr_ps(
12807 0.,
12808 1.,
12809 -1.,
12810 f32::MAX,
12811 f32::MIN,
12812 100.,
12813 -100.,
12814 -32.,
12815 0.,
12816 1.,
12817 -1.,
12818 f32::MAX,
12819 f32::MIN,
12820 100.,
12821 -100.,
12822 -32.,
12823 );
12824 let r = _mm512_mask_abs_ps(a, 0, a);
12825 assert_eq_m512(r, a);
12826 let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
12827 let e = _mm512_setr_ps(
12828 0.,
12829 1.,
12830 1.,
12831 f32::MAX,
12832 f32::MAX,
12833 100.,
12834 100.,
12835 32.,
12836 0.,
12837 1.,
12838 -1.,
12839 f32::MAX,
12840 f32::MIN,
12841 100.,
12842 -100.,
12843 -32.,
12844 );
12845 assert_eq_m512(r, e);
12846 }
12847
12848 #[simd_test(enable = "avx512f")]
12849 unsafe fn test_mm512_add_epi32() {
12850 let a = _mm512_setr_epi32(
12851 0,
12852 1,
12853 -1,
12854 i32::MAX,
12855 i32::MIN,
12856 100,
12857 -100,
12858 -32,
12859 0,
12860 1,
12861 -1,
12862 i32::MAX,
12863 i32::MIN,
12864 100,
12865 -100,
12866 -32,
12867 );
12868 let b = _mm512_set1_epi32(1);
12869 let r = _mm512_add_epi32(a, b);
12870 let e = _mm512_setr_epi32(
12871 1,
12872 2,
12873 0,
12874 i32::MIN,
12875 i32::MIN + 1,
12876 101,
12877 -99,
12878 -31,
12879 1,
12880 2,
12881 0,
12882 i32::MIN,
12883 i32::MIN + 1,
12884 101,
12885 -99,
12886 -31,
12887 );
12888 assert_eq_m512i(r, e);
12889 }
12890
12891 #[simd_test(enable = "avx512f")]
12892 unsafe fn test_mm512_mask_add_epi32() {
12893 #[rustfmt::skip]
12894 let a = _mm512_setr_epi32(
12895 0, 1, -1, i32::MAX,
12896 i32::MIN, 100, -100, -32,
12897 0, 1, -1, i32::MAX,
12898 i32::MIN, 100, -100, -32,
12899 );
12900 let b = _mm512_set1_epi32(1);
12901 let r = _mm512_mask_add_epi32(a, 0, a, b);
12902 assert_eq_m512i(r, a);
12903 let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
12904 let e = _mm512_setr_epi32(
12905 1,
12906 2,
12907 0,
12908 i32::MIN,
12909 i32::MIN + 1,
12910 101,
12911 -99,
12912 -31,
12913 0,
12914 1,
12915 -1,
12916 i32::MAX,
12917 i32::MIN,
12918 100,
12919 -100,
12920 -32,
12921 );
12922 assert_eq_m512i(r, e);
12923 }
12924
12925 #[simd_test(enable = "avx512f")]
12926 unsafe fn test_mm512_maskz_add_epi32() {
12927 #[rustfmt::skip]
12928 let a = _mm512_setr_epi32(
12929 0, 1, -1, i32::MAX,
12930 i32::MIN, 100, -100, -32,
12931 0, 1, -1, i32::MAX,
12932 i32::MIN, 100, -100, -32,
12933 );
12934 let b = _mm512_set1_epi32(1);
12935 let r = _mm512_maskz_add_epi32(0, a, b);
12936 assert_eq_m512i(r, _mm512_setzero_si512());
12937 let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
12938 let e = _mm512_setr_epi32(
12939 1,
12940 2,
12941 0,
12942 i32::MIN,
12943 i32::MIN + 1,
12944 101,
12945 -99,
12946 -31,
12947 0,
12948 0,
12949 0,
12950 0,
12951 0,
12952 0,
12953 0,
12954 0,
12955 );
12956 assert_eq_m512i(r, e);
12957 }
12958
12959 #[simd_test(enable = "avx512f")]
12960 unsafe fn test_mm512_add_ps() {
12961 let a = _mm512_setr_ps(
12962 0.,
12963 1.,
12964 -1.,
12965 f32::MAX,
12966 f32::MIN,
12967 100.,
12968 -100.,
12969 -32.,
12970 0.,
12971 1.,
12972 -1.,
12973 f32::MAX,
12974 f32::MIN,
12975 100.,
12976 -100.,
12977 -32.,
12978 );
12979 let b = _mm512_set1_ps(1.);
12980 let r = _mm512_add_ps(a, b);
12981 let e = _mm512_setr_ps(
12982 1.,
12983 2.,
12984 0.,
12985 f32::MAX,
12986 f32::MIN + 1.,
12987 101.,
12988 -99.,
12989 -31.,
12990 1.,
12991 2.,
12992 0.,
12993 f32::MAX,
12994 f32::MIN + 1.,
12995 101.,
12996 -99.,
12997 -31.,
12998 );
12999 assert_eq_m512(r, e);
13000 }
13001
13002 #[simd_test(enable = "avx512f")]
13003 unsafe fn test_mm512_mask_add_ps() {
13004 let a = _mm512_setr_ps(
13005 0.,
13006 1.,
13007 -1.,
13008 f32::MAX,
13009 f32::MIN,
13010 100.,
13011 -100.,
13012 -32.,
13013 0.,
13014 1.,
13015 -1.,
13016 f32::MAX,
13017 f32::MIN,
13018 100.,
13019 -100.,
13020 -32.,
13021 );
13022 let b = _mm512_set1_ps(1.);
13023 let r = _mm512_mask_add_ps(a, 0, a, b);
13024 assert_eq_m512(r, a);
13025 let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
13026 let e = _mm512_setr_ps(
13027 1.,
13028 2.,
13029 0.,
13030 f32::MAX,
13031 f32::MIN + 1.,
13032 101.,
13033 -99.,
13034 -31.,
13035 0.,
13036 1.,
13037 -1.,
13038 f32::MAX,
13039 f32::MIN,
13040 100.,
13041 -100.,
13042 -32.,
13043 );
13044 assert_eq_m512(r, e);
13045 }
13046
13047 #[simd_test(enable = "avx512f")]
13048 unsafe fn test_mm512_maskz_add_ps() {
13049 let a = _mm512_setr_ps(
13050 0.,
13051 1.,
13052 -1.,
13053 f32::MAX,
13054 f32::MIN,
13055 100.,
13056 -100.,
13057 -32.,
13058 0.,
13059 1.,
13060 -1.,
13061 f32::MAX,
13062 f32::MIN,
13063 100.,
13064 -100.,
13065 -32.,
13066 );
13067 let b = _mm512_set1_ps(1.);
13068 let r = _mm512_maskz_add_ps(0, a, b);
13069 assert_eq_m512(r, _mm512_setzero_ps());
13070 let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
13071 let e = _mm512_setr_ps(
13072 1.,
13073 2.,
13074 0.,
13075 f32::MAX,
13076 f32::MIN + 1.,
13077 101.,
13078 -99.,
13079 -31.,
13080 0.,
13081 0.,
13082 0.,
13083 0.,
13084 0.,
13085 0.,
13086 0.,
13087 0.,
13088 );
13089 assert_eq_m512(r, e);
13090 }
13091
13092 #[simd_test(enable = "avx512f")]
13093 unsafe fn test_mm512_sub_epi32() {
13094 let a = _mm512_setr_epi32(
13095 0,
13096 1,
13097 -1,
13098 i32::MAX,
13099 i32::MIN,
13100 100,
13101 -100,
13102 -32,
13103 0,
13104 1,
13105 -1,
13106 i32::MAX,
13107 i32::MIN,
13108 100,
13109 -100,
13110 -32,
13111 );
13112 let b = _mm512_set1_epi32(1);
13113 let r = _mm512_sub_epi32(a, b);
13114 let e = _mm512_setr_epi32(
13115 -1,
13116 0,
13117 -2,
13118 i32::MAX - 1,
13119 i32::MAX,
13120 99,
13121 -101,
13122 -33,
13123 -1,
13124 0,
13125 -2,
13126 i32::MAX - 1,
13127 i32::MAX,
13128 99,
13129 -101,
13130 -33,
13131 );
13132 assert_eq_m512i(r, e);
13133 }
13134
13135 #[simd_test(enable = "avx512f")]
13136 unsafe fn test_mm512_mask_sub_epi32() {
13137 let a = _mm512_setr_epi32(
13138 0,
13139 1,
13140 -1,
13141 i32::MAX,
13142 i32::MIN,
13143 100,
13144 -100,
13145 -32,
13146 0,
13147 1,
13148 -1,
13149 i32::MAX,
13150 i32::MIN,
13151 100,
13152 -100,
13153 -32,
13154 );
13155 let b = _mm512_set1_epi32(1);
13156 let r = _mm512_mask_sub_epi32(a, 0, a, b);
13157 assert_eq_m512i(r, a);
13158 let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
13159 let e = _mm512_setr_epi32(
13160 -1,
13161 0,
13162 -2,
13163 i32::MAX - 1,
13164 i32::MAX,
13165 99,
13166 -101,
13167 -33,
13168 0,
13169 1,
13170 -1,
13171 i32::MAX,
13172 i32::MIN,
13173 100,
13174 -100,
13175 -32,
13176 );
13177 assert_eq_m512i(r, e);
13178 }
13179
13180 #[simd_test(enable = "avx512f")]
13181 unsafe fn test_mm512_maskz_sub_epi32() {
13182 let a = _mm512_setr_epi32(
13183 0,
13184 1,
13185 -1,
13186 i32::MAX,
13187 i32::MIN,
13188 100,
13189 -100,
13190 -32,
13191 0,
13192 1,
13193 -1,
13194 i32::MAX,
13195 i32::MIN,
13196 100,
13197 -100,
13198 -32,
13199 );
13200 let b = _mm512_set1_epi32(1);
13201 let r = _mm512_maskz_sub_epi32(0, a, b);
13202 assert_eq_m512i(r, _mm512_setzero_si512());
13203 let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
13204 let e = _mm512_setr_epi32(
13205 -1,
13206 0,
13207 -2,
13208 i32::MAX - 1,
13209 i32::MAX,
13210 99,
13211 -101,
13212 -33,
13213 0,
13214 0,
13215 0,
13216 0,
13217 0,
13218 0,
13219 0,
13220 0,
13221 );
13222 assert_eq_m512i(r, e);
13223 }
13224
13225 #[simd_test(enable = "avx512f")]
13226 unsafe fn test_mm512_sub_ps() {
13227 let a = _mm512_setr_ps(
13228 0.,
13229 1.,
13230 -1.,
13231 f32::MAX,
13232 f32::MIN,
13233 100.,
13234 -100.,
13235 -32.,
13236 0.,
13237 1.,
13238 -1.,
13239 f32::MAX,
13240 f32::MIN,
13241 100.,
13242 -100.,
13243 -32.,
13244 );
13245 let b = _mm512_set1_ps(1.);
13246 let r = _mm512_sub_ps(a, b);
13247 let e = _mm512_setr_ps(
13248 -1.,
13249 0.,
13250 -2.,
13251 f32::MAX - 1.,
13252 f32::MIN,
13253 99.,
13254 -101.,
13255 -33.,
13256 -1.,
13257 0.,
13258 -2.,
13259 f32::MAX - 1.,
13260 f32::MIN,
13261 99.,
13262 -101.,
13263 -33.,
13264 );
13265 assert_eq_m512(r, e);
13266 }
13267
13268 #[simd_test(enable = "avx512f")]
13269 unsafe fn test_mm512_mask_sub_ps() {
13270 let a = _mm512_setr_ps(
13271 0.,
13272 1.,
13273 -1.,
13274 f32::MAX,
13275 f32::MIN,
13276 100.,
13277 -100.,
13278 -32.,
13279 0.,
13280 1.,
13281 -1.,
13282 f32::MAX,
13283 f32::MIN,
13284 100.,
13285 -100.,
13286 -32.,
13287 );
13288 let b = _mm512_set1_ps(1.);
13289 let r = _mm512_mask_sub_ps(a, 0, a, b);
13290 assert_eq_m512(r, a);
13291 let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
13292 let e = _mm512_setr_ps(
13293 -1.,
13294 0.,
13295 -2.,
13296 f32::MAX - 1.,
13297 f32::MIN,
13298 99.,
13299 -101.,
13300 -33.,
13301 0.,
13302 1.,
13303 -1.,
13304 f32::MAX,
13305 f32::MIN,
13306 100.,
13307 -100.,
13308 -32.,
13309 );
13310 assert_eq_m512(r, e);
13311 }
13312
13313 #[simd_test(enable = "avx512f")]
13314 unsafe fn test_mm512_maskz_sub_ps() {
13315 let a = _mm512_setr_ps(
13316 0.,
13317 1.,
13318 -1.,
13319 f32::MAX,
13320 f32::MIN,
13321 100.,
13322 -100.,
13323 -32.,
13324 0.,
13325 1.,
13326 -1.,
13327 f32::MAX,
13328 f32::MIN,
13329 100.,
13330 -100.,
13331 -32.,
13332 );
13333 let b = _mm512_set1_ps(1.);
13334 let r = _mm512_maskz_sub_ps(0, a, b);
13335 assert_eq_m512(r, _mm512_setzero_ps());
13336 let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
13337 let e = _mm512_setr_ps(
13338 -1.,
13339 0.,
13340 -2.,
13341 f32::MAX - 1.,
13342 f32::MIN,
13343 99.,
13344 -101.,
13345 -33.,
13346 0.,
13347 0.,
13348 0.,
13349 0.,
13350 0.,
13351 0.,
13352 0.,
13353 0.,
13354 );
13355 assert_eq_m512(r, e);
13356 }
13357
13358 #[simd_test(enable = "avx512f")]
13359 unsafe fn test_mm512_mullo_epi32() {
13360 let a = _mm512_setr_epi32(
13361 0,
13362 1,
13363 -1,
13364 i32::MAX,
13365 i32::MIN,
13366 100,
13367 -100,
13368 -32,
13369 0,
13370 1,
13371 -1,
13372 i32::MAX,
13373 i32::MIN,
13374 100,
13375 -100,
13376 -32,
13377 );
13378 let b = _mm512_set1_epi32(2);
13379 let r = _mm512_mullo_epi32(a, b);
13380 let e = _mm512_setr_epi32(
13381 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
13382 );
13383 assert_eq_m512i(r, e);
13384 }
13385
13386 #[simd_test(enable = "avx512f")]
13387 unsafe fn test_mm512_mask_mullo_epi32() {
13388 let a = _mm512_setr_epi32(
13389 0,
13390 1,
13391 -1,
13392 i32::MAX,
13393 i32::MIN,
13394 100,
13395 -100,
13396 -32,
13397 0,
13398 1,
13399 -1,
13400 i32::MAX,
13401 i32::MIN,
13402 100,
13403 -100,
13404 -32,
13405 );
13406 let b = _mm512_set1_epi32(2);
13407 let r = _mm512_mask_mullo_epi32(a, 0, a, b);
13408 assert_eq_m512i(r, a);
13409 let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
13410 let e = _mm512_setr_epi32(
13411 0,
13412 2,
13413 -2,
13414 -2,
13415 0,
13416 200,
13417 -200,
13418 -64,
13419 0,
13420 1,
13421 -1,
13422 i32::MAX,
13423 i32::MIN,
13424 100,
13425 -100,
13426 -32,
13427 );
13428 assert_eq_m512i(r, e);
13429 }
13430
13431 #[simd_test(enable = "avx512f")]
13432 unsafe fn test_mm512_maskz_mullo_epi32() {
13433 let a = _mm512_setr_epi32(
13434 0,
13435 1,
13436 -1,
13437 i32::MAX,
13438 i32::MIN,
13439 100,
13440 -100,
13441 -32,
13442 0,
13443 1,
13444 -1,
13445 i32::MAX,
13446 i32::MIN,
13447 100,
13448 -100,
13449 -32,
13450 );
13451 let b = _mm512_set1_epi32(2);
13452 let r = _mm512_maskz_mullo_epi32(0, a, b);
13453 assert_eq_m512i(r, _mm512_setzero_si512());
13454 let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
13455 let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
13456 assert_eq_m512i(r, e);
13457 }
13458
13459 #[simd_test(enable = "avx512f")]
13460 unsafe fn test_mm512_mul_ps() {
13461 let a = _mm512_setr_ps(
13462 0.,
13463 1.,
13464 -1.,
13465 f32::MAX,
13466 f32::MIN,
13467 100.,
13468 -100.,
13469 -32.,
13470 0.,
13471 1.,
13472 -1.,
13473 f32::MAX,
13474 f32::MIN,
13475 100.,
13476 -100.,
13477 -32.,
13478 );
13479 let b = _mm512_set1_ps(2.);
13480 let r = _mm512_mul_ps(a, b);
13481 let e = _mm512_setr_ps(
13482 0.,
13483 2.,
13484 -2.,
13485 f32::INFINITY,
13486 f32::NEG_INFINITY,
13487 200.,
13488 -200.,
13489 -64.,
13490 0.,
13491 2.,
13492 -2.,
13493 f32::INFINITY,
13494 f32::NEG_INFINITY,
13495 200.,
13496 -200.,
13497 -64.,
13498 );
13499 assert_eq_m512(r, e);
13500 }
13501
13502 #[simd_test(enable = "avx512f")]
13503 unsafe fn test_mm512_mask_mul_ps() {
13504 let a = _mm512_setr_ps(
13505 0.,
13506 1.,
13507 -1.,
13508 f32::MAX,
13509 f32::MIN,
13510 100.,
13511 -100.,
13512 -32.,
13513 0.,
13514 1.,
13515 -1.,
13516 f32::MAX,
13517 f32::MIN,
13518 100.,
13519 -100.,
13520 -32.,
13521 );
13522 let b = _mm512_set1_ps(2.);
13523 let r = _mm512_mask_mul_ps(a, 0, a, b);
13524 assert_eq_m512(r, a);
13525 let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
13526 let e = _mm512_setr_ps(
13527 0.,
13528 2.,
13529 -2.,
13530 f32::INFINITY,
13531 f32::NEG_INFINITY,
13532 200.,
13533 -200.,
13534 -64.,
13535 0.,
13536 1.,
13537 -1.,
13538 f32::MAX,
13539 f32::MIN,
13540 100.,
13541 -100.,
13542 -32.,
13543 );
13544 assert_eq_m512(r, e);
13545 }
13546
13547 #[simd_test(enable = "avx512f")]
13548 unsafe fn test_mm512_maskz_mul_ps() {
13549 let a = _mm512_setr_ps(
13550 0.,
13551 1.,
13552 -1.,
13553 f32::MAX,
13554 f32::MIN,
13555 100.,
13556 -100.,
13557 -32.,
13558 0.,
13559 1.,
13560 -1.,
13561 f32::MAX,
13562 f32::MIN,
13563 100.,
13564 -100.,
13565 -32.,
13566 );
13567 let b = _mm512_set1_ps(2.);
13568 let r = _mm512_maskz_mul_ps(0, a, b);
13569 assert_eq_m512(r, _mm512_setzero_ps());
13570 let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
13571 let e = _mm512_setr_ps(
13572 0.,
13573 2.,
13574 -2.,
13575 f32::INFINITY,
13576 f32::NEG_INFINITY,
13577 200.,
13578 -200.,
13579 -64.,
13580 0.,
13581 0.,
13582 0.,
13583 0.,
13584 0.,
13585 0.,
13586 0.,
13587 0.,
13588 );
13589 assert_eq_m512(r, e);
13590 }
13591
13592 #[simd_test(enable = "avx512f")]
13593 unsafe fn test_mm512_div_ps() {
13594 let a = _mm512_setr_ps(
13595 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
13596 );
13597 let b = _mm512_setr_ps(
13598 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
13599 );
13600 let r = _mm512_div_ps(a, b);
13601 let e = _mm512_setr_ps(
13602 0.,
13603 0.5,
13604 -0.5,
13605 -1.,
13606 50.,
13607 f32::INFINITY,
13608 -50.,
13609 -16.,
13610 0.,
13611 0.5,
13612 -0.5,
13613 500.,
13614 f32::NEG_INFINITY,
13615 50.,
13616 -50.,
13617 -16.,
13618 );
13619 assert_eq_m512(r, e); // 0/0 = NAN
13620 }
13621
13622 #[simd_test(enable = "avx512f")]
13623 unsafe fn test_mm512_mask_div_ps() {
13624 let a = _mm512_setr_ps(
13625 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
13626 );
13627 let b = _mm512_setr_ps(
13628 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
13629 );
13630 let r = _mm512_mask_div_ps(a, 0, a, b);
13631 assert_eq_m512(r, a);
13632 let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
13633 let e = _mm512_setr_ps(
13634 0.,
13635 0.5,
13636 -0.5,
13637 -1.,
13638 50.,
13639 f32::INFINITY,
13640 -50.,
13641 -16.,
13642 0.,
13643 1.,
13644 -1.,
13645 1000.,
13646 -131.,
13647 100.,
13648 -100.,
13649 -32.,
13650 );
13651 assert_eq_m512(r, e);
13652 }
13653
13654 #[simd_test(enable = "avx512f")]
13655 unsafe fn test_mm512_maskz_div_ps() {
13656 let a = _mm512_setr_ps(
13657 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
13658 );
13659 let b = _mm512_setr_ps(
13660 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
13661 );
13662 let r = _mm512_maskz_div_ps(0, a, b);
13663 assert_eq_m512(r, _mm512_setzero_ps());
13664 let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
13665 let e = _mm512_setr_ps(
13666 0.,
13667 0.5,
13668 -0.5,
13669 -1.,
13670 50.,
13671 f32::INFINITY,
13672 -50.,
13673 -16.,
13674 0.,
13675 0.,
13676 0.,
13677 0.,
13678 0.,
13679 0.,
13680 0.,
13681 0.,
13682 );
13683 assert_eq_m512(r, e);
13684 }
13685
13686 #[simd_test(enable = "avx512f")]
13687 unsafe fn test_mm512_max_epi32() {
13688 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13689 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13690 let r = _mm512_max_epi32(a, b);
13691 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13692 assert_eq_m512i(r, e);
13693 }
13694
13695 #[simd_test(enable = "avx512f")]
13696 unsafe fn test_mm512_mask_max_epi32() {
13697 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13698 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13699 let r = _mm512_mask_max_epi32(a, 0, a, b);
13700 assert_eq_m512i(r, a);
13701 let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
13702 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13703 assert_eq_m512i(r, e);
13704 }
13705
13706 #[simd_test(enable = "avx512f")]
13707 unsafe fn test_mm512_maskz_max_epi32() {
13708 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13709 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13710 let r = _mm512_maskz_max_epi32(0, a, b);
13711 assert_eq_m512i(r, _mm512_setzero_si512());
13712 let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
13713 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
13714 assert_eq_m512i(r, e);
13715 }
13716
13717 #[simd_test(enable = "avx512f")]
13718 unsafe fn test_mm512_max_ps() {
13719 let a = _mm512_setr_ps(
13720 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13721 );
13722 let b = _mm512_setr_ps(
13723 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
13724 );
13725 let r = _mm512_max_ps(a, b);
13726 let e = _mm512_setr_ps(
13727 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
13728 );
13729 assert_eq_m512(r, e);
13730 }
13731
13732 #[simd_test(enable = "avx512f")]
13733 unsafe fn test_mm512_mask_max_ps() {
13734 let a = _mm512_setr_ps(
13735 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13736 );
13737 let b = _mm512_setr_ps(
13738 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
13739 );
13740 let r = _mm512_mask_max_ps(a, 0, a, b);
13741 assert_eq_m512(r, a);
13742 let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
13743 let e = _mm512_setr_ps(
13744 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
13745 );
13746 assert_eq_m512(r, e);
13747 }
13748
13749 #[simd_test(enable = "avx512f")]
13750 unsafe fn test_mm512_maskz_max_ps() {
13751 let a = _mm512_setr_ps(
13752 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13753 );
13754 let b = _mm512_setr_ps(
13755 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
13756 );
13757 let r = _mm512_maskz_max_ps(0, a, b);
13758 assert_eq_m512(r, _mm512_setzero_ps());
13759 let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
13760 let e = _mm512_setr_ps(
13761 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
13762 );
13763 assert_eq_m512(r, e);
13764 }
13765
13766 #[simd_test(enable = "avx512f")]
13767 unsafe fn test_mm512_max_epu32() {
13768 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13769 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13770 let r = _mm512_max_epu32(a, b);
13771 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13772 assert_eq_m512i(r, e);
13773 }
13774
13775 #[simd_test(enable = "avx512f")]
13776 unsafe fn test_mm512_mask_max_epu32() {
13777 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13778 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13779 let r = _mm512_mask_max_epu32(a, 0, a, b);
13780 assert_eq_m512i(r, a);
13781 let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
13782 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
13783 assert_eq_m512i(r, e);
13784 }
13785
13786 #[simd_test(enable = "avx512f")]
13787 unsafe fn test_mm512_maskz_max_epu32() {
13788 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13789 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13790 let r = _mm512_maskz_max_epu32(0, a, b);
13791 assert_eq_m512i(r, _mm512_setzero_si512());
13792 let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
13793 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
13794 assert_eq_m512i(r, e);
13795 }
13796
13797 #[simd_test(enable = "avx512f")]
13798 unsafe fn test_mm512_min_epi32() {
13799 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13800 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13801 let r = _mm512_min_epi32(a, b);
13802 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13803 assert_eq_m512i(r, e);
13804 }
13805
13806 #[simd_test(enable = "avx512f")]
13807 unsafe fn test_mm512_mask_min_epi32() {
13808 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13809 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13810 let r = _mm512_mask_min_epi32(a, 0, a, b);
13811 assert_eq_m512i(r, a);
13812 let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
13813 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13814 assert_eq_m512i(r, e);
13815 }
13816
13817 #[simd_test(enable = "avx512f")]
13818 unsafe fn test_mm512_maskz_min_epi32() {
13819 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13820 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13821 let r = _mm512_maskz_min_epi32(0, a, b);
13822 assert_eq_m512i(r, _mm512_setzero_si512());
13823 let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
13824 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
13825 assert_eq_m512i(r, e);
13826 }
13827
13828 #[simd_test(enable = "avx512f")]
13829 unsafe fn test_mm512_min_ps() {
13830 let a = _mm512_setr_ps(
13831 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13832 );
13833 let b = _mm512_setr_ps(
13834 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
13835 );
13836 let r = _mm512_min_ps(a, b);
13837 let e = _mm512_setr_ps(
13838 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
13839 );
13840 assert_eq_m512(r, e);
13841 }
13842
13843 #[simd_test(enable = "avx512f")]
13844 unsafe fn test_mm512_mask_min_ps() {
13845 let a = _mm512_setr_ps(
13846 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13847 );
13848 let b = _mm512_setr_ps(
13849 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
13850 );
13851 let r = _mm512_mask_min_ps(a, 0, a, b);
13852 assert_eq_m512(r, a);
13853 let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
13854 let e = _mm512_setr_ps(
13855 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13856 );
13857 assert_eq_m512(r, e);
13858 }
13859
13860 #[simd_test(enable = "avx512f")]
13861 unsafe fn test_mm512_maskz_min_ps() {
13862 let a = _mm512_setr_ps(
13863 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13864 );
13865 let b = _mm512_setr_ps(
13866 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
13867 );
13868 let r = _mm512_maskz_min_ps(0, a, b);
13869 assert_eq_m512(r, _mm512_setzero_ps());
13870 let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
13871 let e = _mm512_setr_ps(
13872 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
13873 );
13874 assert_eq_m512(r, e);
13875 }
13876
13877 #[simd_test(enable = "avx512f")]
13878 unsafe fn test_mm512_min_epu32() {
13879 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13880 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13881 let r = _mm512_min_epu32(a, b);
13882 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
13883 assert_eq_m512i(r, e);
13884 }
13885
13886 #[simd_test(enable = "avx512f")]
13887 unsafe fn test_mm512_mask_min_epu32() {
13888 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13889 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13890 let r = _mm512_mask_min_epu32(a, 0, a, b);
13891 assert_eq_m512i(r, a);
13892 let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
13893 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13894 assert_eq_m512i(r, e);
13895 }
13896
13897 #[simd_test(enable = "avx512f")]
13898 unsafe fn test_mm512_maskz_min_epu32() {
13899 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
13900 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
13901 let r = _mm512_maskz_min_epu32(0, a, b);
13902 assert_eq_m512i(r, _mm512_setzero_si512());
13903 let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
13904 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
13905 assert_eq_m512i(r, e);
13906 }
13907
13908 #[simd_test(enable = "avx512f")]
13909 unsafe fn test_mm512_sqrt_ps() {
13910 let a = _mm512_setr_ps(
13911 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
13912 );
13913 let r = _mm512_sqrt_ps(a);
13914 let e = _mm512_setr_ps(
13915 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13916 );
13917 assert_eq_m512(r, e);
13918 }
13919
13920 #[simd_test(enable = "avx512f")]
13921 unsafe fn test_mm512_mask_sqrt_ps() {
13922 let a = _mm512_setr_ps(
13923 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
13924 );
13925 let r = _mm512_mask_sqrt_ps(a, 0, a);
13926 assert_eq_m512(r, a);
13927 let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
13928 let e = _mm512_setr_ps(
13929 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
13930 );
13931 assert_eq_m512(r, e);
13932 }
13933
13934 #[simd_test(enable = "avx512f")]
13935 unsafe fn test_mm512_maskz_sqrt_ps() {
13936 let a = _mm512_setr_ps(
13937 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
13938 );
13939 let r = _mm512_maskz_sqrt_ps(0, a);
13940 assert_eq_m512(r, _mm512_setzero_ps());
13941 let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
13942 let e = _mm512_setr_ps(
13943 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
13944 );
13945 assert_eq_m512(r, e);
13946 }
13947
13948 #[simd_test(enable = "avx512f")]
13949 unsafe fn test_mm512_fmadd_ps() {
13950 let a = _mm512_setr_ps(
13951 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
13952 );
13953 let b = _mm512_setr_ps(
13954 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13955 );
13956 let c = _mm512_setr_ps(
13957 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
13958 );
13959 let r = _mm512_fmadd_ps(a, b, c);
13960 let e = _mm512_setr_ps(
13961 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
13962 );
13963 assert_eq_m512(r, e);
13964 }
13965
13966 #[simd_test(enable = "avx512f")]
13967 unsafe fn test_mm512_mask_fmadd_ps() {
13968 let a = _mm512_setr_ps(
13969 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
13970 );
13971 let b = _mm512_setr_ps(
13972 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13973 );
13974 let c = _mm512_setr_ps(
13975 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
13976 );
13977 let r = _mm512_mask_fmadd_ps(a, 0, b, c);
13978 assert_eq_m512(r, a);
13979 let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
13980 let e = _mm512_setr_ps(
13981 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
13982 );
13983 assert_eq_m512(r, e);
13984 }
13985
13986 #[simd_test(enable = "avx512f")]
13987 unsafe fn test_mm512_maskz_fmadd_ps() {
13988 let a = _mm512_setr_ps(
13989 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
13990 );
13991 let b = _mm512_setr_ps(
13992 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
13993 );
13994 let c = _mm512_setr_ps(
13995 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
13996 );
13997 let r = _mm512_maskz_fmadd_ps(0, a, b, c);
13998 assert_eq_m512(r, _mm512_setzero_ps());
13999 let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
14000 let e = _mm512_setr_ps(
14001 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
14002 );
14003 assert_eq_m512(r, e);
14004 }
14005
14006 #[simd_test(enable = "avx512f")]
14007 unsafe fn test_mm512_mask3_fmadd_ps() {
14008 let a = _mm512_setr_ps(
14009 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14010 );
14011 let b = _mm512_setr_ps(
14012 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14013 );
14014 let c = _mm512_setr_ps(
14015 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
14016 );
14017 let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
14018 assert_eq_m512(r, c);
14019 let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
14020 let e = _mm512_setr_ps(
14021 1., 2., 3., 4., 5., 6., 7., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
14022 );
14023 assert_eq_m512(r, e);
14024 }
14025
14026 #[simd_test(enable = "avx512f")]
14027 unsafe fn test_mm512_fmsub_ps() {
14028 let a = _mm512_setr_ps(
14029 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14030 );
14031 let b = _mm512_setr_ps(
14032 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14033 );
14034 let c = _mm512_setr_ps(
14035 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14036 );
14037 let r = _mm512_fmsub_ps(a, b, c);
14038 let e = _mm512_setr_ps(
14039 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
14040 );
14041 assert_eq_m512(r, e);
14042 }
14043
14044 #[simd_test(enable = "avx512f")]
14045 unsafe fn test_mm512_mask_fmsub_ps() {
14046 let a = _mm512_setr_ps(
14047 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14048 );
14049 let b = _mm512_setr_ps(
14050 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14051 );
14052 let c = _mm512_setr_ps(
14053 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14054 );
14055 let r = _mm512_mask_fmsub_ps(a, 0, b, c);
14056 assert_eq_m512(r, a);
14057 let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
14058 let e = _mm512_setr_ps(
14059 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
14060 );
14061 assert_eq_m512(r, e);
14062 }
14063
14064 #[simd_test(enable = "avx512f")]
14065 unsafe fn test_mm512_maskz_fmsub_ps() {
14066 let a = _mm512_setr_ps(
14067 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14068 );
14069 let b = _mm512_setr_ps(
14070 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14071 );
14072 let c = _mm512_setr_ps(
14073 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14074 );
14075 let r = _mm512_maskz_fmsub_ps(0, a, b, c);
14076 assert_eq_m512(r, _mm512_setzero_ps());
14077 let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
14078 let e = _mm512_setr_ps(
14079 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
14080 );
14081 assert_eq_m512(r, e);
14082 }
14083
14084 #[simd_test(enable = "avx512f")]
14085 unsafe fn test_mm512_mask3_fmsub_ps() {
14086 let a = _mm512_setr_ps(
14087 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14088 );
14089 let b = _mm512_setr_ps(
14090 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14091 );
14092 let c = _mm512_setr_ps(
14093 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
14094 );
14095 let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
14096 assert_eq_m512(r, c);
14097 let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
14098 let e = _mm512_setr_ps(
14099 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
14100 );
14101 assert_eq_m512(r, e);
14102 }
14103
14104 #[simd_test(enable = "avx512f")]
14105 unsafe fn test_mm512_fmaddsub_ps() {
14106 let a = _mm512_setr_ps(
14107 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14108 );
14109 let b = _mm512_setr_ps(
14110 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14111 );
14112 let c = _mm512_setr_ps(
14113 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14114 );
14115 let r = _mm512_fmaddsub_ps(a, b, c);
14116 let e = _mm512_setr_ps(
14117 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
14118 );
14119 assert_eq_m512(r, e);
14120 }
14121
14122 #[simd_test(enable = "avx512f")]
14123 unsafe fn test_mm512_mask_fmaddsub_ps() {
14124 let a = _mm512_setr_ps(
14125 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14126 );
14127 let b = _mm512_setr_ps(
14128 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14129 );
14130 let c = _mm512_setr_ps(
14131 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14132 );
14133 let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
14134 assert_eq_m512(r, a);
14135 let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
14136 let e = _mm512_setr_ps(
14137 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
14138 );
14139 assert_eq_m512(r, e);
14140 }
14141
14142 #[simd_test(enable = "avx512f")]
14143 unsafe fn test_mm512_maskz_fmaddsub_ps() {
14144 let a = _mm512_setr_ps(
14145 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14146 );
14147 let b = _mm512_setr_ps(
14148 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14149 );
14150 let c = _mm512_setr_ps(
14151 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14152 );
14153 let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
14154 assert_eq_m512(r, _mm512_setzero_ps());
14155 let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
14156 let e = _mm512_setr_ps(
14157 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
14158 );
14159 assert_eq_m512(r, e);
14160 }
14161
14162 #[simd_test(enable = "avx512f")]
14163 unsafe fn test_mm512_mask3_fmaddsub_ps() {
14164 let a = _mm512_setr_ps(
14165 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14166 );
14167 let b = _mm512_setr_ps(
14168 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14169 );
14170 let c = _mm512_setr_ps(
14171 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
14172 );
14173 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
14174 assert_eq_m512(r, c);
14175 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
14176 let e = _mm512_setr_ps(
14177 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
14178 );
14179 assert_eq_m512(r, e);
14180 }
14181
14182 #[simd_test(enable = "avx512f")]
14183 unsafe fn test_mm512_fmsubadd_ps() {
14184 let a = _mm512_setr_ps(
14185 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14186 );
14187 let b = _mm512_setr_ps(
14188 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14189 );
14190 let c = _mm512_setr_ps(
14191 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14192 );
14193 let r = _mm512_fmsubadd_ps(a, b, c);
14194 let e = _mm512_setr_ps(
14195 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
14196 );
14197 assert_eq_m512(r, e);
14198 }
14199
14200 #[simd_test(enable = "avx512f")]
14201 unsafe fn test_mm512_mask_fmsubadd_ps() {
14202 let a = _mm512_setr_ps(
14203 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14204 );
14205 let b = _mm512_setr_ps(
14206 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14207 );
14208 let c = _mm512_setr_ps(
14209 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14210 );
14211 let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
14212 assert_eq_m512(r, a);
14213 let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
14214 let e = _mm512_setr_ps(
14215 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
14216 );
14217 assert_eq_m512(r, e);
14218 }
14219
14220 #[simd_test(enable = "avx512f")]
14221 unsafe fn test_mm512_maskz_fmsubadd_ps() {
14222 let a = _mm512_setr_ps(
14223 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14224 );
14225 let b = _mm512_setr_ps(
14226 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14227 );
14228 let c = _mm512_setr_ps(
14229 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14230 );
14231 let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
14232 assert_eq_m512(r, _mm512_setzero_ps());
14233 let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
14234 let e = _mm512_setr_ps(
14235 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
14236 );
14237 assert_eq_m512(r, e);
14238 }
14239
14240 #[simd_test(enable = "avx512f")]
14241 unsafe fn test_mm512_mask3_fmsubadd_ps() {
14242 let a = _mm512_setr_ps(
14243 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14244 );
14245 let b = _mm512_setr_ps(
14246 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14247 );
14248 let c = _mm512_setr_ps(
14249 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
14250 );
14251 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
14252 assert_eq_m512(r, c);
14253 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
14254 let e = _mm512_setr_ps(
14255 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
14256 );
14257 assert_eq_m512(r, e);
14258 }
14259
14260 #[simd_test(enable = "avx512f")]
14261 unsafe fn test_mm512_fnmadd_ps() {
14262 let a = _mm512_setr_ps(
14263 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14264 );
14265 let b = _mm512_setr_ps(
14266 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14267 );
14268 let c = _mm512_setr_ps(
14269 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14270 );
14271 let r = _mm512_fnmadd_ps(a, b, c);
14272 let e = _mm512_setr_ps(
14273 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
14274 );
14275 assert_eq_m512(r, e);
14276 }
14277
14278 #[simd_test(enable = "avx512f")]
14279 unsafe fn test_mm512_mask_fnmadd_ps() {
14280 let a = _mm512_setr_ps(
14281 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14282 );
14283 let b = _mm512_setr_ps(
14284 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14285 );
14286 let c = _mm512_setr_ps(
14287 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14288 );
14289 let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
14290 assert_eq_m512(r, a);
14291 let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
14292 let e = _mm512_setr_ps(
14293 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
14294 );
14295 assert_eq_m512(r, e);
14296 }
14297
14298 #[simd_test(enable = "avx512f")]
14299 unsafe fn test_mm512_maskz_fnmadd_ps() {
14300 let a = _mm512_setr_ps(
14301 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14302 );
14303 let b = _mm512_setr_ps(
14304 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14305 );
14306 let c = _mm512_setr_ps(
14307 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14308 );
14309 let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
14310 assert_eq_m512(r, _mm512_setzero_ps());
14311 let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
14312 let e = _mm512_setr_ps(
14313 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
14314 );
14315 assert_eq_m512(r, e);
14316 }
14317
14318 #[simd_test(enable = "avx512f")]
14319 unsafe fn test_mm512_mask3_fnmadd_ps() {
14320 let a = _mm512_setr_ps(
14321 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14322 );
14323 let b = _mm512_setr_ps(
14324 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14325 );
14326 let c = _mm512_setr_ps(
14327 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
14328 );
14329 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
14330 assert_eq_m512(r, c);
14331 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
14332 let e = _mm512_setr_ps(
14333 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
14334 );
14335 assert_eq_m512(r, e);
14336 }
14337
14338 #[simd_test(enable = "avx512f")]
14339 unsafe fn test_mm512_fnmsub_ps() {
14340 let a = _mm512_setr_ps(
14341 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14342 );
14343 let b = _mm512_setr_ps(
14344 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14345 );
14346 let c = _mm512_setr_ps(
14347 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14348 );
14349 let r = _mm512_fnmsub_ps(a, b, c);
14350 let e = _mm512_setr_ps(
14351 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
14352 );
14353 assert_eq_m512(r, e);
14354 }
14355
14356 #[simd_test(enable = "avx512f")]
14357 unsafe fn test_mm512_mask_fnmsub_ps() {
14358 let a = _mm512_setr_ps(
14359 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14360 );
14361 let b = _mm512_setr_ps(
14362 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14363 );
14364 let c = _mm512_setr_ps(
14365 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14366 );
14367 let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
14368 assert_eq_m512(r, a);
14369 let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
14370 let e = _mm512_setr_ps(
14371 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
14372 );
14373 assert_eq_m512(r, e);
14374 }
14375
14376 #[simd_test(enable = "avx512f")]
14377 unsafe fn test_mm512_maskz_fnmsub_ps() {
14378 let a = _mm512_setr_ps(
14379 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14380 );
14381 let b = _mm512_setr_ps(
14382 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14383 );
14384 let c = _mm512_setr_ps(
14385 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14386 );
14387 let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
14388 assert_eq_m512(r, _mm512_setzero_ps());
14389 let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
14390 let e = _mm512_setr_ps(
14391 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
14392 );
14393 assert_eq_m512(r, e);
14394 }
14395
14396 #[simd_test(enable = "avx512f")]
14397 unsafe fn test_mm512_mask3_fnmsub_ps() {
14398 let a = _mm512_setr_ps(
14399 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
14400 );
14401 let b = _mm512_setr_ps(
14402 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
14403 );
14404 let c = _mm512_setr_ps(
14405 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
14406 );
14407 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
14408 assert_eq_m512(r, c);
14409 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
14410 let e = _mm512_setr_ps(
14411 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
14412 );
14413 assert_eq_m512(r, e);
14414 }
14415
14416 #[simd_test(enable = "avx512f")]
14417 unsafe fn test_mm512_rcp14_ps() {
14418 let a = _mm512_set1_ps(3.);
14419 let r = _mm512_rcp14_ps(a);
14420 let e = _mm512_set1_ps(0.33333206);
14421 assert_eq_m512(r, e);
14422 }
14423
14424 #[simd_test(enable = "avx512f")]
14425 unsafe fn test_mm512_mask_rcp14_ps() {
14426 let a = _mm512_set1_ps(3.);
14427 let r = _mm512_mask_rcp14_ps(a, 0, a);
14428 assert_eq_m512(r, a);
14429 let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
14430 let e = _mm512_setr_ps(
14431 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
14432 0.33333206, 0.33333206, 0.33333206, 0.33333206,
14433 );
14434 assert_eq_m512(r, e);
14435 }
14436
14437 #[simd_test(enable = "avx512f")]
14438 unsafe fn test_mm512_maskz_rcp14_ps() {
14439 let a = _mm512_set1_ps(3.);
14440 let r = _mm512_maskz_rcp14_ps(0, a);
14441 assert_eq_m512(r, _mm512_setzero_ps());
14442 let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
14443 let e = _mm512_setr_ps(
14444 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
14445 0.33333206, 0.33333206, 0.33333206, 0.33333206,
14446 );
14447 assert_eq_m512(r, e);
14448 }
14449
14450 #[simd_test(enable = "avx512f")]
14451 unsafe fn test_mm512_rsqrt14_ps() {
14452 let a = _mm512_set1_ps(3.);
14453 let r = _mm512_rsqrt14_ps(a);
14454 let e = _mm512_set1_ps(0.5773392);
14455 assert_eq_m512(r, e);
14456 }
14457
14458 #[simd_test(enable = "avx512f")]
14459 unsafe fn test_mm512_mask_rsqrt14_ps() {
14460 let a = _mm512_set1_ps(3.);
14461 let r = _mm512_mask_rsqrt14_ps(a, 0, a);
14462 assert_eq_m512(r, a);
14463 let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
14464 let e = _mm512_setr_ps(
14465 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
14466 0.5773392, 0.5773392, 0.5773392,
14467 );
14468 assert_eq_m512(r, e);
14469 }
14470
14471 #[simd_test(enable = "avx512f")]
14472 unsafe fn test_mm512_maskz_rsqrt14_ps() {
14473 let a = _mm512_set1_ps(3.);
14474 let r = _mm512_maskz_rsqrt14_ps(0, a);
14475 assert_eq_m512(r, _mm512_setzero_ps());
14476 let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
14477 let e = _mm512_setr_ps(
14478 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
14479 0.5773392, 0.5773392, 0.5773392,
14480 );
14481 assert_eq_m512(r, e);
14482 }
14483
14484 #[simd_test(enable = "avx512f")]
14485 unsafe fn test_mm512_getexp_ps() {
14486 let a = _mm512_set1_ps(3.);
14487 let r = _mm512_getexp_ps(a);
14488 let e = _mm512_set1_ps(1.);
14489 assert_eq_m512(r, e);
14490 }
14491
14492 #[simd_test(enable = "avx512f")]
14493 unsafe fn test_mm512_mask_getexp_ps() {
14494 let a = _mm512_set1_ps(3.);
14495 let r = _mm512_mask_getexp_ps(a, 0, a);
14496 assert_eq_m512(r, a);
14497 let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
14498 let e = _mm512_setr_ps(
14499 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
14500 );
14501 assert_eq_m512(r, e);
14502 }
14503
14504 #[simd_test(enable = "avx512f")]
14505 unsafe fn test_mm512_maskz_getexp_ps() {
14506 let a = _mm512_set1_ps(3.);
14507 let r = _mm512_maskz_getexp_ps(0, a);
14508 assert_eq_m512(r, _mm512_setzero_ps());
14509 let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
14510 let e = _mm512_setr_ps(
14511 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
14512 );
14513 assert_eq_m512(r, e);
14514 }
14515
14516 #[simd_test(enable = "avx512f")]
14517 unsafe fn test_mm512_getmant_ps() {
14518 let a = _mm512_set1_ps(10.);
14519 let r = _mm512_getmant_ps(a, _MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN);
14520 let e = _mm512_set1_ps(1.25);
14521 assert_eq_m512(r, e);
14522 }
14523
14524 #[simd_test(enable = "avx512f")]
14525 unsafe fn test_mm512_mask_getmant_ps() {
14526 let a = _mm512_set1_ps(10.);
14527 let r = _mm512_mask_getmant_ps(a, 0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
14528 assert_eq_m512(r, a);
14529 let r = _mm512_mask_getmant_ps(
14530 a,
14531 0b11111111_00000000,
14532 a,
14533 _MM_MANT_NORM_1_2,
14534 _MM_MANT_SIGN_SRC,
14535 );
14536 let e = _mm512_setr_ps(
14537 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
14538 );
14539 assert_eq_m512(r, e);
14540 }
14541
14542 #[simd_test(enable = "avx512f")]
14543 unsafe fn test_mm512_maskz_getmant_ps() {
14544 let a = _mm512_set1_ps(10.);
14545 let r = _mm512_maskz_getmant_ps(0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
14546 assert_eq_m512(r, _mm512_setzero_ps());
14547 let r =
14548 _mm512_maskz_getmant_ps(0b11111111_00000000, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
14549 let e = _mm512_setr_ps(
14550 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
14551 );
14552 assert_eq_m512(r, e);
14553 }
14554
14555 #[simd_test(enable = "avx512f")]
14556 unsafe fn test_mm512_add_round_ps() {
14557 let a = _mm512_setr_ps(
14558 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
14559 );
14560 let b = _mm512_set1_ps(-1.);
14561 let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14562 let e = _mm512_setr_ps(
14563 -1.,
14564 0.5,
14565 1.,
14566 2.5,
14567 3.,
14568 4.5,
14569 5.,
14570 6.5,
14571 7.,
14572 8.5,
14573 9.,
14574 10.5,
14575 11.,
14576 12.5,
14577 13.,
14578 -0.99999994,
14579 );
14580 assert_eq_m512(r, e);
14581 let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
14582 let e = _mm512_setr_ps(
14583 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
14584 );
14585 assert_eq_m512(r, e);
14586 }
14587
14588 #[simd_test(enable = "avx512f")]
14589 unsafe fn test_mm512_mask_add_round_ps() {
14590 let a = _mm512_setr_ps(
14591 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
14592 );
14593 let b = _mm512_set1_ps(-1.);
14594 let r = _mm512_mask_add_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14595 assert_eq_m512(r, a);
14596 let r = _mm512_mask_add_round_ps(
14597 a,
14598 0b11111111_00000000,
14599 a,
14600 b,
14601 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14602 );
14603 let e = _mm512_setr_ps(
14604 0.,
14605 1.5,
14606 2.,
14607 3.5,
14608 4.,
14609 5.5,
14610 6.,
14611 7.5,
14612 7.,
14613 8.5,
14614 9.,
14615 10.5,
14616 11.,
14617 12.5,
14618 13.,
14619 -0.99999994,
14620 );
14621 assert_eq_m512(r, e);
14622 }
14623
14624 #[simd_test(enable = "avx512f")]
14625 unsafe fn test_mm512_maskz_add_round_ps() {
14626 let a = _mm512_setr_ps(
14627 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
14628 );
14629 let b = _mm512_set1_ps(-1.);
14630 let r = _mm512_maskz_add_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14631 assert_eq_m512(r, _mm512_setzero_ps());
14632 let r = _mm512_maskz_add_round_ps(
14633 0b11111111_00000000,
14634 a,
14635 b,
14636 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14637 );
14638 let e = _mm512_setr_ps(
14639 0.,
14640 0.,
14641 0.,
14642 0.,
14643 0.,
14644 0.,
14645 0.,
14646 0.,
14647 7.,
14648 8.5,
14649 9.,
14650 10.5,
14651 11.,
14652 12.5,
14653 13.,
14654 -0.99999994,
14655 );
14656 assert_eq_m512(r, e);
14657 }
14658
14659 #[simd_test(enable = "avx512f")]
14660 unsafe fn test_mm512_sub_round_ps() {
14661 let a = _mm512_setr_ps(
14662 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
14663 );
14664 let b = _mm512_set1_ps(1.);
14665 let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14666 let e = _mm512_setr_ps(
14667 -1.,
14668 0.5,
14669 1.,
14670 2.5,
14671 3.,
14672 4.5,
14673 5.,
14674 6.5,
14675 7.,
14676 8.5,
14677 9.,
14678 10.5,
14679 11.,
14680 12.5,
14681 13.,
14682 -0.99999994,
14683 );
14684 assert_eq_m512(r, e);
14685 let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
14686 let e = _mm512_setr_ps(
14687 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
14688 );
14689 assert_eq_m512(r, e);
14690 }
14691
14692 #[simd_test(enable = "avx512f")]
14693 unsafe fn test_mm512_mask_sub_round_ps() {
14694 let a = _mm512_setr_ps(
14695 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
14696 );
14697 let b = _mm512_set1_ps(1.);
14698 let r = _mm512_mask_sub_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14699 assert_eq_m512(r, a);
14700 let r = _mm512_mask_sub_round_ps(
14701 a,
14702 0b11111111_00000000,
14703 a,
14704 b,
14705 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14706 );
14707 let e = _mm512_setr_ps(
14708 0.,
14709 1.5,
14710 2.,
14711 3.5,
14712 4.,
14713 5.5,
14714 6.,
14715 7.5,
14716 7.,
14717 8.5,
14718 9.,
14719 10.5,
14720 11.,
14721 12.5,
14722 13.,
14723 -0.99999994,
14724 );
14725 assert_eq_m512(r, e);
14726 }
14727
14728 #[simd_test(enable = "avx512f")]
14729 unsafe fn test_mm512_maskz_sub_round_ps() {
14730 let a = _mm512_setr_ps(
14731 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
14732 );
14733 let b = _mm512_set1_ps(1.);
14734 let r = _mm512_maskz_sub_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14735 assert_eq_m512(r, _mm512_setzero_ps());
14736 let r = _mm512_maskz_sub_round_ps(
14737 0b11111111_00000000,
14738 a,
14739 b,
14740 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14741 );
14742 let e = _mm512_setr_ps(
14743 0.,
14744 0.,
14745 0.,
14746 0.,
14747 0.,
14748 0.,
14749 0.,
14750 0.,
14751 7.,
14752 8.5,
14753 9.,
14754 10.5,
14755 11.,
14756 12.5,
14757 13.,
14758 -0.99999994,
14759 );
14760 assert_eq_m512(r, e);
14761 }
14762
14763 #[simd_test(enable = "avx512f")]
14764 unsafe fn test_mm512_mul_round_ps() {
14765 let a = _mm512_setr_ps(
14766 0.,
14767 1.5,
14768 2.,
14769 3.5,
14770 4.,
14771 5.5,
14772 6.,
14773 7.5,
14774 8.,
14775 9.5,
14776 10.,
14777 11.5,
14778 12.,
14779 13.5,
14780 14.,
14781 0.00000000000000000000007,
14782 );
14783 let b = _mm512_set1_ps(0.1);
14784 let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14785 let e = _mm512_setr_ps(
14786 0.,
14787 0.15,
14788 0.2,
14789 0.35,
14790 0.4,
14791 0.55,
14792 0.6,
14793 0.75,
14794 0.8,
14795 0.95,
14796 1.0,
14797 1.15,
14798 1.2,
14799 1.35,
14800 1.4,
14801 0.000000000000000000000007000001,
14802 );
14803 assert_eq_m512(r, e);
14804 let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
14805 let e = _mm512_setr_ps(
14806 0.,
14807 0.14999999,
14808 0.2,
14809 0.35,
14810 0.4,
14811 0.54999995,
14812 0.59999996,
14813 0.75,
14814 0.8,
14815 0.95,
14816 1.0,
14817 1.15,
14818 1.1999999,
14819 1.3499999,
14820 1.4,
14821 0.000000000000000000000007,
14822 );
14823 assert_eq_m512(r, e);
14824 }
14825
14826 #[simd_test(enable = "avx512f")]
14827 unsafe fn test_mm512_mask_mul_round_ps() {
14828 let a = _mm512_setr_ps(
14829 0.,
14830 1.5,
14831 2.,
14832 3.5,
14833 4.,
14834 5.5,
14835 6.,
14836 7.5,
14837 8.,
14838 9.5,
14839 10.,
14840 11.5,
14841 12.,
14842 13.5,
14843 14.,
14844 0.00000000000000000000007,
14845 );
14846 let b = _mm512_set1_ps(0.1);
14847 let r = _mm512_mask_mul_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14848 assert_eq_m512(r, a);
14849 let r = _mm512_mask_mul_round_ps(
14850 a,
14851 0b11111111_00000000,
14852 a,
14853 b,
14854 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14855 );
14856 let e = _mm512_setr_ps(
14857 0.,
14858 1.5,
14859 2.,
14860 3.5,
14861 4.,
14862 5.5,
14863 6.,
14864 7.5,
14865 0.8,
14866 0.95,
14867 1.0,
14868 1.15,
14869 1.2,
14870 1.35,
14871 1.4,
14872 0.000000000000000000000007000001,
14873 );
14874 assert_eq_m512(r, e);
14875 }
14876
14877 #[simd_test(enable = "avx512f")]
14878 unsafe fn test_mm512_maskz_mul_round_ps() {
14879 let a = _mm512_setr_ps(
14880 0.,
14881 1.5,
14882 2.,
14883 3.5,
14884 4.,
14885 5.5,
14886 6.,
14887 7.5,
14888 8.,
14889 9.5,
14890 10.,
14891 11.5,
14892 12.,
14893 13.5,
14894 14.,
14895 0.00000000000000000000007,
14896 );
14897 let b = _mm512_set1_ps(0.1);
14898 let r = _mm512_maskz_mul_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14899 assert_eq_m512(r, _mm512_setzero_ps());
14900 let r = _mm512_maskz_mul_round_ps(
14901 0b11111111_00000000,
14902 a,
14903 b,
14904 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14905 );
14906 let e = _mm512_setr_ps(
14907 0.,
14908 0.,
14909 0.,
14910 0.,
14911 0.,
14912 0.,
14913 0.,
14914 0.,
14915 0.8,
14916 0.95,
14917 1.0,
14918 1.15,
14919 1.2,
14920 1.35,
14921 1.4,
14922 0.000000000000000000000007000001,
14923 );
14924 assert_eq_m512(r, e);
14925 }
14926
14927 #[simd_test(enable = "avx512f")]
14928 unsafe fn test_mm512_div_round_ps() {
14929 let a = _mm512_set1_ps(1.);
14930 let b = _mm512_set1_ps(3.);
14931 let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14932 let e = _mm512_set1_ps(0.33333334);
14933 assert_eq_m512(r, e);
14934 let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
14935 let e = _mm512_set1_ps(0.3333333);
14936 assert_eq_m512(r, e);
14937 }
14938
14939 #[simd_test(enable = "avx512f")]
14940 unsafe fn test_mm512_mask_div_round_ps() {
14941 let a = _mm512_set1_ps(1.);
14942 let b = _mm512_set1_ps(3.);
14943 let r = _mm512_mask_div_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14944 assert_eq_m512(r, a);
14945 let r = _mm512_mask_div_round_ps(
14946 a,
14947 0b11111111_00000000,
14948 a,
14949 b,
14950 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14951 );
14952 let e = _mm512_setr_ps(
14953 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
14954 0.33333334, 0.33333334, 0.33333334, 0.33333334,
14955 );
14956 assert_eq_m512(r, e);
14957 }
14958
14959 #[simd_test(enable = "avx512f")]
14960 unsafe fn test_mm512_maskz_div_round_ps() {
14961 let a = _mm512_set1_ps(1.);
14962 let b = _mm512_set1_ps(3.);
14963 let r = _mm512_maskz_div_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14964 assert_eq_m512(r, _mm512_setzero_ps());
14965 let r = _mm512_maskz_div_round_ps(
14966 0b11111111_00000000,
14967 a,
14968 b,
14969 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14970 );
14971 let e = _mm512_setr_ps(
14972 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
14973 0.33333334, 0.33333334, 0.33333334, 0.33333334,
14974 );
14975 assert_eq_m512(r, e);
14976 }
14977
14978 #[simd_test(enable = "avx512f")]
14979 unsafe fn test_mm512_sqrt_round_ps() {
14980 let a = _mm512_set1_ps(3.);
14981 let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14982 let e = _mm512_set1_ps(1.7320508);
14983 assert_eq_m512(r, e);
14984 let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
14985 let e = _mm512_set1_ps(1.7320509);
14986 assert_eq_m512(r, e);
14987 }
14988
14989 #[simd_test(enable = "avx512f")]
14990 unsafe fn test_mm512_mask_sqrt_round_ps() {
14991 let a = _mm512_set1_ps(3.);
14992 let r = _mm512_mask_sqrt_round_ps(a, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
14993 assert_eq_m512(r, a);
14994 let r = _mm512_mask_sqrt_round_ps(
14995 a,
14996 0b11111111_00000000,
14997 a,
14998 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
14999 );
15000 let e = _mm512_setr_ps(
15001 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
15002 1.7320508, 1.7320508, 1.7320508,
15003 );
15004 assert_eq_m512(r, e);
15005 }
15006
15007 #[simd_test(enable = "avx512f")]
15008 unsafe fn test_mm512_maskz_sqrt_round_ps() {
15009 let a = _mm512_set1_ps(3.);
15010 let r = _mm512_maskz_sqrt_round_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15011 assert_eq_m512(r, _mm512_setzero_ps());
15012 let r = _mm512_maskz_sqrt_round_ps(
15013 0b11111111_00000000,
15014 a,
15015 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15016 );
15017 let e = _mm512_setr_ps(
15018 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
15019 1.7320508, 1.7320508, 1.7320508,
15020 );
15021 assert_eq_m512(r, e);
15022 }
15023
15024 #[simd_test(enable = "avx512f")]
15025 unsafe fn test_mm512_fmadd_round_ps() {
15026 let a = _mm512_set1_ps(0.00000007);
15027 let b = _mm512_set1_ps(1.);
15028 let c = _mm512_set1_ps(-1.);
15029 let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15030 let e = _mm512_set1_ps(-0.99999994);
15031 assert_eq_m512(r, e);
15032 let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
15033 let e = _mm512_set1_ps(-0.9999999);
15034 assert_eq_m512(r, e);
15035 }
15036
15037 #[simd_test(enable = "avx512f")]
15038 unsafe fn test_mm512_mask_fmadd_round_ps() {
15039 let a = _mm512_set1_ps(0.00000007);
15040 let b = _mm512_set1_ps(1.);
15041 let c = _mm512_set1_ps(-1.);
15042 let r =
15043 _mm512_mask_fmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15044 assert_eq_m512(r, a);
15045 let r = _mm512_mask_fmadd_round_ps(
15046 a,
15047 0b00000000_11111111,
15048 b,
15049 c,
15050 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15051 );
15052 let e = _mm512_setr_ps(
15053 -0.99999994,
15054 -0.99999994,
15055 -0.99999994,
15056 -0.99999994,
15057 -0.99999994,
15058 -0.99999994,
15059 -0.99999994,
15060 -0.99999994,
15061 0.00000007,
15062 0.00000007,
15063 0.00000007,
15064 0.00000007,
15065 0.00000007,
15066 0.00000007,
15067 0.00000007,
15068 0.00000007,
15069 );
15070 assert_eq_m512(r, e);
15071 }
15072
15073 #[simd_test(enable = "avx512f")]
15074 unsafe fn test_mm512_maskz_fmadd_round_ps() {
15075 let a = _mm512_set1_ps(0.00000007);
15076 let b = _mm512_set1_ps(1.);
15077 let c = _mm512_set1_ps(-1.);
15078 let r =
15079 _mm512_maskz_fmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15080 assert_eq_m512(r, _mm512_setzero_ps());
15081 let r = _mm512_maskz_fmadd_round_ps(
15082 0b00000000_11111111,
15083 a,
15084 b,
15085 c,
15086 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15087 );
15088 let e = _mm512_setr_ps(
15089 -0.99999994,
15090 -0.99999994,
15091 -0.99999994,
15092 -0.99999994,
15093 -0.99999994,
15094 -0.99999994,
15095 -0.99999994,
15096 -0.99999994,
15097 0.,
15098 0.,
15099 0.,
15100 0.,
15101 0.,
15102 0.,
15103 0.,
15104 0.,
15105 );
15106 assert_eq_m512(r, e);
15107 }
15108
15109 #[simd_test(enable = "avx512f")]
15110 unsafe fn test_mm512_mask3_fmadd_round_ps() {
15111 let a = _mm512_set1_ps(0.00000007);
15112 let b = _mm512_set1_ps(1.);
15113 let c = _mm512_set1_ps(-1.);
15114 let r =
15115 _mm512_mask3_fmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15116 assert_eq_m512(r, c);
15117 let r = _mm512_mask3_fmadd_round_ps(
15118 a,
15119 b,
15120 c,
15121 0b00000000_11111111,
15122 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15123 );
15124 let e = _mm512_setr_ps(
15125 -0.99999994,
15126 -0.99999994,
15127 -0.99999994,
15128 -0.99999994,
15129 -0.99999994,
15130 -0.99999994,
15131 -0.99999994,
15132 -0.99999994,
15133 -1.,
15134 -1.,
15135 -1.,
15136 -1.,
15137 -1.,
15138 -1.,
15139 -1.,
15140 -1.,
15141 );
15142 assert_eq_m512(r, e);
15143 }
15144
15145 #[simd_test(enable = "avx512f")]
15146 unsafe fn test_mm512_fmsub_round_ps() {
15147 let a = _mm512_set1_ps(0.00000007);
15148 let b = _mm512_set1_ps(1.);
15149 let c = _mm512_set1_ps(1.);
15150 let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15151 let e = _mm512_set1_ps(-0.99999994);
15152 assert_eq_m512(r, e);
15153 let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
15154 let e = _mm512_set1_ps(-0.9999999);
15155 assert_eq_m512(r, e);
15156 }
15157
15158 #[simd_test(enable = "avx512f")]
15159 unsafe fn test_mm512_mask_fmsub_round_ps() {
15160 let a = _mm512_set1_ps(0.00000007);
15161 let b = _mm512_set1_ps(1.);
15162 let c = _mm512_set1_ps(1.);
15163 let r =
15164 _mm512_mask_fmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15165 assert_eq_m512(r, a);
15166 let r = _mm512_mask_fmsub_round_ps(
15167 a,
15168 0b00000000_11111111,
15169 b,
15170 c,
15171 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15172 );
15173 let e = _mm512_setr_ps(
15174 -0.99999994,
15175 -0.99999994,
15176 -0.99999994,
15177 -0.99999994,
15178 -0.99999994,
15179 -0.99999994,
15180 -0.99999994,
15181 -0.99999994,
15182 0.00000007,
15183 0.00000007,
15184 0.00000007,
15185 0.00000007,
15186 0.00000007,
15187 0.00000007,
15188 0.00000007,
15189 0.00000007,
15190 );
15191 assert_eq_m512(r, e);
15192 }
15193
15194 #[simd_test(enable = "avx512f")]
15195 unsafe fn test_mm512_maskz_fmsub_round_ps() {
15196 let a = _mm512_set1_ps(0.00000007);
15197 let b = _mm512_set1_ps(1.);
15198 let c = _mm512_set1_ps(1.);
15199 let r =
15200 _mm512_maskz_fmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15201 assert_eq_m512(r, _mm512_setzero_ps());
15202 let r = _mm512_maskz_fmsub_round_ps(
15203 0b00000000_11111111,
15204 a,
15205 b,
15206 c,
15207 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15208 );
15209 let e = _mm512_setr_ps(
15210 -0.99999994,
15211 -0.99999994,
15212 -0.99999994,
15213 -0.99999994,
15214 -0.99999994,
15215 -0.99999994,
15216 -0.99999994,
15217 -0.99999994,
15218 0.,
15219 0.,
15220 0.,
15221 0.,
15222 0.,
15223 0.,
15224 0.,
15225 0.,
15226 );
15227 assert_eq_m512(r, e);
15228 }
15229
15230 #[simd_test(enable = "avx512f")]
15231 unsafe fn test_mm512_mask3_fmsub_round_ps() {
15232 let a = _mm512_set1_ps(0.00000007);
15233 let b = _mm512_set1_ps(1.);
15234 let c = _mm512_set1_ps(1.);
15235 let r =
15236 _mm512_mask3_fmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15237 assert_eq_m512(r, c);
15238 let r = _mm512_mask3_fmsub_round_ps(
15239 a,
15240 b,
15241 c,
15242 0b00000000_11111111,
15243 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15244 );
15245 let e = _mm512_setr_ps(
15246 -0.99999994,
15247 -0.99999994,
15248 -0.99999994,
15249 -0.99999994,
15250 -0.99999994,
15251 -0.99999994,
15252 -0.99999994,
15253 -0.99999994,
15254 1.,
15255 1.,
15256 1.,
15257 1.,
15258 1.,
15259 1.,
15260 1.,
15261 1.,
15262 );
15263 assert_eq_m512(r, e);
15264 }
15265
15266 #[simd_test(enable = "avx512f")]
15267 unsafe fn test_mm512_fmaddsub_round_ps() {
15268 let a = _mm512_set1_ps(0.00000007);
15269 let b = _mm512_set1_ps(1.);
15270 let c = _mm512_set1_ps(-1.);
15271 let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15272 let e = _mm512_setr_ps(
15273 1.0000001,
15274 -0.99999994,
15275 1.0000001,
15276 -0.99999994,
15277 1.0000001,
15278 -0.99999994,
15279 1.0000001,
15280 -0.99999994,
15281 1.0000001,
15282 -0.99999994,
15283 1.0000001,
15284 -0.99999994,
15285 1.0000001,
15286 -0.99999994,
15287 1.0000001,
15288 -0.99999994,
15289 );
15290 assert_eq_m512(r, e);
15291 let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
15292 let e = _mm512_setr_ps(
15293 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
15294 -0.9999999, 1., -0.9999999, 1., -0.9999999,
15295 );
15296 assert_eq_m512(r, e);
15297 }
15298
15299 #[simd_test(enable = "avx512f")]
15300 unsafe fn test_mm512_mask_fmaddsub_round_ps() {
15301 let a = _mm512_set1_ps(0.00000007);
15302 let b = _mm512_set1_ps(1.);
15303 let c = _mm512_set1_ps(-1.);
15304 let r = _mm512_mask_fmaddsub_round_ps(
15305 a,
15306 0,
15307 b,
15308 c,
15309 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15310 );
15311 assert_eq_m512(r, a);
15312 let r = _mm512_mask_fmaddsub_round_ps(
15313 a,
15314 0b00000000_11111111,
15315 b,
15316 c,
15317 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15318 );
15319 let e = _mm512_setr_ps(
15320 1.0000001,
15321 -0.99999994,
15322 1.0000001,
15323 -0.99999994,
15324 1.0000001,
15325 -0.99999994,
15326 1.0000001,
15327 -0.99999994,
15328 0.00000007,
15329 0.00000007,
15330 0.00000007,
15331 0.00000007,
15332 0.00000007,
15333 0.00000007,
15334 0.00000007,
15335 0.00000007,
15336 );
15337 assert_eq_m512(r, e);
15338 }
15339
15340 #[simd_test(enable = "avx512f")]
15341 unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
15342 let a = _mm512_set1_ps(0.00000007);
15343 let b = _mm512_set1_ps(1.);
15344 let c = _mm512_set1_ps(-1.);
15345 let r = _mm512_maskz_fmaddsub_round_ps(
15346 0,
15347 a,
15348 b,
15349 c,
15350 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15351 );
15352 assert_eq_m512(r, _mm512_setzero_ps());
15353 let r = _mm512_maskz_fmaddsub_round_ps(
15354 0b00000000_11111111,
15355 a,
15356 b,
15357 c,
15358 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15359 );
15360 let e = _mm512_setr_ps(
15361 1.0000001,
15362 -0.99999994,
15363 1.0000001,
15364 -0.99999994,
15365 1.0000001,
15366 -0.99999994,
15367 1.0000001,
15368 -0.99999994,
15369 0.,
15370 0.,
15371 0.,
15372 0.,
15373 0.,
15374 0.,
15375 0.,
15376 0.,
15377 );
15378 assert_eq_m512(r, e);
15379 }
15380
15381 #[simd_test(enable = "avx512f")]
15382 unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
15383 let a = _mm512_set1_ps(0.00000007);
15384 let b = _mm512_set1_ps(1.);
15385 let c = _mm512_set1_ps(-1.);
15386 let r = _mm512_mask3_fmaddsub_round_ps(
15387 a,
15388 b,
15389 c,
15390 0,
15391 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15392 );
15393 assert_eq_m512(r, c);
15394 let r = _mm512_mask3_fmaddsub_round_ps(
15395 a,
15396 b,
15397 c,
15398 0b00000000_11111111,
15399 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15400 );
15401 let e = _mm512_setr_ps(
15402 1.0000001,
15403 -0.99999994,
15404 1.0000001,
15405 -0.99999994,
15406 1.0000001,
15407 -0.99999994,
15408 1.0000001,
15409 -0.99999994,
15410 -1.,
15411 -1.,
15412 -1.,
15413 -1.,
15414 -1.,
15415 -1.,
15416 -1.,
15417 -1.,
15418 );
15419 assert_eq_m512(r, e);
15420 }
15421
15422 #[simd_test(enable = "avx512f")]
15423 unsafe fn test_mm512_fmsubadd_round_ps() {
15424 let a = _mm512_set1_ps(0.00000007);
15425 let b = _mm512_set1_ps(1.);
15426 let c = _mm512_set1_ps(-1.);
15427 let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15428 let e = _mm512_setr_ps(
15429 -0.99999994,
15430 1.0000001,
15431 -0.99999994,
15432 1.0000001,
15433 -0.99999994,
15434 1.0000001,
15435 -0.99999994,
15436 1.0000001,
15437 -0.99999994,
15438 1.0000001,
15439 -0.99999994,
15440 1.0000001,
15441 -0.99999994,
15442 1.0000001,
15443 -0.99999994,
15444 1.0000001,
15445 );
15446 assert_eq_m512(r, e);
15447 let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
15448 let e = _mm512_setr_ps(
15449 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
15450 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
15451 );
15452 assert_eq_m512(r, e);
15453 }
15454
15455 #[simd_test(enable = "avx512f")]
15456 unsafe fn test_mm512_mask_fmsubadd_round_ps() {
15457 let a = _mm512_set1_ps(0.00000007);
15458 let b = _mm512_set1_ps(1.);
15459 let c = _mm512_set1_ps(-1.);
15460 let r = _mm512_mask_fmsubadd_round_ps(
15461 a,
15462 0,
15463 b,
15464 c,
15465 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15466 );
15467 assert_eq_m512(r, a);
15468 let r = _mm512_mask_fmsubadd_round_ps(
15469 a,
15470 0b00000000_11111111,
15471 b,
15472 c,
15473 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15474 );
15475 let e = _mm512_setr_ps(
15476 -0.99999994,
15477 1.0000001,
15478 -0.99999994,
15479 1.0000001,
15480 -0.99999994,
15481 1.0000001,
15482 -0.99999994,
15483 1.0000001,
15484 0.00000007,
15485 0.00000007,
15486 0.00000007,
15487 0.00000007,
15488 0.00000007,
15489 0.00000007,
15490 0.00000007,
15491 0.00000007,
15492 );
15493 assert_eq_m512(r, e);
15494 }
15495
15496 #[simd_test(enable = "avx512f")]
15497 unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
15498 let a = _mm512_set1_ps(0.00000007);
15499 let b = _mm512_set1_ps(1.);
15500 let c = _mm512_set1_ps(-1.);
15501 let r = _mm512_maskz_fmsubadd_round_ps(
15502 0,
15503 a,
15504 b,
15505 c,
15506 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15507 );
15508 assert_eq_m512(r, _mm512_setzero_ps());
15509 let r = _mm512_maskz_fmsubadd_round_ps(
15510 0b00000000_11111111,
15511 a,
15512 b,
15513 c,
15514 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15515 );
15516 let e = _mm512_setr_ps(
15517 -0.99999994,
15518 1.0000001,
15519 -0.99999994,
15520 1.0000001,
15521 -0.99999994,
15522 1.0000001,
15523 -0.99999994,
15524 1.0000001,
15525 0.,
15526 0.,
15527 0.,
15528 0.,
15529 0.,
15530 0.,
15531 0.,
15532 0.,
15533 );
15534 assert_eq_m512(r, e);
15535 }
15536
15537 #[simd_test(enable = "avx512f")]
15538 unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
15539 let a = _mm512_set1_ps(0.00000007);
15540 let b = _mm512_set1_ps(1.);
15541 let c = _mm512_set1_ps(-1.);
15542 let r = _mm512_mask3_fmsubadd_round_ps(
15543 a,
15544 b,
15545 c,
15546 0,
15547 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15548 );
15549 assert_eq_m512(r, c);
15550 let r = _mm512_mask3_fmsubadd_round_ps(
15551 a,
15552 b,
15553 c,
15554 0b00000000_11111111,
15555 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15556 );
15557 let e = _mm512_setr_ps(
15558 -0.99999994,
15559 1.0000001,
15560 -0.99999994,
15561 1.0000001,
15562 -0.99999994,
15563 1.0000001,
15564 -0.99999994,
15565 1.0000001,
15566 -1.,
15567 -1.,
15568 -1.,
15569 -1.,
15570 -1.,
15571 -1.,
15572 -1.,
15573 -1.,
15574 );
15575 assert_eq_m512(r, e);
15576 }
15577
15578 #[simd_test(enable = "avx512f")]
15579 unsafe fn test_mm512_fnmadd_round_ps() {
15580 let a = _mm512_set1_ps(0.00000007);
15581 let b = _mm512_set1_ps(1.);
15582 let c = _mm512_set1_ps(1.);
15583 let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15584 let e = _mm512_set1_ps(0.99999994);
15585 assert_eq_m512(r, e);
15586 let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
15587 let e = _mm512_set1_ps(0.9999999);
15588 assert_eq_m512(r, e);
15589 }
15590
15591 #[simd_test(enable = "avx512f")]
15592 unsafe fn test_mm512_mask_fnmadd_round_ps() {
15593 let a = _mm512_set1_ps(0.00000007);
15594 let b = _mm512_set1_ps(1.);
15595 let c = _mm512_set1_ps(1.);
15596 let r =
15597 _mm512_mask_fnmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15598 assert_eq_m512(r, a);
15599 let r = _mm512_mask_fnmadd_round_ps(
15600 a,
15601 0b00000000_11111111,
15602 b,
15603 c,
15604 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15605 );
15606 let e = _mm512_setr_ps(
15607 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
15608 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
15609 0.00000007, 0.00000007,
15610 );
15611 assert_eq_m512(r, e);
15612 }
15613
15614 #[simd_test(enable = "avx512f")]
15615 unsafe fn test_mm512_maskz_fnmadd_round_ps() {
15616 let a = _mm512_set1_ps(0.00000007);
15617 let b = _mm512_set1_ps(1.);
15618 let c = _mm512_set1_ps(1.);
15619 let r =
15620 _mm512_maskz_fnmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15621 assert_eq_m512(r, _mm512_setzero_ps());
15622 let r = _mm512_maskz_fnmadd_round_ps(
15623 0b00000000_11111111,
15624 a,
15625 b,
15626 c,
15627 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15628 );
15629 let e = _mm512_setr_ps(
15630 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
15631 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
15632 );
15633 assert_eq_m512(r, e);
15634 }
15635
15636 #[simd_test(enable = "avx512f")]
15637 unsafe fn test_mm512_mask3_fnmadd_round_ps() {
15638 let a = _mm512_set1_ps(0.00000007);
15639 let b = _mm512_set1_ps(1.);
15640 let c = _mm512_set1_ps(1.);
15641 let r =
15642 _mm512_mask3_fnmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15643 assert_eq_m512(r, c);
15644 let r = _mm512_mask3_fnmadd_round_ps(
15645 a,
15646 b,
15647 c,
15648 0b00000000_11111111,
15649 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15650 );
15651 let e = _mm512_setr_ps(
15652 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
15653 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
15654 );
15655 assert_eq_m512(r, e);
15656 }
15657
15658 #[simd_test(enable = "avx512f")]
15659 unsafe fn test_mm512_fnmsub_round_ps() {
15660 let a = _mm512_set1_ps(0.00000007);
15661 let b = _mm512_set1_ps(1.);
15662 let c = _mm512_set1_ps(-1.);
15663 let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15664 let e = _mm512_set1_ps(0.99999994);
15665 assert_eq_m512(r, e);
15666 let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
15667 let e = _mm512_set1_ps(0.9999999);
15668 assert_eq_m512(r, e);
15669 }
15670
15671 #[simd_test(enable = "avx512f")]
15672 unsafe fn test_mm512_mask_fnmsub_round_ps() {
15673 let a = _mm512_set1_ps(0.00000007);
15674 let b = _mm512_set1_ps(1.);
15675 let c = _mm512_set1_ps(-1.);
15676 let r =
15677 _mm512_mask_fnmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15678 assert_eq_m512(r, a);
15679 let r = _mm512_mask_fnmsub_round_ps(
15680 a,
15681 0b00000000_11111111,
15682 b,
15683 c,
15684 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15685 );
15686 let e = _mm512_setr_ps(
15687 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
15688 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
15689 0.00000007, 0.00000007,
15690 );
15691 assert_eq_m512(r, e);
15692 }
15693
15694 #[simd_test(enable = "avx512f")]
15695 unsafe fn test_mm512_maskz_fnmsub_round_ps() {
15696 let a = _mm512_set1_ps(0.00000007);
15697 let b = _mm512_set1_ps(1.);
15698 let c = _mm512_set1_ps(-1.);
15699 let r =
15700 _mm512_maskz_fnmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15701 assert_eq_m512(r, _mm512_setzero_ps());
15702 let r = _mm512_maskz_fnmsub_round_ps(
15703 0b00000000_11111111,
15704 a,
15705 b,
15706 c,
15707 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15708 );
15709 let e = _mm512_setr_ps(
15710 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
15711 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
15712 );
15713 assert_eq_m512(r, e);
15714 }
15715
15716 #[simd_test(enable = "avx512f")]
15717 unsafe fn test_mm512_mask3_fnmsub_round_ps() {
15718 let a = _mm512_set1_ps(0.00000007);
15719 let b = _mm512_set1_ps(1.);
15720 let c = _mm512_set1_ps(-1.);
15721 let r =
15722 _mm512_mask3_fnmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
15723 assert_eq_m512(r, c);
15724 let r = _mm512_mask3_fnmsub_round_ps(
15725 a,
15726 b,
15727 c,
15728 0b00000000_11111111,
15729 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
15730 );
15731 let e = _mm512_setr_ps(
15732 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
15733 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
15734 );
15735 assert_eq_m512(r, e);
15736 }
15737
15738 #[simd_test(enable = "avx512f")]
15739 unsafe fn test_mm512_max_round_ps() {
15740 let a = _mm512_setr_ps(
15741 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15742 );
15743 let b = _mm512_setr_ps(
15744 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
15745 );
15746 let r = _mm512_max_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
15747 let e = _mm512_setr_ps(
15748 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
15749 );
15750 assert_eq_m512(r, e);
15751 }
15752
15753 #[simd_test(enable = "avx512f")]
15754 unsafe fn test_mm512_mask_max_round_ps() {
15755 let a = _mm512_setr_ps(
15756 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15757 );
15758 let b = _mm512_setr_ps(
15759 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
15760 );
15761 let r = _mm512_mask_max_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
15762 assert_eq_m512(r, a);
15763 let r = _mm512_mask_max_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
15764 let e = _mm512_setr_ps(
15765 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
15766 );
15767 assert_eq_m512(r, e);
15768 }
15769
15770 #[simd_test(enable = "avx512f")]
15771 unsafe fn test_mm512_maskz_max_round_ps() {
15772 let a = _mm512_setr_ps(
15773 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15774 );
15775 let b = _mm512_setr_ps(
15776 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
15777 );
15778 let r = _mm512_maskz_max_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
15779 assert_eq_m512(r, _mm512_setzero_ps());
15780 let r = _mm512_maskz_max_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
15781 let e = _mm512_setr_ps(
15782 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
15783 );
15784 assert_eq_m512(r, e);
15785 }
15786
15787 #[simd_test(enable = "avx512f")]
15788 unsafe fn test_mm512_min_round_ps() {
15789 let a = _mm512_setr_ps(
15790 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15791 );
15792 let b = _mm512_setr_ps(
15793 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
15794 );
15795 let r = _mm512_min_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
15796 let e = _mm512_setr_ps(
15797 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
15798 );
15799 assert_eq_m512(r, e);
15800 }
15801
15802 #[simd_test(enable = "avx512f")]
15803 unsafe fn test_mm512_mask_min_round_ps() {
15804 let a = _mm512_setr_ps(
15805 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15806 );
15807 let b = _mm512_setr_ps(
15808 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
15809 );
15810 let r = _mm512_mask_min_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
15811 assert_eq_m512(r, a);
15812 let r = _mm512_mask_min_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
15813 let e = _mm512_setr_ps(
15814 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15815 );
15816 assert_eq_m512(r, e);
15817 }
15818
15819 #[simd_test(enable = "avx512f")]
15820 unsafe fn test_mm512_maskz_min_round_ps() {
15821 let a = _mm512_setr_ps(
15822 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
15823 );
15824 let b = _mm512_setr_ps(
15825 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
15826 );
15827 let r = _mm512_maskz_min_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
15828 assert_eq_m512(r, _mm512_setzero_ps());
15829 let r = _mm512_maskz_min_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
15830 let e = _mm512_setr_ps(
15831 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
15832 );
15833 assert_eq_m512(r, e);
15834 }
15835
15836 #[simd_test(enable = "avx512f")]
15837 unsafe fn test_mm512_getexp_round_ps() {
15838 let a = _mm512_set1_ps(3.);
15839 let r = _mm512_getexp_round_ps(a, _MM_FROUND_CUR_DIRECTION);
15840 let e = _mm512_set1_ps(1.);
15841 assert_eq_m512(r, e);
15842 let r = _mm512_getexp_round_ps(a, _MM_FROUND_NO_EXC);
15843 let e = _mm512_set1_ps(1.);
15844 assert_eq_m512(r, e);
15845 }
15846
15847 #[simd_test(enable = "avx512f")]
15848 unsafe fn test_mm512_mask_getexp_round_ps() {
15849 let a = _mm512_set1_ps(3.);
15850 let r = _mm512_mask_getexp_round_ps(a, 0, a, _MM_FROUND_CUR_DIRECTION);
15851 assert_eq_m512(r, a);
15852 let r = _mm512_mask_getexp_round_ps(a, 0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
15853 let e = _mm512_setr_ps(
15854 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
15855 );
15856 assert_eq_m512(r, e);
15857 }
15858
15859 #[simd_test(enable = "avx512f")]
15860 unsafe fn test_mm512_maskz_getexp_round_ps() {
15861 let a = _mm512_set1_ps(3.);
15862 let r = _mm512_maskz_getexp_round_ps(0, a, _MM_FROUND_CUR_DIRECTION);
15863 assert_eq_m512(r, _mm512_setzero_ps());
15864 let r = _mm512_maskz_getexp_round_ps(0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
15865 let e = _mm512_setr_ps(
15866 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
15867 );
15868 assert_eq_m512(r, e);
15869 }
15870
15871 #[simd_test(enable = "avx512f")]
15872 unsafe fn test_mm512_getmant_round_ps() {
15873 let a = _mm512_set1_ps(10.);
15874 let r = _mm512_getmant_round_ps(
15875 a,
15876 _MM_MANT_NORM_1_2,
15877 _MM_MANT_SIGN_SRC,
15878 _MM_FROUND_CUR_DIRECTION,
15879 );
15880 let e = _mm512_set1_ps(1.25);
15881 assert_eq_m512(r, e);
15882 }
15883
15884 #[simd_test(enable = "avx512f")]
15885 unsafe fn test_mm512_mask_getmant_round_ps() {
15886 let a = _mm512_set1_ps(10.);
15887 let r = _mm512_mask_getmant_round_ps(
15888 a,
15889 0,
15890 a,
15891 _MM_MANT_NORM_1_2,
15892 _MM_MANT_SIGN_SRC,
15893 _MM_FROUND_CUR_DIRECTION,
15894 );
15895 assert_eq_m512(r, a);
15896 let r = _mm512_mask_getmant_round_ps(
15897 a,
15898 0b11111111_00000000,
15899 a,
15900 _MM_MANT_NORM_1_2,
15901 _MM_MANT_SIGN_SRC,
15902 _MM_FROUND_CUR_DIRECTION,
15903 );
15904 let e = _mm512_setr_ps(
15905 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
15906 );
15907 assert_eq_m512(r, e);
15908 }
15909
15910 #[simd_test(enable = "avx512f")]
15911 unsafe fn test_mm512_maskz_getmant_round_ps() {
15912 let a = _mm512_set1_ps(10.);
15913 let r = _mm512_maskz_getmant_round_ps(
15914 0,
15915 a,
15916 _MM_MANT_NORM_1_2,
15917 _MM_MANT_SIGN_SRC,
15918 _MM_FROUND_CUR_DIRECTION,
15919 );
15920 assert_eq_m512(r, _mm512_setzero_ps());
15921 let r = _mm512_maskz_getmant_round_ps(
15922 0b11111111_00000000,
15923 a,
15924 _MM_MANT_NORM_1_2,
15925 _MM_MANT_SIGN_SRC,
15926 _MM_FROUND_CUR_DIRECTION,
15927 );
15928 let e = _mm512_setr_ps(
15929 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
15930 );
15931 assert_eq_m512(r, e);
15932 }
15933
15934 #[simd_test(enable = "avx512f")]
15935 unsafe fn test_mm512_cvtps_epi32() {
15936 let a = _mm512_setr_ps(
15937 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
15938 );
15939 let r = _mm512_cvtps_epi32(a);
15940 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
15941 assert_eq_m512i(r, e);
15942 }
15943
15944 #[simd_test(enable = "avx512f")]
15945 unsafe fn test_mm512_mask_cvtps_epi32() {
15946 let a = _mm512_setr_ps(
15947 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
15948 );
15949 let src = _mm512_set1_epi32(0);
15950 let r = _mm512_mask_cvtps_epi32(src, 0, a);
15951 assert_eq_m512i(r, src);
15952 let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
15953 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
15954 assert_eq_m512i(r, e);
15955 }
15956
15957 #[simd_test(enable = "avx512f")]
15958 unsafe fn test_mm512_maskz_cvtps_epi32() {
15959 let a = _mm512_setr_ps(
15960 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
15961 );
15962 let r = _mm512_maskz_cvtps_epi32(0, a);
15963 assert_eq_m512i(r, _mm512_setzero_si512());
15964 let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
15965 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
15966 assert_eq_m512i(r, e);
15967 }
15968
15969 #[simd_test(enable = "avx512f")]
15970 unsafe fn test_mm512_cvtps_epu32() {
15971 let a = _mm512_setr_ps(
15972 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
15973 );
15974 let r = _mm512_cvtps_epu32(a);
15975 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
15976 assert_eq_m512i(r, e);
15977 }
15978
15979 #[simd_test(enable = "avx512f")]
15980 unsafe fn test_mm512_mask_cvtps_epu32() {
15981 let a = _mm512_setr_ps(
15982 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
15983 );
15984 let src = _mm512_set1_epi32(0);
15985 let r = _mm512_mask_cvtps_epu32(src, 0, a);
15986 assert_eq_m512i(r, src);
15987 let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
15988 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
15989 assert_eq_m512i(r, e);
15990 }
15991
15992 #[simd_test(enable = "avx512f")]
15993 unsafe fn test_mm512_maskz_cvtps_epu32() {
15994 let a = _mm512_setr_ps(
15995 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
15996 );
15997 let r = _mm512_maskz_cvtps_epu32(0, a);
15998 assert_eq_m512i(r, _mm512_setzero_si512());
15999 let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
16000 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16001 assert_eq_m512i(r, e);
16002 }
16003
16004 #[simd_test(enable = "avx512f")]
16005 unsafe fn test_mm512_cvt_roundps_epi32() {
16006 let a = _mm512_setr_ps(
16007 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16008 );
16009 let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
16010 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
16011 assert_eq_m512i(r, e);
16012 let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
16013 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
16014 assert_eq_m512i(r, e);
16015 }
16016
16017 #[simd_test(enable = "avx512f")]
16018 unsafe fn test_mm512_mask_cvt_roundps_epi32() {
16019 let a = _mm512_setr_ps(
16020 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16021 );
16022 let src = _mm512_set1_epi32(0);
16023 let r =
16024 _mm512_mask_cvt_roundps_epi32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
16025 assert_eq_m512i(r, src);
16026 let r = _mm512_mask_cvt_roundps_epi32(
16027 src,
16028 0b00000000_11111111,
16029 a,
16030 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
16031 );
16032 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
16033 assert_eq_m512i(r, e);
16034 }
16035
16036 #[simd_test(enable = "avx512f")]
16037 unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
16038 let a = _mm512_setr_ps(
16039 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16040 );
16041 let r = _mm512_maskz_cvt_roundps_epi32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
16042 assert_eq_m512i(r, _mm512_setzero_si512());
16043 let r = _mm512_maskz_cvt_roundps_epi32(
16044 0b00000000_11111111,
16045 a,
16046 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
16047 );
16048 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
16049 assert_eq_m512i(r, e);
16050 }
16051
16052 #[simd_test(enable = "avx512f")]
16053 unsafe fn test_mm512_cvt_roundps_epu32() {
16054 let a = _mm512_setr_ps(
16055 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16056 );
16057 let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
16058 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
16059 assert_eq_m512i(r, e);
16060 let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
16061 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
16062 assert_eq_m512i(r, e);
16063 }
16064
16065 #[simd_test(enable = "avx512f")]
16066 unsafe fn test_mm512_mask_cvt_roundps_epu32() {
16067 let a = _mm512_setr_ps(
16068 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16069 );
16070 let src = _mm512_set1_epi32(0);
16071 let r =
16072 _mm512_mask_cvt_roundps_epu32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
16073 assert_eq_m512i(r, src);
16074 let r = _mm512_mask_cvt_roundps_epu32(
16075 src,
16076 0b00000000_11111111,
16077 a,
16078 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
16079 );
16080 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16081 assert_eq_m512i(r, e);
16082 }
16083
16084 #[simd_test(enable = "avx512f")]
16085 unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
16086 let a = _mm512_setr_ps(
16087 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16088 );
16089 let r = _mm512_maskz_cvt_roundps_epu32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
16090 assert_eq_m512i(r, _mm512_setzero_si512());
16091 let r = _mm512_maskz_cvt_roundps_epu32(
16092 0b00000000_11111111,
16093 a,
16094 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
16095 );
16096 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16097 assert_eq_m512i(r, e);
16098 }
16099
16100 #[simd_test(enable = "avx512f")]
16101 unsafe fn test_mm512_cvtt_roundps_epi32() {
16102 let a = _mm512_setr_ps(
16103 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16104 );
16105 let r = _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC);
16106 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
16107 assert_eq_m512i(r, e);
16108 }
16109
16110 #[simd_test(enable = "avx512f")]
16111 unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
16112 let a = _mm512_setr_ps(
16113 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16114 );
16115 let src = _mm512_set1_epi32(0);
16116 let r = _mm512_mask_cvtt_roundps_epi32(src, 0, a, _MM_FROUND_NO_EXC);
16117 assert_eq_m512i(r, src);
16118 let r = _mm512_mask_cvtt_roundps_epi32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
16119 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
16120 assert_eq_m512i(r, e);
16121 }
16122
16123 #[simd_test(enable = "avx512f")]
16124 unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
16125 let a = _mm512_setr_ps(
16126 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16127 );
16128 let r = _mm512_maskz_cvtt_roundps_epi32(0, a, _MM_FROUND_NO_EXC);
16129 assert_eq_m512i(r, _mm512_setzero_si512());
16130 let r = _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
16131 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
16132 assert_eq_m512i(r, e);
16133 }
16134
16135 #[simd_test(enable = "avx512f")]
16136 unsafe fn test_mm512_cvtt_roundps_epu32() {
16137 let a = _mm512_setr_ps(
16138 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16139 );
16140 let r = _mm512_cvtt_roundps_epu32(a, _MM_FROUND_NO_EXC);
16141 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
16142 assert_eq_m512i(r, e);
16143 }
16144
16145 #[simd_test(enable = "avx512f")]
16146 unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
16147 let a = _mm512_setr_ps(
16148 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16149 );
16150 let src = _mm512_set1_epi32(0);
16151 let r = _mm512_mask_cvtt_roundps_epu32(src, 0, a, _MM_FROUND_NO_EXC);
16152 assert_eq_m512i(r, src);
16153 let r = _mm512_mask_cvtt_roundps_epu32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
16154 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16155 assert_eq_m512i(r, e);
16156 }
16157
16158 #[simd_test(enable = "avx512f")]
16159 unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
16160 let a = _mm512_setr_ps(
16161 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16162 );
16163 let r = _mm512_maskz_cvtt_roundps_epu32(0, a, _MM_FROUND_NO_EXC);
16164 assert_eq_m512i(r, _mm512_setzero_si512());
16165 let r = _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
16166 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16167 assert_eq_m512i(r, e);
16168 }
16169
16170 #[simd_test(enable = "avx512f")]
16171 unsafe fn test_mm512_cvttps_epi32() {
16172 let a = _mm512_setr_ps(
16173 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16174 );
16175 let r = _mm512_cvttps_epi32(a);
16176 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
16177 assert_eq_m512i(r, e);
16178 }
16179
16180 #[simd_test(enable = "avx512f")]
16181 unsafe fn test_mm512_mask_cvttps_epi32() {
16182 let a = _mm512_setr_ps(
16183 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16184 );
16185 let src = _mm512_set1_epi32(0);
16186 let r = _mm512_mask_cvttps_epi32(src, 0, a);
16187 assert_eq_m512i(r, src);
16188 let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
16189 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
16190 assert_eq_m512i(r, e);
16191 }
16192
16193 #[simd_test(enable = "avx512f")]
16194 unsafe fn test_mm512_maskz_cvttps_epi32() {
16195 let a = _mm512_setr_ps(
16196 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16197 );
16198 let r = _mm512_maskz_cvttps_epi32(0, a);
16199 assert_eq_m512i(r, _mm512_setzero_si512());
16200 let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
16201 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
16202 assert_eq_m512i(r, e);
16203 }
16204
16205 #[simd_test(enable = "avx512f")]
16206 unsafe fn test_mm512_cvttps_epu32() {
16207 let a = _mm512_setr_ps(
16208 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16209 );
16210 let r = _mm512_cvttps_epu32(a);
16211 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
16212 assert_eq_m512i(r, e);
16213 }
16214
16215 #[simd_test(enable = "avx512f")]
16216 unsafe fn test_mm512_mask_cvttps_epu32() {
16217 let a = _mm512_setr_ps(
16218 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16219 );
16220 let src = _mm512_set1_epi32(0);
16221 let r = _mm512_mask_cvttps_epu32(src, 0, a);
16222 assert_eq_m512i(r, src);
16223 let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
16224 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16225 assert_eq_m512i(r, e);
16226 }
16227
16228 #[simd_test(enable = "avx512f")]
16229 unsafe fn test_mm512_maskz_cvttps_epu32() {
16230 let a = _mm512_setr_ps(
16231 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
16232 );
16233 let r = _mm512_maskz_cvttps_epu32(0, a);
16234 assert_eq_m512i(r, _mm512_setzero_si512());
16235 let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
16236 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
16237 assert_eq_m512i(r, e);
16238 }
16239
16240 #[simd_test(enable = "avx512f")]
16241 unsafe fn test_mm512_i32gather_ps() {
16242 let mut arr = [0f32; 256];
16243 for i in 0..256 {
16244 arr[i] = i as f32;
16245 }
16246 // A multiplier of 4 is word-addressing
16247 #[rustfmt::skip]
16248 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16249 120, 128, 136, 144, 152, 160, 168, 176);
16250 let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
16251 #[rustfmt::skip]
16252 assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
16253 120., 128., 136., 144., 152., 160., 168., 176.));
16254 }
16255
16256 #[simd_test(enable = "avx512f")]
16257 unsafe fn test_mm512_mask_i32gather_ps() {
16258 let mut arr = [0f32; 256];
16259 for i in 0..256 {
16260 arr[i] = i as f32;
16261 }
16262 let src = _mm512_set1_ps(2.);
16263 let mask = 0b10101010_10101010;
16264 #[rustfmt::skip]
16265 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16266 120, 128, 136, 144, 152, 160, 168, 176);
16267 // A multiplier of 4 is word-addressing
16268 let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
16269 #[rustfmt::skip]
16270 assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
16271 2., 128., 2., 144., 2., 160., 2., 176.));
16272 }
16273
16274 #[simd_test(enable = "avx512f")]
16275 unsafe fn test_mm512_i32gather_epi32() {
16276 let mut arr = [0i32; 256];
16277 for i in 0..256 {
16278 arr[i] = i as i32;
16279 }
16280 // A multiplier of 4 is word-addressing
16281 #[rustfmt::skip]
16282 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16283 120, 128, 136, 144, 152, 160, 168, 176);
16284 let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
16285 #[rustfmt::skip]
16286 assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16287 120, 128, 136, 144, 152, 160, 168, 176));
16288 }
16289
16290 #[simd_test(enable = "avx512f")]
16291 unsafe fn test_mm512_mask_i32gather_epi32() {
16292 let mut arr = [0i32; 256];
16293 for i in 0..256 {
16294 arr[i] = i as i32;
16295 }
16296 let src = _mm512_set1_epi32(2);
16297 let mask = 0b10101010_10101010;
16298 #[rustfmt::skip]
16299 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16300 128, 144, 160, 176, 192, 208, 224, 240);
16301 // A multiplier of 4 is word-addressing
16302 let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
16303 #[rustfmt::skip]
16304 assert_eq_m512i(r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
16305 2, 144, 2, 176, 2, 208, 2, 240));
16306 }
16307
16308 #[simd_test(enable = "avx512f")]
16309 unsafe fn test_mm512_i32scatter_ps() {
16310 let mut arr = [0f32; 256];
16311 #[rustfmt::skip]
16312 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16313 128, 144, 160, 176, 192, 208, 224, 240);
16314 let src = _mm512_setr_ps(
16315 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16316 );
16317 // A multiplier of 4 is word-addressing
16318 _mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
16319 let mut expected = [0f32; 256];
16320 for i in 0..16 {
16321 expected[i * 16] = (i + 1) as f32;
16322 }
16323 assert_eq!(&arr[..], &expected[..],);
16324 }
16325
16326 #[simd_test(enable = "avx512f")]
16327 unsafe fn test_mm512_mask_i32scatter_ps() {
16328 let mut arr = [0f32; 256];
16329 let mask = 0b10101010_10101010;
16330 #[rustfmt::skip]
16331 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16332 128, 144, 160, 176, 192, 208, 224, 240);
16333 let src = _mm512_setr_ps(
16334 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
16335 );
16336 // A multiplier of 4 is word-addressing
16337 _mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
16338 let mut expected = [0f32; 256];
16339 for i in 0..8 {
16340 expected[i * 32 + 16] = 2. * (i + 1) as f32;
16341 }
16342 assert_eq!(&arr[..], &expected[..],);
16343 }
16344
16345 #[simd_test(enable = "avx512f")]
16346 unsafe fn test_mm512_i32scatter_epi32() {
16347 let mut arr = [0i32; 256];
16348 #[rustfmt::skip]
16349
16350 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16351 128, 144, 160, 176, 192, 208, 224, 240);
16352 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16353 // A multiplier of 4 is word-addressing
16354 _mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
16355 let mut expected = [0i32; 256];
16356 for i in 0..16 {
16357 expected[i * 16] = (i + 1) as i32;
16358 }
16359 assert_eq!(&arr[..], &expected[..],);
16360 }
16361
16362 #[simd_test(enable = "avx512f")]
16363 unsafe fn test_mm512_mask_i32scatter_epi32() {
16364 let mut arr = [0i32; 256];
16365 let mask = 0b10101010_10101010;
16366 #[rustfmt::skip]
16367 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
16368 128, 144, 160, 176, 192, 208, 224, 240);
16369 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
16370 // A multiplier of 4 is word-addressing
16371 _mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
16372 let mut expected = [0i32; 256];
16373 for i in 0..8 {
16374 expected[i * 32 + 16] = 2 * (i + 1) as i32;
16375 }
16376 assert_eq!(&arr[..], &expected[..],);
16377 }
16378
16379 #[simd_test(enable = "avx512f")]
16380 unsafe fn test_mm512_cmplt_ps_mask() {
16381 #[rustfmt::skip]
16382 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16383 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16384 let b = _mm512_set1_ps(-1.);
16385 let m = _mm512_cmplt_ps_mask(a, b);
16386 assert_eq!(m, 0b00000101_00000101);
16387 }
16388
16389 #[simd_test(enable = "avx512f")]
16390 unsafe fn test_mm512_mask_cmplt_ps_mask() {
16391 #[rustfmt::skip]
16392 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16393 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16394 let b = _mm512_set1_ps(-1.);
16395 let mask = 0b01100110_01100110;
16396 let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
16397 assert_eq!(r, 0b00000100_00000100);
16398 }
16399
16400 #[simd_test(enable = "avx512f")]
16401 unsafe fn test_mm512_cmpnlt_ps_mask() {
16402 #[rustfmt::skip]
16403 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16404 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16405 let b = _mm512_set1_ps(-1.);
16406 assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
16407 }
16408
16409 #[simd_test(enable = "avx512f")]
16410 unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
16411 #[rustfmt::skip]
16412 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16413 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16414 let b = _mm512_set1_ps(-1.);
16415 let mask = 0b01111010_01111010;
16416 assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
16417 }
16418
16419 #[simd_test(enable = "avx512f")]
16420 unsafe fn test_mm512_cmpnle_ps_mask() {
16421 #[rustfmt::skip]
16422 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16423 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16424 let b = _mm512_set1_ps(-1.);
16425 let m = _mm512_cmpnle_ps_mask(b, a);
16426 assert_eq!(m, 0b00001101_00001101);
16427 }
16428
16429 #[simd_test(enable = "avx512f")]
16430 unsafe fn test_mm512_mask_cmpnle_ps_mask() {
16431 #[rustfmt::skip]
16432 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16433 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16434 let b = _mm512_set1_ps(-1.);
16435 let mask = 0b01100110_01100110;
16436 let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
16437 assert_eq!(r, 0b00000100_00000100);
16438 }
16439
16440 #[simd_test(enable = "avx512f")]
16441 unsafe fn test_mm512_cmple_ps_mask() {
16442 #[rustfmt::skip]
16443 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16444 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16445 let b = _mm512_set1_ps(-1.);
16446 assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
16447 }
16448
16449 #[simd_test(enable = "avx512f")]
16450 unsafe fn test_mm512_mask_cmple_ps_mask() {
16451 #[rustfmt::skip]
16452 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
16453 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
16454 let b = _mm512_set1_ps(-1.);
16455 let mask = 0b01111010_01111010;
16456 assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
16457 }
16458
16459 #[simd_test(enable = "avx512f")]
16460 unsafe fn test_mm512_cmpeq_ps_mask() {
16461 #[rustfmt::skip]
16462 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
16463 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
16464 #[rustfmt::skip]
16465 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
16466 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
16467 let m = _mm512_cmpeq_ps_mask(b, a);
16468 assert_eq!(m, 0b11001101_11001101);
16469 }
16470
16471 #[simd_test(enable = "avx512f")]
16472 unsafe fn test_mm512_mask_cmpeq_ps_mask() {
16473 #[rustfmt::skip]
16474 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
16475 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
16476 #[rustfmt::skip]
16477 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
16478 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
16479 let mask = 0b01111010_01111010;
16480 let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
16481 assert_eq!(r, 0b01001000_01001000);
16482 }
16483
16484 #[simd_test(enable = "avx512f")]
16485 unsafe fn test_mm512_cmpneq_ps_mask() {
16486 #[rustfmt::skip]
16487 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
16488 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
16489 #[rustfmt::skip]
16490 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
16491 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
16492 let m = _mm512_cmpneq_ps_mask(b, a);
16493 assert_eq!(m, 0b00110010_00110010);
16494 }
16495
16496 #[simd_test(enable = "avx512f")]
16497 unsafe fn test_mm512_mask_cmpneq_ps_mask() {
16498 #[rustfmt::skip]
16499 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
16500 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
16501 #[rustfmt::skip]
16502 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
16503 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
16504 let mask = 0b01111010_01111010;
16505 let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
16506 assert_eq!(r, 0b00110010_00110010)
16507 }
16508
16509 #[simd_test(enable = "avx512f")]
16510 unsafe fn test_mm512_cmp_ps_mask() {
16511 #[rustfmt::skip]
16512 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
16513 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
16514 let b = _mm512_set1_ps(-1.);
16515 let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
16516 assert_eq!(m, 0b00000101_00000101);
16517 }
16518
16519 #[simd_test(enable = "avx512f")]
16520 unsafe fn test_mm512_mask_cmp_ps_mask() {
16521 #[rustfmt::skip]
16522 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
16523 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
16524 let b = _mm512_set1_ps(-1.);
16525 let mask = 0b01100110_01100110;
16526 let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
16527 assert_eq!(r, 0b00000100_00000100);
16528 }
16529
16530 #[simd_test(enable = "avx512f")]
16531 unsafe fn test_mm512_cmp_round_ps_mask() {
16532 #[rustfmt::skip]
16533 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
16534 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
16535 let b = _mm512_set1_ps(-1.);
16536 let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
16537 assert_eq!(m, 0b00000101_00000101);
16538 }
16539
16540 #[simd_test(enable = "avx512f")]
16541 unsafe fn test_mm512_mask_cmp_round_ps_mask() {
16542 #[rustfmt::skip]
16543 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
16544 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
16545 let b = _mm512_set1_ps(-1.);
16546 let mask = 0b01100110_01100110;
16547 let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
16548 assert_eq!(r, 0b00000100_00000100);
16549 }
16550
16551 #[simd_test(enable = "avx512f")]
16552 unsafe fn test_mm512_cmpord_ps_mask() {
16553 #[rustfmt::skip]
16554 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
16555 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
16556 #[rustfmt::skip]
16557 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
16558 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
16559 let m = _mm512_cmpord_ps_mask(a, b);
16560 assert_eq!(m, 0b00000101_00000101);
16561 }
16562
16563 #[simd_test(enable = "avx512f")]
16564 unsafe fn test_mm512_mask_cmpord_ps_mask() {
16565 #[rustfmt::skip]
16566 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
16567 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
16568 #[rustfmt::skip]
16569 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
16570 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
16571 let mask = 0b11000011_11000011;
16572 let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
16573 assert_eq!(m, 0b00000001_00000001);
16574 }
16575
16576 #[simd_test(enable = "avx512f")]
16577 unsafe fn test_mm512_cmpunord_ps_mask() {
16578 #[rustfmt::skip]
16579 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
16580 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
16581 #[rustfmt::skip]
16582 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
16583 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
16584 let m = _mm512_cmpunord_ps_mask(a, b);
16585
16586 assert_eq!(m, 0b11111010_11111010);
16587 }
16588
16589 #[simd_test(enable = "avx512f")]
16590 unsafe fn test_mm512_mask_cmpunord_ps_mask() {
16591 #[rustfmt::skip]
16592 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
16593 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
16594 #[rustfmt::skip]
16595 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
16596 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
16597 let mask = 0b00001111_00001111;
16598 let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
16599 assert_eq!(m, 0b000001010_00001010);
16600 }
16601
16602 #[simd_test(enable = "avx512f")]
16603 unsafe fn test_mm_cmp_ss_mask() {
16604 let a = _mm_setr_ps(2., 1., 1., 1.);
16605 let b = _mm_setr_ps(1., 2., 2., 2.);
16606 let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
16607 assert_eq!(m, 1);
16608 }
16609
16610 #[simd_test(enable = "avx512f")]
16611 unsafe fn test_mm_mask_cmp_ss_mask() {
16612 let a = _mm_setr_ps(2., 1., 1., 1.);
16613 let b = _mm_setr_ps(1., 2., 2., 2.);
16614 let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
16615 assert_eq!(m, 0);
16616 let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
16617 assert_eq!(m, 1);
16618 }
16619
16620 #[simd_test(enable = "avx512f")]
16621 unsafe fn test_mm_cmp_round_ss_mask() {
16622 let a = _mm_setr_ps(2., 1., 1., 1.);
16623 let b = _mm_setr_ps(1., 2., 2., 2.);
16624 let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
16625 assert_eq!(m, 1);
16626 }
16627
16628 #[simd_test(enable = "avx512f")]
16629 unsafe fn test_mm_mask_cmp_round_ss_mask() {
16630 let a = _mm_setr_ps(2., 1., 1., 1.);
16631 let b = _mm_setr_ps(1., 2., 2., 2.);
16632 let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
16633 assert_eq!(m, 0);
16634 let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
16635 assert_eq!(m, 1);
16636 }
16637
16638 #[simd_test(enable = "avx512f")]
16639 unsafe fn test_mm_cmp_sd_mask() {
16640 let a = _mm_setr_pd(2., 1.);
16641 let b = _mm_setr_pd(1., 2.);
16642 let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
16643 assert_eq!(m, 1);
16644 }
16645
16646 #[simd_test(enable = "avx512f")]
16647 unsafe fn test_mm_mask_cmp_sd_mask() {
16648 let a = _mm_setr_pd(2., 1.);
16649 let b = _mm_setr_pd(1., 2.);
16650 let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
16651 assert_eq!(m, 0);
16652 let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
16653 assert_eq!(m, 1);
16654 }
16655
16656 #[simd_test(enable = "avx512f")]
16657 unsafe fn test_mm_cmp_round_sd_mask() {
16658 let a = _mm_setr_pd(2., 1.);
16659 let b = _mm_setr_pd(1., 2.);
16660 let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
16661 assert_eq!(m, 1);
16662 }
16663
16664 #[simd_test(enable = "avx512f")]
16665 unsafe fn test_mm_mask_cmp_round_sd_mask() {
16666 let a = _mm_setr_pd(2., 1.);
16667 let b = _mm_setr_pd(1., 2.);
16668 let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
16669 assert_eq!(m, 0);
16670 let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
16671 assert_eq!(m, 1);
16672 }
16673
16674 #[simd_test(enable = "avx512f")]
16675 unsafe fn test_mm512_cmplt_epu32_mask() {
16676 #[rustfmt::skip]
16677 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16678 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16679 let b = _mm512_set1_epi32(-1);
16680 let m = _mm512_cmplt_epu32_mask(a, b);
16681 assert_eq!(m, 0b11001111_11001111);
16682 }
16683
16684 #[simd_test(enable = "avx512f")]
16685 unsafe fn test_mm512_mask_cmplt_epu32_mask() {
16686 #[rustfmt::skip]
16687 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16688 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16689 let b = _mm512_set1_epi32(-1);
16690 let mask = 0b01111010_01111010;
16691 let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
16692 assert_eq!(r, 0b01001010_01001010);
16693 }
16694
16695 #[simd_test(enable = "avx512f")]
16696 unsafe fn test_mm512_cmpgt_epu32_mask() {
16697 #[rustfmt::skip]
16698 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16699 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16700 let b = _mm512_set1_epi32(-1);
16701 let m = _mm512_cmpgt_epu32_mask(b, a);
16702 assert_eq!(m, 0b11001111_11001111);
16703 }
16704
16705 #[simd_test(enable = "avx512f")]
16706 unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
16707 #[rustfmt::skip]
16708 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16709 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16710 let b = _mm512_set1_epi32(-1);
16711 let mask = 0b01111010_01111010;
16712 let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
16713 assert_eq!(r, 0b01001010_01001010);
16714 }
16715
16716 #[simd_test(enable = "avx512f")]
16717 unsafe fn test_mm512_cmple_epu32_mask() {
16718 #[rustfmt::skip]
16719 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16720 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16721 let b = _mm512_set1_epi32(-1);
16722 assert_eq!(
16723 _mm512_cmple_epu32_mask(a, b),
16724 !_mm512_cmpgt_epu32_mask(a, b)
16725 )
16726 }
16727
16728 #[simd_test(enable = "avx512f")]
16729 unsafe fn test_mm512_mask_cmple_epu32_mask() {
16730 #[rustfmt::skip]
16731 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16732 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16733 let b = _mm512_set1_epi32(-1);
16734 let mask = 0b01111010_01111010;
16735 assert_eq!(
16736 _mm512_mask_cmple_epu32_mask(mask, a, b),
16737 0b01111010_01111010
16738 );
16739 }
16740
16741 #[simd_test(enable = "avx512f")]
16742 unsafe fn test_mm512_cmpge_epu32_mask() {
16743 #[rustfmt::skip]
16744 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16745 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16746 let b = _mm512_set1_epi32(-1);
16747 assert_eq!(
16748 _mm512_cmpge_epu32_mask(a, b),
16749 !_mm512_cmplt_epu32_mask(a, b)
16750 )
16751 }
16752
16753 #[simd_test(enable = "avx512f")]
16754 unsafe fn test_mm512_mask_cmpge_epu32_mask() {
16755 #[rustfmt::skip]
16756 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16757 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16758 let b = _mm512_set1_epi32(-1);
16759 let mask = 0b01111010_01111010;
16760 assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
16761 }
16762
16763 #[simd_test(enable = "avx512f")]
16764 unsafe fn test_mm512_cmpeq_epu32_mask() {
16765 #[rustfmt::skip]
16766 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16767 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16768 #[rustfmt::skip]
16769 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16770 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16771 let m = _mm512_cmpeq_epu32_mask(b, a);
16772 assert_eq!(m, 0b11001111_11001111);
16773 }
16774
16775 #[simd_test(enable = "avx512f")]
16776 unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
16777 #[rustfmt::skip]
16778 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16779 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16780 #[rustfmt::skip]
16781 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16782 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16783 let mask = 0b01111010_01111010;
16784 let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
16785 assert_eq!(r, 0b01001010_01001010);
16786 }
16787
16788 #[simd_test(enable = "avx512f")]
16789 unsafe fn test_mm512_cmpneq_epu32_mask() {
16790 #[rustfmt::skip]
16791 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16792 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16793 #[rustfmt::skip]
16794 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16795 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16796 let m = _mm512_cmpneq_epu32_mask(b, a);
16797 assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
16798 }
16799
16800 #[simd_test(enable = "avx512f")]
16801 unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
16802 #[rustfmt::skip]
16803 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
16804 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
16805 #[rustfmt::skip]
16806 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16807 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16808 let mask = 0b01111010_01111010;
16809 let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
16810 assert_eq!(r, 0b00110010_00110010);
16811 }
16812
16813 #[simd_test(enable = "avx512f")]
16814 unsafe fn test_mm512_cmp_epu32_mask() {
16815 #[rustfmt::skip]
16816 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16817 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16818 let b = _mm512_set1_epi32(-1);
16819 let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
16820 assert_eq!(m, 0b11001111_11001111);
16821 }
16822
16823 #[simd_test(enable = "avx512f")]
16824 unsafe fn test_mm512_mask_cmp_epu32_mask() {
16825 #[rustfmt::skip]
16826 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16827 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16828 let b = _mm512_set1_epi32(-1);
16829 let mask = 0b01111010_01111010;
16830 let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
16831 assert_eq!(r, 0b01001010_01001010);
16832 }
16833
16834 #[simd_test(enable = "avx512f")]
16835 unsafe fn test_mm512_cmplt_epi32_mask() {
16836 #[rustfmt::skip]
16837 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16838 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16839 let b = _mm512_set1_epi32(-1);
16840 let m = _mm512_cmplt_epi32_mask(a, b);
16841 assert_eq!(m, 0b00000101_00000101);
16842 }
16843
16844 #[simd_test(enable = "avx512f")]
16845 unsafe fn test_mm512_mask_cmplt_epi32_mask() {
16846 #[rustfmt::skip]
16847 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16848 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16849 let b = _mm512_set1_epi32(-1);
16850 let mask = 0b01100110_01100110;
16851 let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
16852 assert_eq!(r, 0b00000100_00000100);
16853 }
16854
16855 #[simd_test(enable = "avx512f")]
16856 unsafe fn test_mm512_cmpgt_epi32_mask() {
16857 #[rustfmt::skip]
16858 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16859 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16860 let b = _mm512_set1_epi32(-1);
16861 let m = _mm512_cmpgt_epi32_mask(b, a);
16862 assert_eq!(m, 0b00000101_00000101);
16863 }
16864
16865 #[simd_test(enable = "avx512f")]
16866 unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
16867 #[rustfmt::skip]
16868 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16869 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16870 let b = _mm512_set1_epi32(-1);
16871 let mask = 0b01100110_01100110;
16872 let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
16873 assert_eq!(r, 0b00000100_00000100);
16874 }
16875
16876 #[simd_test(enable = "avx512f")]
16877 unsafe fn test_mm512_cmple_epi32_mask() {
16878 #[rustfmt::skip]
16879 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16880 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16881 let b = _mm512_set1_epi32(-1);
16882 assert_eq!(
16883 _mm512_cmple_epi32_mask(a, b),
16884 !_mm512_cmpgt_epi32_mask(a, b)
16885 )
16886 }
16887
16888 #[simd_test(enable = "avx512f")]
16889 unsafe fn test_mm512_mask_cmple_epi32_mask() {
16890 #[rustfmt::skip]
16891 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16892 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16893 let b = _mm512_set1_epi32(-1);
16894 let mask = 0b01111010_01111010;
16895 assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
16896 }
16897
16898 #[simd_test(enable = "avx512f")]
16899 unsafe fn test_mm512_cmpge_epi32_mask() {
16900 #[rustfmt::skip]
16901 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16902 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16903 let b = _mm512_set1_epi32(-1);
16904 assert_eq!(
16905 _mm512_cmpge_epi32_mask(a, b),
16906 !_mm512_cmplt_epi32_mask(a, b)
16907 )
16908 }
16909
16910 #[simd_test(enable = "avx512f")]
16911 unsafe fn test_mm512_mask_cmpge_epi32_mask() {
16912 #[rustfmt::skip]
16913 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
16914 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
16915 let b = _mm512_set1_epi32(-1);
16916 let mask = 0b01111010_01111010;
16917 assert_eq!(
16918 _mm512_mask_cmpge_epi32_mask(mask, a, b),
16919 0b01111010_01111010
16920 );
16921 }
16922
16923 #[simd_test(enable = "avx512f")]
16924 unsafe fn test_mm512_cmpeq_epi32_mask() {
16925 #[rustfmt::skip]
16926 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16927 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16928 #[rustfmt::skip]
16929 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16930 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16931 let m = _mm512_cmpeq_epi32_mask(b, a);
16932 assert_eq!(m, 0b11001111_11001111);
16933 }
16934
16935 #[simd_test(enable = "avx512f")]
16936 unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
3dfed10e
XL
16937 #[rustfmt::skip]
16938 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16939 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16940 #[rustfmt::skip]
16941 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16942 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16943 let mask = 0b01111010_01111010;
16944 let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
16945 assert_eq!(r, 0b01001010_01001010);
16946 }
16947
16948 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
16949 unsafe fn test_mm512_cmpneq_epi32_mask() {
16950 #[rustfmt::skip]
16951 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16952 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16953 #[rustfmt::skip]
16954 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16955 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16956 let m = _mm512_cmpneq_epi32_mask(b, a);
16957 assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
16958 }
16959
16960 #[simd_test(enable = "avx512f")]
16961 unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
16962 #[rustfmt::skip]
16963 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
16964 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
16965 #[rustfmt::skip]
16966 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
16967 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
16968 let mask = 0b01111010_01111010;
16969 let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
16970 assert_eq!(r, 0b00110010_00110010)
16971 }
16972
16973 #[simd_test(enable = "avx512f")]
16974 unsafe fn test_mm512_cmp_epi32_mask() {
16975 #[rustfmt::skip]
16976 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16977 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16978 let b = _mm512_set1_epi32(-1);
16979 let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
16980 assert_eq!(m, 0b00000101_00000101);
16981 }
16982
16983 #[simd_test(enable = "avx512f")]
16984 unsafe fn test_mm512_mask_cmp_epi32_mask() {
16985 #[rustfmt::skip]
16986 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
16987 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
16988 let b = _mm512_set1_epi32(-1);
16989 let mask = 0b01100110_01100110;
16990 let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
16991 assert_eq!(r, 0b00000100_00000100);
16992 }
16993
16994 #[simd_test(enable = "avx512f")]
16995 unsafe fn test_mm512_set_epi32() {
16996 let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
16997 assert_eq_m512i(
16998 r,
16999 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
17000 )
17001 }
17002
17003 #[simd_test(enable = "avx512f")]
17004 unsafe fn test_mm512_setr_epi32() {
17005 let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17006 assert_eq_m512i(
17007 r,
17008 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
17009 )
17010 }
17011
17012 #[simd_test(enable = "avx512f")]
17013 unsafe fn test_mm512_set1_epi32() {
17014 let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17015 assert_eq_m512i(r, _mm512_set1_epi32(2));
17016 }
17017
17018 #[simd_test(enable = "avx512f")]
17019 unsafe fn test_mm512_setzero_si512() {
17020 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
17021 }
17022
17023 #[simd_test(enable = "avx512f")]
17024 unsafe fn test_mm512_set_ps() {
17025 let r = _mm512_setr_ps(
17026 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17027 );
17028 assert_eq_m512(
17029 r,
17030 _mm512_set_ps(
17031 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
17032 ),
17033 )
17034 }
17035
17036 #[simd_test(enable = "avx512f")]
17037 unsafe fn test_mm512_setr_ps() {
17038 let r = _mm512_set_ps(
17039 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17040 );
17041 assert_eq_m512(
17042 r,
17043 _mm512_setr_ps(
17044 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
17045 ),
17046 )
17047 }
17048
17049 #[simd_test(enable = "avx512f")]
17050 unsafe fn test_mm512_set1_ps() {
17051 #[rustfmt::skip]
17052 let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
17053 2., 2., 2., 2., 2., 2., 2., 2.);
17054 assert_eq_m512(expected, _mm512_set1_ps(2.));
17055 }
17056
17057 #[simd_test(enable = "avx512f")]
17058 unsafe fn test_mm512_setzero_ps() {
17059 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
17060 }
17061
17062 #[simd_test(enable = "avx512f")]
17063 unsafe fn test_mm512_loadu_pd() {
17064 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
17065 let p = a.as_ptr();
17066 let r = _mm512_loadu_pd(black_box(p));
17067 let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
17068 assert_eq_m512d(r, e);
17069 }
17070
17071 #[simd_test(enable = "avx512f")]
17072 unsafe fn test_mm512_storeu_pd() {
17073 let a = _mm512_set1_pd(9.);
17074 let mut r = _mm512_undefined_pd();
17075 _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
17076 assert_eq_m512d(r, a);
17077 }
17078
17079 #[simd_test(enable = "avx512f")]
17080 unsafe fn test_mm512_loadu_ps() {
17081 let a = &[
17082 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
17083 ];
17084 let p = a.as_ptr();
17085 let r = _mm512_loadu_ps(black_box(p));
17086 let e = _mm512_setr_ps(
17087 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
17088 );
17089 assert_eq_m512(r, e);
17090 }
17091
17092 #[simd_test(enable = "avx512f")]
17093 unsafe fn test_mm512_storeu_ps() {
17094 let a = _mm512_set1_ps(9.);
17095 let mut r = _mm512_undefined_ps();
17096 _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
17097 assert_eq_m512(r, a);
17098 }
17099
17100 #[simd_test(enable = "avx512f")]
17101 unsafe fn test_mm512_setr_pd() {
17102 let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
17103 assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
17104 }
17105
17106 #[simd_test(enable = "avx512f")]
17107 unsafe fn test_mm512_set_pd() {
17108 let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
17109 assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
17110 }
17111
17112 #[simd_test(enable = "avx512f")]
17113 unsafe fn test_mm512_rol_epi32() {
17114 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17115 let r = _mm512_rol_epi32(a, 1);
17116 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17117 assert_eq_m512i(r, e);
17118 }
17119
17120 #[simd_test(enable = "avx512f")]
17121 unsafe fn test_mm512_mask_rol_epi32() {
17122 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17123 let r = _mm512_mask_rol_epi32(a, 0, a, 1);
17124 assert_eq_m512i(r, a);
17125
17126 let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
17127 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17128 assert_eq_m512i(r, e);
17129 }
17130
17131 #[simd_test(enable = "avx512f")]
17132 unsafe fn test_mm512_maskz_rol_epi32() {
17133 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
17134 let r = _mm512_maskz_rol_epi32(0, a, 1);
17135 assert_eq_m512i(r, _mm512_setzero_si512());
17136
17137 let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
17138 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
17139 assert_eq_m512i(r, e);
17140 }
17141
17142 #[simd_test(enable = "avx512f")]
17143 unsafe fn test_mm512_ror_epi32() {
17144 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17145 let r = _mm512_ror_epi32(a, 1);
17146 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17147 assert_eq_m512i(r, e);
17148 }
17149
17150 #[simd_test(enable = "avx512f")]
17151 unsafe fn test_mm512_mask_ror_epi32() {
17152 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17153 let r = _mm512_mask_ror_epi32(a, 0, a, 1);
17154 assert_eq_m512i(r, a);
17155
17156 let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
17157 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17158 assert_eq_m512i(r, e);
17159 }
17160
17161 #[simd_test(enable = "avx512f")]
17162 unsafe fn test_mm512_maskz_ror_epi32() {
17163 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
17164 let r = _mm512_maskz_ror_epi32(0, a, 1);
17165 assert_eq_m512i(r, _mm512_setzero_si512());
17166
17167 let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
17168 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
17169 assert_eq_m512i(r, e);
17170 }
17171
17172 #[simd_test(enable = "avx512f")]
17173 unsafe fn test_mm512_slli_epi32() {
17174 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17175 let r = _mm512_slli_epi32(a, 1);
17176 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17177 assert_eq_m512i(r, e);
17178 }
17179
17180 #[simd_test(enable = "avx512f")]
17181 unsafe fn test_mm512_mask_slli_epi32() {
17182 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17183 let r = _mm512_mask_slli_epi32(a, 0, a, 1);
17184 assert_eq_m512i(r, a);
17185
17186 let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1);
17187 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17188 assert_eq_m512i(r, e);
17189 }
17190
17191 #[simd_test(enable = "avx512f")]
17192 unsafe fn test_mm512_maskz_slli_epi32() {
17193 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
17194 let r = _mm512_maskz_slli_epi32(0, a, 1);
17195 assert_eq_m512i(r, _mm512_setzero_si512());
17196
17197 let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1);
17198 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
17199 assert_eq_m512i(r, e);
17200 }
17201
17202 #[simd_test(enable = "avx512f")]
17203 unsafe fn test_mm512_srli_epi32() {
17204 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17205 let r = _mm512_srli_epi32(a, 1);
17206 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17207 assert_eq_m512i(r, e);
17208 }
17209
17210 #[simd_test(enable = "avx512f")]
17211 unsafe fn test_mm512_mask_srli_epi32() {
17212 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17213 let r = _mm512_mask_srli_epi32(a, 0, a, 1);
17214 assert_eq_m512i(r, a);
17215
17216 let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
17217 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17218 assert_eq_m512i(r, e);
17219 }
17220
17221 #[simd_test(enable = "avx512f")]
17222 unsafe fn test_mm512_maskz_srli_epi32() {
17223 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
17224 let r = _mm512_maskz_srli_epi32(0, a, 1);
17225 assert_eq_m512i(r, _mm512_setzero_si512());
17226
17227 let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
17228 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
17229 assert_eq_m512i(r, e);
17230 }
17231
17232 #[simd_test(enable = "avx512f")]
17233 unsafe fn test_mm512_rolv_epi32() {
17234 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17235 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17236
17237 let r = _mm512_rolv_epi32(a, b);
17238
17239 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17240 assert_eq_m512i(r, e);
17241 }
17242
17243 #[simd_test(enable = "avx512f")]
17244 unsafe fn test_mm512_mask_rolv_epi32() {
17245 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17246 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17247
17248 let r = _mm512_mask_rolv_epi32(a, 0, a, b);
17249 assert_eq_m512i(r, a);
17250
17251 let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
17252
17253 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17254 assert_eq_m512i(r, e);
17255 }
17256
17257 #[simd_test(enable = "avx512f")]
17258 unsafe fn test_mm512_maskz_rolv_epi32() {
17259 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
17260 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17261
17262 let r = _mm512_maskz_rolv_epi32(0, a, b);
17263 assert_eq_m512i(r, _mm512_setzero_si512());
17264
17265 let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
17266
17267 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
17268 assert_eq_m512i(r, e);
17269 }
17270
17271 #[simd_test(enable = "avx512f")]
17272 unsafe fn test_mm512_rorv_epi32() {
17273 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17274 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17275
17276 let r = _mm512_rorv_epi32(a, b);
17277
17278 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17279 assert_eq_m512i(r, e);
17280 }
17281
17282 #[simd_test(enable = "avx512f")]
17283 unsafe fn test_mm512_mask_rorv_epi32() {
17284 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17285 let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17286
17287 let r = _mm512_mask_rorv_epi32(a, 0, a, b);
17288 assert_eq_m512i(r, a);
17289
17290 let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
17291
17292 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17293 assert_eq_m512i(r, e);
17294 }
17295
17296 #[simd_test(enable = "avx512f")]
17297 unsafe fn test_mm512_maskz_rorv_epi32() {
17298 let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
17299 let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17300
17301 let r = _mm512_maskz_rorv_epi32(0, a, b);
17302 assert_eq_m512i(r, _mm512_setzero_si512());
17303
17304 let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
17305
17306 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
17307 assert_eq_m512i(r, e);
17308 }
17309
17310 #[simd_test(enable = "avx512f")]
17311 unsafe fn test_mm512_sllv_epi32() {
17312 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17313 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17314
17315 let r = _mm512_sllv_epi32(a, count);
17316
17317 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17318 assert_eq_m512i(r, e);
17319 }
17320
17321 #[simd_test(enable = "avx512f")]
17322 unsafe fn test_mm512_mask_sllv_epi32() {
17323 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17324 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17325
17326 let r = _mm512_mask_sllv_epi32(a, 0, a, count);
17327 assert_eq_m512i(r, a);
17328
17329 let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
17330
17331 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17332 assert_eq_m512i(r, e);
17333 }
17334
17335 #[simd_test(enable = "avx512f")]
17336 unsafe fn test_mm512_maskz_sllv_epi32() {
17337 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
17338 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17339
17340 let r = _mm512_maskz_sllv_epi32(0, a, count);
17341 assert_eq_m512i(r, _mm512_setzero_si512());
17342
17343 let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
17344
17345 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
17346 assert_eq_m512i(r, e);
17347 }
17348
17349 #[simd_test(enable = "avx512f")]
17350 unsafe fn test_mm512_srlv_epi32() {
17351 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17352 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17353
17354 let r = _mm512_srlv_epi32(a, count);
17355
17356 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17357 assert_eq_m512i(r, e);
17358 }
17359
17360 #[simd_test(enable = "avx512f")]
17361 unsafe fn test_mm512_mask_srlv_epi32() {
17362 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
17363 let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17364
17365 let r = _mm512_mask_srlv_epi32(a, 0, a, count);
17366 assert_eq_m512i(r, a);
17367
17368 let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
17369
17370 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17371 assert_eq_m512i(r, e);
17372 }
17373
17374 #[simd_test(enable = "avx512f")]
17375 unsafe fn test_mm512_maskz_srlv_epi32() {
17376 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
17377 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
17378
17379 let r = _mm512_maskz_srlv_epi32(0, a, count);
17380 assert_eq_m512i(r, _mm512_setzero_si512());
17381
17382 let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
17383
17384 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
17385 assert_eq_m512i(r, e);
17386 }
17387
17388 #[simd_test(enable = "avx512f")]
17389 unsafe fn test_mm512_sll_epi32() {
17390 let a = _mm512_set_epi32(
17391 1 << 31,
17392 1 << 0,
17393 1 << 1,
17394 1 << 2,
17395 0,
17396 0,
17397 0,
17398 0,
17399 0,
17400 0,
17401 0,
17402 0,
17403 0,
17404 0,
17405 0,
17406 0,
17407 );
17408 let count = _mm_set_epi32(0, 0, 0, 2);
17409 let r = _mm512_sll_epi32(a, count);
17410 let e = _mm512_set_epi32(
17411 0,
17412 1 << 2,
17413 1 << 3,
17414 1 << 4,
17415 0,
17416 0,
17417 0,
17418 0,
17419 0,
17420 0,
17421 0,
17422 0,
17423 0,
17424 0,
17425 0,
17426 0,
17427 );
17428 assert_eq_m512i(r, e);
17429 }
17430
17431 #[simd_test(enable = "avx512f")]
17432 unsafe fn test_mm512_mask_sll_epi32() {
17433 let a = _mm512_set_epi32(
17434 1 << 31,
17435 1 << 0,
17436 1 << 1,
17437 1 << 2,
17438 0,
17439 0,
17440 0,
17441 0,
17442 0,
17443 0,
17444 0,
17445 0,
17446 0,
17447 0,
17448 0,
17449 0,
17450 );
17451 let count = _mm_set_epi32(0, 0, 0, 2);
17452 let r = _mm512_mask_sll_epi32(a, 0, a, count);
17453 assert_eq_m512i(r, a);
17454
17455 let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
17456 let e = _mm512_set_epi32(
17457 0,
17458 1 << 2,
17459 1 << 3,
17460 1 << 4,
17461 0,
17462 0,
17463 0,
17464 0,
17465 0,
17466 0,
17467 0,
17468 0,
17469 0,
17470 0,
17471 0,
17472 0,
17473 );
17474 assert_eq_m512i(r, e);
17475 }
17476
17477 #[simd_test(enable = "avx512f")]
17478 unsafe fn test_mm512_maskz_sll_epi32() {
17479 let a = _mm512_set_epi32(
17480 1 << 31,
17481 1 << 0,
17482 1 << 1,
17483 1 << 2,
17484 0,
17485 0,
17486 0,
17487 0,
17488 0,
17489 0,
17490 0,
17491 0,
17492 0,
17493 0,
17494 0,
17495 1 << 31,
17496 );
17497 let count = _mm_set_epi32(2, 0, 0, 2);
17498 let r = _mm512_maskz_sll_epi32(0, a, count);
17499 assert_eq_m512i(r, _mm512_setzero_si512());
17500
17501 let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
17502 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17503 assert_eq_m512i(r, e);
17504 }
17505
17506 #[simd_test(enable = "avx512f")]
17507 unsafe fn test_mm512_srl_epi32() {
17508 let a = _mm512_set_epi32(
17509 1 << 31,
17510 1 << 0,
17511 1 << 1,
17512 1 << 2,
17513 0,
17514 0,
17515 0,
17516 0,
17517 0,
17518 0,
17519 0,
17520 0,
17521 0,
17522 0,
17523 0,
17524 0,
17525 );
17526 let count = _mm_set_epi32(0, 0, 0, 2);
17527 let r = _mm512_srl_epi32(a, count);
17528 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17529 assert_eq_m512i(r, e);
17530 }
17531
17532 #[simd_test(enable = "avx512f")]
17533 unsafe fn test_mm512_mask_srl_epi32() {
17534 let a = _mm512_set_epi32(
17535 1 << 31,
17536 1 << 0,
17537 1 << 1,
17538 1 << 2,
17539 0,
17540 0,
17541 0,
17542 0,
17543 0,
17544 0,
17545 0,
17546 0,
17547 0,
17548 0,
17549 0,
17550 0,
17551 );
17552 let count = _mm_set_epi32(0, 0, 0, 2);
17553 let r = _mm512_mask_srl_epi32(a, 0, a, count);
17554 assert_eq_m512i(r, a);
17555
17556 let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
17557 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17558 assert_eq_m512i(r, e);
17559 }
17560
17561 #[simd_test(enable = "avx512f")]
17562 unsafe fn test_mm512_maskz_srl_epi32() {
17563 let a = _mm512_set_epi32(
17564 1 << 31,
17565 1 << 0,
17566 1 << 1,
17567 1 << 2,
17568 0,
17569 0,
17570 0,
17571 0,
17572 0,
17573 0,
17574 0,
17575 0,
17576 0,
17577 0,
17578 0,
17579 1 << 31,
17580 );
17581 let count = _mm_set_epi32(2, 0, 0, 2);
17582 let r = _mm512_maskz_srl_epi32(0, a, count);
17583 assert_eq_m512i(r, _mm512_setzero_si512());
17584
17585 let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
17586 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
17587 assert_eq_m512i(r, e);
17588 }
17589
17590 #[simd_test(enable = "avx512f")]
17591 unsafe fn test_mm512_sra_epi32() {
17592 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
17593 let count = _mm_set_epi32(1, 0, 0, 2);
17594 let r = _mm512_sra_epi32(a, count);
17595 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17596 assert_eq_m512i(r, e);
17597 }
17598
17599 #[simd_test(enable = "avx512f")]
17600 unsafe fn test_mm512_mask_sra_epi32() {
17601 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
17602 let count = _mm_set_epi32(0, 0, 0, 2);
17603 let r = _mm512_mask_sra_epi32(a, 0, a, count);
17604 assert_eq_m512i(r, a);
17605
17606 let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
17607 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
17608 assert_eq_m512i(r, e);
17609 }
17610
17611 #[simd_test(enable = "avx512f")]
17612 unsafe fn test_mm512_maskz_sra_epi32() {
17613 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
17614 let count = _mm_set_epi32(2, 0, 0, 2);
17615 let r = _mm512_maskz_sra_epi32(0, a, count);
17616 assert_eq_m512i(r, _mm512_setzero_si512());
17617
17618 let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
17619 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
17620 assert_eq_m512i(r, e);
17621 }
17622
17623 #[simd_test(enable = "avx512f")]
17624 unsafe fn test_mm512_srav_epi32() {
17625 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
17626 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17627 let r = _mm512_srav_epi32(a, count);
17628 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
17629 assert_eq_m512i(r, e);
17630 }
17631
17632 #[simd_test(enable = "avx512f")]
17633 unsafe fn test_mm512_mask_srav_epi32() {
17634 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
17635 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
17636 let r = _mm512_mask_srav_epi32(a, 0, a, count);
17637 assert_eq_m512i(r, a);
17638
17639 let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
17640 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
17641 assert_eq_m512i(r, e);
17642 }
17643
17644 #[simd_test(enable = "avx512f")]
17645 unsafe fn test_mm512_maskz_srav_epi32() {
17646 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
17647 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
17648 let r = _mm512_maskz_srav_epi32(0, a, count);
17649 assert_eq_m512i(r, _mm512_setzero_si512());
17650
17651 let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
17652 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
17653 assert_eq_m512i(r, e);
17654 }
17655
17656 #[simd_test(enable = "avx512f")]
17657 unsafe fn test_mm512_srai_epi32() {
17658 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
17659 let r = _mm512_srai_epi32(a, 2);
17660 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
17661 assert_eq_m512i(r, e);
17662 }
17663
17664 #[simd_test(enable = "avx512f")]
17665 unsafe fn test_mm512_mask_srai_epi32() {
17666 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
17667 let r = _mm512_mask_srai_epi32(a, 0, a, 2);
17668 assert_eq_m512i(r, a);
17669
17670 let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2);
17671 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
17672 assert_eq_m512i(r, e);
17673 }
17674
17675 #[simd_test(enable = "avx512f")]
17676 unsafe fn test_mm512_maskz_srai_epi32() {
17677 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
17678 let r = _mm512_maskz_srai_epi32(0, a, 2);
17679 assert_eq_m512i(r, _mm512_setzero_si512());
17680
17681 let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2);
17682 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
17683 assert_eq_m512i(r, e);
17684 }
17685
17686 #[simd_test(enable = "avx512f")]
17687 unsafe fn test_mm512_permute_ps() {
17688 let a = _mm512_set_ps(
17689 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17690 );
17691 let r = _mm512_permute_ps(a, 1);
17692 let e = _mm512_set_ps(
17693 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
17694 );
17695 assert_eq_m512(r, e);
17696 }
17697
17698 #[simd_test(enable = "avx512f")]
17699 unsafe fn test_mm512_mask_permute_ps() {
17700 let a = _mm512_set_ps(
17701 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17702 );
17703 let r = _mm512_mask_permute_ps(a, 0b00000000_00000000, a, 1);
17704 assert_eq_m512(r, a);
17705 let r = _mm512_mask_permute_ps(a, 0b11111111_11111111, a, 1);
17706 let e = _mm512_set_ps(
17707 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
17708 );
17709 assert_eq_m512(r, e);
17710 }
17711
17712 #[simd_test(enable = "avx512f")]
17713 unsafe fn test_mm512_maskz_permute_ps() {
17714 let a = _mm512_set_ps(
17715 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17716 );
17717 let r = _mm512_maskz_permute_ps(0, a, 1);
17718 assert_eq_m512(r, _mm512_setzero_ps());
17719 let r = _mm512_maskz_permute_ps(0b00000000_11111111, a, 1);
17720 let e = _mm512_set_ps(
17721 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
17722 );
17723 assert_eq_m512(r, e);
17724 }
17725
17726 #[simd_test(enable = "avx512f")]
17727 unsafe fn test_mm512_permutevar_epi32() {
17728 let idx = _mm512_set1_epi32(1);
17729 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17730 let r = _mm512_permutevar_epi32(idx, a);
17731 let e = _mm512_set1_epi32(14);
17732 assert_eq_m512i(r, e);
17733 }
17734
17735 #[simd_test(enable = "avx512f")]
17736 unsafe fn test_mm512_mask_permutevar_epi32() {
17737 let idx = _mm512_set1_epi32(1);
17738 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17739 let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
17740 assert_eq_m512i(r, a);
17741 let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
17742 let e = _mm512_set1_epi32(14);
17743 assert_eq_m512i(r, e);
17744 }
17745
17746 #[simd_test(enable = "avx512f")]
17747 unsafe fn test_mm512_permutevar_ps() {
17748 let a = _mm512_set_ps(
17749 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17750 );
17751 let b = _mm512_set1_epi32(1);
17752 let r = _mm512_permutevar_ps(a, b);
17753 let e = _mm512_set_ps(
17754 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
17755 );
17756 assert_eq_m512(r, e);
17757 }
17758
17759 #[simd_test(enable = "avx512f")]
17760 unsafe fn test_mm512_mask_permutevar_ps() {
17761 let a = _mm512_set_ps(
17762 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17763 );
17764 let b = _mm512_set1_epi32(1);
17765 let r = _mm512_mask_permutevar_ps(a, 0, a, b);
17766 assert_eq_m512(r, a);
17767 let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
17768 let e = _mm512_set_ps(
17769 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
17770 );
17771 assert_eq_m512(r, e);
17772 }
17773
17774 #[simd_test(enable = "avx512f")]
17775 unsafe fn test_mm512_maskz_permutevar_ps() {
17776 let a = _mm512_set_ps(
17777 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17778 );
17779 let b = _mm512_set1_epi32(1);
17780 let r = _mm512_maskz_permutevar_ps(0, a, b);
17781 assert_eq_m512(r, _mm512_setzero_ps());
17782 let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
17783 let e = _mm512_set_ps(
17784 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
17785 );
17786 assert_eq_m512(r, e);
17787 }
17788
17789 #[simd_test(enable = "avx512f")]
17790 unsafe fn test_mm512_permutexvar_epi32() {
17791 let idx = _mm512_set1_epi32(1);
17792 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17793 let r = _mm512_permutexvar_epi32(idx, a);
17794 let e = _mm512_set1_epi32(14);
17795 assert_eq_m512i(r, e);
17796 }
17797
17798 #[simd_test(enable = "avx512f")]
17799 unsafe fn test_mm512_mask_permutexvar_epi32() {
17800 let idx = _mm512_set1_epi32(1);
17801 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17802 let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
17803 assert_eq_m512i(r, a);
17804 let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
17805 let e = _mm512_set1_epi32(14);
17806 assert_eq_m512i(r, e);
17807 }
17808
17809 #[simd_test(enable = "avx512f")]
17810 unsafe fn test_mm512_maskz_permutexvar_epi32() {
17811 let idx = _mm512_set1_epi32(1);
17812 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17813 let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
17814 assert_eq_m512i(r, _mm512_setzero_si512());
17815 let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
17816 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
17817 assert_eq_m512i(r, e);
17818 }
17819
17820 #[simd_test(enable = "avx512f")]
17821 unsafe fn test_mm512_permutexvar_ps() {
17822 let idx = _mm512_set1_epi32(1);
17823 let a = _mm512_set_ps(
17824 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17825 );
17826 let r = _mm512_permutexvar_ps(idx, a);
17827 let e = _mm512_set1_ps(14.);
17828 assert_eq_m512(r, e);
17829 }
17830
17831 #[simd_test(enable = "avx512f")]
17832 unsafe fn test_mm512_mask_permutexvar_ps() {
17833 let idx = _mm512_set1_epi32(1);
17834 let a = _mm512_set_ps(
17835 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17836 );
17837 let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
17838 assert_eq_m512(r, a);
17839 let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
17840 let e = _mm512_set1_ps(14.);
17841 assert_eq_m512(r, e);
17842 }
17843
17844 #[simd_test(enable = "avx512f")]
17845 unsafe fn test_mm512_maskz_permutexvar_ps() {
17846 let idx = _mm512_set1_epi32(1);
17847 let a = _mm512_set_ps(
17848 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17849 );
17850 let r = _mm512_maskz_permutexvar_ps(0, idx, a);
17851 assert_eq_m512(r, _mm512_setzero_ps());
17852 let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
17853 let e = _mm512_set_ps(
17854 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
17855 );
17856 assert_eq_m512(r, e);
17857 }
17858
17859 #[simd_test(enable = "avx512f")]
17860 unsafe fn test_mm512_permutex2var_epi32() {
17861 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17862 let idx = _mm512_set_epi32(
17863 1,
17864 1 << 4,
17865 2,
17866 1 << 4,
17867 3,
17868 1 << 4,
17869 4,
17870 1 << 4,
17871 5,
17872 1 << 4,
17873 6,
17874 1 << 4,
17875 7,
17876 1 << 4,
17877 8,
17878 1 << 4,
17879 );
17880 let b = _mm512_set1_epi32(100);
17881 let r = _mm512_permutex2var_epi32(a, idx, b);
17882 let e = _mm512_set_epi32(
17883 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
17884 );
17885 assert_eq_m512i(r, e);
17886 }
17887
17888 #[simd_test(enable = "avx512f")]
17889 unsafe fn test_mm512_mask_permutex2var_epi32() {
17890 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17891 let idx = _mm512_set_epi32(
17892 1,
17893 1 << 4,
17894 2,
17895 1 << 4,
17896 3,
17897 1 << 4,
17898 4,
17899 1 << 4,
17900 5,
17901 1 << 4,
17902 6,
17903 1 << 4,
17904 7,
17905 1 << 4,
17906 8,
17907 1 << 4,
17908 );
17909 let b = _mm512_set1_epi32(100);
17910 let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
17911 assert_eq_m512i(r, a);
17912 let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
17913 let e = _mm512_set_epi32(
17914 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
17915 );
17916 assert_eq_m512i(r, e);
17917 }
17918
17919 #[simd_test(enable = "avx512f")]
17920 unsafe fn test_mm512_maskz_permutex2var_epi32() {
17921 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17922 let idx = _mm512_set_epi32(
17923 1,
17924 1 << 4,
17925 2,
17926 1 << 4,
17927 3,
17928 1 << 4,
17929 4,
17930 1 << 4,
17931 5,
17932 1 << 4,
17933 6,
17934 1 << 4,
17935 7,
17936 1 << 4,
17937 8,
17938 1 << 4,
17939 );
17940 let b = _mm512_set1_epi32(100);
17941 let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
17942 assert_eq_m512i(r, _mm512_setzero_si512());
17943 let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
17944 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
17945 assert_eq_m512i(r, e);
17946 }
17947
17948 #[simd_test(enable = "avx512f")]
17949 unsafe fn test_mm512_mask2_permutex2var_epi32() {
17950 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
17951 let idx = _mm512_set_epi32(
17952 1000,
17953 1 << 4,
17954 2000,
17955 1 << 4,
17956 3000,
17957 1 << 4,
17958 4000,
17959 1 << 4,
17960 5,
17961 1 << 4,
17962 6,
17963 1 << 4,
17964 7,
17965 1 << 4,
17966 8,
17967 1 << 4,
17968 );
17969 let b = _mm512_set1_epi32(100);
17970 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
17971 assert_eq_m512i(r, idx);
17972 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
17973 let e = _mm512_set_epi32(
17974 1000,
17975 1 << 4,
17976 2000,
17977 1 << 4,
17978 3000,
17979 1 << 4,
17980 4000,
17981 1 << 4,
17982 10,
17983 100,
17984 9,
17985 100,
17986 8,
17987 100,
17988 7,
17989 100,
17990 );
17991 assert_eq_m512i(r, e);
17992 }
17993
17994 #[simd_test(enable = "avx512f")]
17995 unsafe fn test_mm512_permutex2var_ps() {
17996 let a = _mm512_set_ps(
17997 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
17998 );
17999 let idx = _mm512_set_epi32(
18000 1,
18001 1 << 4,
18002 2,
18003 1 << 4,
18004 3,
18005 1 << 4,
18006 4,
18007 1 << 4,
18008 5,
18009 1 << 4,
18010 6,
18011 1 << 4,
18012 7,
18013 1 << 4,
18014 8,
18015 1 << 4,
18016 );
18017 let b = _mm512_set1_ps(100.);
18018 let r = _mm512_permutex2var_ps(a, idx, b);
18019 let e = _mm512_set_ps(
18020 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
18021 );
18022 assert_eq_m512(r, e);
18023 }
18024
18025 #[simd_test(enable = "avx512f")]
18026 unsafe fn test_mm512_mask_permutex2var_ps() {
18027 let a = _mm512_set_ps(
18028 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
18029 );
18030 let idx = _mm512_set_epi32(
18031 1,
18032 1 << 4,
18033 2,
18034 1 << 4,
18035 3,
18036 1 << 4,
18037 4,
18038 1 << 4,
18039 5,
18040 1 << 4,
18041 6,
18042 1 << 4,
18043 7,
18044 1 << 4,
18045 8,
18046 1 << 4,
18047 );
18048 let b = _mm512_set1_ps(100.);
18049 let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
18050 assert_eq_m512(r, a);
18051 let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
18052 let e = _mm512_set_ps(
18053 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
18054 );
18055 assert_eq_m512(r, e);
18056 }
18057
18058 #[simd_test(enable = "avx512f")]
18059 unsafe fn test_mm512_maskz_permutex2var_ps() {
18060 let a = _mm512_set_ps(
18061 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
18062 );
18063 let idx = _mm512_set_epi32(
18064 1,
18065 1 << 4,
18066 2,
18067 1 << 4,
18068 3,
18069 1 << 4,
18070 4,
18071 1 << 4,
18072 5,
18073 1 << 4,
18074 6,
18075 1 << 4,
18076 7,
18077 1 << 4,
18078 8,
18079 1 << 4,
18080 );
18081 let b = _mm512_set1_ps(100.);
18082 let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
18083 assert_eq_m512(r, _mm512_setzero_ps());
18084 let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
18085 let e = _mm512_set_ps(
18086 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
18087 );
18088 assert_eq_m512(r, e);
18089 }
18090
18091 #[simd_test(enable = "avx512f")]
18092 unsafe fn test_mm512_mask2_permutex2var_ps() {
18093 let a = _mm512_set_ps(
18094 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
18095 );
18096 let idx = _mm512_set_epi32(
18097 1,
18098 1 << 4,
18099 2,
18100 1 << 4,
18101 3,
18102 1 << 4,
18103 4,
18104 1 << 4,
18105 5,
18106 1 << 4,
18107 6,
18108 1 << 4,
18109 7,
18110 1 << 4,
18111 8,
18112 1 << 4,
18113 );
18114 let b = _mm512_set1_ps(100.);
18115 let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
18116 assert_eq_m512(r, _mm512_setzero_ps());
18117 let r = _mm512_mask2_permutex2var_ps(a, idx, 0b00000000_11111111, b);
18118 let e = _mm512_set_ps(
18119 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
18120 );
18121 assert_eq_m512(r, e);
18122 }
18123
18124 #[simd_test(enable = "avx512f")]
18125 unsafe fn test_mm512_shuffle_epi32() {
18126 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
18127 let r = _mm512_shuffle_epi32(a, _MM_PERM_AADD);
18128 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
18129 assert_eq_m512i(r, e);
18130 }
18131
18132 #[simd_test(enable = "avx512f")]
18133 unsafe fn test_mm512_mask_shuffle_epi32() {
18134 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
18135 let r = _mm512_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD);
18136 assert_eq_m512i(r, a);
18137 let r = _mm512_mask_shuffle_epi32(a, 0b11111111_11111111, a, _MM_PERM_AADD);
18138 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
18139 assert_eq_m512i(r, e);
18140 }
18141
18142 #[simd_test(enable = "avx512f")]
18143 unsafe fn test_mm512_maskz_shuffle_epi32() {
18144 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
18145 let r = _mm512_maskz_shuffle_epi32(0, a, _MM_PERM_AADD);
18146 assert_eq_m512i(r, _mm512_setzero_si512());
18147 let r = _mm512_maskz_shuffle_epi32(0b00000000_11111111, a, _MM_PERM_AADD);
18148 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
18149 assert_eq_m512i(r, e);
18150 }
18151
18152 #[simd_test(enable = "avx512f")]
18153 unsafe fn test_mm512_shuffle_ps() {
18154 let a = _mm512_setr_ps(
18155 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
18156 );
18157 let b = _mm512_setr_ps(
18158 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
18159 );
18160 let r = _mm512_shuffle_ps(a, b, 0x0F);
18161 let e = _mm512_setr_ps(
18162 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
18163 );
18164 assert_eq_m512(r, e);
18165 }
18166
18167 #[simd_test(enable = "avx512f")]
18168 unsafe fn test_mm512_mask_shuffle_ps() {
18169 let a = _mm512_setr_ps(
18170 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
18171 );
18172 let b = _mm512_setr_ps(
18173 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
18174 );
18175 let r = _mm512_mask_shuffle_ps(a, 0, a, b, 0x0F);
18176 assert_eq_m512(r, a);
18177 let r = _mm512_mask_shuffle_ps(a, 0b11111111_11111111, a, b, 0x0F);
18178 let e = _mm512_setr_ps(
18179 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
18180 );
18181 assert_eq_m512(r, e);
18182 }
18183
18184 #[simd_test(enable = "avx512f")]
18185 unsafe fn test_mm512_maskz_shuffle_ps() {
18186 let a = _mm512_setr_ps(
18187 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
18188 );
18189 let b = _mm512_setr_ps(
18190 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
18191 );
18192 let r = _mm512_maskz_shuffle_ps(0, a, b, 0x0F);
18193 assert_eq_m512(r, _mm512_setzero_ps());
18194 let r = _mm512_maskz_shuffle_ps(0b00000000_11111111, a, b, 0x0F);
18195 let e = _mm512_setr_ps(
18196 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
18197 );
18198 assert_eq_m512(r, e);
18199 }
18200
18201 #[simd_test(enable = "avx512f")]
18202 unsafe fn test_mm512_shuffle_i32x4() {
18203 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
18204 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
18205 let r = _mm512_shuffle_i32x4(a, b, 0b00000000);
18206 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
18207 assert_eq_m512i(r, e);
18208 }
18209
18210 #[simd_test(enable = "avx512f")]
18211 unsafe fn test_mm512_mask_shuffle_i32x4() {
18212 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
18213 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
18214 let r = _mm512_mask_shuffle_i32x4(a, 0, a, b, 0b00000000);
18215 assert_eq_m512i(r, a);
18216 let r = _mm512_mask_shuffle_i32x4(a, 0b11111111_11111111, a, b, 0b00000000);
18217 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
18218 assert_eq_m512i(r, e);
18219 }
18220
18221 #[simd_test(enable = "avx512f")]
18222 unsafe fn test_mm512_maskz_shuffle_i32x4() {
18223 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
18224 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
18225 let r = _mm512_maskz_shuffle_i32x4(0, a, b, 0b00000000);
18226 assert_eq_m512i(r, _mm512_setzero_si512());
18227 let r = _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a, b, 0b00000000);
18228 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
18229 assert_eq_m512i(r, e);
18230 }
18231
18232 #[simd_test(enable = "avx512f")]
18233 unsafe fn test_mm512_shuffle_f32x4() {
18234 let a = _mm512_setr_ps(
18235 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
18236 );
18237 let b = _mm512_setr_ps(
18238 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
18239 );
18240 let r = _mm512_shuffle_f32x4(a, b, 0b00000000);
18241 let e = _mm512_setr_ps(
18242 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
18243 );
18244 assert_eq_m512(r, e);
18245 }
18246
18247 #[simd_test(enable = "avx512f")]
18248 unsafe fn test_mm512_mask_shuffle_f32x4() {
18249 let a = _mm512_setr_ps(
18250 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
18251 );
18252 let b = _mm512_setr_ps(
18253 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
18254 );
18255 let r = _mm512_mask_shuffle_f32x4(a, 0, a, b, 0b00000000);
18256 assert_eq_m512(r, a);
18257 let r = _mm512_mask_shuffle_f32x4(a, 0b11111111_11111111, a, b, 0b00000000);
18258 let e = _mm512_setr_ps(
18259 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
18260 );
18261 assert_eq_m512(r, e);
18262 }
18263
18264 #[simd_test(enable = "avx512f")]
18265 unsafe fn test_mm512_maskz_shuffle_f32x4() {
18266 let a = _mm512_setr_ps(
18267 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
18268 );
18269 let b = _mm512_setr_ps(
18270 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
18271 );
18272 let r = _mm512_maskz_shuffle_f32x4(0, a, b, 0b00000000);
18273 assert_eq_m512(r, _mm512_setzero_ps());
18274 let r = _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a, b, 0b00000000);
18275 let e = _mm512_setr_ps(
18276 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
18277 );
18278 assert_eq_m512(r, e);
18279 }
18280
18281 #[simd_test(enable = "avx512f")]
18282 unsafe fn test_mm512_extractf32x4_ps() {
18283 let a = _mm512_setr_ps(
18284 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18285 );
18286 let r = _mm512_extractf32x4_ps(a, 0x1);
18287 let e = _mm_setr_ps(5., 6., 7., 8.);
18288 assert_eq_m128(r, e);
18289 }
18290
18291 #[simd_test(enable = "avx512f")]
18292 unsafe fn test_mm512_moveldup_ps() {
18293 let a = _mm512_setr_ps(
18294 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18295 );
18296 let r = _mm512_moveldup_ps(a);
18297 let e = _mm512_setr_ps(
18298 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
18299 );
18300 assert_eq_m512(r, e);
18301 }
18302
18303 #[simd_test(enable = "avx512f")]
18304 unsafe fn test_mm512_mask_moveldup_ps() {
18305 let a = _mm512_setr_ps(
18306 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18307 );
18308 let r = _mm512_mask_moveldup_ps(a, 0, a);
18309 assert_eq_m512(r, a);
18310 let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
18311 let e = _mm512_setr_ps(
18312 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
18313 );
18314 assert_eq_m512(r, e);
18315 }
18316
18317 #[simd_test(enable = "avx512f")]
18318 unsafe fn test_mm512_maskz_moveldup_ps() {
18319 let a = _mm512_setr_ps(
18320 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18321 );
18322 let r = _mm512_maskz_moveldup_ps(0, a);
18323 assert_eq_m512(r, _mm512_setzero_ps());
18324 let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
18325 let e = _mm512_setr_ps(
18326 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
18327 );
18328 assert_eq_m512(r, e);
18329 }
18330
18331 #[simd_test(enable = "avx512f")]
18332 unsafe fn test_mm512_movehdup_ps() {
18333 let a = _mm512_setr_ps(
18334 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18335 );
18336 let r = _mm512_movehdup_ps(a);
18337 let e = _mm512_setr_ps(
18338 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
18339 );
18340 assert_eq_m512(r, e);
18341 }
18342
18343 #[simd_test(enable = "avx512f")]
18344 unsafe fn test_mm512_mask_movehdup_ps() {
18345 let a = _mm512_setr_ps(
18346 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18347 );
18348 let r = _mm512_mask_movehdup_ps(a, 0, a);
18349 assert_eq_m512(r, a);
18350 let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
18351 let e = _mm512_setr_ps(
18352 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
18353 );
18354 assert_eq_m512(r, e);
18355 }
18356
18357 #[simd_test(enable = "avx512f")]
18358 unsafe fn test_mm512_maskz_movehdup_ps() {
18359 let a = _mm512_setr_ps(
18360 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
18361 );
18362 let r = _mm512_maskz_movehdup_ps(0, a);
18363 assert_eq_m512(r, _mm512_setzero_ps());
18364 let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
18365 let e = _mm512_setr_ps(
18366 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
18367 );
18368 assert_eq_m512(r, e);
18369 }
18370
18371 #[simd_test(enable = "avx512f")]
18372 unsafe fn test_mm512_and_epi32() {
18373 let a = _mm512_set_epi32(
18374 1 << 1 | 1 << 2,
18375 0,
18376 0,
18377 0,
18378 0,
18379 0,
18380 0,
18381 0,
18382 0,
18383 0,
18384 0,
18385 0,
18386 0,
18387 0,
18388 0,
18389 1 << 1 | 1 << 3,
18390 );
18391 let b = _mm512_set_epi32(
18392 1 << 1,
18393 0,
18394 0,
18395 0,
18396 0,
18397 0,
18398 0,
18399 0,
18400 0,
18401 0,
18402 0,
18403 0,
18404 0,
18405 0,
18406 0,
18407 1 << 3 | 1 << 4,
18408 );
18409 let r = _mm512_and_epi32(a, b);
18410 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
18411 assert_eq_m512i(r, e);
18412 }
18413
18414 #[simd_test(enable = "avx512f")]
18415 unsafe fn test_mm512_mask_and_epi32() {
18416 let a = _mm512_set_epi32(
18417 1 << 1 | 1 << 2,
18418 0,
18419 0,
18420 0,
18421 0,
18422 0,
18423 0,
18424 0,
18425 0,
18426 0,
18427 0,
18428 0,
18429 0,
18430 0,
18431 0,
18432 1 << 1 | 1 << 3,
18433 );
18434 let b = _mm512_set_epi32(
18435 1 << 1,
18436 0,
18437 0,
18438 0,
18439 0,
18440 0,
18441 0,
18442 0,
18443 0,
18444 0,
18445 0,
18446 0,
18447 0,
18448 0,
18449 0,
18450 1 << 3 | 1 << 4,
18451 );
18452 let r = _mm512_mask_and_epi32(a, 0, a, b);
18453 assert_eq_m512i(r, a);
18454
18455 let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
18456 let e = _mm512_set_epi32(
18457 1 << 1 | 1 << 2,
18458 0,
18459 0,
18460 0,
18461 0,
18462 0,
18463 0,
18464 0,
18465 0,
18466 0,
18467 0,
18468 0,
18469 0,
18470 0,
18471 0,
18472 1 << 3,
18473 );
18474 assert_eq_m512i(r, e);
18475 }
18476
18477 #[simd_test(enable = "avx512f")]
18478 unsafe fn test_mm512_maskz_and_epi32() {
18479 let a = _mm512_set_epi32(
18480 1 << 1 | 1 << 2,
18481 0,
18482 0,
18483 0,
18484 0,
18485 0,
18486 0,
18487 0,
18488 0,
18489 0,
18490 0,
18491 0,
18492 0,
18493 0,
18494 0,
18495 1 << 1 | 1 << 3,
18496 );
18497 let b = _mm512_set_epi32(
18498 1 << 1,
18499 0,
18500 0,
18501 0,
18502 0,
18503 0,
18504 0,
18505 0,
18506 0,
18507 0,
18508 0,
18509 0,
18510 0,
18511 0,
18512 0,
18513 1 << 3 | 1 << 4,
18514 );
18515 let r = _mm512_maskz_and_epi32(0, a, b);
18516 assert_eq_m512i(r, _mm512_setzero_si512());
18517
18518 let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
18519 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
18520 assert_eq_m512i(r, e);
18521 }
18522
18523 #[simd_test(enable = "avx512f")]
18524 unsafe fn test_mm512_and_si512() {
18525 let a = _mm512_set_epi32(
18526 1 << 1 | 1 << 2,
18527 0,
18528 0,
18529 0,
18530 0,
18531 0,
18532 0,
18533 0,
18534 0,
18535 0,
18536 0,
18537 0,
18538 0,
18539 0,
18540 0,
18541 1 << 1 | 1 << 3,
18542 );
18543 let b = _mm512_set_epi32(
18544 1 << 1,
18545 0,
18546 0,
18547 0,
18548 0,
18549 0,
18550 0,
18551 0,
18552 0,
18553 0,
18554 0,
18555 0,
18556 0,
18557 0,
18558 0,
18559 1 << 3 | 1 << 4,
18560 );
18561 let r = _mm512_and_epi32(a, b);
18562 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
18563 assert_eq_m512i(r, e);
18564 }
18565
18566 #[simd_test(enable = "avx512f")]
18567 unsafe fn test_mm512_or_epi32() {
18568 let a = _mm512_set_epi32(
18569 1 << 1 | 1 << 2,
18570 0,
18571 0,
18572 0,
18573 0,
18574 0,
18575 0,
18576 0,
18577 0,
18578 0,
18579 0,
18580 0,
18581 0,
18582 0,
18583 0,
18584 1 << 1 | 1 << 3,
18585 );
18586 let b = _mm512_set_epi32(
18587 1 << 1,
18588 0,
18589 0,
18590 0,
18591 0,
18592 0,
18593 0,
18594 0,
18595 0,
18596 0,
18597 0,
18598 0,
18599 0,
18600 0,
18601 0,
18602 1 << 3 | 1 << 4,
18603 );
18604 let r = _mm512_or_epi32(a, b);
18605 let e = _mm512_set_epi32(
18606 1 << 1 | 1 << 2,
18607 0,
18608 0,
18609 0,
18610 0,
18611 0,
18612 0,
18613 0,
18614 0,
18615 0,
18616 0,
18617 0,
18618 0,
18619 0,
18620 0,
18621 1 << 1 | 1 << 3 | 1 << 4,
18622 );
18623 assert_eq_m512i(r, e);
18624 }
18625
18626 #[simd_test(enable = "avx512f")]
18627 unsafe fn test_mm512_mask_or_epi32() {
18628 let a = _mm512_set_epi32(
18629 1 << 1 | 1 << 2,
18630 0,
18631 0,
18632 0,
18633 0,
18634 0,
18635 0,
18636 0,
18637 0,
18638 0,
18639 0,
18640 0,
18641 0,
18642 0,
18643 0,
18644 1 << 1 | 1 << 3,
18645 );
18646 let b = _mm512_set_epi32(
18647 1 << 1,
18648 0,
18649 0,
18650 0,
18651 0,
18652 0,
18653 0,
18654 0,
18655 0,
18656 0,
18657 0,
18658 0,
18659 0,
18660 0,
18661 0,
18662 1 << 3 | 1 << 4,
18663 );
18664 let r = _mm512_mask_or_epi32(a, 0, a, b);
18665 assert_eq_m512i(r, a);
18666
18667 let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
18668 let e = _mm512_set_epi32(
18669 1 << 1 | 1 << 2,
18670 0,
18671 0,
18672 0,
18673 0,
18674 0,
18675 0,
18676 0,
18677 0,
18678 0,
18679 0,
18680 0,
18681 0,
18682 0,
18683 0,
18684 1 << 1 | 1 << 3 | 1 << 4,
18685 );
18686 assert_eq_m512i(r, e);
18687 }
18688
18689 #[simd_test(enable = "avx512f")]
18690 unsafe fn test_mm512_maskz_or_epi32() {
18691 let a = _mm512_set_epi32(
18692 1 << 1 | 1 << 2,
18693 0,
18694 0,
18695 0,
18696 0,
18697 0,
18698 0,
18699 0,
18700 0,
18701 0,
18702 0,
18703 0,
18704 0,
18705 0,
18706 0,
18707 1 << 1 | 1 << 3,
18708 );
18709 let b = _mm512_set_epi32(
18710 1 << 1,
18711 0,
18712 0,
18713 0,
18714 0,
18715 0,
18716 0,
18717 0,
18718 0,
18719 0,
18720 0,
18721 0,
18722 0,
18723 0,
18724 0,
18725 1 << 3 | 1 << 4,
18726 );
18727 let r = _mm512_maskz_or_epi32(0, a, b);
18728 assert_eq_m512i(r, _mm512_setzero_si512());
18729
18730 let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
18731 let e = _mm512_set_epi32(
18732 0,
18733 0,
18734 0,
18735 0,
18736 0,
18737 0,
18738 0,
18739 0,
18740 0,
18741 0,
18742 0,
18743 0,
18744 0,
18745 0,
18746 0,
18747 1 << 1 | 1 << 3 | 1 << 4,
18748 );
18749 assert_eq_m512i(r, e);
18750 }
18751
18752 #[simd_test(enable = "avx512f")]
18753 unsafe fn test_mm512_or_si512() {
18754 let a = _mm512_set_epi32(
18755 1 << 1 | 1 << 2,
18756 0,
18757 0,
18758 0,
18759 0,
18760 0,
18761 0,
18762 0,
18763 0,
18764 0,
18765 0,
18766 0,
18767 0,
18768 0,
18769 0,
18770 1 << 1 | 1 << 3,
18771 );
18772 let b = _mm512_set_epi32(
18773 1 << 1,
18774 0,
18775 0,
18776 0,
18777 0,
18778 0,
18779 0,
18780 0,
18781 0,
18782 0,
18783 0,
18784 0,
18785 0,
18786 0,
18787 0,
18788 1 << 3 | 1 << 4,
18789 );
18790 let r = _mm512_or_epi32(a, b);
18791 let e = _mm512_set_epi32(
18792 1 << 1 | 1 << 2,
18793 0,
18794 0,
18795 0,
18796 0,
18797 0,
18798 0,
18799 0,
18800 0,
18801 0,
18802 0,
18803 0,
18804 0,
18805 0,
18806 0,
18807 1 << 1 | 1 << 3 | 1 << 4,
18808 );
18809 assert_eq_m512i(r, e);
18810 }
18811
18812 #[simd_test(enable = "avx512f")]
18813 unsafe fn test_mm512_xor_epi32() {
18814 let a = _mm512_set_epi32(
18815 1 << 1 | 1 << 2,
18816 0,
18817 0,
18818 0,
18819 0,
18820 0,
18821 0,
18822 0,
18823 0,
18824 0,
18825 0,
18826 0,
18827 0,
18828 0,
18829 0,
18830 1 << 1 | 1 << 3,
18831 );
18832 let b = _mm512_set_epi32(
18833 1 << 1,
18834 0,
18835 0,
18836 0,
18837 0,
18838 0,
18839 0,
18840 0,
18841 0,
18842 0,
18843 0,
18844 0,
18845 0,
18846 0,
18847 0,
18848 1 << 3 | 1 << 4,
18849 );
18850 let r = _mm512_xor_epi32(a, b);
18851 let e = _mm512_set_epi32(
18852 1 << 2,
18853 0,
18854 0,
18855 0,
18856 0,
18857 0,
18858 0,
18859 0,
18860 0,
18861 0,
18862 0,
18863 0,
18864 0,
18865 0,
18866 0,
18867 1 << 1 | 1 << 4,
18868 );
18869 assert_eq_m512i(r, e);
18870 }
18871
18872 #[simd_test(enable = "avx512f")]
18873 unsafe fn test_mm512_mask_xor_epi32() {
18874 let a = _mm512_set_epi32(
18875 1 << 1 | 1 << 2,
18876 0,
18877 0,
18878 0,
18879 0,
18880 0,
18881 0,
18882 0,
18883 0,
18884 0,
18885 0,
18886 0,
18887 0,
18888 0,
18889 0,
18890 1 << 1 | 1 << 3,
18891 );
18892 let b = _mm512_set_epi32(
18893 1 << 1,
18894 0,
18895 0,
18896 0,
18897 0,
18898 0,
18899 0,
18900 0,
18901 0,
18902 0,
18903 0,
18904 0,
18905 0,
18906 0,
18907 0,
18908 1 << 3 | 1 << 4,
18909 );
18910 let r = _mm512_mask_xor_epi32(a, 0, a, b);
18911 assert_eq_m512i(r, a);
3dfed10e 18912
1b1a35ee
XL
18913 let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
18914 let e = _mm512_set_epi32(
18915 1 << 1 | 1 << 2,
18916 0,
18917 0,
18918 0,
18919 0,
18920 0,
18921 0,
18922 0,
18923 0,
18924 0,
18925 0,
18926 0,
18927 0,
18928 0,
18929 0,
18930 1 << 1 | 1 << 4,
18931 );
18932 assert_eq_m512i(r, e);
3dfed10e
XL
18933 }
18934
18935 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
18936 unsafe fn test_mm512_maskz_xor_epi32() {
18937 let a = _mm512_set_epi32(
18938 1 << 1 | 1 << 2,
18939 0,
18940 0,
18941 0,
18942 0,
18943 0,
18944 0,
18945 0,
18946 0,
18947 0,
18948 0,
18949 0,
18950 0,
18951 0,
18952 0,
18953 1 << 1 | 1 << 3,
18954 );
18955 let b = _mm512_set_epi32(
18956 1 << 1,
18957 0,
18958 0,
18959 0,
18960 0,
18961 0,
18962 0,
18963 0,
18964 0,
18965 0,
18966 0,
18967 0,
18968 0,
18969 0,
18970 0,
18971 1 << 3 | 1 << 4,
18972 );
18973 let r = _mm512_maskz_xor_epi32(0, a, b);
18974 assert_eq_m512i(r, _mm512_setzero_si512());
3dfed10e 18975
1b1a35ee
XL
18976 let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
18977 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
18978 assert_eq_m512i(r, e);
3dfed10e
XL
18979 }
18980
18981 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
18982 unsafe fn test_mm512_xor_si512() {
18983 let a = _mm512_set_epi32(
18984 1 << 1 | 1 << 2,
18985 0,
18986 0,
18987 0,
18988 0,
18989 0,
18990 0,
18991 0,
18992 0,
18993 0,
18994 0,
18995 0,
18996 0,
18997 0,
18998 0,
18999 1 << 1 | 1 << 3,
19000 );
19001 let b = _mm512_set_epi32(
19002 1 << 1,
19003 0,
19004 0,
19005 0,
19006 0,
19007 0,
19008 0,
19009 0,
19010 0,
19011 0,
19012 0,
19013 0,
19014 0,
19015 0,
19016 0,
19017 1 << 3 | 1 << 4,
19018 );
19019 let r = _mm512_xor_epi32(a, b);
19020 let e = _mm512_set_epi32(
19021 1 << 2,
19022 0,
19023 0,
19024 0,
19025 0,
19026 0,
19027 0,
19028 0,
19029 0,
19030 0,
19031 0,
19032 0,
19033 0,
19034 0,
19035 0,
19036 1 << 1 | 1 << 4,
19037 );
19038 assert_eq_m512i(r, e);
3dfed10e
XL
19039 }
19040
19041 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19042 unsafe fn test_mm512_kand() {
19043 let a: u16 = 0b11001100_00110011;
19044 let b: u16 = 0b11001100_00110011;
19045 let r = _mm512_kand(a, b);
19046 let e: u16 = 0b11001100_00110011;
19047 assert_eq!(r, e);
3dfed10e
XL
19048 }
19049
19050 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19051 unsafe fn test_kand_mask16() {
19052 let a: u16 = 0b11001100_00110011;
19053 let b: u16 = 0b11001100_00110011;
19054 let r = _kand_mask16(a, b);
19055 let e: u16 = 0b11001100_00110011;
19056 assert_eq!(r, e);
3dfed10e
XL
19057 }
19058
19059 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19060 unsafe fn test_mm512_kor() {
19061 let a: u16 = 0b11001100_00110011;
19062 let b: u16 = 0b00101110_00001011;
19063 let r = _mm512_kor(a, b);
19064 let e: u16 = 0b11101110_00111011;
19065 assert_eq!(r, e);
3dfed10e
XL
19066 }
19067
19068 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19069 unsafe fn test_kor_mask16() {
19070 let a: u16 = 0b11001100_00110011;
19071 let b: u16 = 0b00101110_00001011;
19072 let r = _kor_mask16(a, b);
19073 let e: u16 = 0b11101110_00111011;
19074 assert_eq!(r, e);
3dfed10e
XL
19075 }
19076
19077 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19078 unsafe fn test_mm512_kxor() {
19079 let a: u16 = 0b11001100_00110011;
19080 let b: u16 = 0b00101110_00001011;
19081 let r = _mm512_kxor(a, b);
19082 let e: u16 = 0b11100010_00111000;
19083 assert_eq!(r, e);
3dfed10e
XL
19084 }
19085
19086 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19087 unsafe fn test_kxor_mask16() {
19088 let a: u16 = 0b11001100_00110011;
19089 let b: u16 = 0b00101110_00001011;
19090 let r = _kxor_mask16(a, b);
19091 let e: u16 = 0b11100010_00111000;
19092 assert_eq!(r, e);
3dfed10e
XL
19093 }
19094
19095 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19096 unsafe fn test_mm512_knot() {
19097 let a: u16 = 0b11001100_00110011;
19098 let r = _mm512_knot(a);
19099 let e: u16 = 0b00110011_11001100;
19100 assert_eq!(r, e);
3dfed10e
XL
19101 }
19102
19103 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19104 unsafe fn test_knot_mask16() {
19105 let a: u16 = 0b11001100_00110011;
19106 let r = _knot_mask16(a);
19107 let e: u16 = 0b00110011_11001100;
19108 assert_eq!(r, e);
3dfed10e
XL
19109 }
19110
19111 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19112 unsafe fn test_mm512_kandn() {
19113 let a: u16 = 0b11001100_00110011;
19114 let b: u16 = 0b00101110_00001011;
19115 let r = _mm512_kandn(a, b);
19116 let e: u16 = 0b00100010_00001000;
19117 assert_eq!(r, e);
3dfed10e
XL
19118 }
19119
19120 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19121 unsafe fn test_kandn_mask16() {
19122 let a: u16 = 0b11001100_00110011;
19123 let b: u16 = 0b00101110_00001011;
19124 let r = _kandn_mask16(a, b);
19125 let e: u16 = 0b00100010_00001000;
19126 assert_eq!(r, e);
3dfed10e
XL
19127 }
19128
19129 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19130 unsafe fn test_mm512_kxnor() {
19131 let a: u16 = 0b11001100_00110011;
19132 let b: u16 = 0b00101110_00001011;
19133 let r = _mm512_kxnor(a, b);
19134 let e: u16 = 0b00011101_11000111;
19135 assert_eq!(r, e);
3dfed10e
XL
19136 }
19137
19138 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19139 unsafe fn test_kxnor_mask16() {
19140 let a: u16 = 0b11001100_00110011;
19141 let b: u16 = 0b00101110_00001011;
19142 let r = _kxnor_mask16(a, b);
19143 let e: u16 = 0b00011101_11000111;
19144 assert_eq!(r, e);
3dfed10e
XL
19145 }
19146
19147 #[simd_test(enable = "avx512f")]
1b1a35ee
XL
19148 unsafe fn test_mm512_kmov() {
19149 let a: u16 = 0b11001100_00110011;
19150 let r = _mm512_kmov(a);
19151 let e: u16 = 0b11001100_00110011;
19152 assert_eq!(r, e);
3dfed10e
XL
19153 }
19154}