2 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
3 mem
::{self, transmute}
,
8 use stdarch_test
::assert_instr
;
10 /// Computes the absolute values of packed 32-bit integers in `a`.
12 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
14 #[target_feature(enable = "avx512f")]
15 #[cfg_attr(test, assert_instr(vpabsd))]
16 pub unsafe fn _mm512_abs_epi32(a
: __m512i
) -> __m512i
{
17 let a
= a
.as_i32x16();
18 // all-0 is a properly initialized i32x16
19 let zero
: i32x16
= mem
::zeroed();
20 let sub
= simd_sub(zero
, a
);
21 let cmp
: i32x16
= simd_gt(a
, zero
);
22 transmute(simd_select(cmp
, a
, sub
))
25 /// Computes the absolute value of packed 32-bit integers in `a`, and store the
26 /// unsigned results in `dst` using writemask `k` (elements are copied from
27 /// `src` when the corresponding mask bit is not set).
29 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
31 #[target_feature(enable = "avx512f")]
32 #[cfg_attr(test, assert_instr(vpabsd))]
33 pub unsafe fn _mm512_mask_abs_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
) -> __m512i
{
34 let abs
= _mm512_abs_epi32(a
).as_i32x16();
35 transmute(simd_select_bitmask(k
, abs
, src
.as_i32x16()))
38 /// Computes the absolute value of packed 32-bit integers in `a`, and store the
39 /// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
40 /// the corresponding mask bit is not set).
42 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
44 #[target_feature(enable = "avx512f")]
45 #[cfg_attr(test, assert_instr(vpabsd))]
46 pub unsafe fn _mm512_maskz_abs_epi32(k
: __mmask16
, a
: __m512i
) -> __m512i
{
47 let abs
= _mm512_abs_epi32(a
).as_i32x16();
48 let zero
= _mm512_setzero_si512().as_i32x16();
49 transmute(simd_select_bitmask(k
, abs
, zero
))
52 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
54 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48)
56 #[target_feature(enable = "avx512f")]
57 #[cfg_attr(test, assert_instr(vpabsq))]
58 pub unsafe fn _mm512_abs_epi64(a
: __m512i
) -> __m512i
{
60 // all-0 is a properly initialized i64x8
61 let zero
: i64x8
= mem
::zeroed();
62 let sub
= simd_sub(zero
, a
);
63 let cmp
: i64x8
= simd_gt(a
, zero
);
64 transmute(simd_select(cmp
, a
, sub
))
67 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
69 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49)
71 #[target_feature(enable = "avx512f")]
72 #[cfg_attr(test, assert_instr(vpabsq))]
73 pub unsafe fn _mm512_mask_abs_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
) -> __m512i
{
74 let abs
= _mm512_abs_epi64(a
).as_i64x8();
75 transmute(simd_select_bitmask(k
, abs
, src
.as_i64x8()))
78 /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
80 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50)
82 #[target_feature(enable = "avx512f")]
83 #[cfg_attr(test, assert_instr(vpabsq))]
84 pub unsafe fn _mm512_maskz_abs_epi64(k
: __mmask8
, a
: __m512i
) -> __m512i
{
85 let abs
= _mm512_abs_epi64(a
).as_i64x8();
86 let zero
= _mm512_setzero_si512().as_i64x8();
87 transmute(simd_select_bitmask(k
, abs
, zero
))
90 /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
92 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_ps&expand=65)
94 #[target_feature(enable = "avx512f")]
95 #[cfg_attr(test, assert_instr(vpandq))]
96 pub unsafe fn _mm512_abs_ps(v2
: __m512
) -> __m512
{
97 let a
= _mm512_set1_epi32(0x7FFFFFFF); // from LLVM code
98 let b
= transmute
::<f32x16
, __m512i
>(v2
.as_f32x16());
99 let abs
= _mm512_and_epi32(a
, b
);
103 /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_ps&expand=66)
107 #[target_feature(enable = "avx512f")]
108 #[cfg_attr(test, assert_instr(vpandd))]
109 pub unsafe fn _mm512_mask_abs_ps(src
: __m512
, k
: __mmask16
, v2
: __m512
) -> __m512
{
110 let abs
= _mm512_abs_ps(v2
).as_f32x16();
111 transmute(simd_select_bitmask(k
, abs
, src
.as_f32x16()))
114 /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
116 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_pd&expand=60)
118 #[target_feature(enable = "avx512f")]
119 #[cfg_attr(test, assert_instr(vpandq))]
120 pub unsafe fn _mm512_abs_pd(v2
: __m512d
) -> __m512d
{
121 let a
= _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF); // from LLVM code
122 let b
= transmute
::<f64x8
, __m512i
>(v2
.as_f64x8());
123 let abs
= _mm512_and_epi64(a
, b
);
127 /// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_pd&expand=61)
131 #[target_feature(enable = "avx512f")]
132 #[cfg_attr(test, assert_instr(vpandq))]
133 pub unsafe fn _mm512_mask_abs_pd(src
: __m512d
, k
: __mmask8
, v2
: __m512d
) -> __m512d
{
134 let abs
= _mm512_abs_pd(v2
).as_f64x8();
135 transmute(simd_select_bitmask(k
, abs
, src
.as_f64x8()))
138 /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_epi32&expand=3801)
142 #[target_feature(enable = "avx512f")]
143 #[cfg_attr(test, assert_instr(vmovdqa32))]
144 pub unsafe fn _mm512_mask_mov_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
) -> __m512i
{
145 let mov
= a
.as_i32x16();
146 transmute(simd_select_bitmask(k
, mov
, src
.as_i32x16()))
149 /// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_epi32&expand=3802)
153 #[target_feature(enable = "avx512f")]
154 #[cfg_attr(test, assert_instr(vmovdqa32))]
155 pub unsafe fn _mm512_maskz_mov_epi32(k
: __mmask16
, a
: __m512i
) -> __m512i
{
156 let mov
= a
.as_i32x16();
157 let zero
= _mm512_setzero_si512().as_i32x16();
158 transmute(simd_select_bitmask(k
, mov
, zero
))
161 /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
163 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_epi64&expand=3807)
165 #[target_feature(enable = "avx512f")]
166 #[cfg_attr(test, assert_instr(vmovdqa64))]
167 pub unsafe fn _mm512_mask_mov_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
) -> __m512i
{
168 let mov
= a
.as_i64x8();
169 transmute(simd_select_bitmask(k
, mov
, src
.as_i64x8()))
172 /// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_epi64&expand=3808)
176 #[target_feature(enable = "avx512f")]
177 #[cfg_attr(test, assert_instr(vmovdqa64))]
178 pub unsafe fn _mm512_maskz_mov_epi64(k
: __mmask8
, a
: __m512i
) -> __m512i
{
179 let mov
= a
.as_i64x8();
180 let zero
= _mm512_setzero_si512().as_i64x8();
181 transmute(simd_select_bitmask(k
, mov
, zero
))
184 /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_ps&expand=3825)
188 #[target_feature(enable = "avx512f")]
189 #[cfg_attr(test, assert_instr(vmovaps))]
190 pub unsafe fn _mm512_mask_mov_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
191 let mov
= a
.as_f32x16();
192 transmute(simd_select_bitmask(k
, mov
, src
.as_f32x16()))
195 /// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_ps&expand=3826)
199 #[target_feature(enable = "avx512f")]
200 #[cfg_attr(test, assert_instr(vmovaps))]
201 pub unsafe fn _mm512_maskz_mov_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
202 let mov
= a
.as_f32x16();
203 let zero
= _mm512_setzero_ps().as_f32x16();
204 transmute(simd_select_bitmask(k
, mov
, zero
))
207 /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
209 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mov_pd&expand=3819)
211 #[target_feature(enable = "avx512f")]
212 #[cfg_attr(test, assert_instr(vmovapd))]
213 pub unsafe fn _mm512_mask_mov_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
214 let mov
= a
.as_f64x8();
215 transmute(simd_select_bitmask(k
, mov
, src
.as_f64x8()))
218 /// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
220 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mov_pd&expand=3820)
222 #[target_feature(enable = "avx512f")]
223 #[cfg_attr(test, assert_instr(vmovapd))]
224 pub unsafe fn _mm512_maskz_mov_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
225 let mov
= a
.as_f64x8();
226 let zero
= _mm512_setzero_pd().as_f64x8();
227 transmute(simd_select_bitmask(k
, mov
, zero
))
230 /// Add packed 32-bit integers in a and b, and store the results in dst.
232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi32&expand=100)
234 #[target_feature(enable = "avx512f")]
235 #[cfg_attr(test, assert_instr(vpaddd))]
236 pub unsafe fn _mm512_add_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
237 transmute(simd_add(a
.as_i32x16(), b
.as_i32x16()))
240 /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi32&expand=101)
244 #[target_feature(enable = "avx512f")]
245 #[cfg_attr(test, assert_instr(vpaddd))]
246 pub unsafe fn _mm512_mask_add_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
247 let add
= _mm512_add_epi32(a
, b
).as_i32x16();
248 transmute(simd_select_bitmask(k
, add
, src
.as_i32x16()))
251 /// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
253 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi32&expand=102)
255 #[target_feature(enable = "avx512f")]
256 #[cfg_attr(test, assert_instr(vpaddd))]
257 pub unsafe fn _mm512_maskz_add_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
258 let add
= _mm512_add_epi32(a
, b
).as_i32x16();
259 let zero
= _mm512_setzero_si512().as_i32x16();
260 transmute(simd_select_bitmask(k
, add
, zero
))
263 /// Add packed 64-bit integers in a and b, and store the results in dst.
265 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi64&expand=109)
267 #[target_feature(enable = "avx512f")]
268 #[cfg_attr(test, assert_instr(vpaddq))]
269 pub unsafe fn _mm512_add_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
270 transmute(simd_add(a
.as_i64x8(), b
.as_i64x8()))
273 /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi64&expand=110)
277 #[target_feature(enable = "avx512f")]
278 #[cfg_attr(test, assert_instr(vpaddq))]
279 pub unsafe fn _mm512_mask_add_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
280 let add
= _mm512_add_epi64(a
, b
).as_i64x8();
281 transmute(simd_select_bitmask(k
, add
, src
.as_i64x8()))
284 /// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
286 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi64&expand=111)
288 #[target_feature(enable = "avx512f")]
289 #[cfg_attr(test, assert_instr(vpaddq))]
290 pub unsafe fn _mm512_maskz_add_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
291 let add
= _mm512_add_epi64(a
, b
).as_i64x8();
292 let zero
= _mm512_setzero_si512().as_i64x8();
293 transmute(simd_select_bitmask(k
, add
, zero
))
296 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_ps&expand=139)
300 #[target_feature(enable = "avx512f")]
301 #[cfg_attr(test, assert_instr(vaddps))]
302 pub unsafe fn _mm512_add_ps(a
: __m512
, b
: __m512
) -> __m512
{
303 transmute(simd_add(a
.as_f32x16(), b
.as_f32x16()))
306 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_ps&expand=140)
310 #[target_feature(enable = "avx512f")]
311 #[cfg_attr(test, assert_instr(vaddps))]
312 pub unsafe fn _mm512_mask_add_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
313 let add
= _mm512_add_ps(a
, b
).as_f32x16();
314 transmute(simd_select_bitmask(k
, add
, src
.as_f32x16()))
317 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_ps&expand=141)
321 #[target_feature(enable = "avx512f")]
322 #[cfg_attr(test, assert_instr(vaddps))]
323 pub unsafe fn _mm512_maskz_add_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
324 let add
= _mm512_add_ps(a
, b
).as_f32x16();
325 let zero
= _mm512_setzero_ps().as_f32x16();
326 transmute(simd_select_bitmask(k
, add
, zero
))
329 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
331 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_pd&expand=127)
333 #[target_feature(enable = "avx512f")]
334 #[cfg_attr(test, assert_instr(vaddpd))]
335 pub unsafe fn _mm512_add_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
336 transmute(simd_add(a
.as_f64x8(), b
.as_f64x8()))
339 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_pd&expand=128)
343 #[target_feature(enable = "avx512f")]
344 #[cfg_attr(test, assert_instr(vaddpd))]
345 pub unsafe fn _mm512_mask_add_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
346 let add
= _mm512_add_pd(a
, b
).as_f64x8();
347 transmute(simd_select_bitmask(k
, add
, src
.as_f64x8()))
350 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_pd&expand=129)
354 #[target_feature(enable = "avx512f")]
355 #[cfg_attr(test, assert_instr(vaddpd))]
356 pub unsafe fn _mm512_maskz_add_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
357 let add
= _mm512_add_pd(a
, b
).as_f64x8();
358 let zero
= _mm512_setzero_pd().as_f64x8();
359 transmute(simd_select_bitmask(k
, add
, zero
))
362 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi32&expand=5694)
366 #[target_feature(enable = "avx512f")]
367 #[cfg_attr(test, assert_instr(vpsubd))]
368 pub unsafe fn _mm512_sub_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
369 transmute(simd_sub(a
.as_i32x16(), b
.as_i32x16()))
372 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi32&expand=5692)
376 #[target_feature(enable = "avx512f")]
377 #[cfg_attr(test, assert_instr(vpsubd))]
378 pub unsafe fn _mm512_mask_sub_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
379 let sub
= _mm512_sub_epi32(a
, b
).as_i32x16();
380 transmute(simd_select_bitmask(k
, sub
, src
.as_i32x16()))
383 /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi32&expand=5693)
387 #[target_feature(enable = "avx512f")]
388 #[cfg_attr(test, assert_instr(vpsubd))]
389 pub unsafe fn _mm512_maskz_sub_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
390 let sub
= _mm512_sub_epi32(a
, b
).as_i32x16();
391 let zero
= _mm512_setzero_si512().as_i32x16();
392 transmute(simd_select_bitmask(k
, sub
, zero
))
395 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi64&expand=5703)
399 #[target_feature(enable = "avx512f")]
400 #[cfg_attr(test, assert_instr(vpsubq))]
401 pub unsafe fn _mm512_sub_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
402 transmute(simd_sub(a
.as_i64x8(), b
.as_i64x8()))
405 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
407 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi64&expand=5701)
409 #[target_feature(enable = "avx512f")]
410 #[cfg_attr(test, assert_instr(vpsubq))]
411 pub unsafe fn _mm512_mask_sub_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
412 let sub
= _mm512_sub_epi64(a
, b
).as_i64x8();
413 transmute(simd_select_bitmask(k
, sub
, src
.as_i64x8()))
416 /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
418 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi64&expand=5702)
420 #[target_feature(enable = "avx512f")]
421 #[cfg_attr(test, assert_instr(vpsubq))]
422 pub unsafe fn _mm512_maskz_sub_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
423 let add
= _mm512_sub_epi64(a
, b
).as_i64x8();
424 let zero
= _mm512_setzero_si512().as_i64x8();
425 transmute(simd_select_bitmask(k
, add
, zero
))
428 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
430 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_ps&expand=5733)
432 #[target_feature(enable = "avx512f")]
433 #[cfg_attr(test, assert_instr(vsubps))]
434 pub unsafe fn _mm512_sub_ps(a
: __m512
, b
: __m512
) -> __m512
{
435 transmute(simd_sub(a
.as_f32x16(), b
.as_f32x16()))
438 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
440 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_ps&expand=5731)
442 #[target_feature(enable = "avx512f")]
443 #[cfg_attr(test, assert_instr(vsubps))]
444 pub unsafe fn _mm512_mask_sub_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
445 let sub
= _mm512_sub_ps(a
, b
).as_f32x16();
446 transmute(simd_select_bitmask(k
, sub
, src
.as_f32x16()))
449 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
451 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_ps&expand=5732)
453 #[target_feature(enable = "avx512f")]
454 #[cfg_attr(test, assert_instr(vsubps))]
455 pub unsafe fn _mm512_maskz_sub_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
456 let sub
= _mm512_sub_ps(a
, b
).as_f32x16();
457 let zero
= _mm512_setzero_ps().as_f32x16();
458 transmute(simd_select_bitmask(k
, sub
, zero
))
461 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
463 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_pd&expand=5721)
465 #[target_feature(enable = "avx512f")]
466 #[cfg_attr(test, assert_instr(vsubpd))]
467 pub unsafe fn _mm512_sub_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
468 transmute(simd_sub(a
.as_f64x8(), b
.as_f64x8()))
471 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
473 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_pd&expand=5719)
475 #[target_feature(enable = "avx512f")]
476 #[cfg_attr(test, assert_instr(vsubpd))]
477 pub unsafe fn _mm512_mask_sub_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
478 let sub
= _mm512_sub_pd(a
, b
).as_f64x8();
479 transmute(simd_select_bitmask(k
, sub
, src
.as_f64x8()))
482 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
484 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_pd&expand=5720)
486 #[target_feature(enable = "avx512f")]
487 #[cfg_attr(test, assert_instr(vsubpd))]
488 pub unsafe fn _mm512_maskz_sub_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
489 let sub
= _mm512_sub_pd(a
, b
).as_f64x8();
490 let zero
= _mm512_setzero_pd().as_f64x8();
491 transmute(simd_select_bitmask(k
, sub
, zero
))
494 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epi32&expand=3907)
498 #[target_feature(enable = "avx512f")]
499 #[cfg_attr(test, assert_instr(vpmuldq))]
500 pub unsafe fn _mm512_mul_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
501 transmute(vpmuldq(a
.as_i32x16(), b
.as_i32x16()))
504 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
506 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epi32&expand=3905)
508 #[target_feature(enable = "avx512f")]
509 #[cfg_attr(test, assert_instr(vpmuldq))]
510 pub unsafe fn _mm512_mask_mul_epi32(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
511 let mul
= _mm512_mul_epi32(a
, b
).as_i64x8();
512 transmute(simd_select_bitmask(k
, mul
, src
.as_i64x8()))
515 /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epi32&expand=3906)
519 #[target_feature(enable = "avx512f")]
520 #[cfg_attr(test, assert_instr(vpmuldq))]
521 pub unsafe fn _mm512_maskz_mul_epi32(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
522 let mul
= _mm512_mul_epi32(a
, b
).as_i64x8();
523 let zero
= _mm512_setzero_si512().as_i64x8();
524 transmute(simd_select_bitmask(k
, mul
, zero
))
527 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
529 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullo_epi&expand=4005)
531 #[target_feature(enable = "avx512f")]
532 #[cfg_attr(test, assert_instr(vpmulld))]
533 pub unsafe fn _mm512_mullo_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
534 transmute(simd_mul(a
.as_i32x16(), b
.as_i32x16()))
537 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
539 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullo_epi32&expand=4003)
541 #[target_feature(enable = "avx512f")]
542 #[cfg_attr(test, assert_instr(vpmulld))]
543 pub unsafe fn _mm512_mask_mullo_epi32(
549 let mul
= _mm512_mullo_epi32(a
, b
).as_i32x16();
550 transmute(simd_select_bitmask(k
, mul
, src
.as_i32x16()))
553 /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mullo_epi32&expand=4004)
557 #[target_feature(enable = "avx512f")]
558 #[cfg_attr(test, assert_instr(vpmulld))]
559 pub unsafe fn _mm512_maskz_mullo_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
560 let mul
= _mm512_mullo_epi32(a
, b
).as_i32x16();
561 let zero
= _mm512_setzero_si512().as_i32x16();
562 transmute(simd_select_bitmask(k
, mul
, zero
))
565 /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullox_epi64&expand=4017)
569 /// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
571 #[target_feature(enable = "avx512f")]
572 pub unsafe fn _mm512_mullox_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
573 transmute(simd_mul(a
.as_i64x8(), b
.as_i64x8()))
576 /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullox&expand=4016)
580 /// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
582 #[target_feature(enable = "avx512f")]
583 pub unsafe fn _mm512_mask_mullox_epi64(
589 let mul
= _mm512_mullox_epi64(a
, b
).as_i64x8();
590 transmute(simd_select_bitmask(k
, mul
, src
.as_i64x8()))
593 /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epu32&expand=3916)
597 #[target_feature(enable = "avx512f")]
598 #[cfg_attr(test, assert_instr(vpmuludq))]
599 pub unsafe fn _mm512_mul_epu32(a
: __m512i
, b
: __m512i
) -> __m512i
{
600 transmute(vpmuludq(a
.as_u32x16(), b
.as_u32x16()))
603 /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
605 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epu32&expand=3914)
607 #[target_feature(enable = "avx512f")]
608 #[cfg_attr(test, assert_instr(vpmuludq))]
609 pub unsafe fn _mm512_mask_mul_epu32(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
610 let mul
= _mm512_mul_epu32(a
, b
).as_u64x8();
611 transmute(simd_select_bitmask(k
, mul
, src
.as_u64x8()))
614 /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
616 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epu32&expand=3915)
618 #[target_feature(enable = "avx512f")]
619 #[cfg_attr(test, assert_instr(vpmuludq))]
620 pub unsafe fn _mm512_maskz_mul_epu32(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
621 let mul
= _mm512_mul_epu32(a
, b
).as_u64x8();
622 let zero
= _mm512_setzero_si512().as_u64x8();
623 transmute(simd_select_bitmask(k
, mul
, zero
))
626 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
628 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_mul_ps&expand=3934)
630 #[target_feature(enable = "avx512f")]
631 #[cfg_attr(test, assert_instr(vmulps))]
632 pub unsafe fn _mm512_mul_ps(a
: __m512
, b
: __m512
) -> __m512
{
633 transmute(simd_mul(a
.as_f32x16(), b
.as_f32x16()))
636 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
638 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_ps&expand=3932)
640 #[target_feature(enable = "avx512f")]
641 #[cfg_attr(test, assert_instr(vmulps))]
642 pub unsafe fn _mm512_mask_mul_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
643 let mul
= _mm512_mul_ps(a
, b
).as_f32x16();
644 transmute(simd_select_bitmask(k
, mul
, src
.as_f32x16()))
647 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_ps&expand=3933)
651 #[target_feature(enable = "avx512f")]
652 #[cfg_attr(test, assert_instr(vmulps))]
653 pub unsafe fn _mm512_maskz_mul_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
654 let mul
= _mm512_mul_ps(a
, b
).as_f32x16();
655 let zero
= _mm512_setzero_ps().as_f32x16();
656 transmute(simd_select_bitmask(k
, mul
, zero
))
659 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
661 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_pd&expand=3925)
663 #[target_feature(enable = "avx512f")]
664 #[cfg_attr(test, assert_instr(vmulpd))]
665 pub unsafe fn _mm512_mul_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
666 transmute(simd_mul(a
.as_f64x8(), b
.as_f64x8()))
669 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM.
671 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_pd&expand=3923)
673 #[target_feature(enable = "avx512f")]
674 #[cfg_attr(test, assert_instr(vmulpd))]
675 pub unsafe fn _mm512_mask_mul_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
676 let mul
= _mm512_mul_pd(a
, b
).as_f64x8();
677 transmute(simd_select_bitmask(k
, mul
, src
.as_f64x8()))
680 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_pd&expand=3924)
684 #[target_feature(enable = "avx512f")]
685 #[cfg_attr(test, assert_instr(vmulpd))]
686 pub unsafe fn _mm512_maskz_mul_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
687 let mul
= _mm512_mul_pd(a
, b
).as_f64x8();
688 let zero
= _mm512_setzero_pd().as_f64x8();
689 transmute(simd_select_bitmask(k
, mul
, zero
))
692 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
694 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_ps&expand=2162)
696 #[target_feature(enable = "avx512f")]
697 #[cfg_attr(test, assert_instr(vdivps))]
698 pub unsafe fn _mm512_div_ps(a
: __m512
, b
: __m512
) -> __m512
{
699 transmute(simd_div(a
.as_f32x16(), b
.as_f32x16()))
702 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_ps&expand=2163)
706 #[target_feature(enable = "avx512f")]
707 #[cfg_attr(test, assert_instr(vdivps))]
708 pub unsafe fn _mm512_mask_div_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
709 let div
= _mm512_div_ps(a
, b
).as_f32x16();
710 transmute(simd_select_bitmask(k
, div
, src
.as_f32x16()))
713 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
715 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_ps&expand=2164)
717 #[target_feature(enable = "avx512f")]
718 #[cfg_attr(test, assert_instr(vdivps))]
719 pub unsafe fn _mm512_maskz_div_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
720 let div
= _mm512_div_ps(a
, b
).as_f32x16();
721 let zero
= _mm512_setzero_ps().as_f32x16();
722 transmute(simd_select_bitmask(k
, div
, zero
))
725 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
727 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_pd&expand=2153)
729 #[target_feature(enable = "avx512f")]
730 #[cfg_attr(test, assert_instr(vdivpd))]
731 pub unsafe fn _mm512_div_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
732 transmute(simd_div(a
.as_f64x8(), b
.as_f64x8()))
735 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
737 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_pd&expand=2154)
739 #[target_feature(enable = "avx512f")]
740 #[cfg_attr(test, assert_instr(vdivpd))]
741 pub unsafe fn _mm512_mask_div_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
742 let div
= _mm512_div_pd(a
, b
).as_f64x8();
743 transmute(simd_select_bitmask(k
, div
, src
.as_f64x8()))
746 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
748 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_pd&expand=2155)
750 #[target_feature(enable = "avx512f")]
751 #[cfg_attr(test, assert_instr(vdivpd))]
752 pub unsafe fn _mm512_maskz_div_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
753 let div
= _mm512_div_pd(a
, b
).as_f64x8();
754 let zero
= _mm512_setzero_pd().as_f64x8();
755 transmute(simd_select_bitmask(k
, div
, zero
))
758 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
760 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi32&expand=3582)
762 #[target_feature(enable = "avx512f")]
763 #[cfg_attr(test, assert_instr(vpmaxsd))]
764 pub unsafe fn _mm512_max_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
765 transmute(vpmaxsd(a
.as_i32x16(), b
.as_i32x16()))
768 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
770 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi32&expand=3580)
772 #[target_feature(enable = "avx512f")]
773 #[cfg_attr(test, assert_instr(vpmaxsd))]
774 pub unsafe fn _mm512_mask_max_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
775 let max
= _mm512_max_epi32(a
, b
).as_i32x16();
776 transmute(simd_select_bitmask(k
, max
, src
.as_i32x16()))
779 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
781 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi32&expand=3581)
783 #[target_feature(enable = "avx512f")]
784 #[cfg_attr(test, assert_instr(vpmaxsd))]
785 pub unsafe fn _mm512_maskz_max_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
786 let max
= _mm512_max_epi32(a
, b
).as_i32x16();
787 let zero
= _mm512_setzero_si512().as_i32x16();
788 transmute(simd_select_bitmask(k
, max
, zero
))
791 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi64&expand=3591)
795 #[target_feature(enable = "avx512f")]
796 #[cfg_attr(test, assert_instr(vpmaxsq))]
797 pub unsafe fn _mm512_max_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
798 transmute(vpmaxsq(a
.as_i64x8(), b
.as_i64x8()))
801 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi64&expand=3589)
805 #[target_feature(enable = "avx512f")]
806 #[cfg_attr(test, assert_instr(vpmaxsq))]
807 pub unsafe fn _mm512_mask_max_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
808 let max
= _mm512_max_epi64(a
, b
).as_i64x8();
809 transmute(simd_select_bitmask(k
, max
, src
.as_i64x8()))
812 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi64&expand=3590)
816 #[target_feature(enable = "avx512f")]
817 #[cfg_attr(test, assert_instr(vpmaxsq))]
818 pub unsafe fn _mm512_maskz_max_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
819 let max
= _mm512_max_epi64(a
, b
).as_i64x8();
820 let zero
= _mm512_setzero_si512().as_i64x8();
821 transmute(simd_select_bitmask(k
, max
, zero
))
824 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
826 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_ps&expand=3655)
828 #[target_feature(enable = "avx512f")]
829 #[cfg_attr(test, assert_instr(vmaxps))]
830 pub unsafe fn _mm512_max_ps(a
: __m512
, b
: __m512
) -> __m512
{
834 _MM_FROUND_CUR_DIRECTION
,
838 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
840 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_ps&expand=3653)
842 #[target_feature(enable = "avx512f")]
843 #[cfg_attr(test, assert_instr(vmaxps))]
844 pub unsafe fn _mm512_mask_max_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
845 let max
= _mm512_max_ps(a
, b
).as_f32x16();
846 transmute(simd_select_bitmask(k
, max
, src
.as_f32x16()))
849 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_ps&expand=3654)
853 #[target_feature(enable = "avx512f")]
854 #[cfg_attr(test, assert_instr(vmaxps))]
855 pub unsafe fn _mm512_maskz_max_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
856 let max
= _mm512_max_ps(a
, b
).as_f32x16();
857 let zero
= _mm512_setzero_ps().as_f32x16();
858 transmute(simd_select_bitmask(k
, max
, zero
))
861 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
863 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_pd&expand=3645)
865 #[target_feature(enable = "avx512f")]
866 #[cfg_attr(test, assert_instr(vmaxpd))]
867 pub unsafe fn _mm512_max_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
868 transmute(vmaxpd(a
.as_f64x8(), b
.as_f64x8(), _MM_FROUND_CUR_DIRECTION
))
871 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
873 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_pd&expand=3643)
875 #[target_feature(enable = "avx512f")]
876 #[cfg_attr(test, assert_instr(vmaxpd))]
877 pub unsafe fn _mm512_mask_max_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
878 let max
= _mm512_max_pd(a
, b
).as_f64x8();
879 transmute(simd_select_bitmask(k
, max
, src
.as_f64x8()))
882 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_pd&expand=3644)
886 #[target_feature(enable = "avx512f")]
887 #[cfg_attr(test, assert_instr(vmaxpd))]
888 pub unsafe fn _mm512_maskz_max_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
889 let max
= _mm512_max_pd(a
, b
).as_f64x8();
890 let zero
= _mm512_setzero_pd().as_f64x8();
891 transmute(simd_select_bitmask(k
, max
, zero
))
894 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epu32&expand=3618)
898 #[target_feature(enable = "avx512f")]
899 #[cfg_attr(test, assert_instr(vpmaxud))]
900 pub unsafe fn _mm512_max_epu32(a
: __m512i
, b
: __m512i
) -> __m512i
{
901 transmute(vpmaxud(a
.as_u32x16(), b
.as_u32x16()))
904 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu32&expand=3616)
908 #[target_feature(enable = "avx512f")]
909 #[cfg_attr(test, assert_instr(vpmaxud))]
910 pub unsafe fn _mm512_mask_max_epu32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
911 let max
= _mm512_max_epu32(a
, b
).as_u32x16();
912 transmute(simd_select_bitmask(k
, max
, src
.as_u32x16()))
915 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu32&expand=3617)
919 #[target_feature(enable = "avx512f")]
920 #[cfg_attr(test, assert_instr(vpmaxud))]
921 pub unsafe fn _mm512_maskz_max_epu32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
922 let max
= _mm512_max_epu32(a
, b
).as_u32x16();
923 let zero
= _mm512_setzero_si512().as_u32x16();
924 transmute(simd_select_bitmask(k
, max
, zero
))
927 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_epu64&expand=3627)
931 #[target_feature(enable = "avx512f")]
932 #[cfg_attr(test, assert_instr(vpmaxuq))]
933 pub unsafe fn _mm512_max_epu64(a
: __m512i
, b
: __m512i
) -> __m512i
{
934 transmute(vpmaxuq(a
.as_u64x8(), b
.as_u64x8()))
937 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu64&expand=3625)
941 #[target_feature(enable = "avx512f")]
942 #[cfg_attr(test, assert_instr(vpmaxuq))]
943 pub unsafe fn _mm512_mask_max_epu64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
944 let max
= _mm512_max_epu64(a
, b
).as_u64x8();
945 transmute(simd_select_bitmask(k
, max
, src
.as_u64x8()))
948 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu&expand=3626)
952 #[target_feature(enable = "avx512f")]
953 #[cfg_attr(test, assert_instr(vpmaxuq))]
954 pub unsafe fn _mm512_maskz_max_epu64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
955 let max
= _mm512_max_epu64(a
, b
).as_u64x8();
956 let zero
= _mm512_setzero_si512().as_u64x8();
957 transmute(simd_select_bitmask(k
, max
, zero
))
960 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
962 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi32&expand=3696)
964 #[target_feature(enable = "avx512f")]
965 #[cfg_attr(test, assert_instr(vpminsd))]
966 pub unsafe fn _mm512_min_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
967 transmute(vpminsd(a
.as_i32x16(), b
.as_i32x16()))
970 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi32&expand=3694)
974 #[target_feature(enable = "avx512f")]
975 #[cfg_attr(test, assert_instr(vpminsd))]
976 pub unsafe fn _mm512_mask_min_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
977 let max
= _mm512_min_epi32(a
, b
).as_i32x16();
978 transmute(simd_select_bitmask(k
, max
, src
.as_i32x16()))
981 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi32&expand=3695)
985 #[target_feature(enable = "avx512f")]
986 #[cfg_attr(test, assert_instr(vpminsd))]
987 pub unsafe fn _mm512_maskz_min_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
988 let max
= _mm512_min_epi32(a
, b
).as_i32x16();
989 let zero
= _mm512_setzero_si512().as_i32x16();
990 transmute(simd_select_bitmask(k
, max
, zero
))
993 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
995 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi64&expand=3705)
997 #[target_feature(enable = "avx512f")]
998 #[cfg_attr(test, assert_instr(vpminsq))]
999 pub unsafe fn _mm512_min_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
1000 transmute(vpminsq(a
.as_i64x8(), b
.as_i64x8()))
1003 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi64&expand=3703)
1007 #[target_feature(enable = "avx512f")]
1008 #[cfg_attr(test, assert_instr(vpminsq))]
1009 pub unsafe fn _mm512_mask_min_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
1010 let max
= _mm512_min_epi64(a
, b
).as_i64x8();
1011 transmute(simd_select_bitmask(k
, max
, src
.as_i64x8()))
1014 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi64&expand=3704)
1018 #[target_feature(enable = "avx512f")]
1019 #[cfg_attr(test, assert_instr(vpminsq))]
1020 pub unsafe fn _mm512_maskz_min_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
1021 let max
= _mm512_min_epi64(a
, b
).as_i64x8();
1022 let zero
= _mm512_setzero_si512().as_i64x8();
1023 transmute(simd_select_bitmask(k
, max
, zero
))
1026 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
1028 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_ps&expand=3769)
1030 #[target_feature(enable = "avx512f")]
1031 #[cfg_attr(test, assert_instr(vminps))]
1032 pub unsafe fn _mm512_min_ps(a
: __m512
, b
: __m512
) -> __m512
{
1036 _MM_FROUND_CUR_DIRECTION
,
1040 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1042 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_ps&expand=3767)
1044 #[target_feature(enable = "avx512f")]
1045 #[cfg_attr(test, assert_instr(vminps))]
1046 pub unsafe fn _mm512_mask_min_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
1047 let max
= _mm512_min_ps(a
, b
).as_f32x16();
1048 transmute(simd_select_bitmask(k
, max
, src
.as_f32x16()))
1051 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1053 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_ps&expand=3768)
1055 #[target_feature(enable = "avx512f")]
1056 #[cfg_attr(test, assert_instr(vminps))]
1057 pub unsafe fn _mm512_maskz_min_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
1058 let max
= _mm512_min_ps(a
, b
).as_f32x16();
1059 let zero
= _mm512_setzero_ps().as_f32x16();
1060 transmute(simd_select_bitmask(k
, max
, zero
))
1063 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
1065 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_pd&expand=3759)
1067 #[target_feature(enable = "avx512f")]
1068 #[cfg_attr(test, assert_instr(vminpd))]
1069 pub unsafe fn _mm512_min_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
1070 transmute(vminpd(a
.as_f64x8(), b
.as_f64x8(), _MM_FROUND_CUR_DIRECTION
))
1073 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1075 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_pd&expand=3757)
1077 #[target_feature(enable = "avx512f")]
1078 #[cfg_attr(test, assert_instr(vminpd))]
1079 pub unsafe fn _mm512_mask_min_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
1080 let max
= _mm512_min_pd(a
, b
).as_f64x8();
1081 transmute(simd_select_bitmask(k
, max
, src
.as_f64x8()))
1084 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1086 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_pd&expand=3758)
1088 #[target_feature(enable = "avx512f")]
1089 #[cfg_attr(test, assert_instr(vminpd))]
1090 pub unsafe fn _mm512_maskz_min_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
1091 let max
= _mm512_min_pd(a
, b
).as_f64x8();
1092 let zero
= _mm512_setzero_pd().as_f64x8();
1093 transmute(simd_select_bitmask(k
, max
, zero
))
1096 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
1098 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu32&expand=3732)
1100 #[target_feature(enable = "avx512f")]
1101 #[cfg_attr(test, assert_instr(vpminud))]
1102 pub unsafe fn _mm512_min_epu32(a
: __m512i
, b
: __m512i
) -> __m512i
{
1103 transmute(vpminud(a
.as_u32x16(), b
.as_u32x16()))
1106 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu32&expand=3730)
1110 #[target_feature(enable = "avx512f")]
1111 #[cfg_attr(test, assert_instr(vpminud))]
1112 pub unsafe fn _mm512_mask_min_epu32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
1113 let max
= _mm512_min_epu32(a
, b
).as_u32x16();
1114 transmute(simd_select_bitmask(k
, max
, src
.as_u32x16()))
1117 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu32&expand=3731)
1121 #[target_feature(enable = "avx512f")]
1122 #[cfg_attr(test, assert_instr(vpminud))]
1123 pub unsafe fn _mm512_maskz_min_epu32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
1124 let max
= _mm512_min_epu32(a
, b
).as_u32x16();
1125 let zero
= _mm512_setzero_si512().as_u32x16();
1126 transmute(simd_select_bitmask(k
, max
, zero
))
1129 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
1131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu64&expand=3741)
1133 #[target_feature(enable = "avx512f")]
1134 #[cfg_attr(test, assert_instr(vpminuq))]
1135 pub unsafe fn _mm512_min_epu64(a
: __m512i
, b
: __m512i
) -> __m512i
{
1136 transmute(vpminuq(a
.as_u64x8(), b
.as_u64x8()))
1139 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1141 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu64&expand=3739)
1143 #[target_feature(enable = "avx512f")]
1144 #[cfg_attr(test, assert_instr(vpminuq))]
1145 pub unsafe fn _mm512_mask_min_epu64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
1146 let max
= _mm512_min_epu64(a
, b
).as_u64x8();
1147 transmute(simd_select_bitmask(k
, max
, src
.as_u64x8()))
1150 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu64&expand=3740)
1154 #[target_feature(enable = "avx512f")]
1155 #[cfg_attr(test, assert_instr(vpminuq))]
1156 pub unsafe fn _mm512_maskz_min_epu64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
1157 let max
= _mm512_min_epu64(a
, b
).as_u64x8();
1158 let zero
= _mm512_setzero_si512().as_u64x8();
1159 transmute(simd_select_bitmask(k
, max
, zero
))
1162 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_ps&expand=5371)
1166 #[target_feature(enable = "avx512f")]
1167 #[cfg_attr(test, assert_instr(vsqrtps))]
1168 pub unsafe fn _mm512_sqrt_ps(a
: __m512
) -> __m512
{
1169 transmute(vsqrtps(a
.as_f32x16(), _MM_FROUND_CUR_DIRECTION
))
1172 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_ps&expand=5369)
1176 #[target_feature(enable = "avx512f")]
1177 #[cfg_attr(test, assert_instr(vsqrtps))]
1178 pub unsafe fn _mm512_mask_sqrt_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
1179 let sqrt
= _mm512_sqrt_ps(a
).as_f32x16();
1180 transmute(simd_select_bitmask(k
, sqrt
, src
.as_f32x16()))
1183 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_ps&expand=5370)
1187 #[target_feature(enable = "avx512f")]
1188 #[cfg_attr(test, assert_instr(vsqrtps))]
1189 pub unsafe fn _mm512_maskz_sqrt_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
1190 let sqrt
= _mm512_sqrt_ps(a
).as_f32x16();
1191 let zero
= _mm512_setzero_ps().as_f32x16();
1192 transmute(simd_select_bitmask(k
, sqrt
, zero
))
1195 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_pd&expand=5362)
1199 #[target_feature(enable = "avx512f")]
1200 #[cfg_attr(test, assert_instr(vsqrtpd))]
1201 pub unsafe fn _mm512_sqrt_pd(a
: __m512d
) -> __m512d
{
1202 transmute(vsqrtpd(a
.as_f64x8(), _MM_FROUND_CUR_DIRECTION
))
1205 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_pd&expand=5360)
1209 #[target_feature(enable = "avx512f")]
1210 #[cfg_attr(test, assert_instr(vsqrtpd))]
1211 pub unsafe fn _mm512_mask_sqrt_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
1212 let sqrt
= _mm512_sqrt_pd(a
).as_f64x8();
1213 transmute(simd_select_bitmask(k
, sqrt
, src
.as_f64x8()))
1216 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_pd&expand=5361)
1220 #[target_feature(enable = "avx512f")]
1221 #[cfg_attr(test, assert_instr(vsqrtpd))]
1222 pub unsafe fn _mm512_maskz_sqrt_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
1223 let sqrt
= _mm512_sqrt_pd(a
).as_f64x8();
1224 let zero
= _mm512_setzero_pd().as_f64x8();
1225 transmute(simd_select_bitmask(k
, sqrt
, zero
))
1228 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
1230 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fmadd_ps&expand=2557)
1232 #[target_feature(enable = "avx512f")]
1233 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1234 pub unsafe fn _mm512_fmadd_ps(a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1235 transmute(vfmadd132ps(
1239 _MM_FROUND_CUR_DIRECTION
,
1243 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_ps&expand=2558)
1247 #[target_feature(enable = "avx512f")]
1248 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1249 pub unsafe fn _mm512_mask_fmadd_ps(a
: __m512
, k
: __mmask16
, b
: __m512
, c
: __m512
) -> __m512
{
1250 let fmadd
= _mm512_fmadd_ps(a
, b
, c
).as_f32x16();
1251 transmute(simd_select_bitmask(k
, fmadd
, a
.as_f32x16()))
1254 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_ps&expand=2560)
1258 #[target_feature(enable = "avx512f")]
1259 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1260 pub unsafe fn _mm512_maskz_fmadd_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1261 let fmadd
= _mm512_fmadd_ps(a
, b
, c
).as_f32x16();
1262 let zero
= _mm512_setzero_ps().as_f32x16();
1263 transmute(simd_select_bitmask(k
, fmadd
, zero
))
1266 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1268 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_ps&expand=2559)
1270 #[target_feature(enable = "avx512f")]
1271 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
1272 pub unsafe fn _mm512_mask3_fmadd_ps(a
: __m512
, b
: __m512
, c
: __m512
, k
: __mmask16
) -> __m512
{
1273 let fmadd
= _mm512_fmadd_ps(a
, b
, c
).as_f32x16();
1274 transmute(simd_select_bitmask(k
, fmadd
, c
.as_f32x16()))
1277 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
1279 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_pd&expand=2545)
1281 #[target_feature(enable = "avx512f")]
1282 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1283 pub unsafe fn _mm512_fmadd_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1284 transmute(vfmadd132pd(
1288 _MM_FROUND_CUR_DIRECTION
,
1292 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_pd&expand=2546)
1296 #[target_feature(enable = "avx512f")]
1297 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1298 pub unsafe fn _mm512_mask_fmadd_pd(a
: __m512d
, k
: __mmask8
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1299 let fmadd
= _mm512_fmadd_pd(a
, b
, c
).as_f64x8();
1300 transmute(simd_select_bitmask(k
, fmadd
, a
.as_f64x8()))
1303 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1305 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_pd&expand=2548)
1307 #[target_feature(enable = "avx512f")]
1308 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1309 pub unsafe fn _mm512_maskz_fmadd_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1310 let fmadd
= _mm512_fmadd_pd(a
, b
, c
).as_f64x8();
1311 let zero
= _mm512_setzero_pd().as_f64x8();
1312 transmute(simd_select_bitmask(k
, fmadd
, zero
))
1315 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_pd&expand=2547)
1319 #[target_feature(enable = "avx512f")]
1320 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
1321 pub unsafe fn _mm512_mask3_fmadd_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, k
: __mmask8
) -> __m512d
{
1322 let fmadd
= _mm512_fmadd_pd(a
, b
, c
).as_f64x8();
1323 transmute(simd_select_bitmask(k
, fmadd
, c
.as_f64x8()))
1326 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
1328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_ps&expand=2643)
1330 #[target_feature(enable = "avx512f")]
1331 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1332 pub unsafe fn _mm512_fmsub_ps(a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1333 let zero
: f32x16
= mem
::zeroed();
1334 let sub
= simd_sub(zero
, c
.as_f32x16());
1335 transmute(vfmadd132ps(
1339 _MM_FROUND_CUR_DIRECTION
,
1343 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1345 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_ps&expand=2644)
1347 #[target_feature(enable = "avx512f")]
1348 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1349 pub unsafe fn _mm512_mask_fmsub_ps(a
: __m512
, k
: __mmask16
, b
: __m512
, c
: __m512
) -> __m512
{
1350 let fmsub
= _mm512_fmsub_ps(a
, b
, c
).as_f32x16();
1351 transmute(simd_select_bitmask(k
, fmsub
, a
.as_f32x16()))
1354 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_ps&expand=2646)
1358 #[target_feature(enable = "avx512f")]
1359 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1360 pub unsafe fn _mm512_maskz_fmsub_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1361 let fmsub
= _mm512_fmsub_ps(a
, b
, c
).as_f32x16();
1362 let zero
= _mm512_setzero_ps().as_f32x16();
1363 transmute(simd_select_bitmask(k
, fmsub
, zero
))
1366 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1368 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_ps&expand=2645)
1370 #[target_feature(enable = "avx512f")]
1371 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
1372 pub unsafe fn _mm512_mask3_fmsub_ps(a
: __m512
, b
: __m512
, c
: __m512
, k
: __mmask16
) -> __m512
{
1373 let fmsub
= _mm512_fmsub_ps(a
, b
, c
).as_f32x16();
1374 transmute(simd_select_bitmask(k
, fmsub
, c
.as_f32x16()))
1377 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
1379 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_pd&expand=2631)
1381 #[target_feature(enable = "avx512f")]
1382 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1383 pub unsafe fn _mm512_fmsub_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1384 let zero
: f64x8
= mem
::zeroed();
1385 let sub
= simd_sub(zero
, c
.as_f64x8());
1386 transmute(vfmadd132pd(
1390 _MM_FROUND_CUR_DIRECTION
,
1394 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1396 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_pd&expand=2632)
1398 #[target_feature(enable = "avx512f")]
1399 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1400 pub unsafe fn _mm512_mask_fmsub_pd(a
: __m512d
, k
: __mmask8
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1401 let fmsub
= _mm512_fmsub_pd(a
, b
, c
).as_f64x8();
1402 transmute(simd_select_bitmask(k
, fmsub
, a
.as_f64x8()))
1405 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1407 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_pd&expand=2634)
1409 #[target_feature(enable = "avx512f")]
1410 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1411 pub unsafe fn _mm512_maskz_fmsub_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1412 let fmsub
= _mm512_fmsub_pd(a
, b
, c
).as_f64x8();
1413 let zero
= _mm512_setzero_pd().as_f64x8();
1414 transmute(simd_select_bitmask(k
, fmsub
, zero
))
1417 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_pd&expand=2633)
1421 #[target_feature(enable = "avx512f")]
1422 #[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
1423 pub unsafe fn _mm512_mask3_fmsub_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, k
: __mmask8
) -> __m512d
{
1424 let fmsub
= _mm512_fmsub_pd(a
, b
, c
).as_f64x8();
1425 transmute(simd_select_bitmask(k
, fmsub
, c
.as_f64x8()))
1428 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
1430 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_ps&expand=2611)
1432 #[target_feature(enable = "avx512f")]
1433 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1434 pub unsafe fn _mm512_fmaddsub_ps(a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1435 transmute(vfmaddsub213ps(
1439 _MM_FROUND_CUR_DIRECTION
,
1443 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1445 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_ps&expand=2612)
1447 #[target_feature(enable = "avx512f")]
1448 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1449 pub unsafe fn _mm512_mask_fmaddsub_ps(a
: __m512
, k
: __mmask16
, b
: __m512
, c
: __m512
) -> __m512
{
1450 let fmaddsub
= _mm512_fmaddsub_ps(a
, b
, c
).as_f32x16();
1451 transmute(simd_select_bitmask(k
, fmaddsub
, a
.as_f32x16()))
1454 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_ps&expand=2614)
1458 #[target_feature(enable = "avx512f")]
1459 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1460 pub unsafe fn _mm512_maskz_fmaddsub_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1461 let fmaddsub
= _mm512_fmaddsub_ps(a
, b
, c
).as_f32x16();
1462 let zero
= _mm512_setzero_ps().as_f32x16();
1463 transmute(simd_select_bitmask(k
, fmaddsub
, zero
))
1466 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1468 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_ps&expand=2613)
1470 #[target_feature(enable = "avx512f")]
1471 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
1472 pub unsafe fn _mm512_mask3_fmaddsub_ps(a
: __m512
, b
: __m512
, c
: __m512
, k
: __mmask16
) -> __m512
{
1473 let fmaddsub
= _mm512_fmaddsub_ps(a
, b
, c
).as_f32x16();
1474 transmute(simd_select_bitmask(k
, fmaddsub
, c
.as_f32x16()))
1477 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
1479 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_pd&expand=2599)
1481 #[target_feature(enable = "avx512f")]
1482 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1483 pub unsafe fn _mm512_fmaddsub_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1484 transmute(vfmaddsub213pd(
1488 _MM_FROUND_CUR_DIRECTION
,
1492 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_pd&expand=2600)
1496 #[target_feature(enable = "avx512f")]
1497 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1498 pub unsafe fn _mm512_mask_fmaddsub_pd(a
: __m512d
, k
: __mmask8
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1499 let fmaddsub
= _mm512_fmaddsub_pd(a
, b
, c
).as_f64x8();
1500 transmute(simd_select_bitmask(k
, fmaddsub
, a
.as_f64x8()))
1503 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_pd&expand=2602)
1507 #[target_feature(enable = "avx512f")]
1508 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1509 pub unsafe fn _mm512_maskz_fmaddsub_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1510 let fmaddsub
= _mm512_fmaddsub_pd(a
, b
, c
).as_f64x8();
1511 let zero
= _mm512_setzero_pd().as_f64x8();
1512 transmute(simd_select_bitmask(k
, fmaddsub
, zero
))
1515 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_ps&expand=2613)
1519 #[target_feature(enable = "avx512f")]
1520 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
1521 pub unsafe fn _mm512_mask3_fmaddsub_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, k
: __mmask8
) -> __m512d
{
1522 let fmaddsub
= _mm512_fmaddsub_pd(a
, b
, c
).as_f64x8();
1523 transmute(simd_select_bitmask(k
, fmaddsub
, c
.as_f64x8()))
1526 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
1528 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_ps&expand=2691)
1530 #[target_feature(enable = "avx512f")]
1531 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1532 pub unsafe fn _mm512_fmsubadd_ps(a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1533 let zero
: f32x16
= mem
::zeroed();
1534 let sub
= simd_sub(zero
, c
.as_f32x16());
1535 transmute(vfmaddsub213ps(
1539 _MM_FROUND_CUR_DIRECTION
,
1543 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1545 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_ps&expand=2692)
1547 #[target_feature(enable = "avx512f")]
1548 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1549 pub unsafe fn _mm512_mask_fmsubadd_ps(a
: __m512
, k
: __mmask16
, b
: __m512
, c
: __m512
) -> __m512
{
1550 let fmsubadd
= _mm512_fmsubadd_ps(a
, b
, c
).as_f32x16();
1551 transmute(simd_select_bitmask(k
, fmsubadd
, a
.as_f32x16()))
1554 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1556 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_ps&expand=2694)
1558 #[target_feature(enable = "avx512f")]
1559 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1560 pub unsafe fn _mm512_maskz_fmsubadd_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1561 let fmsubadd
= _mm512_fmsubadd_ps(a
, b
, c
).as_f32x16();
1562 let zero
= _mm512_setzero_ps().as_f32x16();
1563 transmute(simd_select_bitmask(k
, fmsubadd
, zero
))
1566 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1568 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_ps&expand=2693)
1570 #[target_feature(enable = "avx512f")]
1571 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
1572 pub unsafe fn _mm512_mask3_fmsubadd_ps(a
: __m512
, b
: __m512
, c
: __m512
, k
: __mmask16
) -> __m512
{
1573 let fmsubadd
= _mm512_fmsubadd_ps(a
, b
, c
).as_f32x16();
1574 transmute(simd_select_bitmask(k
, fmsubadd
, c
.as_f32x16()))
1577 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
1579 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_pd&expand=2679)
1581 #[target_feature(enable = "avx512f")]
1582 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1583 pub unsafe fn _mm512_fmsubadd_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1584 let zero
: f64x8
= mem
::zeroed();
1585 let sub
= simd_sub(zero
, c
.as_f64x8());
1586 transmute(vfmaddsub213pd(
1590 _MM_FROUND_CUR_DIRECTION
,
1594 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_pd&expand=2680)
1598 #[target_feature(enable = "avx512f")]
1599 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1600 pub unsafe fn _mm512_mask_fmsubadd_pd(a
: __m512d
, k
: __mmask8
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1601 let fmsubadd
= _mm512_fmsubadd_pd(a
, b
, c
).as_f64x8();
1602 transmute(simd_select_bitmask(k
, fmsubadd
, a
.as_f64x8()))
1605 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1607 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_pd&expand=2682)
1609 #[target_feature(enable = "avx512f")]
1610 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1611 pub unsafe fn _mm512_maskz_fmsubadd_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1612 let fmsubadd
= _mm512_fmsubadd_pd(a
, b
, c
).as_f64x8();
1613 let zero
= _mm512_setzero_pd().as_f64x8();
1614 transmute(simd_select_bitmask(k
, fmsubadd
, zero
))
1617 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_pd&expand=2681)
1621 #[target_feature(enable = "avx512f")]
1622 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
1623 pub unsafe fn _mm512_mask3_fmsubadd_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, k
: __mmask8
) -> __m512d
{
1624 let fmsubadd
= _mm512_fmsubadd_pd(a
, b
, c
).as_f64x8();
1625 transmute(simd_select_bitmask(k
, fmsubadd
, c
.as_f64x8()))
1628 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
1630 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_ps&expand=2723)
1632 #[target_feature(enable = "avx512f")]
1633 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1634 pub unsafe fn _mm512_fnmadd_ps(a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1635 let zero
: f32x16
= mem
::zeroed();
1636 let sub
= simd_sub(zero
, a
.as_f32x16());
1637 transmute(vfmadd132ps(
1641 _MM_FROUND_CUR_DIRECTION
,
1645 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1647 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_ps&expand=2724)
1649 #[target_feature(enable = "avx512f")]
1650 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1651 pub unsafe fn _mm512_mask_fnmadd_ps(a
: __m512
, k
: __mmask16
, b
: __m512
, c
: __m512
) -> __m512
{
1652 let fnmadd
= _mm512_fnmadd_ps(a
, b
, c
).as_f32x16();
1653 transmute(simd_select_bitmask(k
, fnmadd
, a
.as_f32x16()))
1656 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_ps&expand=2726)
1660 #[target_feature(enable = "avx512f")]
1661 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1662 pub unsafe fn _mm512_maskz_fnmadd_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1663 let fnmadd
= _mm512_fnmadd_ps(a
, b
, c
).as_f32x16();
1664 let zero
= _mm512_setzero_ps().as_f32x16();
1665 transmute(simd_select_bitmask(k
, fnmadd
, zero
))
1668 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1670 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_ps&expand=2725)
1672 #[target_feature(enable = "avx512f")]
1673 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
1674 pub unsafe fn _mm512_mask3_fnmadd_ps(a
: __m512
, b
: __m512
, c
: __m512
, k
: __mmask16
) -> __m512
{
1675 let fnmadd
= _mm512_fnmadd_ps(a
, b
, c
).as_f32x16();
1676 transmute(simd_select_bitmask(k
, fnmadd
, c
.as_f32x16()))
1679 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
1681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_pd&expand=2711)
1683 #[target_feature(enable = "avx512f")]
1684 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1685 pub unsafe fn _mm512_fnmadd_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1686 let zero
: f64x8
= mem
::zeroed();
1687 let sub
= simd_sub(zero
, a
.as_f64x8());
1688 transmute(vfmadd132pd(
1692 _MM_FROUND_CUR_DIRECTION
,
1696 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_pd&expand=2712)
1700 #[target_feature(enable = "avx512f")]
1701 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1702 pub unsafe fn _mm512_mask_fnmadd_pd(a
: __m512d
, k
: __mmask8
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1703 let fnmadd
= _mm512_fnmadd_pd(a
, b
, c
).as_f64x8();
1704 transmute(simd_select_bitmask(k
, fnmadd
, a
.as_f64x8()))
1707 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_pd&expand=2714)
1711 #[target_feature(enable = "avx512f")]
1712 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1713 pub unsafe fn _mm512_maskz_fnmadd_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1714 let fnmadd
= _mm512_fnmadd_pd(a
, b
, c
).as_f64x8();
1715 let zero
= _mm512_setzero_pd().as_f64x8();
1716 transmute(simd_select_bitmask(k
, fnmadd
, zero
))
1719 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_pd&expand=2713)
1723 #[target_feature(enable = "avx512f")]
1724 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
1725 pub unsafe fn _mm512_mask3_fnmadd_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, k
: __mmask8
) -> __m512d
{
1726 let fnmadd
= _mm512_fnmadd_pd(a
, b
, c
).as_f64x8();
1727 transmute(simd_select_bitmask(k
, fnmadd
, c
.as_f64x8()))
1730 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
1732 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_ps&expand=2771)
1734 #[target_feature(enable = "avx512f")]
1735 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1736 pub unsafe fn _mm512_fnmsub_ps(a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1737 let zero
: f32x16
= mem
::zeroed();
1738 let suba
= simd_sub(zero
, a
.as_f32x16());
1739 let subc
= simd_sub(zero
, c
.as_f32x16());
1740 transmute(vfmadd132ps(
1744 _MM_FROUND_CUR_DIRECTION
,
1748 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_ps&expand=2772)
1752 #[target_feature(enable = "avx512f")]
1753 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1754 pub unsafe fn _mm512_mask_fnmsub_ps(a
: __m512
, k
: __mmask16
, b
: __m512
, c
: __m512
) -> __m512
{
1755 let fnmsub
= _mm512_fnmsub_ps(a
, b
, c
).as_f32x16();
1756 transmute(simd_select_bitmask(k
, fnmsub
, a
.as_f32x16()))
1759 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_ps&expand=2774)
1763 #[target_feature(enable = "avx512f")]
1764 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1765 pub unsafe fn _mm512_maskz_fnmsub_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, c
: __m512
) -> __m512
{
1766 let fnmsub
= _mm512_fnmsub_ps(a
, b
, c
).as_f32x16();
1767 let zero
= _mm512_setzero_ps().as_f32x16();
1768 transmute(simd_select_bitmask(k
, fnmsub
, zero
))
1771 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1773 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_ps&expand=2773)
1775 #[target_feature(enable = "avx512f")]
1776 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
1777 pub unsafe fn _mm512_mask3_fnmsub_ps(a
: __m512
, b
: __m512
, c
: __m512
, k
: __mmask16
) -> __m512
{
1778 let fnmsub
= _mm512_fnmsub_ps(a
, b
, c
).as_f32x16();
1779 transmute(simd_select_bitmask(k
, fnmsub
, c
.as_f32x16()))
1782 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
1784 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_pd&expand=2759)
1786 #[target_feature(enable = "avx512f")]
1787 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1788 pub unsafe fn _mm512_fnmsub_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1789 let zero
: f64x8
= mem
::zeroed();
1790 let suba
= simd_sub(zero
, a
.as_f64x8());
1791 let subc
= simd_sub(zero
, c
.as_f64x8());
1792 transmute(vfmadd132pd(
1796 _MM_FROUND_CUR_DIRECTION
,
1800 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
1802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_pd&expand=2760)
1804 #[target_feature(enable = "avx512f")]
1805 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1806 pub unsafe fn _mm512_mask_fnmsub_pd(a
: __m512d
, k
: __mmask8
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1807 let fnmsub
= _mm512_fnmsub_pd(a
, b
, c
).as_f64x8();
1808 transmute(simd_select_bitmask(k
, fnmsub
, a
.as_f64x8()))
1811 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_pd&expand=2762)
1815 #[target_feature(enable = "avx512f")]
1816 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1817 pub unsafe fn _mm512_maskz_fnmsub_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, c
: __m512d
) -> __m512d
{
1818 let fnmsub
= _mm512_fnmsub_pd(a
, b
, c
).as_f64x8();
1819 let zero
= _mm512_setzero_pd().as_f64x8();
1820 transmute(simd_select_bitmask(k
, fnmsub
, zero
))
1823 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
1825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_pd&expand=2761)
1827 #[target_feature(enable = "avx512f")]
1828 #[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
1829 pub unsafe fn _mm512_mask3_fnmsub_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, k
: __mmask8
) -> __m512d
{
1830 let fnmsub
= _mm512_fnmsub_pd(a
, b
, c
).as_f64x8();
1831 transmute(simd_select_bitmask(k
, fnmsub
, c
.as_f64x8()))
1834 /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rcp14_ps&expand=4502)
1838 #[target_feature(enable = "avx512f")]
1839 #[cfg_attr(test, assert_instr(vrcp14ps))]
1840 pub unsafe fn _mm512_rcp14_ps(a
: __m512
) -> __m512
{
1843 _mm512_setzero_ps().as_f32x16(),
1844 0b11111111_11111111,
1848 /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1850 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rcp14_ps&expand=4500)
1852 #[target_feature(enable = "avx512f")]
1853 #[cfg_attr(test, assert_instr(vrcp14ps))]
1854 pub unsafe fn _mm512_mask_rcp14_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
1855 transmute(vrcp14ps(a
.as_f32x16(), src
.as_f32x16(), k
))
1858 /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1860 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rcp14_ps&expand=4501)
1862 #[target_feature(enable = "avx512f")]
1863 #[cfg_attr(test, assert_instr(vrcp14ps))]
1864 pub unsafe fn _mm512_maskz_rcp14_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
1865 transmute(vrcp14ps(a
.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k
))
1868 /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1870 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rcp14_pd&expand=4493)
1872 #[target_feature(enable = "avx512f")]
1873 #[cfg_attr(test, assert_instr(vrcp14pd))]
1874 pub unsafe fn _mm512_rcp14_pd(a
: __m512d
) -> __m512d
{
1877 _mm512_setzero_pd().as_f64x8(),
1882 /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rcp14_pd&expand=4491)
1886 #[target_feature(enable = "avx512f")]
1887 #[cfg_attr(test, assert_instr(vrcp14pd))]
1888 pub unsafe fn _mm512_mask_rcp14_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
1889 transmute(vrcp14pd(a
.as_f64x8(), src
.as_f64x8(), k
))
1892 /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rcp14_pd&expand=4492)
1896 #[target_feature(enable = "avx512f")]
1897 #[cfg_attr(test, assert_instr(vrcp14pd))]
1898 pub unsafe fn _mm512_maskz_rcp14_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
1899 transmute(vrcp14pd(a
.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k
))
1902 /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rsqrt14_ps&expand=4819)
1906 #[target_feature(enable = "avx512f")]
1907 #[cfg_attr(test, assert_instr(vrsqrt14ps))]
1908 pub unsafe fn _mm512_rsqrt14_ps(a
: __m512
) -> __m512
{
1909 transmute(vrsqrt14ps(
1911 _mm512_setzero_ps().as_f32x16(),
1912 0b11111111_11111111,
1916 /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rsqrt14_ps&expand=4817)
1920 #[target_feature(enable = "avx512f")]
1921 #[cfg_attr(test, assert_instr(vrsqrt14ps))]
1922 pub unsafe fn _mm512_mask_rsqrt14_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
1923 transmute(vrsqrt14ps(a
.as_f32x16(), src
.as_f32x16(), k
))
1926 /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rsqrt14_ps&expand=4818)
1930 #[target_feature(enable = "avx512f")]
1931 #[cfg_attr(test, assert_instr(vrsqrt14ps))]
1932 pub unsafe fn _mm512_maskz_rsqrt14_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
1933 transmute(vrsqrt14ps(
1935 _mm512_setzero_ps().as_f32x16(),
1940 /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
1942 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rsqrt14_pd&expand=4812)
1944 #[target_feature(enable = "avx512f")]
1945 #[cfg_attr(test, assert_instr(vrsqrt14pd))]
1946 pub unsafe fn _mm512_rsqrt14_pd(a
: __m512d
) -> __m512d
{
1947 transmute(vrsqrt14pd(
1949 _mm512_setzero_pd().as_f64x8(),
1954 /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1956 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rsqrt14_pd&expand=4810)
1958 #[target_feature(enable = "avx512f")]
1959 #[cfg_attr(test, assert_instr(vrsqrt14pd))]
1960 pub unsafe fn _mm512_mask_rsqrt14_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
1961 transmute(vrsqrt14pd(a
.as_f64x8(), src
.as_f64x8(), k
))
1964 /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
1966 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rsqrt14_pd&expand=4811)
1968 #[target_feature(enable = "avx512f")]
1969 #[cfg_attr(test, assert_instr(vrsqrt14pd))]
1970 pub unsafe fn _mm512_maskz_rsqrt14_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
1971 transmute(vrsqrt14pd(a
.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k
))
1974 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
1976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_ps&expand=2844)
1978 #[target_feature(enable = "avx512f")]
1979 #[cfg_attr(test, assert_instr(vgetexpps))]
1980 pub unsafe fn _mm512_getexp_ps(a
: __m512
) -> __m512
{
1981 transmute(vgetexpps(
1983 _mm512_setzero_ps().as_f32x16(),
1984 0b11111111_11111111,
1985 _MM_FROUND_CUR_DIRECTION
,
1989 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
1991 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_ps&expand=2845)
1993 #[target_feature(enable = "avx512f")]
1994 #[cfg_attr(test, assert_instr(vgetexpps))]
1995 pub unsafe fn _mm512_mask_getexp_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
1996 transmute(vgetexpps(
2000 _MM_FROUND_CUR_DIRECTION
,
2004 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
2006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_ps&expand=2846)
2008 #[target_feature(enable = "avx512f")]
2009 #[cfg_attr(test, assert_instr(vgetexpps))]
2010 pub unsafe fn _mm512_maskz_getexp_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
2011 transmute(vgetexpps(
2013 _mm512_setzero_ps().as_f32x16(),
2015 _MM_FROUND_CUR_DIRECTION
,
2019 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
2021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_pd&expand=2835)
2023 #[target_feature(enable = "avx512f")]
2024 #[cfg_attr(test, assert_instr(vgetexppd))]
2025 pub unsafe fn _mm512_getexp_pd(a
: __m512d
) -> __m512d
{
2026 transmute(vgetexppd(
2028 _mm512_setzero_pd().as_f64x8(),
2030 _MM_FROUND_CUR_DIRECTION
,
2034 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
2036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_pd&expand=2836)
2038 #[target_feature(enable = "avx512f")]
2039 #[cfg_attr(test, assert_instr(vgetexppd))]
2040 pub unsafe fn _mm512_mask_getexp_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
2041 transmute(vgetexppd(
2045 _MM_FROUND_CUR_DIRECTION
,
2049 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
2051 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_pd&expand=2837)
2053 #[target_feature(enable = "avx512f")]
2054 #[cfg_attr(test, assert_instr(vgetexppd))]
2055 pub unsafe fn _mm512_maskz_getexp_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
2056 transmute(vgetexppd(
2058 _mm512_setzero_pd().as_f64x8(),
2060 _MM_FROUND_CUR_DIRECTION
,
2064 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
2065 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2066 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2067 /// _MM_FROUND_TO_NEG_INF // round down\
2068 /// _MM_FROUND_TO_POS_INF // round up\
2069 /// _MM_FROUND_TO_ZERO // truncate\
2070 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_ps&expand=4784)
2074 #[target_feature(enable = "avx512f")]
2075 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
2076 #[rustc_args_required_const(1)]
2077 pub unsafe fn _mm512_roundscale_ps(a
: __m512
, imm8
: i32) -> __m512
{
2078 let a
= a
.as_f32x16();
2079 let zero
= _mm512_setzero_ps().as_f32x16();
2086 0b11111111_11111111,
2087 _MM_FROUND_CUR_DIRECTION
,
2091 let r
= constify_imm8_sae
!(imm8
, call
);
2095 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2096 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2097 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2098 /// _MM_FROUND_TO_NEG_INF // round down\
2099 /// _MM_FROUND_TO_POS_INF // round up\
2100 /// _MM_FROUND_TO_ZERO // truncate\
2101 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2103 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_ps&expand=4782)
2105 #[target_feature(enable = "avx512f")]
2106 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
2107 #[rustc_args_required_const(3)]
2108 pub unsafe fn _mm512_mask_roundscale_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, imm8
: i32) -> __m512
{
2109 let a
= a
.as_f32x16();
2110 let src
= src
.as_f32x16();
2113 vrndscaleps(a
, $imm8
, src
, k
, _MM_FROUND_CUR_DIRECTION
)
2116 let r
= constify_imm8_sae
!(imm8
, call
);
2120 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2121 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2122 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2123 /// _MM_FROUND_TO_NEG_INF // round down\
2124 /// _MM_FROUND_TO_POS_INF // round up\
2125 /// _MM_FROUND_TO_ZERO // truncate\
2126 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_ps&expand=4783)
2130 #[target_feature(enable = "avx512f")]
2131 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
2132 #[rustc_args_required_const(2)]
2133 pub unsafe fn _mm512_maskz_roundscale_ps(k
: __mmask16
, a
: __m512
, imm8
: i32) -> __m512
{
2134 let a
= a
.as_f32x16();
2135 let zero
= _mm512_setzero_ps().as_f32x16();
2138 vrndscaleps(a
, $imm8
, zero
, k
, _MM_FROUND_CUR_DIRECTION
)
2141 let r
= constify_imm8_sae
!(imm8
, call
);
2145 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
2146 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2147 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2148 /// _MM_FROUND_TO_NEG_INF // round down\
2149 /// _MM_FROUND_TO_POS_INF // round up\
2150 /// _MM_FROUND_TO_ZERO // truncate\
2151 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_pd&expand=4775)
2155 #[target_feature(enable = "avx512f")]
2156 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
2157 #[rustc_args_required_const(1)]
2158 pub unsafe fn _mm512_roundscale_pd(a
: __m512d
, imm8
: i32) -> __m512d
{
2159 let a
= a
.as_f64x8();
2160 let zero
= _mm512_setzero_pd().as_f64x8();
2163 vrndscalepd(a
, $imm8
, zero
, 0b11111111, _MM_FROUND_CUR_DIRECTION
)
2166 let r
= constify_imm8_sae
!(imm8
, call
);
2170 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2171 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2172 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2173 /// _MM_FROUND_TO_NEG_INF // round down\
2174 /// _MM_FROUND_TO_POS_INF // round up\
2175 /// _MM_FROUND_TO_ZERO // truncate\
2176 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2178 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_pd&expand=4773)
2180 #[target_feature(enable = "avx512f")]
2181 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
2182 #[rustc_args_required_const(3)]
2183 pub unsafe fn _mm512_mask_roundscale_pd(
2189 let a
= a
.as_f64x8();
2190 let src
= src
.as_f64x8();
2193 vrndscalepd(a
, $imm8
, src
, k
, _MM_FROUND_CUR_DIRECTION
)
2196 let r
= constify_imm8_sae
!(imm8
, call
);
2200 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2201 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
2202 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
2203 /// _MM_FROUND_TO_NEG_INF // round down\
2204 /// _MM_FROUND_TO_POS_INF // round up\
2205 /// _MM_FROUND_TO_ZERO // truncate\
2206 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2208 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_pd&expand=4774)
2210 #[target_feature(enable = "avx512f")]
2211 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
2212 #[rustc_args_required_const(2)]
2213 pub unsafe fn _mm512_maskz_roundscale_pd(k
: __mmask8
, a
: __m512d
, imm8
: i32) -> __m512d
{
2214 let a
= a
.as_f64x8();
2215 let zero
= _mm512_setzero_pd().as_f64x8();
2218 vrndscalepd(a
, $imm8
, zero
, k
, _MM_FROUND_CUR_DIRECTION
)
2221 let r
= constify_imm8_sae
!(imm8
, call
);
2225 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
2227 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_ps&expand=4883)
2229 #[target_feature(enable = "avx512f")]
2230 #[cfg_attr(test, assert_instr(vscalefps))]
2231 pub unsafe fn _mm512_scalef_ps(a
: __m512
, b
: __m512
) -> __m512
{
2232 transmute(vscalefps(
2235 _mm512_setzero_ps().as_f32x16(),
2236 0b11111111_11111111,
2237 _MM_FROUND_CUR_DIRECTION
,
2241 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_ps&expand=4881)
2245 #[target_feature(enable = "avx512f")]
2246 #[cfg_attr(test, assert_instr(vscalefps))]
2247 pub unsafe fn _mm512_mask_scalef_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
2248 transmute(vscalefps(
2253 _MM_FROUND_CUR_DIRECTION
,
2257 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2259 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_ps&expand=4882)
2261 #[target_feature(enable = "avx512f")]
2262 #[cfg_attr(test, assert_instr(vscalefps))]
2263 pub unsafe fn _mm512_maskz_scalef_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
2264 transmute(vscalefps(
2267 _mm512_setzero_ps().as_f32x16(),
2269 _MM_FROUND_CUR_DIRECTION
,
2273 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
2275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_pd&expand=4874)
2277 #[target_feature(enable = "avx512f")]
2278 #[cfg_attr(test, assert_instr(vscalefpd))]
2279 pub unsafe fn _mm512_scalef_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
2280 transmute(vscalefpd(
2283 _mm512_setzero_pd().as_f64x8(),
2285 _MM_FROUND_CUR_DIRECTION
,
2289 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2291 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_pd&expand=4872)
2293 #[target_feature(enable = "avx512f")]
2294 #[cfg_attr(test, assert_instr(vscalefpd))]
2295 pub unsafe fn _mm512_mask_scalef_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
2296 transmute(vscalefpd(
2301 _MM_FROUND_CUR_DIRECTION
,
2305 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2307 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_pd&expand=4873)
2309 #[target_feature(enable = "avx512f")]
2310 #[cfg_attr(test, assert_instr(vscalefpd))]
2311 pub unsafe fn _mm512_maskz_scalef_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
2312 transmute(vscalefpd(
2315 _mm512_setzero_pd().as_f64x8(),
2317 _MM_FROUND_CUR_DIRECTION
,
2321 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
2323 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_ps&expand=2499)
2325 #[target_feature(enable = "avx512f")]
2326 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
2327 #[rustc_args_required_const(3)]
2328 pub unsafe fn _mm512_fixupimm_ps(a
: __m512
, b
: __m512
, c
: __m512i
, imm8
: i32) -> __m512
{
2329 let a
= a
.as_f32x16();
2330 let b
= b
.as_f32x16();
2331 let c
= c
.as_i32x16();
2339 0b11111111_11111111,
2340 _MM_FROUND_CUR_DIRECTION
,
2344 let r
= constify_imm8_sae
!(imm8
, call
);
2348 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_ps&expand=2500)
2352 #[target_feature(enable = "avx512f")]
2353 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
2354 #[rustc_args_required_const(4)]
2355 pub unsafe fn _mm512_mask_fixupimm_ps(
2362 let a
= a
.as_f32x16();
2363 let b
= b
.as_f32x16();
2364 let c
= c
.as_i32x16();
2367 vfixupimmps(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
2370 let r
= constify_imm8_sae
!(imm8
, call
);
2374 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_ps&expand=2501)
2378 #[target_feature(enable = "avx512f")]
2379 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
2380 #[rustc_args_required_const(4)]
2381 pub unsafe fn _mm512_maskz_fixupimm_ps(
2388 let a
= a
.as_f32x16();
2389 let b
= b
.as_f32x16();
2390 let c
= c
.as_i32x16();
2393 vfixupimmpsz(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
2396 let r
= constify_imm8_sae
!(imm8
, call
);
2400 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
2402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_pd&expand=2490)
2404 #[target_feature(enable = "avx512f")]
2405 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
2406 #[rustc_args_required_const(3)]
2407 pub unsafe fn _mm512_fixupimm_pd(a
: __m512d
, b
: __m512d
, c
: __m512i
, imm8
: i32) -> __m512d
{
2408 let a
= a
.as_f64x8();
2409 let b
= b
.as_f64x8();
2410 let c
= c
.as_i64x8();
2413 vfixupimmpd(a
, b
, c
, $imm8
, 0b11111111, _MM_FROUND_CUR_DIRECTION
)
2416 let r
= constify_imm8_sae
!(imm8
, call
);
2420 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2422 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_pd&expand=2491)
2424 #[target_feature(enable = "avx512f")]
2425 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
2426 #[rustc_args_required_const(4)]
2427 pub unsafe fn _mm512_mask_fixupimm_pd(
2434 let a
= a
.as_f64x8();
2435 let b
= b
.as_f64x8();
2436 let c
= c
.as_i64x8();
2439 vfixupimmpd(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
2442 let r
= constify_imm8_sae
!(imm8
, call
);
2446 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
2448 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_pd&expand=2492)
2450 #[target_feature(enable = "avx512f")]
2451 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
2452 #[rustc_args_required_const(4)]
2453 pub unsafe fn _mm512_maskz_fixupimm_pd(
2460 let a
= a
.as_f64x8();
2461 let b
= b
.as_f64x8();
2462 let c
= c
.as_i64x8();
2465 vfixupimmpdz(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
2468 let r
= constify_imm8_sae
!(imm8
, call
);
2472 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
2474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ternarylogic_epi32&expand=5867)
2476 #[target_feature(enable = "avx512f")]
2477 #[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
2478 #[rustc_args_required_const(3)]
2479 pub unsafe fn _mm512_ternarylogic_epi32(a
: __m512i
, b
: __m512i
, c
: __m512i
, imm8
: i32) -> __m512i
{
2480 let a
= a
.as_i32x16();
2481 let b
= b
.as_i32x16();
2482 let c
= c
.as_i32x16();
2485 vpternlogd(a
, b
, c
, $imm8
)
2488 let r
= constify_imm8_sae
!(imm8
, call
);
2492 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
2494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ternarylogic_epi32&expand=5865)
2496 #[target_feature(enable = "avx512f")]
2497 #[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
2498 #[rustc_args_required_const(4)]
2499 pub unsafe fn _mm512_mask_ternarylogic_epi32(
2506 let src
= src
.as_i32x16();
2507 let a
= a
.as_i32x16();
2508 let b
= b
.as_i32x16();
2511 vpternlogd(src
, a
, b
, $imm8
)
2514 let ternarylogic
= constify_imm8_sae
!(imm8
, call
);
2515 transmute(simd_select_bitmask(k
, ternarylogic
, src
))
2518 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
2520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ternarylogic_epi32&expand=5866)
2522 #[target_feature(enable = "avx512f")]
2523 #[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
2524 #[rustc_args_required_const(4)]
2525 pub unsafe fn _mm512_maskz_ternarylogic_epi32(
2532 let a
= a
.as_i32x16();
2533 let b
= b
.as_i32x16();
2534 let c
= c
.as_i32x16();
2537 vpternlogd(a
, b
, c
, $imm8
)
2540 let ternarylogic
= constify_imm8_sae
!(imm8
, call
);
2541 let zero
= _mm512_setzero_si512().as_i32x16();
2542 transmute(simd_select_bitmask(k
, ternarylogic
, zero
))
2545 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
2547 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ternarylogic_epi64&expand=5876)
2549 #[target_feature(enable = "avx512f")]
2550 #[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
2551 #[rustc_args_required_const(3)]
2552 pub unsafe fn _mm512_ternarylogic_epi64(a
: __m512i
, b
: __m512i
, c
: __m512i
, imm8
: i32) -> __m512i
{
2553 let a
= a
.as_i64x8();
2554 let b
= b
.as_i64x8();
2555 let c
= c
.as_i64x8();
2558 vpternlogq(a
, b
, c
, $imm8
)
2561 let r
= constify_imm8_sae
!(imm8
, call
);
2565 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
2567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ternarylogic_epi64&expand=5874)
2569 #[target_feature(enable = "avx512f")]
2570 #[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
2571 #[rustc_args_required_const(4)]
2572 pub unsafe fn _mm512_mask_ternarylogic_epi64(
2579 let src
= src
.as_i64x8();
2580 let a
= a
.as_i64x8();
2581 let b
= b
.as_i64x8();
2584 vpternlogq(src
, a
, b
, $imm8
)
2587 let ternarylogic
= constify_imm8_sae
!(imm8
, call
);
2588 transmute(simd_select_bitmask(k
, ternarylogic
, src
))
2591 /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
2593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ternarylogic_epi64&expand=5875)
2595 #[target_feature(enable = "avx512f")]
2596 #[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
2597 #[rustc_args_required_const(4)]
2598 pub unsafe fn _mm512_maskz_ternarylogic_epi64(
2605 let a
= a
.as_i64x8();
2606 let b
= b
.as_i64x8();
2607 let c
= c
.as_i64x8();
2610 vpternlogq(a
, b
, c
, $imm8
)
2613 let ternarylogic
= constify_imm8_sae
!(imm8
, call
);
2614 let zero
= _mm512_setzero_si512().as_i64x8();
2615 transmute(simd_select_bitmask(k
, ternarylogic
, zero
))
2618 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
2619 /// The mantissa is normalized to the interval specified by interv, which can take the following values:
2620 /// _MM_MANT_NORM_1_2 // interval [1, 2)
2621 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
2622 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
2623 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
2624 /// The sign is determined by sc which can take the following values:
2625 /// _MM_MANT_SIGN_src // sign = sign(src)
2626 /// _MM_MANT_SIGN_zero // sign = 0
2627 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2629 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_ps&expand=2880)
2631 #[target_feature(enable = "avx512f")]
2632 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2633 #[rustc_args_required_const(1, 2)]
2634 pub unsafe fn _mm512_getmant_ps(
2636 norm
: _MM_MANTISSA_NORM_ENUM
,
2637 sign
: _MM_MANTISSA_SIGN_ENUM
,
2640 ($imm4
:expr
, $imm2
:expr
) => {
2644 _mm512_setzero_ps().as_f32x16(),
2645 0b11111111_11111111,
2646 _MM_FROUND_CUR_DIRECTION
,
2650 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
2654 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2655 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2656 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2657 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2658 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2659 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2660 /// The sign is determined by sc which can take the following values:\
2661 /// _MM_MANT_SIGN_src // sign = sign(src)\
2662 /// _MM_MANT_SIGN_zero // sign = 0\
2663 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2665 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_ps&expand=2881)
2667 #[target_feature(enable = "avx512f")]
2668 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2669 #[rustc_args_required_const(3, 4)]
2670 pub unsafe fn _mm512_mask_getmant_ps(
2674 norm
: _MM_MANTISSA_NORM_ENUM
,
2675 sign
: _MM_MANTISSA_SIGN_ENUM
,
2678 ($imm4
:expr
, $imm2
:expr
) => {
2684 _MM_FROUND_CUR_DIRECTION
,
2688 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
2692 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2693 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2694 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2695 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2696 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2697 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2698 /// The sign is determined by sc which can take the following values:\
2699 /// _MM_MANT_SIGN_src // sign = sign(src)\
2700 /// _MM_MANT_SIGN_zero // sign = 0\
2701 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2703 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_ps&expand=2882)
2705 #[target_feature(enable = "avx512f")]
2706 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
2707 #[rustc_args_required_const(2, 3)]
2708 pub unsafe fn _mm512_maskz_getmant_ps(
2711 norm
: _MM_MANTISSA_NORM_ENUM
,
2712 sign
: _MM_MANTISSA_SIGN_ENUM
,
2715 ($imm4
:expr
, $imm2
:expr
) => {
2719 _mm512_setzero_ps().as_f32x16(),
2721 _MM_FROUND_CUR_DIRECTION
,
2725 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
2729 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2730 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2731 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2732 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2733 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2734 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2735 /// The sign is determined by sc which can take the following values:\
2736 /// _MM_MANT_SIGN_src // sign = sign(src)\
2737 /// _MM_MANT_SIGN_zero // sign = 0\
2738 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_pd&expand=2871)
2742 #[target_feature(enable = "avx512f")]
2743 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2744 #[rustc_args_required_const(1, 2)]
2745 pub unsafe fn _mm512_getmant_pd(
2747 norm
: _MM_MANTISSA_NORM_ENUM
,
2748 sign
: _MM_MANTISSA_SIGN_ENUM
,
2751 ($imm4
:expr
, $imm2
:expr
) => {
2755 _mm512_setzero_pd().as_f64x8(),
2757 _MM_FROUND_CUR_DIRECTION
,
2761 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
2765 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2766 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2767 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2768 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2769 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2770 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2771 /// The sign is determined by sc which can take the following values:\
2772 /// _MM_MANT_SIGN_src // sign = sign(src)\
2773 /// _MM_MANT_SIGN_zero // sign = 0\
2774 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_pd&expand=2872)
2778 #[target_feature(enable = "avx512f")]
2779 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2780 #[rustc_args_required_const(3, 4)]
2781 pub unsafe fn _mm512_mask_getmant_pd(
2785 norm
: _MM_MANTISSA_NORM_ENUM
,
2786 sign
: _MM_MANTISSA_SIGN_ENUM
,
2789 ($imm4
:expr
, $imm2
:expr
) => {
2795 _MM_FROUND_CUR_DIRECTION
,
2799 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
2803 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
2804 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
2805 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
2806 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
2807 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
2808 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
2809 /// The sign is determined by sc which can take the following values:\
2810 /// _MM_MANT_SIGN_src // sign = sign(src)\
2811 /// _MM_MANT_SIGN_zero // sign = 0\
2812 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
2814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_pd&expand=2873)
2816 #[target_feature(enable = "avx512f")]
2817 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
2818 #[rustc_args_required_const(2, 3)]
2819 pub unsafe fn _mm512_maskz_getmant_pd(
2822 norm
: _MM_MANTISSA_NORM_ENUM
,
2823 sign
: _MM_MANTISSA_SIGN_ENUM
,
2826 ($imm4
:expr
, $imm2
:expr
) => {
2830 _mm512_setzero_pd().as_f64x8(),
2832 _MM_FROUND_CUR_DIRECTION
,
2836 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
2840 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
2842 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2843 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2844 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2845 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2846 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2847 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2849 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_round_ps&expand=145)
2851 #[target_feature(enable = "avx512f")]
2852 #[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2853 #[rustc_args_required_const(2)]
2854 pub unsafe fn _mm512_add_round_ps(a
: __m512
, b
: __m512
, rounding
: i32) -> __m512
{
2855 let a
= a
.as_f32x16();
2856 let b
= b
.as_f32x16();
2862 let r
= constify_imm4_round
!(rounding
, call
);
2866 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2868 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2869 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2870 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2871 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2872 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2873 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2875 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_round_ps&expand=146)
2877 #[target_feature(enable = "avx512f")]
2878 #[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2879 #[rustc_args_required_const(4)]
2880 pub unsafe fn _mm512_mask_add_round_ps(
2887 let a
= a
.as_f32x16();
2888 let b
= b
.as_f32x16();
2894 let addround
= constify_imm4_round
!(rounding
, call
);
2895 transmute(simd_select_bitmask(k
, addround
, src
.as_f32x16()))
2898 /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2900 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2901 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2902 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2903 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2904 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2905 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2907 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_round_ps&expand=147)
2909 #[target_feature(enable = "avx512f")]
2910 #[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
2911 #[rustc_args_required_const(3)]
2912 pub unsafe fn _mm512_maskz_add_round_ps(
2918 let a
= a
.as_f32x16();
2919 let b
= b
.as_f32x16();
2925 let addround
= constify_imm4_round
!(rounding
, call
);
2926 let zero
= _mm512_setzero_ps().as_f32x16();
2927 transmute(simd_select_bitmask(k
, addround
, zero
))
2930 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
2932 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2933 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2934 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2935 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2936 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2937 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_round_pd&expand=142)
2941 #[target_feature(enable = "avx512f")]
2942 #[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2943 #[rustc_args_required_const(2)]
2944 pub unsafe fn _mm512_add_round_pd(a
: __m512d
, b
: __m512d
, rounding
: i32) -> __m512d
{
2945 let a
= a
.as_f64x8();
2946 let b
= b
.as_f64x8();
2952 let r
= constify_imm4_round
!(rounding
, call
);
2956 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
2958 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2959 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2960 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2961 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2962 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2963 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_round_pd&expand=143)
2967 #[target_feature(enable = "avx512f")]
2968 #[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
2969 #[rustc_args_required_const(4)]
2970 pub unsafe fn _mm512_mask_add_round_pd(
2977 let a
= a
.as_f64x8();
2978 let b
= b
.as_f64x8();
2984 let addround
= constify_imm4_round
!(rounding
, call
);
2985 transmute(simd_select_bitmask(k
, addround
, src
.as_f64x8()))
2988 /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
2990 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
2991 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
2992 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
2993 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
2994 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
2995 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
2997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_round_pd&expand=144)
2999 #[target_feature(enable = "avx512f")]
3000 #[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
3001 #[rustc_args_required_const(3)]
3002 pub unsafe fn _mm512_maskz_add_round_pd(
3008 let a
= a
.as_f64x8();
3009 let b
= b
.as_f64x8();
3015 let addround
= constify_imm4_round
!(rounding
, call
);
3016 let zero
= _mm512_setzero_pd().as_f64x8();
3017 transmute(simd_select_bitmask(k
, addround
, zero
))
3020 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
3022 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3023 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3024 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3025 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3026 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3027 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_round_ps&expand=5739)
3031 #[target_feature(enable = "avx512f")]
3032 #[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
3033 #[rustc_args_required_const(2)]
3034 pub unsafe fn _mm512_sub_round_ps(a
: __m512
, b
: __m512
, rounding
: i32) -> __m512
{
3035 let a
= a
.as_f32x16();
3036 let b
= b
.as_f32x16();
3042 let r
= constify_imm4_round
!(rounding
, call
);
3046 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3048 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3049 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3050 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3051 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3052 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3053 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_round_ps&expand=5737)
3057 #[target_feature(enable = "avx512f")]
3058 #[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
3059 #[rustc_args_required_const(4)]
3060 pub unsafe fn _mm512_mask_sub_round_ps(
3067 let a
= a
.as_f32x16();
3068 let b
= b
.as_f32x16();
3074 let subround
= constify_imm4_round
!(rounding
, call
);
3075 transmute(simd_select_bitmask(k
, subround
, src
.as_f32x16()))
3078 /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3080 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3081 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3082 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3083 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3084 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3085 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3087 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_round_ps&expand=5738)
3089 #[target_feature(enable = "avx512f")]
3090 #[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
3091 #[rustc_args_required_const(3)]
3092 pub unsafe fn _mm512_maskz_sub_round_ps(
3098 let a
= a
.as_f32x16();
3099 let b
= b
.as_f32x16();
3105 let subround
= constify_imm4_round
!(rounding
, call
);
3106 let zero
= _mm512_setzero_ps().as_f32x16();
3107 transmute(simd_select_bitmask(k
, subround
, zero
))
3110 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
3112 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3113 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3114 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3115 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3116 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3117 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_round_pd&expand=5736)
3121 #[target_feature(enable = "avx512f")]
3122 #[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
3123 #[rustc_args_required_const(2)]
3124 pub unsafe fn _mm512_sub_round_pd(a
: __m512d
, b
: __m512d
, rounding
: i32) -> __m512d
{
3125 let a
= a
.as_f64x8();
3126 let b
= b
.as_f64x8();
3132 let r
= constify_imm4_round
!(rounding
, call
);
3136 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3138 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3139 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3140 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3141 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3142 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3143 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_round_pd&expand=5734)
3147 #[target_feature(enable = "avx512f")]
3148 #[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
3149 #[rustc_args_required_const(4)]
3150 pub unsafe fn _mm512_mask_sub_round_pd(
3157 let a
= a
.as_f64x8();
3158 let b
= b
.as_f64x8();
3164 let subround
= constify_imm4_round
!(rounding
, call
);
3165 transmute(simd_select_bitmask(k
, subround
, src
.as_f64x8()))
3168 /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3170 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3171 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3172 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3173 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3174 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3175 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3177 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_round_pd&expand=5735)
3179 #[target_feature(enable = "avx512f")]
3180 #[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
3181 #[rustc_args_required_const(3)]
3182 pub unsafe fn _mm512_maskz_sub_round_pd(
3188 let a
= a
.as_f64x8();
3189 let b
= b
.as_f64x8();
3195 let subround
= constify_imm4_round
!(rounding
, call
);
3196 let zero
= _mm512_setzero_pd().as_f64x8();
3197 transmute(simd_select_bitmask(k
, subround
, zero
))
3200 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
3202 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3203 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3204 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3205 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3206 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3207 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3209 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_round_ps&expand=3940)
3211 #[target_feature(enable = "avx512f")]
3212 #[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
3213 #[rustc_args_required_const(2)]
3214 pub unsafe fn _mm512_mul_round_ps(a
: __m512
, b
: __m512
, rounding
: i32) -> __m512
{
3215 let a
= a
.as_f32x16();
3216 let b
= b
.as_f32x16();
3222 let r
= constify_imm4_round
!(rounding
, call
);
3226 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3228 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3229 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3230 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3231 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3232 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3233 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3235 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_round_ps&expand=3938)
3237 #[target_feature(enable = "avx512f")]
3238 #[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
3239 #[rustc_args_required_const(4)]
3240 pub unsafe fn _mm512_mask_mul_round_ps(
3247 let a
= a
.as_f32x16();
3248 let b
= b
.as_f32x16();
3254 let mulround
= constify_imm4_round
!(rounding
, call
);
3255 transmute(simd_select_bitmask(k
, mulround
, src
.as_f32x16()))
3258 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3260 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3261 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3262 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3263 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3264 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3265 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3267 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_round_ps&expand=3939)
3269 #[target_feature(enable = "avx512f")]
3270 #[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
3271 #[rustc_args_required_const(3)]
3272 pub unsafe fn _mm512_maskz_mul_round_ps(
3278 let a
= a
.as_f32x16();
3279 let b
= b
.as_f32x16();
3285 let mulround
= constify_imm4_round
!(rounding
, call
);
3286 let zero
= _mm512_setzero_ps().as_f32x16();
3287 transmute(simd_select_bitmask(k
, mulround
, zero
))
3290 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
3292 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3293 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3294 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3295 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3296 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3297 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_round_pd&expand=3937)
3301 #[target_feature(enable = "avx512f")]
3302 #[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
3303 #[rustc_args_required_const(2)]
3304 pub unsafe fn _mm512_mul_round_pd(a
: __m512d
, b
: __m512d
, rounding
: i32) -> __m512d
{
3305 let a
= a
.as_f64x8();
3306 let b
= b
.as_f64x8();
3312 let r
= constify_imm4_round
!(rounding
, call
);
3316 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3318 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3319 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3320 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3321 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3322 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3323 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_round_pd&expand=3935)
3327 #[target_feature(enable = "avx512f")]
3328 #[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
3329 #[rustc_args_required_const(4)]
3330 pub unsafe fn _mm512_mask_mul_round_pd(
3337 let a
= a
.as_f64x8();
3338 let b
= b
.as_f64x8();
3344 let mulround
= constify_imm4_round
!(rounding
, call
);
3345 transmute(simd_select_bitmask(k
, mulround
, src
.as_f64x8()))
3348 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3350 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3351 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3352 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3353 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3354 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3355 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3357 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_round_ps&expand=3939)
3359 #[target_feature(enable = "avx512f")]
3360 #[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
3361 #[rustc_args_required_const(3)]
3362 pub unsafe fn _mm512_maskz_mul_round_pd(
3368 let a
= a
.as_f64x8();
3369 let b
= b
.as_f64x8();
3375 let mulround
= constify_imm4_round
!(rounding
, call
);
3376 let zero
= _mm512_setzero_pd().as_f64x8();
3377 transmute(simd_select_bitmask(k
, mulround
, zero
))
3380 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
3382 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3383 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3384 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3385 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3386 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3387 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_round_ps&expand=2168)
3391 #[target_feature(enable = "avx512f")]
3392 #[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
3393 #[rustc_args_required_const(2)]
3394 pub unsafe fn _mm512_div_round_ps(a
: __m512
, b
: __m512
, rounding
: i32) -> __m512
{
3395 let a
= a
.as_f32x16();
3396 let b
= b
.as_f32x16();
3402 let r
= constify_imm4_round
!(rounding
, call
);
3406 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3408 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3409 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3410 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3411 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3412 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3413 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3415 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_round_ps&expand=2169)
3417 #[target_feature(enable = "avx512f")]
3418 #[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
3419 #[rustc_args_required_const(4)]
3420 pub unsafe fn _mm512_mask_div_round_ps(
3427 let a
= a
.as_f32x16();
3428 let b
= b
.as_f32x16();
3434 let divround
= constify_imm4_round
!(rounding
, call
);
3435 transmute(simd_select_bitmask(k
, divround
, src
.as_f32x16()))
3438 /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3440 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3441 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3442 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3443 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3444 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3445 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3447 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_round_ps&expand=2170)
3449 #[target_feature(enable = "avx512f")]
3450 #[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
3451 #[rustc_args_required_const(3)]
3452 pub unsafe fn _mm512_maskz_div_round_ps(
3458 let a
= a
.as_f32x16();
3459 let b
= b
.as_f32x16();
3465 let divround
= constify_imm4_round
!(rounding
, call
);
3466 let zero
= _mm512_setzero_ps().as_f32x16();
3467 transmute(simd_select_bitmask(k
, divround
, zero
))
3470 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
3472 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3473 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3474 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3475 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3476 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3477 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3479 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_round_pd&expand=2165)
3481 #[target_feature(enable = "avx512f")]
3482 #[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
3483 #[rustc_args_required_const(2)]
3484 pub unsafe fn _mm512_div_round_pd(a
: __m512d
, b
: __m512d
, rounding
: i32) -> __m512d
{
3485 let a
= a
.as_f64x8();
3486 let b
= b
.as_f64x8();
3492 let r
= constify_imm4_round
!(rounding
, call
);
3496 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3498 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3499 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3500 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3501 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3502 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3503 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_round_pd&expand=2166)
3507 #[target_feature(enable = "avx512f")]
3508 #[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
3509 #[rustc_args_required_const(4)]
3510 pub unsafe fn _mm512_mask_div_round_pd(
3517 let a
= a
.as_f64x8();
3518 let b
= b
.as_f64x8();
3524 let divround
= constify_imm4_round
!(rounding
, call
);
3525 transmute(simd_select_bitmask(k
, divround
, src
.as_f64x8()))
3528 /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3530 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3531 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3532 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3533 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3534 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3535 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_round_pd&expand=2167)
3539 #[target_feature(enable = "avx512f")]
3540 #[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
3541 #[rustc_args_required_const(3)]
3542 pub unsafe fn _mm512_maskz_div_round_pd(
3548 let a
= a
.as_f64x8();
3549 let b
= b
.as_f64x8();
3555 let divround
= constify_imm4_round
!(rounding
, call
);
3556 let zero
= _mm512_setzero_pd().as_f64x8();
3557 transmute(simd_select_bitmask(k
, divround
, zero
))
3560 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
3562 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3563 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3564 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3565 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3566 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3567 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_round_ps&expand=5377)
3571 #[target_feature(enable = "avx512f")]
3572 #[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
3573 #[rustc_args_required_const(1)]
3574 pub unsafe fn _mm512_sqrt_round_ps(a
: __m512
, rounding
: i32) -> __m512
{
3575 let a
= a
.as_f32x16();
3581 let r
= constify_imm4_round
!(rounding
, call
);
3585 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3587 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3588 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3589 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3590 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3591 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3592 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_round_ps&expand=5375)
3596 #[target_feature(enable = "avx512f")]
3597 #[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
3598 #[rustc_args_required_const(3)]
3599 pub unsafe fn _mm512_mask_sqrt_round_ps(
3605 let a
= a
.as_f32x16();
3611 let sqrtround
= constify_imm4_round
!(rounding
, call
);
3612 transmute(simd_select_bitmask(k
, sqrtround
, src
.as_f32x16()))
3615 /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3617 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3618 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3619 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3620 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3621 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3622 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_round_ps&expand=5376)
3626 #[target_feature(enable = "avx512f")]
3627 #[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
3628 #[rustc_args_required_const(2)]
3629 pub unsafe fn _mm512_maskz_sqrt_round_ps(k
: __mmask16
, a
: __m512
, rounding
: i32) -> __m512
{
3630 let a
= a
.as_f32x16();
3636 let sqrtround
= constify_imm4_round
!(rounding
, call
);
3637 let zero
= _mm512_setzero_ps().as_f32x16();
3638 transmute(simd_select_bitmask(k
, sqrtround
, zero
))
3641 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
3643 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3644 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3645 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3646 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3647 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3648 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3650 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_round_pd&expand=5374)
3652 #[target_feature(enable = "avx512f")]
3653 #[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3654 #[rustc_args_required_const(1)]
3655 pub unsafe fn _mm512_sqrt_round_pd(a
: __m512d
, rounding
: i32) -> __m512d
{
3656 let a
= a
.as_f64x8();
3662 let r
= constify_imm4_round
!(rounding
, call
);
3666 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
3668 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3669 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3670 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3671 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3672 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3673 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3675 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_round_pd&expand=5372)
3677 #[target_feature(enable = "avx512f")]
3678 #[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3679 #[rustc_args_required_const(3)]
3680 pub unsafe fn _mm512_mask_sqrt_round_pd(
3688 vsqrtpd(a
.as_f64x8(), $imm4
)
3691 let sqrtround
= constify_imm4_round
!(rounding
, call
);
3692 transmute(simd_select_bitmask(k
, sqrtround
, src
.as_f64x8()))
3695 /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3697 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3698 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3699 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3700 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3701 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3702 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_round_pd&expand=5373)
3706 #[target_feature(enable = "avx512f")]
3707 #[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
3708 #[rustc_args_required_const(2)]
3709 pub unsafe fn _mm512_maskz_sqrt_round_pd(k
: __mmask8
, a
: __m512d
, rounding
: i32) -> __m512d
{
3712 vsqrtpd(a
.as_f64x8(), $imm4
)
3715 let sqrtround
= constify_imm4_round
!(rounding
, call
);
3716 let zero
= _mm512_setzero_pd().as_f64x8();
3717 transmute(simd_select_bitmask(k
, sqrtround
, zero
))
3720 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
3722 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3723 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3724 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3725 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3726 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3727 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3729 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_round_ps&expand=2565)
3731 #[target_feature(enable = "avx512f")]
3732 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3733 #[rustc_args_required_const(3)]
3734 pub unsafe fn _mm512_fmadd_round_ps(a
: __m512
, b
: __m512
, c
: __m512
, rounding
: i32) -> __m512
{
3737 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
3740 let r
= constify_imm4_round
!(rounding
, call
);
3744 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
3746 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3747 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3748 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3749 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3750 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3751 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_round_ps&expand=2566)
3755 #[target_feature(enable = "avx512f")]
3756 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3757 #[rustc_args_required_const(4)]
3758 pub unsafe fn _mm512_mask_fmadd_round_ps(
3767 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
3770 let fmadd
= constify_imm4_round
!(rounding
, call
);
3771 transmute(simd_select_bitmask(k
, fmadd
, a
.as_f32x16()))
3774 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3776 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3777 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3778 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3779 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3780 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3781 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_round_ps&expand=2568)
3785 #[target_feature(enable = "avx512f")]
3786 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3787 #[rustc_args_required_const(4)]
3788 pub unsafe fn _mm512_maskz_fmadd_round_ps(
3797 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
3800 let fmadd
= constify_imm4_round
!(rounding
, call
);
3801 let zero
= _mm512_setzero_ps().as_f32x16();
3802 transmute(simd_select_bitmask(k
, fmadd
, zero
))
3805 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
3807 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3808 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3809 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3810 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3811 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3812 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_round_ps&expand=2567)
3816 #[target_feature(enable = "avx512f")]
3817 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3818 #[rustc_args_required_const(4)]
3819 pub unsafe fn _mm512_mask3_fmadd_round_ps(
3828 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
3831 let fmadd
= constify_imm4_round
!(rounding
, call
);
3832 transmute(simd_select_bitmask(k
, fmadd
, c
.as_f32x16()))
3835 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
3837 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3838 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3839 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3840 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3841 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3842 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3844 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmadd_round_pd&expand=2561)
3846 #[target_feature(enable = "avx512f")]
3847 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3848 #[rustc_args_required_const(3)]
3849 pub unsafe fn _mm512_fmadd_round_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, rounding
: i32) -> __m512d
{
3852 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
3855 let r
= constify_imm4_round
!(rounding
, call
);
3859 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
3861 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3862 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3863 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3864 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3865 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3866 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmadd_round_pd&expand=2562)
3870 #[target_feature(enable = "avx512f")]
3871 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3872 #[rustc_args_required_const(4)]
3873 pub unsafe fn _mm512_mask_fmadd_round_pd(
3882 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
3885 let fmadd
= constify_imm4_round
!(rounding
, call
);
3886 transmute(simd_select_bitmask(k
, fmadd
, a
.as_f64x8()))
3889 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
3891 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3892 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3893 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3894 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3895 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3896 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3898 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmadd_round_pd&expand=2564)
3900 #[target_feature(enable = "avx512f")]
3901 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3902 #[rustc_args_required_const(4)]
3903 pub unsafe fn _mm512_maskz_fmadd_round_pd(
3912 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
3915 let fmadd
= constify_imm4_round
!(rounding
, call
);
3916 let zero
= _mm512_setzero_pd().as_f64x8();
3917 transmute(simd_select_bitmask(k
, fmadd
, zero
))
3920 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
3922 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3923 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3924 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3925 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3926 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3927 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmadd_round_pd&expand=2563)
3931 #[target_feature(enable = "avx512f")]
3932 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3933 #[rustc_args_required_const(4)]
3934 pub unsafe fn _mm512_mask3_fmadd_round_pd(
3943 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
3946 let fmadd
= constify_imm4_round
!(rounding
, call
);
3947 transmute(simd_select_bitmask(k
, fmadd
, c
.as_f64x8()))
3950 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
3952 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3953 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3954 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3955 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3956 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3957 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3959 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_round_ps&expand=2651)
3961 #[target_feature(enable = "avx512f")]
3962 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3963 #[rustc_args_required_const(3)]
3964 pub unsafe fn _mm512_fmsub_round_ps(a
: __m512
, b
: __m512
, c
: __m512
, rounding
: i32) -> __m512
{
3965 let zero
: f32x16
= mem
::zeroed();
3966 let sub
= simd_sub(zero
, c
.as_f32x16());
3969 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
3972 let r
= constify_imm4_round
!(rounding
, call
);
3976 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
3978 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
3979 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
3980 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
3981 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
3982 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
3983 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
3985 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_round_ps&expand=2652)
3987 #[target_feature(enable = "avx512f")]
3988 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
3989 #[rustc_args_required_const(4)]
3990 pub unsafe fn _mm512_mask_fmsub_round_ps(
3997 let zero
: f32x16
= mem
::zeroed();
3998 let sub
= simd_sub(zero
, c
.as_f32x16());
4001 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4004 let fmsub
= constify_imm4_round
!(rounding
, call
);
4005 transmute(simd_select_bitmask(k
, fmsub
, a
.as_f32x16()))
4008 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4010 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4011 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4012 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4013 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4014 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4015 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4017 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_round_ps&expand=2654)
4019 #[target_feature(enable = "avx512f")]
4020 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
4021 #[rustc_args_required_const(4)]
4022 pub unsafe fn _mm512_maskz_fmsub_round_ps(
4029 let zero
: f32x16
= mem
::zeroed();
4030 let sub
= simd_sub(zero
, c
.as_f32x16());
4033 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4036 let fmsub
= constify_imm4_round
!(rounding
, call
);
4037 transmute(simd_select_bitmask(k
, fmsub
, zero
))
4040 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4042 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4043 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4044 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4045 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4046 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4047 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_round_ps&expand=2653)
4051 #[target_feature(enable = "avx512f")]
4052 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
4053 #[rustc_args_required_const(4)]
4054 pub unsafe fn _mm512_mask3_fmsub_round_ps(
4061 let zero
: f32x16
= mem
::zeroed();
4062 let sub
= simd_sub(zero
, c
.as_f32x16());
4065 vfmadd132ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4068 let fmsub
= constify_imm4_round
!(rounding
, call
);
4069 transmute(simd_select_bitmask(k
, fmsub
, c
.as_f32x16()))
4072 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
4074 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4075 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4076 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4077 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4078 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4079 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4081 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsub_round_pd&expand=2647)
4083 #[target_feature(enable = "avx512f")]
4084 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4085 #[rustc_args_required_const(3)]
4086 pub unsafe fn _mm512_fmsub_round_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, rounding
: i32) -> __m512d
{
4087 let zero
: f64x8
= mem
::zeroed();
4088 let sub
= simd_sub(zero
, c
.as_f64x8());
4091 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4094 let r
= constify_imm4_round
!(rounding
, call
);
4098 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4100 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4101 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4102 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4103 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4104 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4105 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsub_round_pd&expand=2648)
4109 #[target_feature(enable = "avx512f")]
4110 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4111 #[rustc_args_required_const(4)]
4112 pub unsafe fn _mm512_mask_fmsub_round_pd(
4119 let zero
: f64x8
= mem
::zeroed();
4120 let sub
= simd_sub(zero
, c
.as_f64x8());
4123 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4126 let fmsub
= constify_imm4_round
!(rounding
, call
);
4127 transmute(simd_select_bitmask(k
, fmsub
, a
.as_f64x8()))
4130 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4132 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4133 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4134 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4135 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4136 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4137 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4139 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsub_round_pd&expand=2650)
4141 #[target_feature(enable = "avx512f")]
4142 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4143 #[rustc_args_required_const(4)]
4144 pub unsafe fn _mm512_maskz_fmsub_round_pd(
4151 let zero
: f64x8
= mem
::zeroed();
4152 let sub
= simd_sub(zero
, c
.as_f64x8());
4155 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4158 let fmsub
= constify_imm4_round
!(rounding
, call
);
4159 transmute(simd_select_bitmask(k
, fmsub
, zero
))
4162 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4164 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4165 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4166 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4167 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4168 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4169 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsub_round_pd&expand=2649)
4173 #[target_feature(enable = "avx512f")]
4174 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
4175 #[rustc_args_required_const(4)]
4176 pub unsafe fn _mm512_mask3_fmsub_round_pd(
4183 let zero
: f64x8
= mem
::zeroed();
4184 let sub
= simd_sub(zero
, c
.as_f64x8());
4187 vfmadd132pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4190 let fmsub
= constify_imm4_round
!(rounding
, call
);
4191 transmute(simd_select_bitmask(k
, fmsub
, c
.as_f64x8()))
4194 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
4196 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4197 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4198 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4199 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4200 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4201 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4203 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_round_ps&expand=2619)
4205 #[target_feature(enable = "avx512f")]
4206 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4207 #[rustc_args_required_const(3)]
4208 pub unsafe fn _mm512_fmaddsub_round_ps(a
: __m512
, b
: __m512
, c
: __m512
, rounding
: i32) -> __m512
{
4211 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4214 let r
= constify_imm4_round
!(rounding
, call
);
4218 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4220 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4221 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4222 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4223 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4224 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4225 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4227 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_round_ps&expand=2620)
4229 #[target_feature(enable = "avx512f")]
4230 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4231 #[rustc_args_required_const(4)]
4232 pub unsafe fn _mm512_mask_fmaddsub_round_ps(
4241 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4244 let fmaddsub
= constify_imm4_round
!(rounding
, call
);
4245 transmute(simd_select_bitmask(k
, fmaddsub
, a
.as_f32x16()))
4248 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4250 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4251 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4252 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4253 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4254 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4255 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_round_ps&expand=2622)
4259 #[target_feature(enable = "avx512f")]
4260 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4261 #[rustc_args_required_const(4)]
4262 pub unsafe fn _mm512_maskz_fmaddsub_round_ps(
4271 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4274 let fmaddsub
= constify_imm4_round
!(rounding
, call
);
4275 let zero
= _mm512_setzero_ps().as_f32x16();
4276 transmute(simd_select_bitmask(k
, fmaddsub
, zero
))
4279 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4281 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4282 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4283 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4284 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4285 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4286 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4288 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_round_ps&expand=2621)
4290 #[target_feature(enable = "avx512f")]
4291 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4292 #[rustc_args_required_const(4)]
4293 pub unsafe fn _mm512_mask3_fmaddsub_round_ps(
4302 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4305 let fmaddsub
= constify_imm4_round
!(rounding
, call
);
4306 transmute(simd_select_bitmask(k
, fmaddsub
, c
.as_f32x16()))
4309 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
4311 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4312 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4313 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4314 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4315 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4316 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmaddsub_round_pd&expand=2615)
4320 #[target_feature(enable = "avx512f")]
4321 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4322 #[rustc_args_required_const(3)]
4323 pub unsafe fn _mm512_fmaddsub_round_pd(
4331 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4334 let r
= constify_imm4_round
!(rounding
, call
);
4338 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4340 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4341 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4342 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4343 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4344 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4345 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4347 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmaddsub_round_pd&expand=2616)
4349 #[target_feature(enable = "avx512f")]
4350 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4351 #[rustc_args_required_const(4)]
4352 pub unsafe fn _mm512_mask_fmaddsub_round_pd(
4361 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4364 let fmaddsub
= constify_imm4_round
!(rounding
, call
);
4365 transmute(simd_select_bitmask(k
, fmaddsub
, a
.as_f64x8()))
4368 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4370 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4371 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4372 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4373 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4374 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4375 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4377 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmaddsub_round_pd&expand=2618)
4379 #[target_feature(enable = "avx512f")]
4380 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4381 #[rustc_args_required_const(4)]
4382 pub unsafe fn _mm512_maskz_fmaddsub_round_pd(
4391 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4394 let fmaddsub
= constify_imm4_round
!(rounding
, call
);
4395 let zero
= _mm512_setzero_pd().as_f64x8();
4396 transmute(simd_select_bitmask(k
, fmaddsub
, zero
))
4399 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4401 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4402 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4403 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4404 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4405 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4406 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4408 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmaddsub_round_pd&expand=2617)
4410 #[target_feature(enable = "avx512f")]
4411 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4412 #[rustc_args_required_const(4)]
4413 pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
4422 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4425 let fmaddsub
= constify_imm4_round
!(rounding
, call
);
4426 transmute(simd_select_bitmask(k
, fmaddsub
, c
.as_f64x8()))
4429 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
4431 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4432 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4433 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4434 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4435 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4436 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4438 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_round_ps&expand=2699)
4440 #[target_feature(enable = "avx512f")]
4441 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4442 #[rustc_args_required_const(3)]
4443 pub unsafe fn _mm512_fmsubadd_round_ps(a
: __m512
, b
: __m512
, c
: __m512
, rounding
: i32) -> __m512
{
4444 let zero
: f32x16
= mem
::zeroed();
4445 let sub
= simd_sub(zero
, c
.as_f32x16());
4448 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4451 let r
= constify_imm4_round
!(rounding
, call
);
4455 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4457 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4458 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4459 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4460 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4461 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4462 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_round_ps&expand=2700)
4466 #[target_feature(enable = "avx512f")]
4467 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4468 #[rustc_args_required_const(4)]
4469 pub unsafe fn _mm512_mask_fmsubadd_round_ps(
4476 let zero
: f32x16
= mem
::zeroed();
4477 let sub
= simd_sub(zero
, c
.as_f32x16());
4480 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4483 let fmsubadd
= constify_imm4_round
!(rounding
, call
);
4484 transmute(simd_select_bitmask(k
, fmsubadd
, a
.as_f32x16()))
4487 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4489 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4490 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4491 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4492 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4493 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4494 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_round_ps&expand=2702)
4498 #[target_feature(enable = "avx512f")]
4499 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500 #[rustc_args_required_const(4)]
4501 pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
4508 let zero
: f32x16
= mem
::zeroed();
4509 let sub
= simd_sub(zero
, c
.as_f32x16());
4512 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4515 let fmsubadd
= constify_imm4_round
!(rounding
, call
);
4516 transmute(simd_select_bitmask(k
, fmsubadd
, zero
))
4519 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4521 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4522 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4523 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4524 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4525 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4526 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4528 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_round_ps&expand=2701)
4530 #[target_feature(enable = "avx512f")]
4531 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4532 #[rustc_args_required_const(4)]
4533 pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
4540 let zero
: f32x16
= mem
::zeroed();
4541 let sub
= simd_sub(zero
, c
.as_f32x16());
4544 vfmaddsub213ps(a
.as_f32x16(), b
.as_f32x16(), sub
, $imm4
)
4547 let fmsubadd
= constify_imm4_round
!(rounding
, call
);
4548 transmute(simd_select_bitmask(k
, fmsubadd
, c
.as_f32x16()))
4551 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
4553 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4554 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4555 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4556 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4557 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4558 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4560 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fmsubadd_round_pd&expand=2695)
4562 #[target_feature(enable = "avx512f")]
4563 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4564 #[rustc_args_required_const(3)]
4565 pub unsafe fn _mm512_fmsubadd_round_pd(
4571 let zero
: f64x8
= mem
::zeroed();
4572 let sub
= simd_sub(zero
, c
.as_f64x8());
4575 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4578 let r
= constify_imm4_round
!(rounding
, call
);
4582 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4584 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4585 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4586 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4587 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4588 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4589 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4591 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fmsubadd_round_pd&expand=2696)
4593 #[target_feature(enable = "avx512f")]
4594 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4595 #[rustc_args_required_const(4)]
4596 pub unsafe fn _mm512_mask_fmsubadd_round_pd(
4603 let zero
: f64x8
= mem
::zeroed();
4604 let sub
= simd_sub(zero
, c
.as_f64x8());
4607 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4610 let fmsubadd
= constify_imm4_round
!(rounding
, call
);
4611 transmute(simd_select_bitmask(k
, fmsubadd
, a
.as_f64x8()))
4614 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4616 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4617 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4618 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4619 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4620 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4621 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fmsubadd_round_pd&expand=2698)
4625 #[target_feature(enable = "avx512f")]
4626 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4627 #[rustc_args_required_const(4)]
4628 pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
4635 let zero
: f64x8
= mem
::zeroed();
4636 let sub
= simd_sub(zero
, c
.as_f64x8());
4639 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4642 let fmsubadd
= constify_imm4_round
!(rounding
, call
);
4643 transmute(simd_select_bitmask(k
, fmsubadd
, zero
))
4646 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4648 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4649 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4650 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4651 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4652 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4653 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fmsubadd_round_pd&expand=2697)
4657 #[target_feature(enable = "avx512f")]
4658 #[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4659 #[rustc_args_required_const(4)]
4660 pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
4667 let zero
: f64x8
= mem
::zeroed();
4668 let sub
= simd_sub(zero
, c
.as_f64x8());
4671 vfmaddsub213pd(a
.as_f64x8(), b
.as_f64x8(), sub
, $imm4
)
4674 let fmsubadd
= constify_imm4_round
!(rounding
, call
);
4675 transmute(simd_select_bitmask(k
, fmsubadd
, c
.as_f64x8()))
4678 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
4680 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4681 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4682 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4683 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4684 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4685 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4687 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_round_ps&expand=2731)
4689 #[target_feature(enable = "avx512f")]
4690 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4691 #[rustc_args_required_const(3)]
4692 pub unsafe fn _mm512_fnmadd_round_ps(a
: __m512
, b
: __m512
, c
: __m512
, rounding
: i32) -> __m512
{
4693 let zero
: f32x16
= mem
::zeroed();
4694 let sub
= simd_sub(zero
, a
.as_f32x16());
4697 vfmadd132ps(sub
, b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4700 let r
= constify_imm4_round
!(rounding
, call
);
4704 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4706 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4707 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4708 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4709 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4710 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4711 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_round_ps&expand=2732)
4715 #[target_feature(enable = "avx512f")]
4716 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4717 #[rustc_args_required_const(4)]
4718 pub unsafe fn _mm512_mask_fnmadd_round_ps(
4725 let zero
: f32x16
= mem
::zeroed();
4726 let sub
= simd_sub(zero
, a
.as_f32x16());
4729 vfmadd132ps(sub
, b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4732 let fnmadd
= constify_imm4_round
!(rounding
, call
);
4733 transmute(simd_select_bitmask(k
, fnmadd
, a
.as_f32x16()))
4736 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4738 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4739 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4740 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4741 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4742 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4743 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_round_ps&expand=2734)
4747 #[target_feature(enable = "avx512f")]
4748 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4749 #[rustc_args_required_const(4)]
4750 pub unsafe fn _mm512_maskz_fnmadd_round_ps(
4757 let zero
: f32x16
= mem
::zeroed();
4758 let sub
= simd_sub(zero
, a
.as_f32x16());
4761 vfmadd132ps(sub
, b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4764 let fnmadd
= constify_imm4_round
!(rounding
, call
);
4765 transmute(simd_select_bitmask(k
, fnmadd
, zero
))
4768 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4770 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4771 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4772 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4773 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4774 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4775 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_round_ps&expand=2733)
4779 #[target_feature(enable = "avx512f")]
4780 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4781 #[rustc_args_required_const(4)]
4782 pub unsafe fn _mm512_mask3_fnmadd_round_ps(
4789 let zero
: f32x16
= mem
::zeroed();
4790 let sub
= simd_sub(zero
, a
.as_f32x16());
4793 vfmadd132ps(sub
, b
.as_f32x16(), c
.as_f32x16(), $imm4
)
4796 let fnmadd
= constify_imm4_round
!(rounding
, call
);
4797 transmute(simd_select_bitmask(k
, fnmadd
, c
.as_f32x16()))
4800 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
4802 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4803 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4804 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4805 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4806 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4807 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmadd_pd&expand=2711)
4811 #[target_feature(enable = "avx512f")]
4812 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4813 #[rustc_args_required_const(3)]
4814 pub unsafe fn _mm512_fnmadd_round_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, rounding
: i32) -> __m512d
{
4815 let zero
: f64x8
= mem
::zeroed();
4816 let sub
= simd_sub(zero
, a
.as_f64x8());
4819 vfmadd132pd(sub
, b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4822 let r
= constify_imm4_round
!(rounding
, call
);
4826 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4828 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4829 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4830 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4831 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4832 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4833 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmadd_round_pd&expand=2728)
4837 #[target_feature(enable = "avx512f")]
4838 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4839 #[rustc_args_required_const(4)]
4840 pub unsafe fn _mm512_mask_fnmadd_round_pd(
4847 let zero
: f64x8
= mem
::zeroed();
4848 let sub
= simd_sub(zero
, a
.as_f64x8());
4851 vfmadd132pd(sub
, b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4854 let fnmadd
= constify_imm4_round
!(rounding
, call
);
4855 transmute(simd_select_bitmask(k
, fnmadd
, a
.as_f64x8()))
4858 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4860 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4861 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4862 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4863 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4864 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4865 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmadd_round_pd&expand=2730)
4869 #[target_feature(enable = "avx512f")]
4870 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4871 #[rustc_args_required_const(4)]
4872 pub unsafe fn _mm512_maskz_fnmadd_round_pd(
4879 let zero
: f64x8
= mem
::zeroed();
4880 let sub
= simd_sub(zero
, a
.as_f64x8());
4883 vfmadd132pd(sub
, b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4886 let fnmadd
= constify_imm4_round
!(rounding
, call
);
4887 transmute(simd_select_bitmask(k
, fnmadd
, zero
))
4890 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
4892 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4893 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4894 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4895 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4896 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4897 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4899 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmadd_round_pd&expand=2729)
4901 #[target_feature(enable = "avx512f")]
4902 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4903 #[rustc_args_required_const(4)]
4904 pub unsafe fn _mm512_mask3_fnmadd_round_pd(
4911 let zero
: f64x8
= mem
::zeroed();
4912 let sub
= simd_sub(zero
, a
.as_f64x8());
4915 vfmadd132pd(sub
, b
.as_f64x8(), c
.as_f64x8(), $imm4
)
4918 let fnmadd
= constify_imm4_round
!(rounding
, call
);
4919 transmute(simd_select_bitmask(k
, fnmadd
, c
.as_f64x8()))
4922 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
4924 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4925 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4926 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4927 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4928 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4929 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_round_ps&expand=2779)
4933 #[target_feature(enable = "avx512f")]
4934 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4935 #[rustc_args_required_const(3)]
4936 pub unsafe fn _mm512_fnmsub_round_ps(a
: __m512
, b
: __m512
, c
: __m512
, rounding
: i32) -> __m512
{
4937 let zero
: f32x16
= mem
::zeroed();
4938 let suba
= simd_sub(zero
, a
.as_f32x16());
4939 let subc
= simd_sub(zero
, c
.as_f32x16());
4942 vfmadd132ps(suba
, b
.as_f32x16(), subc
, $imm4
)
4945 let r
= constify_imm4_round
!(rounding
, call
);
4949 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
4951 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4952 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4953 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4954 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4955 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4956 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4958 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_round_ps&expand=2780)
4960 #[target_feature(enable = "avx512f")]
4961 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4962 #[rustc_args_required_const(4)]
4963 pub unsafe fn _mm512_mask_fnmsub_round_ps(
4970 let zero
: f32x16
= mem
::zeroed();
4971 let suba
= simd_sub(zero
, a
.as_f32x16());
4972 let subc
= simd_sub(zero
, c
.as_f32x16());
4975 vfmadd132ps(suba
, b
.as_f32x16(), subc
, $imm4
)
4978 let fnmsub
= constify_imm4_round
!(rounding
, call
);
4979 transmute(simd_select_bitmask(k
, fnmsub
, a
.as_f32x16()))
4982 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
4984 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
4985 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
4986 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
4987 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
4988 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
4989 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
4991 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_round_ps&expand=2782)
4993 #[target_feature(enable = "avx512f")]
4994 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4995 #[rustc_args_required_const(4)]
4996 pub unsafe fn _mm512_maskz_fnmsub_round_ps(
5003 let zero
: f32x16
= mem
::zeroed();
5004 let suba
= simd_sub(zero
, a
.as_f32x16());
5005 let subc
= simd_sub(zero
, c
.as_f32x16());
5008 vfmadd132ps(suba
, b
.as_f32x16(), subc
, $imm4
)
5011 let fnmsub
= constify_imm4_round
!(rounding
, call
);
5012 transmute(simd_select_bitmask(k
, fnmsub
, zero
))
5015 /// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
5017 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5018 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5019 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5020 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5021 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5022 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_round_ps&expand=2781)
5026 #[target_feature(enable = "avx512f")]
5027 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5028 #[rustc_args_required_const(4)]
5029 pub unsafe fn _mm512_mask3_fnmsub_round_ps(
5036 let zero
: f32x16
= mem
::zeroed();
5037 let suba
= simd_sub(zero
, a
.as_f32x16());
5038 let subc
= simd_sub(zero
, c
.as_f32x16());
5041 vfmadd132ps(suba
, b
.as_f32x16(), subc
, $imm4
)
5044 let fnmsub
= constify_imm4_round
!(rounding
, call
);
5045 transmute(simd_select_bitmask(k
, fnmsub
, c
.as_f32x16()))
5048 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
5050 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5051 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5052 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5053 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5054 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5055 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5057 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fnmsub_round_pd&expand=2775)
5059 #[target_feature(enable = "avx512f")]
5060 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5061 #[rustc_args_required_const(3)]
5062 pub unsafe fn _mm512_fnmsub_round_pd(a
: __m512d
, b
: __m512d
, c
: __m512d
, rounding
: i32) -> __m512d
{
5063 let zero
: f64x8
= mem
::zeroed();
5064 let suba
= simd_sub(zero
, a
.as_f64x8());
5065 let subc
= simd_sub(zero
, c
.as_f64x8());
5068 vfmadd132pd(suba
, b
.as_f64x8(), subc
, $imm4
)
5071 let r
= constify_imm4_round
!(rounding
, call
);
5075 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
5077 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5078 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5079 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5080 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5081 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5082 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fnmsub_round_pd&expand=2776)
5086 #[target_feature(enable = "avx512f")]
5087 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5088 #[rustc_args_required_const(4)]
5089 pub unsafe fn _mm512_mask_fnmsub_round_pd(
5096 let zero
: f64x8
= mem
::zeroed();
5097 let suba
= simd_sub(zero
, a
.as_f64x8());
5098 let subc
= simd_sub(zero
, c
.as_f64x8());
5101 vfmadd132pd(suba
, b
.as_f64x8(), subc
, $imm4
)
5104 let fnmsub
= constify_imm4_round
!(rounding
, call
);
5105 transmute(simd_select_bitmask(k
, fnmsub
, a
.as_f64x8()))
5108 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5110 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5111 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5112 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5113 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5114 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5115 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fnmsub_round_pd&expand=2778)
5119 #[target_feature(enable = "avx512f")]
5120 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5121 #[rustc_args_required_const(4)]
5122 pub unsafe fn _mm512_maskz_fnmsub_round_pd(
5129 let zero
: f64x8
= mem
::zeroed();
5130 let suba
= simd_sub(zero
, a
.as_f64x8());
5131 let subc
= simd_sub(zero
, c
.as_f64x8());
5134 vfmadd132pd(suba
, b
.as_f64x8(), subc
, $imm4
)
5137 let fnmsub
= constify_imm4_round
!(rounding
, call
);
5138 transmute(simd_select_bitmask(k
, fnmsub
, zero
))
5141 /// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
5143 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5144 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5145 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5146 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5147 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5148 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5150 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask3_fnmsub_round_pd&expand=2777)
5152 #[target_feature(enable = "avx512f")]
5153 #[cfg_attr(test, assert_instr(vfmadd, rounding = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5154 #[rustc_args_required_const(4)]
5155 pub unsafe fn _mm512_mask3_fnmsub_round_pd(
5162 let zero
: f64x8
= mem
::zeroed();
5163 let suba
= simd_sub(zero
, a
.as_f64x8());
5164 let subc
= simd_sub(zero
, c
.as_f64x8());
5167 vfmadd132pd(suba
, b
.as_f64x8(), subc
, $imm4
)
5170 let fnmsub
= constify_imm4_round
!(rounding
, call
);
5171 transmute(simd_select_bitmask(k
, fnmsub
, c
.as_f64x8()))
5174 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
5175 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5177 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_round_ps&expand=3662)
5179 #[target_feature(enable = "avx512f")]
5180 #[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
5181 #[rustc_args_required_const(2)]
5182 pub unsafe fn _mm512_max_round_ps(a
: __m512
, b
: __m512
, sae
: i32) -> __m512
{
5185 vmaxps(a
.as_f32x16(), b
.as_f32x16(), $imm4
)
5188 let r
= constify_imm4_sae
!(sae
, call
);
5192 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5193 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_round_ps&expand=3660)
5197 #[target_feature(enable = "avx512f")]
5198 #[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
5199 #[rustc_args_required_const(4)]
5200 pub unsafe fn _mm512_mask_max_round_ps(
5209 vmaxps(a
.as_f32x16(), b
.as_f32x16(), $imm4
)
5212 let max
= constify_imm4_sae
!(sae
, call
);
5213 transmute(simd_select_bitmask(k
, max
, src
.as_f32x16()))
5216 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5217 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_round_ps&expand=3661)
5221 #[target_feature(enable = "avx512f")]
5222 #[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
5223 #[rustc_args_required_const(3)]
5224 pub unsafe fn _mm512_maskz_max_round_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, sae
: i32) -> __m512
{
5227 vmaxps(a
.as_f32x16(), b
.as_f32x16(), $imm4
)
5230 let max
= constify_imm4_sae
!(sae
, call
);
5231 let zero
= _mm512_setzero_ps().as_f32x16();
5232 transmute(simd_select_bitmask(k
, max
, zero
))
5235 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
5236 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5238 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_round_pd&expand=3659)
5240 #[target_feature(enable = "avx512f")]
5241 #[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
5242 #[rustc_args_required_const(2)]
5243 pub unsafe fn _mm512_max_round_pd(a
: __m512d
, b
: __m512d
, sae
: i32) -> __m512d
{
5246 vmaxpd(a
.as_f64x8(), b
.as_f64x8(), $imm4
)
5249 let r
= constify_imm4_sae
!(sae
, call
);
5253 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5254 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_round_pd&expand=3657)
5258 #[target_feature(enable = "avx512f")]
5259 #[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
5260 #[rustc_args_required_const(4)]
5261 pub unsafe fn _mm512_mask_max_round_pd(
5270 vmaxpd(a
.as_f64x8(), b
.as_f64x8(), $imm4
)
5273 let max
= constify_imm4_sae
!(sae
, call
);
5274 transmute(simd_select_bitmask(k
, max
, src
.as_f64x8()))
5277 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5278 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_round_pd&expand=3658)
5282 #[target_feature(enable = "avx512f")]
5283 #[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
5284 #[rustc_args_required_const(3)]
5285 pub unsafe fn _mm512_maskz_max_round_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, sae
: i32) -> __m512d
{
5288 vmaxpd(a
.as_f64x8(), b
.as_f64x8(), $imm4
)
5291 let max
= constify_imm4_sae
!(sae
, call
);
5292 let zero
= _mm512_setzero_pd().as_f64x8();
5293 transmute(simd_select_bitmask(k
, max
, zero
))
5296 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
5297 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_round_ps&expand=3776)
5301 #[target_feature(enable = "avx512f")]
5302 #[cfg_attr(test, assert_instr(vminps, sae = 8))]
5303 #[rustc_args_required_const(2)]
5304 pub unsafe fn _mm512_min_round_ps(a
: __m512
, b
: __m512
, sae
: i32) -> __m512
{
5307 vminps(a
.as_f32x16(), b
.as_f32x16(), $imm4
)
5310 let r
= constify_imm4_sae
!(sae
, call
);
5314 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5315 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_round_ps&expand=3774)
5319 #[target_feature(enable = "avx512f")]
5320 #[cfg_attr(test, assert_instr(vminps, sae = 8))]
5321 #[rustc_args_required_const(4)]
5322 pub unsafe fn _mm512_mask_min_round_ps(
5331 vminps(a
.as_f32x16(), b
.as_f32x16(), $imm4
)
5334 let max
= constify_imm4_sae
!(sae
, call
);
5335 transmute(simd_select_bitmask(k
, max
, src
.as_f32x16()))
5338 /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5339 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_round_ps&expand=3775)
5343 #[target_feature(enable = "avx512f")]
5344 #[cfg_attr(test, assert_instr(vminps, sae = 8))]
5345 #[rustc_args_required_const(3)]
5346 pub unsafe fn _mm512_maskz_min_round_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, sae
: i32) -> __m512
{
5349 vminps(a
.as_f32x16(), b
.as_f32x16(), $imm4
)
5352 let max
= constify_imm4_sae
!(sae
, call
);
5353 let zero
= _mm512_setzero_ps().as_f32x16();
5354 transmute(simd_select_bitmask(k
, max
, zero
))
5357 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
5358 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5360 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_round_pd&expand=3773)
5362 #[target_feature(enable = "avx512f")]
5363 #[cfg_attr(test, assert_instr(vminpd, sae = 8))]
5364 #[rustc_args_required_const(2)]
5365 pub unsafe fn _mm512_min_round_pd(a
: __m512d
, b
: __m512d
, sae
: i32) -> __m512d
{
5368 vminpd(a
.as_f64x8(), b
.as_f64x8(), $imm4
)
5371 let r
= constify_imm4_sae
!(sae
, call
);
5375 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5376 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5378 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_round_pd&expand=3771)
5380 #[target_feature(enable = "avx512f")]
5381 #[cfg_attr(test, assert_instr(vminpd, sae = 8))]
5382 #[rustc_args_required_const(4)]
5383 pub unsafe fn _mm512_mask_min_round_pd(
5392 vminpd(a
.as_f64x8(), b
.as_f64x8(), $imm4
)
5395 let max
= constify_imm4_sae
!(sae
, call
);
5396 transmute(simd_select_bitmask(k
, max
, src
.as_f64x8()))
5399 /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5400 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_round_pd&expand=3772)
5404 #[target_feature(enable = "avx512f")]
5405 #[cfg_attr(test, assert_instr(vminpd, sae = 8))]
5406 #[rustc_args_required_const(3)]
5407 pub unsafe fn _mm512_maskz_min_round_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, sae
: i32) -> __m512d
{
5410 vminpd(a
.as_f64x8(), b
.as_f64x8(), $imm4
)
5413 let max
= constify_imm4_sae
!(sae
, call
);
5414 let zero
= _mm512_setzero_pd().as_f64x8();
5415 transmute(simd_select_bitmask(k
, max
, zero
))
5418 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
5419 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_round_ps&expand=2850)
5423 #[target_feature(enable = "avx512f")]
5424 #[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
5425 #[rustc_args_required_const(1)]
5426 pub unsafe fn _mm512_getexp_round_ps(a
: __m512
, sae
: i32) -> __m512
{
5431 _mm512_setzero_ps().as_f32x16(),
5432 0b11111111_11111111,
5437 let r
= constify_imm4_sae
!(sae
, call
);
5441 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5442 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_round_ps&expand=2851)
5446 #[target_feature(enable = "avx512f")]
5447 #[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
5448 #[rustc_args_required_const(3)]
5449 pub unsafe fn _mm512_mask_getexp_round_ps(
5457 vgetexpps(a
.as_f32x16(), src
.as_f32x16(), k
, $imm4
)
5460 let r
= constify_imm4_sae
!(sae
, call
);
5464 /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5465 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5467 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_round_ps&expand=2852)
5469 #[target_feature(enable = "avx512f")]
5470 #[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
5471 #[rustc_args_required_const(2)]
5472 pub unsafe fn _mm512_maskz_getexp_round_ps(k
: __mmask16
, a
: __m512
, sae
: i32) -> __m512
{
5475 vgetexpps(a
.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k
, $imm4
)
5478 let r
= constify_imm4_sae
!(sae
, call
);
5482 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
5483 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getexp_round_pd&expand=2847)
5487 #[target_feature(enable = "avx512f")]
5488 #[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
5489 #[rustc_args_required_const(1)]
5490 pub unsafe fn _mm512_getexp_round_pd(a
: __m512d
, sae
: i32) -> __m512d
{
5495 _mm512_setzero_pd().as_f64x8(),
5501 let r
= constify_imm4_sae
!(sae
, call
);
5505 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5506 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5508 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getexp_round_pd&expand=2848)
5510 #[target_feature(enable = "avx512f")]
5511 #[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
5512 #[rustc_args_required_const(3)]
5513 pub unsafe fn _mm512_mask_getexp_round_pd(
5521 vgetexppd(a
.as_f64x8(), src
.as_f64x8(), k
, $imm4
)
5524 let r
= constify_imm4_sae
!(sae
, call
);
5528 /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
5529 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getexp_round_pd&expand=2849)
5533 #[target_feature(enable = "avx512f")]
5534 #[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
5535 #[rustc_args_required_const(2)]
5536 pub unsafe fn _mm512_maskz_getexp_round_pd(k
: __mmask8
, a
: __m512d
, sae
: i32) -> __m512d
{
5539 vgetexppd(a
.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k
, $imm4
)
5542 let r
= constify_imm4_sae
!(sae
, call
);
5546 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5547 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5548 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5549 /// _MM_FROUND_TO_NEG_INF // round down\
5550 /// _MM_FROUND_TO_POS_INF // round up\
5551 /// _MM_FROUND_TO_ZERO // truncate\
5552 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5554 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_round_ps&expand=4790)
5557 #[target_feature(enable = "avx512f")]
5558 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
5559 #[rustc_args_required_const(1, 2)]
5560 pub unsafe fn _mm512_roundscale_round_ps(a
: __m512
, imm8
: i32, sae
: i32) -> __m512
{
5561 let a
= a
.as_f32x16();
5562 let zero
= _mm512_setzero_ps().as_f32x16();
5564 ($imm8
:expr
, $imm4
:expr
) => {
5565 vrndscaleps(a
, $imm8
, zero
, 0b11111111_11111111, $imm4
)
5568 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5572 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5573 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5574 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5575 /// _MM_FROUND_TO_NEG_INF // round down\
5576 /// _MM_FROUND_TO_POS_INF // round up\
5577 /// _MM_FROUND_TO_ZERO // truncate\
5578 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5580 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_round_ps&expand=4788)
5583 #[target_feature(enable = "avx512f")]
5584 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
5585 #[rustc_args_required_const(3, 4)]
5586 pub unsafe fn _mm512_mask_roundscale_round_ps(
5593 let a
= a
.as_f32x16();
5594 let src
= src
.as_f32x16();
5596 ($imm8
:expr
, $imm4
:expr
) => {
5597 vrndscaleps(a
, $imm8
, src
, k
, $imm4
)
5600 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5604 /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5605 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5606 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5607 /// _MM_FROUND_TO_NEG_INF // round down\
5608 /// _MM_FROUND_TO_POS_INF // round up\
5609 /// _MM_FROUND_TO_ZERO // truncate\
5610 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5612 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5613 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_round_ps&expand=4789)
5615 #[target_feature(enable = "avx512f")]
5616 #[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
5617 #[rustc_args_required_const(2, 3)]
5618 pub unsafe fn _mm512_maskz_roundscale_round_ps(
5624 let a
= a
.as_f32x16();
5625 let zero
= _mm512_setzero_ps().as_f32x16();
5627 ($imm8
:expr
, $imm4
:expr
) => {
5628 vrndscaleps(a
, $imm8
, zero
, k
, $imm4
)
5631 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5635 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5636 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5637 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5638 /// _MM_FROUND_TO_NEG_INF // round down\
5639 /// _MM_FROUND_TO_POS_INF // round up\
5640 /// _MM_FROUND_TO_ZERO // truncate\
5641 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5643 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_roundscale_round_pd&expand=4787)
5646 #[target_feature(enable = "avx512f")]
5647 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
5648 #[rustc_args_required_const(1, 2)]
5649 pub unsafe fn _mm512_roundscale_round_pd(a
: __m512d
, imm8
: i32, sae
: i32) -> __m512d
{
5650 let a
= a
.as_f64x8();
5651 let zero
= _mm512_setzero_pd().as_f64x8();
5653 ($imm8
:expr
, $imm4
:expr
) => {
5654 vrndscalepd(a
, $imm8
, zero
, 0b11111111, $imm4
)
5657 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5661 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5662 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5663 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5664 /// _MM_FROUND_TO_NEG_INF // round down\
5665 /// _MM_FROUND_TO_POS_INF // round up\
5666 /// _MM_FROUND_TO_ZERO // truncate\
5667 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5669 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5670 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_roundscale_round_pd&expand=4785)
5672 #[target_feature(enable = "avx512f")]
5673 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
5674 #[rustc_args_required_const(3, 4)]
5675 pub unsafe fn _mm512_mask_roundscale_round_pd(
5682 let a
= a
.as_f64x8();
5683 let src
= src
.as_f64x8();
5685 ($imm8
:expr
, $imm4
:expr
) => {
5686 vrndscalepd(a
, $imm8
, src
, k
, $imm4
)
5689 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5693 /// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5694 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
5696 /// _MM_FROUND_TO_NEG_INF // round down\
5697 /// _MM_FROUND_TO_POS_INF // round up\
5698 /// _MM_FROUND_TO_ZERO // truncate\
5699 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
5701 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_roundscale_round_pd&expand=4786)
5704 #[target_feature(enable = "avx512f")]
5705 #[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
5706 #[rustc_args_required_const(2, 3)]
5707 pub unsafe fn _mm512_maskz_roundscale_round_pd(
5713 let a
= a
.as_f64x8();
5714 let zero
= _mm512_setzero_pd().as_f64x8();
5716 ($imm8
:expr
, $imm4
:expr
) => {
5717 vrndscalepd(a
, $imm8
, zero
, k
, $imm4
)
5720 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5724 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
5726 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5727 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5728 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5729 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5730 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5731 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_round_ps&expand=4889)
5735 #[target_feature(enable = "avx512f")]
5736 #[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
5737 #[rustc_args_required_const(2)]
5738 pub unsafe fn _mm512_scalef_round_ps(a
: __m512
, b
: __m512
, rounding
: i32) -> __m512
{
5744 _mm512_setzero_ps().as_f32x16(),
5745 0b11111111_11111111,
5750 let r
= constify_imm4_round
!(rounding
, call
);
5754 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5756 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5757 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5758 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5759 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5760 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5761 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5763 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_round_ps&expand=4887)
5765 #[target_feature(enable = "avx512f")]
5766 #[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
5767 #[rustc_args_required_const(4)]
5768 pub unsafe fn _mm512_mask_scalef_round_ps(
5777 vscalefps(a
.as_f32x16(), b
.as_f32x16(), src
.as_f32x16(), k
, $imm4
)
5780 let r
= constify_imm4_round
!(rounding
, call
);
5784 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5786 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5787 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5788 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5789 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5790 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5791 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_round_ps&expand=4888)
5795 #[target_feature(enable = "avx512f")]
5796 #[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
5797 #[rustc_args_required_const(3)]
5798 pub unsafe fn _mm512_maskz_scalef_round_ps(
5809 _mm512_setzero_ps().as_f32x16(),
5815 let r
= constify_imm4_round
!(rounding
, call
);
5819 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
5821 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5822 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5823 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5824 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5825 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5826 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_scalef_round_pd&expand=4886)
5830 #[target_feature(enable = "avx512f")]
5831 #[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
5832 #[rustc_args_required_const(2)]
5833 pub unsafe fn _mm512_scalef_round_pd(a
: __m512d
, b
: __m512d
, rounding
: i32) -> __m512d
{
5839 _mm512_setzero_pd().as_f64x8(),
5845 let r
= constify_imm4_round
!(rounding
, call
);
5849 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5851 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5852 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5853 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5854 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5855 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5856 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_scalef_round_pd&expand=4884)
5860 #[target_feature(enable = "avx512f")]
5861 #[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
5862 #[rustc_args_required_const(4)]
5863 pub unsafe fn _mm512_mask_scalef_round_pd(
5872 vscalefpd(a
.as_f64x8(), b
.as_f64x8(), src
.as_f64x8(), k
, $imm4
)
5875 let r
= constify_imm4_round
!(rounding
, call
);
5879 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5881 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
5882 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
5883 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
5884 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
5885 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
5886 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
5888 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_scalef_round_pd&expand=4885)
5890 #[target_feature(enable = "avx512f")]
5891 #[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
5892 #[rustc_args_required_const(3)]
5893 pub unsafe fn _mm512_maskz_scalef_round_pd(
5904 _mm512_setzero_pd().as_f64x8(),
5910 let r
= constify_imm4_round
!(rounding
, call
);
5914 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
5916 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_round_ps&expand=2505)
5919 #[target_feature(enable = "avx512f")]
5920 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
5921 #[rustc_args_required_const(3, 4)]
5922 pub unsafe fn _mm512_fixupimm_round_ps(
5929 let a
= a
.as_f32x16();
5930 let b
= b
.as_f32x16();
5931 let c
= c
.as_i32x16();
5933 ($imm8
:expr
, $imm4
:expr
) => {
5934 vfixupimmps(a
, b
, c
, $imm8
, 0b11111111_11111111, $imm4
)
5937 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5941 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
5943 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_round_ps&expand=2506)
5946 #[target_feature(enable = "avx512f")]
5947 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
5948 #[rustc_args_required_const(4, 5)]
5949 pub unsafe fn _mm512_mask_fixupimm_round_ps(
5957 let a
= a
.as_f32x16();
5958 let b
= b
.as_f32x16();
5959 let c
= c
.as_i32x16();
5961 ($imm8
:expr
, $imm4
:expr
) => {
5962 vfixupimmps(a
, b
, c
, $imm8
, k
, $imm4
)
5965 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5969 /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
5971 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_round_ps&expand=2507)
5974 #[target_feature(enable = "avx512f")]
5975 #[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
5976 #[rustc_args_required_const(4, 5)]
5977 pub unsafe fn _mm512_maskz_fixupimm_round_ps(
5985 let a
= a
.as_f32x16();
5986 let b
= b
.as_f32x16();
5987 let c
= c
.as_i32x16();
5989 ($imm8
:expr
, $imm4
:expr
) => {
5990 vfixupimmpsz(a
, b
, c
, $imm8
, k
, $imm4
)
5993 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
5997 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
5999 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6000 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_fixupimm_round_pd&expand=2502)
6002 #[target_feature(enable = "avx512f")]
6003 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
6004 #[rustc_args_required_const(3, 4)]
6005 pub unsafe fn _mm512_fixupimm_round_pd(
6012 let a
= a
.as_f64x8();
6013 let b
= b
.as_f64x8();
6014 let c
= c
.as_i64x8();
6016 ($imm8
:expr
, $imm4
:expr
) => {
6017 vfixupimmpd(a
, b
, c
, $imm8
, 0b11111111, $imm4
)
6020 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
6024 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
6026 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_fixupimm_round_pd&expand=2503)
6029 #[target_feature(enable = "avx512f")]
6030 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
6031 #[rustc_args_required_const(4, 5)]
6032 pub unsafe fn _mm512_mask_fixupimm_round_pd(
6040 let a
= a
.as_f64x8();
6041 let b
= b
.as_f64x8();
6042 let c
= c
.as_i64x8();
6044 ($imm8
:expr
, $imm4
:expr
) => {
6045 vfixupimmpd(a
, b
, c
, $imm8
, k
, $imm4
)
6048 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
6052 /// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
6054 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_fixupimm_round_pd&expand=2504)
6057 #[target_feature(enable = "avx512f")]
6058 #[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
6059 #[rustc_args_required_const(4, 5)]
6060 pub unsafe fn _mm512_maskz_fixupimm_round_pd(
6068 let a
= a
.as_f64x8();
6069 let b
= b
.as_f64x8();
6070 let c
= c
.as_i64x8();
6072 ($imm8
:expr
, $imm4
:expr
) => {
6073 vfixupimmpdz(a
, b
, c
, $imm8
, k
, $imm4
)
6076 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
6080 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6081 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6082 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6083 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6084 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6085 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6086 /// The sign is determined by sc which can take the following values:\
6087 /// _MM_MANT_SIGN_src // sign = sign(src)\
6088 /// _MM_MANT_SIGN_zero // sign = 0\
6089 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6090 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_round_ps&expand=2886)
6094 #[target_feature(enable = "avx512f")]
6095 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
6096 #[rustc_args_required_const(1, 2, 3)]
6097 pub unsafe fn _mm512_getmant_round_ps(
6099 norm
: _MM_MANTISSA_NORM_ENUM
,
6100 sign
: _MM_MANTISSA_SIGN_ENUM
,
6104 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
6107 $imm2
<< 2 | $imm4_1
,
6108 _mm512_setzero_ps().as_f32x16(),
6109 0b11111111_11111111,
6114 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
6118 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6119 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6120 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6121 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6122 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6123 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6124 /// The sign is determined by sc which can take the following values:\
6125 /// _MM_MANT_SIGN_src // sign = sign(src)\
6126 /// _MM_MANT_SIGN_zero // sign = 0\
6127 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6128 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_round_ps&expand=2887)
6132 #[target_feature(enable = "avx512f")]
6133 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
6134 #[rustc_args_required_const(3, 4, 5)]
6135 pub unsafe fn _mm512_mask_getmant_round_ps(
6139 norm
: _MM_MANTISSA_NORM_ENUM
,
6140 sign
: _MM_MANTISSA_SIGN_ENUM
,
6144 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
6147 $imm2
<< 2 | $imm4_1
,
6154 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
6158 /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6159 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6160 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6161 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6162 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6163 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6164 /// The sign is determined by sc which can take the following values:\
6165 /// _MM_MANT_SIGN_src // sign = sign(src)\
6166 /// _MM_MANT_SIGN_zero // sign = 0\
6167 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6168 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_round_ps&expand=2888)
6172 #[target_feature(enable = "avx512f")]
6173 #[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
6174 #[rustc_args_required_const(2, 3, 4)]
6175 pub unsafe fn _mm512_maskz_getmant_round_ps(
6178 norm
: _MM_MANTISSA_NORM_ENUM
,
6179 sign
: _MM_MANTISSA_SIGN_ENUM
,
6183 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
6186 $imm2
<< 2 | $imm4_1
,
6187 _mm512_setzero_ps().as_f32x16(),
6193 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
6197 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6198 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6199 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6200 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6201 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6202 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6203 /// The sign is determined by sc which can take the following values:\
6204 /// _MM_MANT_SIGN_src // sign = sign(src)\
6205 /// _MM_MANT_SIGN_zero // sign = 0\
6206 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6207 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6209 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_getmant_round_pd&expand=2883)
6211 #[target_feature(enable = "avx512f")]
6212 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
6213 #[rustc_args_required_const(1, 2, 3)]
6214 pub unsafe fn _mm512_getmant_round_pd(
6216 norm
: _MM_MANTISSA_NORM_ENUM
,
6217 sign
: _MM_MANTISSA_SIGN_ENUM
,
6221 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
6224 $imm2
<< 2 | $imm4_1
,
6225 _mm512_setzero_pd().as_f64x8(),
6231 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
6235 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6236 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6237 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6238 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6239 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6240 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6241 /// The sign is determined by sc which can take the following values:\
6242 /// _MM_MANT_SIGN_src // sign = sign(src)\
6243 /// _MM_MANT_SIGN_zero // sign = 0\
6244 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6245 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6247 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_getmant_round_pd&expand=2884)
6249 #[target_feature(enable = "avx512f")]
6250 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
6251 #[rustc_args_required_const(3, 4, 5)]
6252 pub unsafe fn _mm512_mask_getmant_round_pd(
6256 norm
: _MM_MANTISSA_NORM_ENUM
,
6257 sign
: _MM_MANTISSA_SIGN_ENUM
,
6261 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
6264 $imm2
<< 2 | $imm4_1
,
6271 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
6275 /// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
6276 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
6277 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
6278 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
6279 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
6280 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
6281 /// The sign is determined by sc which can take the following values:\
6282 /// _MM_MANT_SIGN_src // sign = sign(src)\
6283 /// _MM_MANT_SIGN_zero // sign = 0\
6284 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
6285 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_getmant_round_pd&expand=2885)
6289 #[target_feature(enable = "avx512f")]
6290 #[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
6291 #[rustc_args_required_const(2, 3, 4)]
6292 pub unsafe fn _mm512_maskz_getmant_round_pd(
6295 norm
: _MM_MANTISSA_NORM_ENUM
,
6296 sign
: _MM_MANTISSA_SIGN_ENUM
,
6300 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
6303 $imm2
<< 2 | $imm4_1
,
6304 _mm512_setzero_pd().as_f64x8(),
6310 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
6314 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
6316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvtps_epi32&expand=1737)
6318 #[target_feature(enable = "avx512f")]
6319 #[cfg_attr(test, assert_instr(vcvtps2dq))]
6320 pub unsafe fn _mm512_cvtps_epi32(a
: __m512
) -> __m512i
{
6321 transmute(vcvtps2dq(
6323 _mm512_setzero_si512().as_i32x16(),
6324 0b11111111_11111111,
6325 _MM_FROUND_CUR_DIRECTION
,
6329 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6331 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_epi32&expand=1738)
6333 #[target_feature(enable = "avx512f")]
6334 #[cfg_attr(test, assert_instr(vcvtps2dq))]
6335 pub unsafe fn _mm512_mask_cvtps_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512
) -> __m512i
{
6336 transmute(vcvtps2dq(
6340 _MM_FROUND_CUR_DIRECTION
,
6344 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6346 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_epi32&expand=1739)
6348 #[target_feature(enable = "avx512f")]
6349 #[cfg_attr(test, assert_instr(vcvtps2dq))]
6350 pub unsafe fn _mm512_maskz_cvtps_epi32(k
: __mmask16
, a
: __m512
) -> __m512i
{
6351 transmute(vcvtps2dq(
6353 _mm512_setzero_si512().as_i32x16(),
6355 _MM_FROUND_CUR_DIRECTION
,
6359 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
6361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_epu32&expand=1755)
6363 #[target_feature(enable = "avx512f")]
6364 #[cfg_attr(test, assert_instr(vcvtps2udq))]
6365 pub unsafe fn _mm512_cvtps_epu32(a
: __m512
) -> __m512i
{
6366 transmute(vcvtps2udq(
6368 _mm512_setzero_si512().as_u32x16(),
6369 0b11111111_11111111,
6370 _MM_FROUND_CUR_DIRECTION
,
6374 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_epu32&expand=1756)
6378 #[target_feature(enable = "avx512f")]
6379 #[cfg_attr(test, assert_instr(vcvtps2udq))]
6380 pub unsafe fn _mm512_mask_cvtps_epu32(src
: __m512i
, k
: __mmask16
, a
: __m512
) -> __m512i
{
6381 transmute(vcvtps2udq(
6385 _MM_FROUND_CUR_DIRECTION
,
6389 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6391 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=maskz_cvt_roundps_epu32&expand=1343)
6393 #[target_feature(enable = "avx512f")]
6394 #[cfg_attr(test, assert_instr(vcvtps2udq))]
6395 pub unsafe fn _mm512_maskz_cvtps_epu32(k
: __mmask16
, a
: __m512
) -> __m512i
{
6396 transmute(vcvtps2udq(
6398 _mm512_setzero_si512().as_u32x16(),
6400 _MM_FROUND_CUR_DIRECTION
,
6404 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
6406 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_pd&expand=1769)
6408 #[target_feature(enable = "avx512f")]
6409 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6410 pub unsafe fn _mm512_cvtps_pd(a
: __m256
) -> __m512d
{
6411 transmute(vcvtps2pd(
6413 _mm512_setzero_pd().as_f64x8(),
6415 _MM_FROUND_CUR_DIRECTION
,
6419 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_pd&expand=1770)
6423 #[target_feature(enable = "avx512f")]
6424 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6425 pub unsafe fn _mm512_mask_cvtps_pd(src
: __m512d
, k
: __mmask8
, a
: __m256
) -> __m512d
{
6426 transmute(vcvtps2pd(
6430 _MM_FROUND_CUR_DIRECTION
,
6434 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6436 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_pd&expand=1771)
6438 #[target_feature(enable = "avx512f")]
6439 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6440 pub unsafe fn _mm512_maskz_cvtps_pd(k
: __mmask8
, a
: __m256
) -> __m512d
{
6441 transmute(vcvtps2pd(
6443 _mm512_setzero_pd().as_f64x8(),
6445 _MM_FROUND_CUR_DIRECTION
,
6449 /// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
6451 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtpslo_pd&expand=1784)
6453 #[target_feature(enable = "avx512f")]
6454 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6455 pub unsafe fn _mm512_cvtpslo_pd(v2
: __m512
) -> __m512d
{
6456 transmute(vcvtps2pd(
6457 _mm512_castps512_ps256(v2
).as_f32x8(),
6458 _mm512_setzero_pd().as_f64x8(),
6460 _MM_FROUND_CUR_DIRECTION
,
6464 /// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6466 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtpslo_pd&expand=1785)
6468 #[target_feature(enable = "avx512f")]
6469 #[cfg_attr(test, assert_instr(vcvtps2pd))]
6470 pub unsafe fn _mm512_mask_cvtpslo_pd(src
: __m512d
, k
: __mmask8
, v2
: __m512
) -> __m512d
{
6471 transmute(vcvtps2pd(
6472 _mm512_castps512_ps256(v2
).as_f32x8(),
6475 _MM_FROUND_CUR_DIRECTION
,
6479 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
6481 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtpd_ps&expand=1712)
6483 #[target_feature(enable = "avx512f")]
6484 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6485 pub unsafe fn _mm512_cvtpd_ps(a
: __m512d
) -> __m256
{
6486 transmute(vcvtpd2ps(
6488 _mm256_setzero_ps().as_f32x8(),
6490 _MM_FROUND_CUR_DIRECTION
,
6494 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtpd_ps&expand=1713)
6498 #[target_feature(enable = "avx512f")]
6499 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6500 pub unsafe fn _mm512_mask_cvtpd_ps(src
: __m256
, k
: __mmask8
, a
: __m512d
) -> __m256
{
6501 transmute(vcvtpd2ps(
6505 _MM_FROUND_CUR_DIRECTION
,
6509 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6511 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtpd_ps&expand=1714)
6513 #[target_feature(enable = "avx512f")]
6514 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6515 pub unsafe fn _mm512_maskz_cvtpd_ps(k
: __mmask8
, a
: __m512d
) -> __m256
{
6516 transmute(vcvtpd2ps(
6518 _mm256_setzero_ps().as_f32x8(),
6520 _MM_FROUND_CUR_DIRECTION
,
6524 /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
6526 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtpd_pslo&expand=1715)
6528 #[target_feature(enable = "avx512f")]
6529 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6530 pub unsafe fn _mm512_cvtpd_pslo(v2
: __m512d
) -> __m512
{
6531 let r
: f32x8
= vcvtpd2ps(
6533 _mm256_setzero_ps().as_f32x8(),
6535 _MM_FROUND_CUR_DIRECTION
,
6539 _mm256_setzero_ps().as_f32x8(),
6540 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
6544 /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
6546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtpd_pslo&expand=1716)
6548 #[target_feature(enable = "avx512f")]
6549 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
6550 pub unsafe fn _mm512_mask_cvtpd_pslo(src
: __m512
, k
: __mmask8
, v2
: __m512d
) -> __m512
{
6551 let r
: f32x8
= vcvtpd2ps(
6553 _mm512_castps512_ps256(src
).as_f32x8(),
6555 _MM_FROUND_CUR_DIRECTION
,
6559 _mm256_setzero_ps().as_f32x8(),
6560 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
6564 /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
6566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi8_epi32&expand=1535)
6568 #[target_feature(enable = "avx512f")]
6569 #[cfg_attr(test, assert_instr(vpmovsxbd))]
6570 pub unsafe fn _mm512_cvtepi8_epi32(a
: __m128i
) -> __m512i
{
6571 let a
= a
.as_i8x16();
6572 transmute
::<i32x16
, _
>(simd_cast(a
))
6575 /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi8_epi32&expand=1536)
6579 #[target_feature(enable = "avx512f")]
6580 #[cfg_attr(test, assert_instr(vpmovsxbd))]
6581 pub unsafe fn _mm512_mask_cvtepi8_epi32(src
: __m512i
, k
: __mmask16
, a
: __m128i
) -> __m512i
{
6582 let convert
= _mm512_cvtepi8_epi32(a
).as_i32x16();
6583 transmute(simd_select_bitmask(k
, convert
, src
.as_i32x16()))
6586 /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6588 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi8_epi32&expand=1537)
6590 #[target_feature(enable = "avx512f")]
6591 #[cfg_attr(test, assert_instr(vpmovsxbd))]
6592 pub unsafe fn _mm512_maskz_cvtepi8_epi32(k
: __mmask16
, a
: __m128i
) -> __m512i
{
6593 let convert
= _mm512_cvtepi8_epi32(a
).as_i32x16();
6594 let zero
= _mm512_setzero_si512().as_i32x16();
6595 transmute(simd_select_bitmask(k
, convert
, zero
))
6598 /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
6600 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi8_epi64&expand=1544)
6602 #[target_feature(enable = "avx512f")]
6603 #[cfg_attr(test, assert_instr(vpmovsxbq))]
6604 pub unsafe fn _mm512_cvtepi8_epi64(a
: __m128i
) -> __m512i
{
6605 let a
= a
.as_i8x16();
6606 let v64
: i8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
6607 transmute
::<i64x8
, _
>(simd_cast(v64
))
6610 /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6612 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi8_epi64&expand=1545)
6614 #[target_feature(enable = "avx512f")]
6615 #[cfg_attr(test, assert_instr(vpmovsxbq))]
6616 pub unsafe fn _mm512_mask_cvtepi8_epi64(src
: __m512i
, k
: __mmask8
, a
: __m128i
) -> __m512i
{
6617 let convert
= _mm512_cvtepi8_epi64(a
).as_i64x8();
6618 transmute(simd_select_bitmask(k
, convert
, src
.as_i64x8()))
6621 /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi8_epi64&expand=1546)
6625 #[target_feature(enable = "avx512f")]
6626 #[cfg_attr(test, assert_instr(vpmovsxbq))]
6627 pub unsafe fn _mm512_maskz_cvtepi8_epi64(k
: __mmask8
, a
: __m128i
) -> __m512i
{
6628 let convert
= _mm512_cvtepi8_epi64(a
).as_i64x8();
6629 let zero
= _mm512_setzero_si512().as_i64x8();
6630 transmute(simd_select_bitmask(k
, convert
, zero
))
6633 /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
6635 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu8_epi32&expand=1621)
6637 #[target_feature(enable = "avx512f")]
6638 #[cfg_attr(test, assert_instr(vpmovzxbd))]
6639 pub unsafe fn _mm512_cvtepu8_epi32(a
: __m128i
) -> __m512i
{
6640 let a
= a
.as_u8x16();
6641 transmute
::<i32x16
, _
>(simd_cast(a
))
6644 /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6646 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu8_epi32&expand=1622)
6648 #[target_feature(enable = "avx512f")]
6649 #[cfg_attr(test, assert_instr(vpmovzxbd))]
6650 pub unsafe fn _mm512_mask_cvtepu8_epi32(src
: __m512i
, k
: __mmask16
, a
: __m128i
) -> __m512i
{
6651 let convert
= _mm512_cvtepu8_epi32(a
).as_i32x16();
6652 transmute(simd_select_bitmask(k
, convert
, src
.as_i32x16()))
6655 /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu8_epi32&expand=1623)
6659 #[target_feature(enable = "avx512f")]
6660 #[cfg_attr(test, assert_instr(vpmovzxbd))]
6661 pub unsafe fn _mm512_maskz_cvtepu8_epi32(k
: __mmask16
, a
: __m128i
) -> __m512i
{
6662 let convert
= _mm512_cvtepu8_epi32(a
).as_i32x16();
6663 let zero
= _mm512_setzero_si512().as_i32x16();
6664 transmute(simd_select_bitmask(k
, convert
, zero
))
6667 /// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
6669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu8_epi64&expand=1630)
6671 #[target_feature(enable = "avx512f")]
6672 #[cfg_attr(test, assert_instr(vpmovzxbq))]
6673 pub unsafe fn _mm512_cvtepu8_epi64(a
: __m128i
) -> __m512i
{
6674 let a
= a
.as_u8x16();
6675 let v64
: u8x8
= simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7]);
6676 transmute
::<i64x8
, _
>(simd_cast(v64
))
6679 /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu8_epi64&expand=1631)
6683 #[target_feature(enable = "avx512f")]
6684 #[cfg_attr(test, assert_instr(vpmovzxbq))]
6685 pub unsafe fn _mm512_mask_cvtepu8_epi64(src
: __m512i
, k
: __mmask8
, a
: __m128i
) -> __m512i
{
6686 let convert
= _mm512_cvtepu8_epi64(a
).as_i64x8();
6687 transmute(simd_select_bitmask(k
, convert
, src
.as_i64x8()))
6690 /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6692 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu8_epi64&expand=1632)
6694 #[target_feature(enable = "avx512f")]
6695 #[cfg_attr(test, assert_instr(vpmovzxbq))]
6696 pub unsafe fn _mm512_maskz_cvtepu8_epi64(k
: __mmask8
, a
: __m128i
) -> __m512i
{
6697 let convert
= _mm512_cvtepu8_epi64(a
).as_i64x8();
6698 let zero
= _mm512_setzero_si512().as_i64x8();
6699 transmute(simd_select_bitmask(k
, convert
, zero
))
6702 /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
6704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi16_epi32&expand=1389)
6706 #[target_feature(enable = "avx512f")]
6707 #[cfg_attr(test, assert_instr(vpmovsxwd))]
6708 pub unsafe fn _mm512_cvtepi16_epi32(a
: __m256i
) -> __m512i
{
6709 let a
= a
.as_i16x16();
6710 transmute
::<i32x16
, _
>(simd_cast(a
))
6713 /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6715 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi16_epi32&expand=1390)
6717 #[target_feature(enable = "avx512f")]
6718 #[cfg_attr(test, assert_instr(vpmovsxwd))]
6719 pub unsafe fn _mm512_mask_cvtepi16_epi32(src
: __m512i
, k
: __mmask16
, a
: __m256i
) -> __m512i
{
6720 let convert
= _mm512_cvtepi16_epi32(a
).as_i32x16();
6721 transmute(simd_select_bitmask(k
, convert
, src
.as_i32x16()))
6724 /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi16_epi32&expand=1391)
6728 #[target_feature(enable = "avx512f")]
6729 #[cfg_attr(test, assert_instr(vpmovsxwd))]
6730 pub unsafe fn _mm512_maskz_cvtepi16_epi32(k
: __mmask16
, a
: __m256i
) -> __m512i
{
6731 let convert
= _mm512_cvtepi16_epi32(a
).as_i32x16();
6732 let zero
= _mm512_setzero_si512().as_i32x16();
6733 transmute(simd_select_bitmask(k
, convert
, zero
))
6736 /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
6738 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi16_epi64&expand=1398)
6740 #[target_feature(enable = "avx512f")]
6741 #[cfg_attr(test, assert_instr(vpmovsxwq))]
6742 pub unsafe fn _mm512_cvtepi16_epi64(a
: __m128i
) -> __m512i
{
6743 let a
= a
.as_i16x8();
6744 transmute
::<i64x8
, _
>(simd_cast(a
))
6747 /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6749 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi16_epi64&expand=1399)
6751 #[target_feature(enable = "avx512f")]
6752 #[cfg_attr(test, assert_instr(vpmovsxwq))]
6753 pub unsafe fn _mm512_mask_cvtepi16_epi64(src
: __m512i
, k
: __mmask8
, a
: __m128i
) -> __m512i
{
6754 let convert
= _mm512_cvtepi16_epi64(a
).as_i64x8();
6755 transmute(simd_select_bitmask(k
, convert
, src
.as_i64x8()))
6758 /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6760 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi16_epi64&expand=1400)
6762 #[target_feature(enable = "avx512f")]
6763 #[cfg_attr(test, assert_instr(vpmovsxwq))]
6764 pub unsafe fn _mm512_maskz_cvtepi16_epi64(k
: __mmask8
, a
: __m128i
) -> __m512i
{
6765 let convert
= _mm512_cvtepi16_epi64(a
).as_i64x8();
6766 let zero
= _mm512_setzero_si512().as_i64x8();
6767 transmute(simd_select_bitmask(k
, convert
, zero
))
6770 /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
6772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu16_epi32&expand=1553)
6774 #[target_feature(enable = "avx512f")]
6775 #[cfg_attr(test, assert_instr(vpmovzxwd))]
6776 pub unsafe fn _mm512_cvtepu16_epi32(a
: __m256i
) -> __m512i
{
6777 let a
= a
.as_u16x16();
6778 transmute
::<i32x16
, _
>(simd_cast(a
))
6781 /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu16_epi32&expand=1554)
6785 #[target_feature(enable = "avx512f")]
6786 #[cfg_attr(test, assert_instr(vpmovzxwd))]
6787 pub unsafe fn _mm512_mask_cvtepu16_epi32(src
: __m512i
, k
: __mmask16
, a
: __m256i
) -> __m512i
{
6788 let convert
= _mm512_cvtepu16_epi32(a
).as_i32x16();
6789 transmute(simd_select_bitmask(k
, convert
, src
.as_i32x16()))
6792 /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6794 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu16_epi32&expand=1555)
6796 #[target_feature(enable = "avx512f")]
6797 #[cfg_attr(test, assert_instr(vpmovzxwd))]
6798 pub unsafe fn _mm512_maskz_cvtepu16_epi32(k
: __mmask16
, a
: __m256i
) -> __m512i
{
6799 let convert
= _mm512_cvtepu16_epi32(a
).as_i32x16();
6800 let zero
= _mm512_setzero_si512().as_i32x16();
6801 transmute(simd_select_bitmask(k
, convert
, zero
))
6804 /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
6806 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu16_epi64&expand=1562)
6808 #[target_feature(enable = "avx512f")]
6809 #[cfg_attr(test, assert_instr(vpmovzxwq))]
6810 pub unsafe fn _mm512_cvtepu16_epi64(a
: __m128i
) -> __m512i
{
6811 let a
= a
.as_u16x8();
6812 transmute
::<i64x8
, _
>(simd_cast(a
))
6815 /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6817 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu16_epi64&expand=1563)
6819 #[target_feature(enable = "avx512f")]
6820 #[cfg_attr(test, assert_instr(vpmovzxwq))]
6821 pub unsafe fn _mm512_mask_cvtepu16_epi64(src
: __m512i
, k
: __mmask8
, a
: __m128i
) -> __m512i
{
6822 let convert
= _mm512_cvtepu16_epi64(a
).as_i64x8();
6823 transmute(simd_select_bitmask(k
, convert
, src
.as_i64x8()))
6826 /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu16_epi64&expand=1564)
6830 #[target_feature(enable = "avx512f")]
6831 #[cfg_attr(test, assert_instr(vpmovzxwq))]
6832 pub unsafe fn _mm512_maskz_cvtepu16_epi64(k
: __mmask8
, a
: __m128i
) -> __m512i
{
6833 let convert
= _mm512_cvtepu16_epi64(a
).as_i64x8();
6834 let zero
= _mm512_setzero_si512().as_i64x8();
6835 transmute(simd_select_bitmask(k
, convert
, zero
))
6838 /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
6840 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_epi64&expand=1428)
6842 #[target_feature(enable = "avx512f")]
6843 #[cfg_attr(test, assert_instr(vpmovsxdq))]
6844 pub unsafe fn _mm512_cvtepi32_epi64(a
: __m256i
) -> __m512i
{
6845 let a
= a
.as_i32x8();
6846 transmute
::<i64x8
, _
>(simd_cast(a
))
6849 /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_epi64&expand=1429)
6853 #[target_feature(enable = "avx512f")]
6854 #[cfg_attr(test, assert_instr(vpmovsxdq))]
6855 pub unsafe fn _mm512_mask_cvtepi32_epi64(src
: __m512i
, k
: __mmask8
, a
: __m256i
) -> __m512i
{
6856 let convert
= _mm512_cvtepi32_epi64(a
).as_i64x8();
6857 transmute(simd_select_bitmask(k
, convert
, src
.as_i64x8()))
6860 /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6862 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_epi64&expand=1430)
6864 #[target_feature(enable = "avx512f")]
6865 #[cfg_attr(test, assert_instr(vpmovsxdq))]
6866 pub unsafe fn _mm512_maskz_cvtepi32_epi64(k
: __mmask8
, a
: __m256i
) -> __m512i
{
6867 let convert
= _mm512_cvtepi32_epi64(a
).as_i64x8();
6868 let zero
= _mm512_setzero_si512().as_i64x8();
6869 transmute(simd_select_bitmask(k
, convert
, zero
))
6872 /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
6874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32_epi64&expand=1571)
6876 #[target_feature(enable = "avx512f")]
6877 #[cfg_attr(test, assert_instr(vpmovzxdq))]
6878 pub unsafe fn _mm512_cvtepu32_epi64(a
: __m256i
) -> __m512i
{
6879 let a
= a
.as_u32x8();
6880 transmute
::<i64x8
, _
>(simd_cast(a
))
6883 /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6885 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32_epi64&expand=1572)
6887 #[target_feature(enable = "avx512f")]
6888 #[cfg_attr(test, assert_instr(vpmovzxdq))]
6889 pub unsafe fn _mm512_mask_cvtepu32_epi64(src
: __m512i
, k
: __mmask8
, a
: __m256i
) -> __m512i
{
6890 let convert
= _mm512_cvtepu32_epi64(a
).as_i64x8();
6891 transmute(simd_select_bitmask(k
, convert
, src
.as_i64x8()))
6894 /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu32_epi64&expand=1573)
6898 #[target_feature(enable = "avx512f")]
6899 #[cfg_attr(test, assert_instr(vpmovzxdq))]
6900 pub unsafe fn _mm512_maskz_cvtepu32_epi64(k
: __mmask8
, a
: __m256i
) -> __m512i
{
6901 let convert
= _mm512_cvtepu32_epi64(a
).as_i64x8();
6902 let zero
= _mm512_setzero_si512().as_i64x8();
6903 transmute(simd_select_bitmask(k
, convert
, zero
))
6906 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
6908 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_ps&expand=1455)
6910 #[target_feature(enable = "avx512f")]
6911 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
6912 pub unsafe fn _mm512_cvtepi32_ps(a
: __m512i
) -> __m512
{
6913 let a
= a
.as_i32x16();
6914 transmute
::<f32x16
, _
>(simd_cast(a
))
6917 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6919 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_ps&expand=1456)
6921 #[target_feature(enable = "avx512f")]
6922 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
6923 pub unsafe fn _mm512_mask_cvtepi32_ps(src
: __m512
, k
: __mmask16
, a
: __m512i
) -> __m512
{
6924 let convert
= _mm512_cvtepi32_ps(a
).as_f32x16();
6925 transmute(simd_select_bitmask(k
, convert
, src
.as_f32x16()))
6928 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6930 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_ps&expand=1457)
6932 #[target_feature(enable = "avx512f")]
6933 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
6934 pub unsafe fn _mm512_maskz_cvtepi32_ps(k
: __mmask16
, a
: __m512i
) -> __m512
{
6935 let convert
= _mm512_cvtepi32_ps(a
).as_f32x16();
6936 let zero
= _mm512_setzero_ps().as_f32x16();
6937 transmute(simd_select_bitmask(k
, convert
, zero
))
6940 /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
6942 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_pd&expand=1446)
6944 #[target_feature(enable = "avx512f")]
6945 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
6946 pub unsafe fn _mm512_cvtepi32_pd(a
: __m256i
) -> __m512d
{
6947 let a
= a
.as_i32x8();
6948 transmute
::<f64x8
, _
>(simd_cast(a
))
6951 /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6953 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_pd&expand=1447)
6955 #[target_feature(enable = "avx512f")]
6956 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
6957 pub unsafe fn _mm512_mask_cvtepi32_pd(src
: __m512d
, k
: __mmask8
, a
: __m256i
) -> __m512d
{
6958 let convert
= _mm512_cvtepi32_pd(a
).as_f64x8();
6959 transmute(simd_select_bitmask(k
, convert
, src
.as_f64x8()))
6962 /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_pd&expand=1448)
6966 #[target_feature(enable = "avx512f")]
6967 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
6968 pub unsafe fn _mm512_maskz_cvtepi32_pd(k
: __mmask8
, a
: __m256i
) -> __m512d
{
6969 let convert
= _mm512_cvtepi32_pd(a
).as_f64x8();
6970 let zero
= _mm512_setzero_pd().as_f64x8();
6971 transmute(simd_select_bitmask(k
, convert
, zero
))
6974 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
6976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32_ps&expand=1583)
6978 #[target_feature(enable = "avx512f")]
6979 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
6980 pub unsafe fn _mm512_cvtepu32_ps(a
: __m512i
) -> __m512
{
6981 let a
= a
.as_u32x16();
6982 transmute
::<f32x16
, _
>(simd_cast(a
))
6985 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6987 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32_ps&expand=1584)
6989 #[target_feature(enable = "avx512f")]
6990 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
6991 pub unsafe fn _mm512_mask_cvtepu32_ps(src
: __m512
, k
: __mmask16
, a
: __m512i
) -> __m512
{
6992 let convert
= _mm512_cvtepu32_ps(a
).as_f32x16();
6993 transmute(simd_select_bitmask(k
, convert
, src
.as_f32x16()))
6996 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu32_ps&expand=1585)
7000 #[target_feature(enable = "avx512f")]
7001 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
7002 pub unsafe fn _mm512_maskz_cvtepu32_ps(k
: __mmask16
, a
: __m512i
) -> __m512
{
7003 let convert
= _mm512_cvtepu32_ps(a
).as_f32x16();
7004 let zero
= _mm512_setzero_ps().as_f32x16();
7005 transmute(simd_select_bitmask(k
, convert
, zero
))
7008 /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
7010 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32_pd&expand=1580)
7012 #[target_feature(enable = "avx512f")]
7013 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7014 pub unsafe fn _mm512_cvtepu32_pd(a
: __m256i
) -> __m512d
{
7015 let a
= a
.as_u32x8();
7016 transmute
::<f64x8
, _
>(simd_cast(a
))
7019 /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32_pd&expand=1581)
7023 #[target_feature(enable = "avx512f")]
7024 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7025 pub unsafe fn _mm512_mask_cvtepu32_pd(src
: __m512d
, k
: __mmask8
, a
: __m256i
) -> __m512d
{
7026 let convert
= _mm512_cvtepu32_pd(a
).as_f64x8();
7027 transmute(simd_select_bitmask(k
, convert
, src
.as_f64x8()))
7030 /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7032 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepu32_pd&expand=1582)
7034 #[target_feature(enable = "avx512f")]
7035 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7036 pub unsafe fn _mm512_maskz_cvtepu32_pd(k
: __mmask8
, a
: __m256i
) -> __m512d
{
7037 let convert
= _mm512_cvtepu32_pd(a
).as_f64x8();
7038 let zero
= _mm512_setzero_pd().as_f64x8();
7039 transmute(simd_select_bitmask(k
, convert
, zero
))
7042 /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
7044 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32lo_pd&expand=1464)
7046 #[target_feature(enable = "avx512f")]
7047 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
7048 pub unsafe fn _mm512_cvtepi32lo_pd(v2
: __m512i
) -> __m512d
{
7049 let v2
= v2
.as_i32x16();
7050 let v256
: i32x8
= simd_shuffle8(v2
, v2
, [0, 1, 2, 3, 4, 5, 6, 7]);
7051 transmute
::<f64x8
, _
>(simd_cast(v256
))
7054 /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7056 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32lo_pd&expand=1465)
7058 #[target_feature(enable = "avx512f")]
7059 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
7060 pub unsafe fn _mm512_mask_cvtepi32lo_pd(src
: __m512d
, k
: __mmask8
, v2
: __m512i
) -> __m512d
{
7061 let convert
= _mm512_cvtepi32lo_pd(v2
).as_f64x8();
7062 transmute(simd_select_bitmask(k
, convert
, src
.as_f64x8()))
7065 /// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
7067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepu32lo_pd&expand=1586)
7069 #[target_feature(enable = "avx512f")]
7070 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7071 pub unsafe fn _mm512_cvtepu32lo_pd(v2
: __m512i
) -> __m512d
{
7072 let v2
= v2
.as_u32x16();
7073 let v256
: u32x8
= simd_shuffle8(v2
, v2
, [0, 1, 2, 3, 4, 5, 6, 7]);
7074 transmute
::<f64x8
, _
>(simd_cast(v256
))
7077 /// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7079 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepu32lo_pd&expand=1587)
7081 #[target_feature(enable = "avx512f")]
7082 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
7083 pub unsafe fn _mm512_mask_cvtepu32lo_pd(src
: __m512d
, k
: __mmask8
, v2
: __m512i
) -> __m512d
{
7084 let convert
= _mm512_cvtepu32lo_pd(v2
).as_f64x8();
7085 transmute(simd_select_bitmask(k
, convert
, src
.as_f64x8()))
7088 /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
7090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_epi16&expand=1419)
7092 #[target_feature(enable = "avx512f")]
7093 #[cfg_attr(test, assert_instr(vpmovdw))]
7094 pub unsafe fn _mm512_cvtepi32_epi16(a
: __m512i
) -> __m256i
{
7095 let a
= a
.as_i32x16();
7096 transmute
::<i16x16
, _
>(simd_cast(a
))
7099 /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7101 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_epi16&expand=1420)
7103 #[target_feature(enable = "avx512f")]
7104 #[cfg_attr(test, assert_instr(vpmovdw))]
7105 pub unsafe fn _mm512_mask_cvtepi32_epi16(src
: __m256i
, k
: __mmask16
, a
: __m512i
) -> __m256i
{
7106 let convert
= _mm512_cvtepi32_epi16(a
).as_i16x16();
7107 transmute(simd_select_bitmask(k
, convert
, src
.as_i16x16()))
7110 /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_epi16&expand=1421)
7114 #[target_feature(enable = "avx512f")]
7115 #[cfg_attr(test, assert_instr(vpmovdw))]
7116 pub unsafe fn _mm512_maskz_cvtepi32_epi16(k
: __mmask16
, a
: __m512i
) -> __m256i
{
7117 let convert
= _mm512_cvtepi32_epi16(a
).as_i16x16();
7118 let zero
= _mm256_setzero_si256().as_i16x16();
7119 transmute(simd_select_bitmask(k
, convert
, zero
))
7122 /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
7124 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi32_epi8&expand=1437)
7126 #[target_feature(enable = "avx512f")]
7127 #[cfg_attr(test, assert_instr(vpmovdb))]
7128 pub unsafe fn _mm512_cvtepi32_epi8(a
: __m512i
) -> __m128i
{
7129 let a
= a
.as_i32x16();
7130 transmute
::<i8x16
, _
>(simd_cast(a
))
7133 /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7135 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi32_epi8&expand=1438)
7137 #[target_feature(enable = "avx512f")]
7138 #[cfg_attr(test, assert_instr(vpmovdb))]
7139 pub unsafe fn _mm512_mask_cvtepi32_epi8(src
: __m128i
, k
: __mmask16
, a
: __m512i
) -> __m128i
{
7140 let convert
= _mm512_cvtepi32_epi8(a
).as_i8x16();
7141 transmute(simd_select_bitmask(k
, convert
, src
.as_i8x16()))
7144 /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi32_epi8&expand=1439)
7148 #[target_feature(enable = "avx512f")]
7149 #[cfg_attr(test, assert_instr(vpmovdb))]
7150 pub unsafe fn _mm512_maskz_cvtepi32_epi8(k
: __mmask16
, a
: __m512i
) -> __m128i
{
7151 let convert
= _mm512_cvtepi32_epi8(a
).as_i8x16();
7152 let zero
= _mm_setzero_si128().as_i8x16();
7153 transmute(simd_select_bitmask(k
, convert
, zero
))
7156 /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
7158 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi64_epi32&expand=1481)
7160 #[target_feature(enable = "avx512f")]
7161 #[cfg_attr(test, assert_instr(vpmovqd))]
7162 pub unsafe fn _mm512_cvtepi64_epi32(a
: __m512i
) -> __m256i
{
7163 let a
= a
.as_i64x8();
7164 transmute
::<i32x8
, _
>(simd_cast(a
))
7167 /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7169 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi64_epi32&expand=1482)
7171 #[target_feature(enable = "avx512f")]
7172 #[cfg_attr(test, assert_instr(vpmovqd))]
7173 pub unsafe fn _mm512_mask_cvtepi64_epi32(src
: __m256i
, k
: __mmask8
, a
: __m512i
) -> __m256i
{
7174 let convert
= _mm512_cvtepi64_epi32(a
).as_i32x8();
7175 transmute(simd_select_bitmask(k
, convert
, src
.as_i32x8()))
7178 /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7180 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi64_epi32&expand=1483)
7182 #[target_feature(enable = "avx512f")]
7183 #[cfg_attr(test, assert_instr(vpmovqd))]
7184 pub unsafe fn _mm512_maskz_cvtepi64_epi32(k
: __mmask8
, a
: __m512i
) -> __m256i
{
7185 let convert
= _mm512_cvtepi64_epi32(a
).as_i32x8();
7186 let zero
= _mm256_setzero_si256().as_i32x8();
7187 transmute(simd_select_bitmask(k
, convert
, zero
))
7190 /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
7192 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi64_epi16&expand=1472)
7194 #[target_feature(enable = "avx512f")]
7195 #[cfg_attr(test, assert_instr(vpmovqw))]
7196 pub unsafe fn _mm512_cvtepi64_epi16(a
: __m512i
) -> __m128i
{
7197 let a
= a
.as_i64x8();
7198 transmute
::<i16x8
, _
>(simd_cast(a
))
7201 /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7203 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi64_epi16&expand=1473)
7205 #[target_feature(enable = "avx512f")]
7206 #[cfg_attr(test, assert_instr(vpmovqw))]
7207 pub unsafe fn _mm512_mask_cvtepi64_epi16(src
: __m128i
, k
: __mmask8
, a
: __m512i
) -> __m128i
{
7208 let convert
= _mm512_cvtepi64_epi16(a
).as_i16x8();
7209 transmute(simd_select_bitmask(k
, convert
, src
.as_i16x8()))
7212 /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi64_epi16&expand=1474)
7216 #[target_feature(enable = "avx512f")]
7217 #[cfg_attr(test, assert_instr(vpmovqw))]
7218 pub unsafe fn _mm512_maskz_cvtepi64_epi16(k
: __mmask8
, a
: __m512i
) -> __m128i
{
7219 let convert
= _mm512_cvtepi64_epi16(a
).as_i16x8();
7220 let zero
= _mm_setzero_si128().as_i16x8();
7221 transmute(simd_select_bitmask(k
, convert
, zero
))
7224 /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
7226 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtepi64_epi8&expand=1490)
7228 #[target_feature(enable = "avx512f")]
7229 #[cfg_attr(test, assert_instr(vpmovqb))]
7230 pub unsafe fn _mm512_cvtepi64_epi8(a
: __m512i
) -> __m128i
{
7233 _mm_setzero_si128().as_i8x16(),
7238 /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtepi64_epi8&expand=1491)
7242 #[target_feature(enable = "avx512f")]
7243 #[cfg_attr(test, assert_instr(vpmovqb))]
7244 pub unsafe fn _mm512_mask_cvtepi64_epi8(src
: __m128i
, k
: __mmask8
, a
: __m512i
) -> __m128i
{
7245 transmute(vpmovqb(a
.as_i64x8(), src
.as_i8x16(), k
))
7248 /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtepi64_epi8&expand=1492)
7252 #[target_feature(enable = "avx512f")]
7253 #[cfg_attr(test, assert_instr(vpmovqb))]
7254 pub unsafe fn _mm512_maskz_cvtepi64_epi8(k
: __mmask8
, a
: __m512i
) -> __m128i
{
7255 transmute(vpmovqb(a
.as_i64x8(), _mm_setzero_si128().as_i8x16(), k
))
7258 /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
7260 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi32_epi16&expand=1819)
7262 #[target_feature(enable = "avx512f")]
7263 #[cfg_attr(test, assert_instr(vpmovsdw))]
7264 pub unsafe fn _mm512_cvtsepi32_epi16(a
: __m512i
) -> __m256i
{
7267 _mm256_setzero_si256().as_i16x16(),
7268 0b11111111_11111111,
7272 /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi32_epi16&expand=1820)
7276 #[target_feature(enable = "avx512f")]
7277 #[cfg_attr(test, assert_instr(vpmovsdw))]
7278 pub unsafe fn _mm512_mask_cvtsepi32_epi16(src
: __m256i
, k
: __mmask16
, a
: __m512i
) -> __m256i
{
7279 transmute(vpmovsdw(a
.as_i32x16(), src
.as_i16x16(), k
))
7282 /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
7284 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi32_epi16&expand=1819)
7286 #[target_feature(enable = "avx512f")]
7287 #[cfg_attr(test, assert_instr(vpmovsdw))]
7288 pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k
: __mmask16
, a
: __m512i
) -> __m256i
{
7291 _mm256_setzero_si256().as_i16x16(),
7296 /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
7298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi32_epi8&expand=1828)
7300 #[target_feature(enable = "avx512f")]
7301 #[cfg_attr(test, assert_instr(vpmovsdb))]
7302 pub unsafe fn _mm512_cvtsepi32_epi8(a
: __m512i
) -> __m128i
{
7305 _mm_setzero_si128().as_i8x16(),
7306 0b11111111_11111111,
7310 /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7312 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi32_epi8&expand=1829)
7314 #[target_feature(enable = "avx512f")]
7315 #[cfg_attr(test, assert_instr(vpmovsdb))]
7316 pub unsafe fn _mm512_mask_cvtsepi32_epi8(src
: __m128i
, k
: __mmask16
, a
: __m512i
) -> __m128i
{
7317 transmute(vpmovsdb(a
.as_i32x16(), src
.as_i8x16(), k
))
7320 /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7322 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi32_epi8&expand=1830)
7324 #[target_feature(enable = "avx512f")]
7325 #[cfg_attr(test, assert_instr(vpmovsdb))]
7326 pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k
: __mmask16
, a
: __m512i
) -> __m128i
{
7327 transmute(vpmovsdb(a
.as_i32x16(), _mm_setzero_si128().as_i8x16(), k
))
7330 /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
7332 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi64_epi32&expand=1852)
7334 #[target_feature(enable = "avx512f")]
7335 #[cfg_attr(test, assert_instr(vpmovsqd))]
7336 pub unsafe fn _mm512_cvtsepi64_epi32(a
: __m512i
) -> __m256i
{
7339 _mm256_setzero_si256().as_i32x8(),
7344 /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7346 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi64_epi32&expand=1853)
7348 #[target_feature(enable = "avx512f")]
7349 #[cfg_attr(test, assert_instr(vpmovsqd))]
7350 pub unsafe fn _mm512_mask_cvtsepi64_epi32(src
: __m256i
, k
: __mmask8
, a
: __m512i
) -> __m256i
{
7351 transmute(vpmovsqd(a
.as_i64x8(), src
.as_i32x8(), k
))
7354 /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi64_epi32&expand=1854)
7358 #[target_feature(enable = "avx512f")]
7359 #[cfg_attr(test, assert_instr(vpmovsqd))]
7360 pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k
: __mmask8
, a
: __m512i
) -> __m256i
{
7361 transmute(vpmovsqd(a
.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k
))
7364 /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
7366 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi64_epi16&expand=1843)
7368 #[target_feature(enable = "avx512f")]
7369 #[cfg_attr(test, assert_instr(vpmovsqw))]
7370 pub unsafe fn _mm512_cvtsepi64_epi16(a
: __m512i
) -> __m128i
{
7373 _mm_setzero_si128().as_i16x8(),
7378 /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7380 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi64_epi16&expand=1844)
7382 #[target_feature(enable = "avx512f")]
7383 #[cfg_attr(test, assert_instr(vpmovsqw))]
7384 pub unsafe fn _mm512_mask_cvtsepi64_epi16(src
: __m128i
, k
: __mmask8
, a
: __m512i
) -> __m128i
{
7385 transmute(vpmovsqw(a
.as_i64x8(), src
.as_i16x8(), k
))
7388 /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi64_epi16&expand=1845)
7392 #[target_feature(enable = "avx512f")]
7393 #[cfg_attr(test, assert_instr(vpmovsqw))]
7394 pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k
: __mmask8
, a
: __m512i
) -> __m128i
{
7395 transmute(vpmovsqw(a
.as_i64x8(), _mm_setzero_si128().as_i16x8(), k
))
7398 /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
7400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtsepi64_epi8&expand=1861)
7402 #[target_feature(enable = "avx512f")]
7403 #[cfg_attr(test, assert_instr(vpmovsqb))]
7404 pub unsafe fn _mm512_cvtsepi64_epi8(a
: __m512i
) -> __m128i
{
7407 _mm_setzero_si128().as_i8x16(),
7412 /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtsepi64_epi8&expand=1862)
7416 #[target_feature(enable = "avx512f")]
7417 #[cfg_attr(test, assert_instr(vpmovsqb))]
7418 pub unsafe fn _mm512_mask_cvtsepi64_epi8(src
: __m128i
, k
: __mmask8
, a
: __m512i
) -> __m128i
{
7419 transmute(vpmovsqb(a
.as_i64x8(), src
.as_i8x16(), k
))
7422 /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtsepi64_epi8&expand=1863)
7426 #[target_feature(enable = "avx512f")]
7427 #[cfg_attr(test, assert_instr(vpmovsqb))]
7428 pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k
: __mmask8
, a
: __m512i
) -> __m128i
{
7429 transmute(vpmovsqb(a
.as_i64x8(), _mm_setzero_si128().as_i8x16(), k
))
7432 /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
7434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi32_epi16&expand=2054)
7436 #[target_feature(enable = "avx512f")]
7437 #[cfg_attr(test, assert_instr(vpmovusdw))]
7438 pub unsafe fn _mm512_cvtusepi32_epi16(a
: __m512i
) -> __m256i
{
7439 transmute(vpmovusdw(
7441 _mm256_setzero_si256().as_u16x16(),
7442 0b11111111_11111111,
7446 /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7448 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi32_epi16&expand=2055)
7450 #[target_feature(enable = "avx512f")]
7451 #[cfg_attr(test, assert_instr(vpmovusdw))]
7452 pub unsafe fn _mm512_mask_cvtusepi32_epi16(src
: __m256i
, k
: __mmask16
, a
: __m512i
) -> __m256i
{
7453 transmute(vpmovusdw(a
.as_u32x16(), src
.as_u16x16(), k
))
7456 /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi32_epi16&expand=2056)
7460 #[target_feature(enable = "avx512f")]
7461 #[cfg_attr(test, assert_instr(vpmovusdw))]
7462 pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k
: __mmask16
, a
: __m512i
) -> __m256i
{
7463 transmute(vpmovusdw(
7465 _mm256_setzero_si256().as_u16x16(),
7470 /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
7472 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi32_epi8&expand=2063)
7474 #[target_feature(enable = "avx512f")]
7475 #[cfg_attr(test, assert_instr(vpmovusdb))]
7476 pub unsafe fn _mm512_cvtusepi32_epi8(a
: __m512i
) -> __m128i
{
7477 transmute(vpmovusdb(
7479 _mm_setzero_si128().as_u8x16(),
7480 0b11111111_11111111,
7484 /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7486 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi32_epi8&expand=2064)
7488 #[target_feature(enable = "avx512f")]
7489 #[cfg_attr(test, assert_instr(vpmovusdb))]
7490 pub unsafe fn _mm512_mask_cvtusepi32_epi8(src
: __m128i
, k
: __mmask16
, a
: __m512i
) -> __m128i
{
7491 transmute(vpmovusdb(a
.as_u32x16(), src
.as_u8x16(), k
))
7494 /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7496 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi32_epi8&expand=2065)
7498 #[target_feature(enable = "avx512f")]
7499 #[cfg_attr(test, assert_instr(vpmovusdb))]
7500 pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k
: __mmask16
, a
: __m512i
) -> __m128i
{
7501 transmute(vpmovusdb(a
.as_u32x16(), _mm_setzero_si128().as_u8x16(), k
))
7504 /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
7506 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi64_epi32&expand=2087)
7508 #[target_feature(enable = "avx512f")]
7509 #[cfg_attr(test, assert_instr(vpmovusqd))]
7510 pub unsafe fn _mm512_cvtusepi64_epi32(a
: __m512i
) -> __m256i
{
7511 transmute(vpmovusqd(
7513 _mm256_setzero_si256().as_u32x8(),
7518 /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi64_epi32&expand=2088)
7522 #[target_feature(enable = "avx512f")]
7523 #[cfg_attr(test, assert_instr(vpmovusqd))]
7524 pub unsafe fn _mm512_mask_cvtusepi64_epi32(src
: __m256i
, k
: __mmask8
, a
: __m512i
) -> __m256i
{
7525 transmute(vpmovusqd(a
.as_u64x8(), src
.as_u32x8(), k
))
7528 /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7530 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi64_epi32&expand=2089)
7532 #[target_feature(enable = "avx512f")]
7533 #[cfg_attr(test, assert_instr(vpmovusqd))]
7534 pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k
: __mmask8
, a
: __m512i
) -> __m256i
{
7535 transmute(vpmovusqd(
7537 _mm256_setzero_si256().as_u32x8(),
7542 /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
7544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi64_epi16&expand=2078)
7546 #[target_feature(enable = "avx512f")]
7547 #[cfg_attr(test, assert_instr(vpmovusqw))]
7548 pub unsafe fn _mm512_cvtusepi64_epi16(a
: __m512i
) -> __m128i
{
7549 transmute(vpmovusqw(
7551 _mm_setzero_si128().as_u16x8(),
7556 /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7558 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi64_epi16&expand=2079)
7560 #[target_feature(enable = "avx512f")]
7561 #[cfg_attr(test, assert_instr(vpmovusqw))]
7562 pub unsafe fn _mm512_mask_cvtusepi64_epi16(src
: __m128i
, k
: __mmask8
, a
: __m512i
) -> __m128i
{
7563 transmute(vpmovusqw(a
.as_u64x8(), src
.as_u16x8(), k
))
7566 /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7568 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi64_epi16&expand=2080)
7570 #[target_feature(enable = "avx512f")]
7571 #[cfg_attr(test, assert_instr(vpmovusqw))]
7572 pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k
: __mmask8
, a
: __m512i
) -> __m128i
{
7573 transmute(vpmovusqw(a
.as_u64x8(), _mm_setzero_si128().as_u16x8(), k
))
7576 /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
7578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtusepi64_epi8&expand=2096)
7580 #[target_feature(enable = "avx512f")]
7581 #[cfg_attr(test, assert_instr(vpmovusqb))]
7582 pub unsafe fn _mm512_cvtusepi64_epi8(a
: __m512i
) -> __m128i
{
7583 transmute(vpmovusqb(
7585 _mm_setzero_si128().as_u8x16(),
7590 /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
7592 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtusepi64_epi8&expand=2097)
7594 #[target_feature(enable = "avx512f")]
7595 #[cfg_attr(test, assert_instr(vpmovusqb))]
7596 pub unsafe fn _mm512_mask_cvtusepi64_epi8(src
: __m128i
, k
: __mmask8
, a
: __m512i
) -> __m128i
{
7597 transmute(vpmovusqb(a
.as_u64x8(), src
.as_u8x16(), k
))
7600 /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
7602 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtusepi64_epi8&expand=2098)
7604 #[target_feature(enable = "avx512f")]
7605 #[cfg_attr(test, assert_instr(vpmovusqb))]
7606 pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k
: __mmask8
, a
: __m512i
) -> __m128i
{
7607 transmute(vpmovusqb(a
.as_u64x8(), _mm_setzero_si128().as_u8x16(), k
))
7610 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
7612 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
7613 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
7614 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
7615 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
7616 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
7617 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_epi32&expand=1335)
7621 #[target_feature(enable = "avx512f")]
7622 #[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
7623 #[rustc_args_required_const(1)]
7624 pub unsafe fn _mm512_cvt_roundps_epi32(a
: __m512
, rounding
: i32) -> __m512i
{
7629 _mm512_setzero_si512().as_i32x16(),
7630 0b11111111_11111111,
7635 let r
= constify_imm4_round
!(rounding
, call
);
7639 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7641 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7642 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7643 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7644 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7645 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7646 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epi32&expand=1336)
7650 #[target_feature(enable = "avx512f")]
7651 #[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
7652 #[rustc_args_required_const(3)]
7653 pub unsafe fn _mm512_mask_cvt_roundps_epi32(
7661 vcvtps2dq(a
.as_f32x16(), src
.as_i32x16(), k
, $imm4
)
7664 let r
= constify_imm4_round
!(rounding
, call
);
7668 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7670 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7671 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7672 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7673 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7674 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7675 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7677 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_epi32&expand=1337)
7679 #[target_feature(enable = "avx512f")]
7680 #[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
7681 #[rustc_args_required_const(2)]
7682 pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k
: __mmask16
, a
: __m512
, rounding
: i32) -> __m512i
{
7685 vcvtps2dq(a
.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k
, $imm4
)
7688 let r
= constify_imm4_round
!(rounding
, call
);
7692 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
7694 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7695 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7696 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7697 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7698 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7699 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7701 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_epu32&expand=1341)
7703 #[target_feature(enable = "avx512f")]
7704 #[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
7705 #[rustc_args_required_const(1)]
7706 pub unsafe fn _mm512_cvt_roundps_epu32(a
: __m512
, rounding
: i32) -> __m512i
{
7711 _mm512_setzero_si512().as_u32x16(),
7712 0b11111111_11111111,
7717 let r
= constify_imm4_round
!(rounding
, call
);
7721 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7723 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7724 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7725 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7726 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7727 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7728 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7730 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epu32&expand=1342)
7732 #[target_feature(enable = "avx512f")]
7733 #[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
7734 #[rustc_args_required_const(3)]
7735 pub unsafe fn _mm512_mask_cvt_roundps_epu32(
7743 vcvtps2udq(a
.as_f32x16(), src
.as_u32x16(), k
, $imm4
)
7746 let r
= constify_imm4_round
!(rounding
, call
);
7750 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7752 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7753 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7754 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7755 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7756 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7757 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7759 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=maskz_cvt_roundps_epu32&expand=1343)
7761 #[target_feature(enable = "avx512f")]
7762 #[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
7763 #[rustc_args_required_const(2)]
7764 pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k
: __mmask16
, a
: __m512
, rounding
: i32) -> __m512i
{
7767 vcvtps2udq(a
.as_f32x16(), _mm512_setzero_si512().as_u32x16(), k
, $imm4
)
7770 let r
= constify_imm4_round
!(rounding
, call
);
7774 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
7775 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
7777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundps_pd&expand=1347)
7779 #[target_feature(enable = "avx512f")]
7780 #[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
7781 #[rustc_args_required_const(1)]
7782 pub unsafe fn _mm512_cvt_roundps_pd(a
: __m256
, sae
: i32) -> __m512d
{
7787 _mm512_setzero_pd().as_f64x8(),
7793 let r
= constify_imm4_sae
!(sae
, call
);
7797 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7798 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
7800 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_epi32&expand=1336)
7802 #[target_feature(enable = "avx512f")]
7803 #[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
7804 #[rustc_args_required_const(3)]
7805 pub unsafe fn _mm512_mask_cvt_roundps_pd(
7813 vcvtps2pd(a
.as_f32x8(), src
.as_f64x8(), k
, $imm4
)
7816 let r
= constify_imm4_sae
!(sae
, call
);
7820 /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7821 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
7823 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_epi32&expand=1337)
7825 #[target_feature(enable = "avx512f")]
7826 #[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
7827 #[rustc_args_required_const(2)]
7828 pub unsafe fn _mm512_maskz_cvt_roundps_pd(k
: __mmask8
, a
: __m256
, sae
: i32) -> __m512d
{
7831 vcvtps2pd(a
.as_f32x8(), _mm512_setzero_pd().as_f64x8(), k
, $imm4
)
7834 let r
= constify_imm4_sae
!(sae
, call
);
7838 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
7840 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7841 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7842 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7843 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7844 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7845 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundpd_epi32&expand=1315)
7849 #[target_feature(enable = "avx512f")]
7850 #[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
7851 #[rustc_args_required_const(1)]
7852 pub unsafe fn _mm512_cvt_roundpd_epi32(a
: __m512d
, rounding
: i32) -> __m256i
{
7857 _mm256_setzero_si256().as_i32x8(),
7863 let r
= constify_imm4_round
!(rounding
, call
);
7867 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7869 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7871 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7872 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7873 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7874 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7876 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundpd_epi32&expand=1316)
7878 #[target_feature(enable = "avx512f")]
7879 #[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
7880 #[rustc_args_required_const(3)]
7881 pub unsafe fn _mm512_mask_cvt_roundpd_epi32(
7889 vcvtpd2dq(a
.as_f64x8(), src
.as_i32x8(), k
, $imm4
)
7892 let r
= constify_imm4_round
!(rounding
, call
);
7896 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7898 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7900 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7901 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7902 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7903 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=512_maskz_cvt_roundpd_epi32&expand=1317)
7907 #[target_feature(enable = "avx512f")]
7908 #[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
7909 #[rustc_args_required_const(2)]
7910 pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k
: __mmask8
, a
: __m512d
, rounding
: i32) -> __m256i
{
7913 vcvtpd2dq(a
.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k
, $imm4
)
7916 let r
= constify_imm4_round
!(rounding
, call
);
7920 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
7922 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7923 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7924 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7925 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7926 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7927 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundpd_epu32&expand=1321)
7931 #[target_feature(enable = "avx512f")]
7932 #[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
7933 #[rustc_args_required_const(1)]
7934 pub unsafe fn _mm512_cvt_roundpd_epu32(a
: __m512d
, rounding
: i32) -> __m256i
{
7939 _mm256_setzero_si256().as_u32x8(),
7945 let r
= constify_imm4_round
!(rounding
, call
);
7949 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7951 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7952 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7953 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7954 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7955 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7956 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7958 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundpd_epu32&expand=1322)
7960 #[target_feature(enable = "avx512f")]
7961 #[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
7962 #[rustc_args_required_const(3)]
7963 pub unsafe fn _mm512_mask_cvt_roundpd_epu32(
7971 vcvtpd2udq(a
.as_f64x8(), src
.as_u32x8(), k
, $imm4
)
7974 let r
= constify_imm4_round
!(rounding
, call
);
7978 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7980 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7981 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
7982 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
7983 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
7984 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
7985 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
7987 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=512_maskz_cvt_roundpd_epu32&expand=1323)
7989 #[target_feature(enable = "avx512f")]
7990 #[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
7991 #[rustc_args_required_const(2)]
7992 pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k
: __mmask8
, a
: __m512d
, rounding
: i32) -> __m256i
{
7995 vcvtpd2udq(a
.as_f64x8(), _mm256_setzero_si256().as_u32x8(), k
, $imm4
)
7998 let r
= constify_imm4_round
!(rounding
, call
);
8002 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8004 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8005 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8006 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8007 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8008 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8009 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundpd_ps&expand=1327)
8013 #[target_feature(enable = "avx512f")]
8014 #[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
8015 #[rustc_args_required_const(1)]
8016 pub unsafe fn _mm512_cvt_roundpd_ps(a
: __m512d
, rounding
: i32) -> __m256
{
8021 _mm256_setzero_ps().as_f32x8(),
8027 let r
= constify_imm4_round
!(rounding
, call
);
8031 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8033 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8034 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8035 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8036 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8037 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8038 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8040 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundpd_ps&expand=1328)
8042 #[target_feature(enable = "avx512f")]
8043 #[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
8044 #[rustc_args_required_const(3)]
8045 pub unsafe fn _mm512_mask_cvt_roundpd_ps(
8053 vcvtpd2ps(a
.as_f64x8(), src
.as_f32x8(), k
, $imm4
)
8056 let r
= constify_imm4_round
!(rounding
, call
);
8060 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8062 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8063 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8064 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8065 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8066 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8067 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8069 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundpd_ps&expand=1329)
8071 #[target_feature(enable = "avx512f")]
8072 #[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
8073 #[rustc_args_required_const(2)]
8074 pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k
: __mmask8
, a
: __m512d
, rounding
: i32) -> __m256
{
8077 vcvtpd2ps(a
.as_f64x8(), _mm256_setzero_ps().as_f32x8(), k
, $imm4
)
8080 let r
= constify_imm4_round
!(rounding
, call
);
8084 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8086 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8087 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8088 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8089 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8090 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8091 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundepi32_ps&expand=1294)
8095 #[target_feature(enable = "avx512f")]
8096 #[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
8097 #[rustc_args_required_const(1)]
8098 pub unsafe fn _mm512_cvt_roundepi32_ps(a
: __m512i
, rounding
: i32) -> __m512
{
8101 vcvtdq2ps(a
.as_i32x16(), $imm4
)
8104 let r
= constify_imm4_round
!(rounding
, call
);
8108 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8110 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8111 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8112 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8113 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8114 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8115 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundepi32_ps&expand=1295)
8119 #[target_feature(enable = "avx512f")]
8120 #[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
8121 #[rustc_args_required_const(3)]
8122 pub unsafe fn _mm512_mask_cvt_roundepi32_ps(
8130 vcvtdq2ps(a
.as_i32x16(), $imm4
)
8133 let r
: f32x16
= constify_imm4_round
!(rounding
, call
);
8134 transmute(simd_select_bitmask(k
, r
, src
.as_f32x16()))
8137 /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8139 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8140 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8141 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8142 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8143 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8144 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundepi32_ps&expand=1296)
8148 #[target_feature(enable = "avx512f")]
8149 #[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
8150 #[rustc_args_required_const(2)]
8151 pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k
: __mmask16
, a
: __m512i
, rounding
: i32) -> __m512
{
8154 vcvtdq2ps(a
.as_i32x16(), $imm4
)
8157 let r
= constify_imm4_round
!(rounding
, call
);
8158 let zero
= _mm512_setzero_ps().as_f32x16();
8159 transmute(simd_select_bitmask(k
, r
, zero
))
8162 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8164 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8165 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8166 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8167 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8168 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8169 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundepu32_ps&expand=1303)
8173 #[target_feature(enable = "avx512f")]
8174 #[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
8175 #[rustc_args_required_const(1)]
8176 pub unsafe fn _mm512_cvt_roundepu32_ps(a
: __m512i
, rounding
: i32) -> __m512
{
8179 vcvtudq2ps(a
.as_u32x16(), $imm4
)
8182 let r
= constify_imm4_round
!(rounding
, call
);
8186 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8188 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8189 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8190 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8191 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8192 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8193 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundepu32_ps&expand=1304)
8197 #[target_feature(enable = "avx512f")]
8198 #[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
8199 #[rustc_args_required_const(3)]
8200 pub unsafe fn _mm512_mask_cvt_roundepu32_ps(
8208 vcvtudq2ps(a
.as_u32x16(), $imm4
)
8211 let r
: f32x16
= constify_imm4_round
!(rounding
, call
);
8212 transmute(simd_select_bitmask(k
, r
, src
.as_f32x16()))
8215 /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8217 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8218 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
8219 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
8220 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
8221 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
8222 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
8224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundepu32_ps&expand=1305)
8226 #[target_feature(enable = "avx512f")]
8227 #[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
8228 #[rustc_args_required_const(2)]
8229 pub unsafe fn _mm512_maskz_cvt_roundepu32_ps(k
: __mmask16
, a
: __m512i
, rounding
: i32) -> __m512
{
8232 vcvtudq2ps(a
.as_u32x16(), $imm4
)
8235 let r
= constify_imm4_round
!(rounding
, call
);
8236 let zero
= _mm512_setzero_ps().as_f32x16();
8237 transmute(simd_select_bitmask(k
, r
, zero
))
8240 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
8241 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8243 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundps_ph&expand=1354)
8245 #[target_feature(enable = "avx512f")]
8246 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8247 #[rustc_args_required_const(1)]
8248 pub unsafe fn _mm512_cvt_roundps_ph(a
: __m512
, sae
: i32) -> __m256i
{
8254 _mm256_setzero_si256().as_i16x16(),
8255 0b11111111_11111111,
8259 let r
= constify_imm4_sae
!(sae
, call
);
8263 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8264 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8266 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundps_ph&expand=1355)
8268 #[target_feature(enable = "avx512f")]
8269 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8270 #[rustc_args_required_const(3)]
8271 pub unsafe fn _mm512_mask_cvt_roundps_ph(
8279 vcvtps2ph(a
.as_f32x16(), $imm4
, src
.as_i16x16(), k
)
8282 let r
= constify_imm4_sae
!(sae
, call
);
8286 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8287 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8289 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundps_ph&expand=1356)
8291 #[target_feature(enable = "avx512f")]
8292 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8293 #[rustc_args_required_const(2)]
8294 pub unsafe fn _mm512_maskz_cvt_roundps_ph(k
: __mmask16
, a
: __m512
, sae
: i32) -> __m256i
{
8297 vcvtps2ph(a
.as_f32x16(), $imm4
, _mm256_setzero_si256().as_i16x16(), k
)
8300 let r
= constify_imm4_sae
!(sae
, call
);
8304 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
8305 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8307 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtps_ph&expand=1778)
8309 #[target_feature(enable = "avx512f")]
8310 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8311 #[rustc_args_required_const(1)]
8312 pub unsafe fn _mm512_cvtps_ph(a
: __m512
, sae
: i32) -> __m256i
{
8318 _mm256_setzero_si256().as_i16x16(),
8319 0b11111111_11111111,
8323 let r
= constify_imm4_sae
!(sae
, call
);
8327 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8328 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtps_ph&expand=1779)
8332 #[target_feature(enable = "avx512f")]
8333 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8334 #[rustc_args_required_const(3)]
8335 pub unsafe fn _mm512_mask_cvtps_ph(src
: __m256i
, k
: __mmask16
, a
: __m512
, sae
: i32) -> __m256i
{
8338 vcvtps2ph(a
.as_f32x16(), $imm4
, src
.as_i16x16(), k
)
8341 let r
= constify_imm4_sae
!(sae
, call
);
8345 /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8346 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8348 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtps_ph&expand=1780)
8350 #[target_feature(enable = "avx512f")]
8351 #[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
8352 #[rustc_args_required_const(2)]
8353 pub unsafe fn _mm512_maskz_cvtps_ph(k
: __mmask16
, a
: __m512
, sae
: i32) -> __m256i
{
8356 vcvtps2ph(a
.as_f32x16(), $imm4
, _mm256_setzero_si256().as_i16x16(), k
)
8359 let r
= constify_imm4_sae
!(sae
, call
);
8363 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
8364 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8366 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvt_roundph_ps&expand=1332)
8368 #[target_feature(enable = "avx512f")]
8369 #[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
8370 #[rustc_args_required_const(1)]
8371 pub unsafe fn _mm512_cvt_roundph_ps(a
: __m256i
, sae
: i32) -> __m512
{
8376 _mm512_setzero_ps().as_f32x16(),
8377 0b11111111_11111111,
8382 let r
= constify_imm4_sae
!(sae
, call
);
8386 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8387 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvt_roundph_ps&expand=1333)
8391 #[target_feature(enable = "avx512f")]
8392 #[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
8393 #[rustc_args_required_const(3)]
8394 pub unsafe fn _mm512_mask_cvt_roundph_ps(
8402 vcvtph2ps(a
.as_i16x16(), src
.as_f32x16(), k
, $imm4
)
8405 let r
= constify_imm4_sae
!(sae
, call
);
8409 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8410 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8412 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvt_roundph_ps&expand=1334)
8414 #[target_feature(enable = "avx512f")]
8415 #[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
8416 #[rustc_args_required_const(2)]
8417 pub unsafe fn _mm512_maskz_cvt_roundph_ps(k
: __mmask16
, a
: __m256i
, sae
: i32) -> __m512
{
8420 vcvtph2ps(a
.as_i16x16(), _mm512_setzero_ps().as_f32x16(), k
, $imm4
)
8423 let r
= constify_imm4_sae
!(sae
, call
);
8427 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
8429 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtph_ps&expand=1723)
8431 #[target_feature(enable = "avx512f")]
8432 #[cfg_attr(test, assert_instr(vcvtph2ps))]
8433 pub unsafe fn _mm512_cvtph_ps(a
: __m256i
) -> __m512
{
8434 transmute(vcvtph2ps(
8436 _mm512_setzero_ps().as_f32x16(),
8437 0b11111111_11111111,
8442 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtph_ps&expand=1724)
8446 #[target_feature(enable = "avx512f")]
8447 #[cfg_attr(test, assert_instr(vcvtph2ps))]
8448 pub unsafe fn _mm512_mask_cvtph_ps(src
: __m512
, k
: __mmask16
, a
: __m256i
) -> __m512
{
8449 transmute(vcvtph2ps(
8457 /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8459 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtph_ps&expand=1725)
8461 #[target_feature(enable = "avx512f")]
8462 #[cfg_attr(test, assert_instr(vcvtph2ps))]
8463 pub unsafe fn _mm512_maskz_cvtph_ps(k
: __mmask16
, a
: __m256i
) -> __m512
{
8464 transmute(vcvtph2ps(
8466 _mm512_setzero_ps().as_f32x16(),
8472 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
8473 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8475 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundps_epi32&expand=1916)
8477 #[target_feature(enable = "avx512f")]
8478 #[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
8479 #[rustc_args_required_const(1)]
8480 pub unsafe fn _mm512_cvtt_roundps_epi32(a
: __m512
, sae
: i32) -> __m512i
{
8485 _mm512_setzero_si512().as_i32x16(),
8486 0b11111111_11111111,
8491 let r
= constify_imm4_sae
!(sae
, call
);
8495 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8496 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8498 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundps_epi32&expand=1917)
8500 #[target_feature(enable = "avx512f")]
8501 #[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
8502 #[rustc_args_required_const(3)]
8503 pub unsafe fn _mm512_mask_cvtt_roundps_epi32(
8511 vcvttps2dq(a
.as_f32x16(), src
.as_i32x16(), k
, $imm4
)
8514 let r
= constify_imm4_sae
!(sae
, call
);
8518 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8519 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8521 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epi32&expand=1918)
8523 #[target_feature(enable = "avx512f")]
8524 #[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
8525 #[rustc_args_required_const(2)]
8526 pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k
: __mmask16
, a
: __m512
, sae
: i32) -> __m512i
{
8529 vcvttps2dq(a
.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k
, $imm4
)
8532 let r
= constify_imm4_sae
!(sae
, call
);
8536 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
8537 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8539 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundps_epu32&expand=1922)
8541 #[target_feature(enable = "avx512f")]
8542 #[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
8543 #[rustc_args_required_const(1)]
8544 pub unsafe fn _mm512_cvtt_roundps_epu32(a
: __m512
, sae
: i32) -> __m512i
{
8549 _mm512_setzero_si512().as_i32x16(),
8550 0b11111111_11111111,
8555 let r
= constify_imm4_sae
!(sae
, call
);
8559 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8560 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8562 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundps_epu32&expand=1923)
8564 #[target_feature(enable = "avx512f")]
8565 #[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
8566 #[rustc_args_required_const(3)]
8567 pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
8575 vcvttps2udq(a
.as_f32x16(), src
.as_i32x16(), k
, $imm4
)
8578 let r
= constify_imm4_sae
!(sae
, call
);
8582 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8583 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epu32&expand=1924)
8587 #[target_feature(enable = "avx512f")]
8588 #[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
8589 #[rustc_args_required_const(2)]
8590 pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k
: __mmask16
, a
: __m512
, sae
: i32) -> __m512i
{
8593 vcvttps2udq(a
.as_f32x16(), _mm512_setzero_si512().as_i32x16(), k
, $imm4
)
8596 let r
= constify_imm4_sae
!(sae
, call
);
8600 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
8601 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8603 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundpd_epi32&expand=1904)
8605 #[target_feature(enable = "avx512f")]
8606 #[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
8607 #[rustc_args_required_const(1)]
8608 pub unsafe fn _mm512_cvtt_roundpd_epi32(a
: __m512d
, sae
: i32) -> __m256i
{
8613 _mm256_setzero_si256().as_i32x8(),
8619 let r
= constify_imm4_sae
!(sae
, call
);
8623 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8624 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8626 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundpd_epi32&expand=1905)
8628 #[target_feature(enable = "avx512f")]
8629 #[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
8630 #[rustc_args_required_const(3)]
8631 pub unsafe fn _mm512_mask_cvtt_roundpd_epi32(
8639 vcvttpd2dq(a
.as_f64x8(), src
.as_i32x8(), k
, $imm4
)
8642 let r
= constify_imm4_sae
!(sae
, call
);
8646 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8647 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundps_epi32&expand=1918)
8651 #[target_feature(enable = "avx512f")]
8652 #[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
8653 #[rustc_args_required_const(2)]
8654 pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k
: __mmask8
, a
: __m512d
, sae
: i32) -> __m256i
{
8657 vcvttpd2dq(a
.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k
, $imm4
)
8660 let r
= constify_imm4_sae
!(sae
, call
);
8664 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
8665 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvtt_roundpd_epu32&expand=1910)
8669 #[target_feature(enable = "avx512f")]
8670 #[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
8671 #[rustc_args_required_const(1)]
8672 pub unsafe fn _mm512_cvtt_roundpd_epu32(a
: __m512d
, sae
: i32) -> __m256i
{
8677 _mm256_setzero_si256().as_i32x8(),
8683 let r
= constify_imm4_sae
!(sae
, call
);
8687 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8688 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8690 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvtt_roundpd_epu32&expand=1911)
8692 #[target_feature(enable = "avx512f")]
8693 #[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
8694 #[rustc_args_required_const(3)]
8695 pub unsafe fn _mm512_mask_cvtt_roundpd_epu32(
8703 vcvttpd2udq(a
.as_f64x8(), src
.as_i32x8(), k
, $imm4
)
8706 let r
= constify_imm4_sae
!(sae
, call
);
8710 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
8712 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttps_epi32&expand=1984)
8714 #[target_feature(enable = "avx512f")]
8715 #[cfg_attr(test, assert_instr(vcvttps2dq))]
8716 pub unsafe fn _mm512_cvttps_epi32(a
: __m512
) -> __m512i
{
8717 transmute(vcvttps2dq(
8719 _mm512_setzero_si512().as_i32x16(),
8720 0b11111111_11111111,
8721 _MM_FROUND_CUR_DIRECTION
,
8725 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8727 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttps_epi32&expand=1985)
8729 #[target_feature(enable = "avx512f")]
8730 #[cfg_attr(test, assert_instr(vcvttps2dq))]
8731 pub unsafe fn _mm512_mask_cvttps_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512
) -> __m512i
{
8732 transmute(vcvttps2dq(
8736 _MM_FROUND_CUR_DIRECTION
,
8740 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8742 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttps_epi32&expand=1986)
8744 #[target_feature(enable = "avx512f")]
8745 #[cfg_attr(test, assert_instr(vcvttps2dq))]
8746 pub unsafe fn _mm512_maskz_cvttps_epi32(k
: __mmask16
, a
: __m512
) -> __m512i
{
8747 transmute(vcvttps2dq(
8749 _mm512_setzero_si512().as_i32x16(),
8751 _MM_FROUND_CUR_DIRECTION
,
8755 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
8757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttps_epu32&expand=2002)
8759 #[target_feature(enable = "avx512f")]
8760 #[cfg_attr(test, assert_instr(vcvttps2udq))]
8761 pub unsafe fn _mm512_cvttps_epu32(a
: __m512
) -> __m512i
{
8762 transmute(vcvttps2udq(
8764 _mm512_setzero_si512().as_i32x16(),
8765 0b11111111_11111111,
8766 _MM_FROUND_CUR_DIRECTION
,
8770 /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttps_epu32&expand=2003)
8774 #[target_feature(enable = "avx512f")]
8775 #[cfg_attr(test, assert_instr(vcvttps2udq))]
8776 pub unsafe fn _mm512_mask_cvttps_epu32(src
: __m512i
, k
: __mmask16
, a
: __m512
) -> __m512i
{
8777 transmute(vcvttps2udq(
8781 _MM_FROUND_CUR_DIRECTION
,
8785 /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8787 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttps_epu32&expand=2004)
8789 #[target_feature(enable = "avx512f")]
8790 #[cfg_attr(test, assert_instr(vcvttps2udq))]
8791 pub unsafe fn _mm512_maskz_cvttps_epu32(k
: __mmask16
, a
: __m512
) -> __m512i
{
8792 transmute(vcvttps2udq(
8794 _mm512_setzero_si512().as_i32x16(),
8796 _MM_FROUND_CUR_DIRECTION
,
8800 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8801 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
8803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvtt_roundpd_epu32&expand=1912)
8805 #[target_feature(enable = "avx512f")]
8806 #[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
8807 #[rustc_args_required_const(2)]
8808 pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k
: __mmask8
, a
: __m512d
, sae
: i32) -> __m256i
{
8811 vcvttpd2udq(a
.as_f64x8(), _mm256_setzero_si256().as_i32x8(), k
, $imm4
)
8814 let r
= constify_imm4_sae
!(sae
, call
);
8818 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
8820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttpd_epi32&expand=1947)
8822 #[target_feature(enable = "avx512f")]
8823 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
8824 pub unsafe fn _mm512_cvttpd_epi32(a
: __m512d
) -> __m256i
{
8825 transmute(vcvttpd2dq(
8827 _mm256_setzero_si256().as_i32x8(),
8829 _MM_FROUND_CUR_DIRECTION
,
8833 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8835 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttpd_epi32&expand=1948)
8837 #[target_feature(enable = "avx512f")]
8838 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
8839 pub unsafe fn _mm512_mask_cvttpd_epi32(src
: __m256i
, k
: __mmask8
, a
: __m512d
) -> __m256i
{
8840 transmute(vcvttpd2dq(
8844 _MM_FROUND_CUR_DIRECTION
,
8848 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8850 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttpd_epi32&expand=1949)
8852 #[target_feature(enable = "avx512f")]
8853 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
8854 pub unsafe fn _mm512_maskz_cvttpd_epi32(k
: __mmask8
, a
: __m512d
) -> __m256i
{
8855 transmute(vcvttpd2dq(
8857 _mm256_setzero_si256().as_i32x8(),
8859 _MM_FROUND_CUR_DIRECTION
,
8863 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
8865 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cvttpd_epu32&expand=1965)
8867 #[target_feature(enable = "avx512f")]
8868 #[cfg_attr(test, assert_instr(vcvttpd2udq))]
8869 pub unsafe fn _mm512_cvttpd_epu32(a
: __m512d
) -> __m256i
{
8870 transmute(vcvttpd2udq(
8872 _mm256_setzero_si256().as_i32x8(),
8874 _MM_FROUND_CUR_DIRECTION
,
8878 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
8880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cvttpd_epu32&expand=1966)
8882 #[target_feature(enable = "avx512f")]
8883 #[cfg_attr(test, assert_instr(vcvttpd2udq))]
8884 pub unsafe fn _mm512_mask_cvttpd_epu32(src
: __m256i
, k
: __mmask8
, a
: __m512d
) -> __m256i
{
8885 transmute(vcvttpd2udq(
8889 _MM_FROUND_CUR_DIRECTION
,
8893 /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
8895 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_cvttpd_epu32&expand=1967)
8897 #[target_feature(enable = "avx512f")]
8898 #[cfg_attr(test, assert_instr(vcvttpd2udq))]
8899 pub unsafe fn _mm512_maskz_cvttpd_epu32(k
: __mmask8
, a
: __m512d
) -> __m256i
{
8900 transmute(vcvttpd2udq(
8902 _mm256_setzero_si256().as_i32x8(),
8904 _MM_FROUND_CUR_DIRECTION
,
8908 /// Returns vector of type `__m512d` with all elements set to zero.
8910 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
8912 #[target_feature(enable = "avx512f")]
8913 #[cfg_attr(test, assert_instr(vxorps))]
8914 pub unsafe fn _mm512_setzero_pd() -> __m512d
{
8915 // All-0 is a properly initialized __m512d
8919 /// Returns vector of type `__m512d` with all elements set to zero.
8921 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd)
8923 #[target_feature(enable = "avx512f")]
8924 #[cfg_attr(test, assert_instr(vxorps))]
8925 pub unsafe fn _mm512_setzero_ps() -> __m512
{
8926 // All-0 is a properly initialized __m512
8930 /// Return vector of type __m512 with all elements set to zero.
8932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setzero&expand=5014)
8934 #[target_feature(enable = "avx512f")]
8935 #[cfg_attr(test, assert_instr(vxorps))]
8936 pub unsafe fn _mm512_setzero() -> __m512
{
8937 // All-0 is a properly initialized __m512
8941 /// Returns vector of type `__m512i` with all elements set to zero.
8943 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
8945 #[target_feature(enable = "avx512f")]
8946 #[cfg_attr(test, assert_instr(vxorps))]
8947 pub unsafe fn _mm512_setzero_si512() -> __m512i
{
8948 // All-0 is a properly initialized __m512i
8952 /// Return vector of type __m512i with all elements set to zero.
8954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setzero_epi32&expand=5015)
8956 #[target_feature(enable = "avx512f")]
8957 #[cfg_attr(test, assert_instr(vxorps))]
8958 pub unsafe fn _mm512_setzero_epi32() -> __m512i
{
8959 // All-0 is a properly initialized __m512i
8963 /// Sets packed 32-bit integers in `dst` with the supplied values in reverse
8966 #[target_feature(enable = "avx512f")]
8967 pub unsafe fn _mm512_setr_epi32(
8986 e15
, e14
, e13
, e12
, e11
, e10
, e9
, e8
, e7
, e6
, e5
, e4
, e3
, e2
, e1
, e0
,
8991 /// Set packed 8-bit integers in dst with the supplied values.
8993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set_epi8&expand=4915)
8995 #[target_feature(enable = "avx512f")]
8996 pub unsafe fn _mm512_set_epi8(
9063 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
, e16
, e17
, e18
, e19
,
9064 e20
, e21
, e22
, e23
, e24
, e25
, e26
, e27
, e28
, e29
, e30
, e31
, e32
, e33
, e34
, e35
, e36
, e37
,
9065 e38
, e39
, e40
, e41
, e42
, e43
, e44
, e45
, e46
, e47
, e48
, e49
, e50
, e51
, e52
, e53
, e54
, e55
,
9066 e56
, e57
, e58
, e59
, e60
, e61
, e62
, e63
,
9071 /// Set packed 16-bit integers in dst with the supplied values.
9073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set_epi16&expand=4905)
9075 #[target_feature(enable = "avx512f")]
9076 pub unsafe fn _mm512_set_epi16(
9111 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
, e16
, e17
, e18
, e19
,
9112 e20
, e21
, e22
, e23
, e24
, e25
, e26
, e27
, e28
, e29
, e30
, e31
,
9117 /// Set packed 32-bit integers in dst with the repeated 4 element sequence.
9119 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_epi32&expand=4982)
9121 #[target_feature(enable = "avx512f")]
9122 pub unsafe fn _mm512_set4_epi32(d
: i32, c
: i32, b
: i32, a
: i32) -> __m512i
{
9123 _mm512_set_epi32(d
, c
, b
, a
, d
, c
, b
, a
, d
, c
, b
, a
, d
, c
, b
, a
)
9126 /// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
9128 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_ps&expand=4985)
9130 #[target_feature(enable = "avx512f")]
9131 pub unsafe fn _mm512_set4_ps(d
: f32, c
: f32, b
: f32, a
: f32) -> __m512
{
9132 _mm512_set_ps(d
, c
, b
, a
, d
, c
, b
, a
, d
, c
, b
, a
, d
, c
, b
, a
)
9135 /// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
9137 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_pd&expand=4984)
9139 #[target_feature(enable = "avx512f")]
9140 pub unsafe fn _mm512_set4_pd(d
: f64, c
: f64, b
: f64, a
: f64) -> __m512d
{
9141 _mm512_set_pd(d
, c
, b
, a
, d
, c
, b
, a
)
9144 /// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
9146 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_epi32&expand=5009)
9148 #[target_feature(enable = "avx512f")]
9149 pub unsafe fn _mm512_setr4_epi32(d
: i32, c
: i32, b
: i32, a
: i32) -> __m512i
{
9150 _mm512_set_epi32(a
, b
, c
, d
, a
, b
, c
, d
, a
, b
, c
, d
, a
, b
, c
, d
)
9153 /// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
9155 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_ps&expand=5012)
9157 #[target_feature(enable = "avx512f")]
9158 pub unsafe fn _mm512_setr4_ps(d
: f32, c
: f32, b
: f32, a
: f32) -> __m512
{
9159 _mm512_set_ps(a
, b
, c
, d
, a
, b
, c
, d
, a
, b
, c
, d
, a
, b
, c
, d
)
9162 /// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
9164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_pd&expand=5011)
9166 #[target_feature(enable = "avx512f")]
9167 pub unsafe fn _mm512_setr4_pd(d
: f64, c
: f64, b
: f64, a
: f64) -> __m512d
{
9168 _mm512_set_pd(a
, b
, c
, d
, a
, b
, c
, d
)
9171 /// Set packed 64-bit integers in dst with the supplied values.
9173 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set_epi64&expand=4910)
9175 #[target_feature(enable = "avx512f")]
9176 pub unsafe fn _mm512_set_epi64(
9186 _mm512_setr_epi64(e7
, e6
, e5
, e4
, e3
, e2
, e1
, e0
)
9189 /// Set packed 64-bit integers in dst with the supplied values in reverse order.
9191 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr_epi64&expand=4993)
9193 #[target_feature(enable = "avx512f")]
9194 pub unsafe fn _mm512_setr_epi64(
9204 let r
= i64x8
::new(e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
);
9208 /// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9210 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_pd&expand=3002)
9212 #[target_feature(enable = "avx512f")]
9213 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
9214 #[rustc_args_required_const(2)]
9215 pub unsafe fn _mm512_i32gather_pd(offsets
: __m256i
, slice
: *const u8, scale
: i32) -> __m512d
{
9216 let zero
= _mm512_setzero_pd().as_f64x8();
9218 let slice
= slice
as *const i8;
9219 let offsets
= offsets
.as_i32x8();
9222 vgatherdpd(zero
, slice
, offsets
, neg_one
, $imm8
)
9225 let r
= constify_imm8_gather
!(scale
, call
);
9229 /// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9231 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_pd&expand=3003)
9233 #[target_feature(enable = "avx512f")]
9234 #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
9235 #[rustc_args_required_const(4)]
9236 pub unsafe fn _mm512_mask_i32gather_pd(
9243 let src
= src
.as_f64x8();
9244 let slice
= slice
as *const i8;
9245 let offsets
= offsets
.as_i32x8();
9248 vgatherdpd(src
, slice
, offsets
, mask
as i8, $imm8
)
9251 let r
= constify_imm8_gather
!(scale
, call
);
9255 /// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9257 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_pd&expand=3092)
9259 #[target_feature(enable = "avx512f")]
9260 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
9261 #[rustc_args_required_const(2)]
9262 pub unsafe fn _mm512_i64gather_pd(offsets
: __m512i
, slice
: *const u8, scale
: i32) -> __m512d
{
9263 let zero
= _mm512_setzero_pd().as_f64x8();
9265 let slice
= slice
as *const i8;
9266 let offsets
= offsets
.as_i64x8();
9269 vgatherqpd(zero
, slice
, offsets
, neg_one
, $imm8
)
9272 let r
= constify_imm8_gather
!(scale
, call
);
9276 /// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_pd&expand=3093)
9280 #[target_feature(enable = "avx512f")]
9281 #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
9282 #[rustc_args_required_const(4)]
9283 pub unsafe fn _mm512_mask_i64gather_pd(
9290 let src
= src
.as_f64x8();
9291 let slice
= slice
as *const i8;
9292 let offsets
= offsets
.as_i64x8();
9295 vgatherqpd(src
, slice
, offsets
, mask
as i8, $imm8
)
9298 let r
= constify_imm8_gather
!(scale
, call
);
9302 /// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_ps&expand=3100)
9306 #[target_feature(enable = "avx512f")]
9307 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
9308 #[rustc_args_required_const(2)]
9309 pub unsafe fn _mm512_i64gather_ps(offsets
: __m512i
, slice
: *const u8, scale
: i32) -> __m256
{
9310 let zero
= _mm256_setzero_ps().as_f32x8();
9312 let slice
= slice
as *const i8;
9313 let offsets
= offsets
.as_i64x8();
9316 vgatherqps(zero
, slice
, offsets
, neg_one
, $imm8
)
9319 let r
= constify_imm8_gather
!(scale
, call
);
9323 /// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_ps&expand=3101)
9327 #[target_feature(enable = "avx512f")]
9328 #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
9329 #[rustc_args_required_const(4)]
9330 pub unsafe fn _mm512_mask_i64gather_ps(
9337 let src
= src
.as_f32x8();
9338 let slice
= slice
as *const i8;
9339 let offsets
= offsets
.as_i64x8();
9342 vgatherqps(src
, slice
, offsets
, mask
as i8, $imm8
)
9345 let r
= constify_imm8_gather
!(scale
, call
);
9349 /// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_ps&expand=3010)
9353 #[target_feature(enable = "avx512f")]
9354 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
9355 #[rustc_args_required_const(2)]
9356 pub unsafe fn _mm512_i32gather_ps(offsets
: __m512i
, slice
: *const u8, scale
: i32) -> __m512
{
9357 let zero
= _mm512_setzero_ps().as_f32x16();
9359 let slice
= slice
as *const i8;
9360 let offsets
= offsets
.as_i32x16();
9363 vgatherdps(zero
, slice
, offsets
, neg_one
, $imm8
)
9366 let r
= constify_imm8_gather
!(scale
, call
);
9370 /// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_ps&expand=3011)
9374 #[target_feature(enable = "avx512f")]
9375 #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
9376 #[rustc_args_required_const(4)]
9377 pub unsafe fn _mm512_mask_i32gather_ps(
9384 let src
= src
.as_f32x16();
9385 let slice
= slice
as *const i8;
9386 let offsets
= offsets
.as_i32x16();
9389 vgatherdps(src
, slice
, offsets
, mask
as i16, $imm8
)
9392 let r
= constify_imm8_gather
!(scale
, call
);
9396 /// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_epi32&expand=2986)
9400 #[target_feature(enable = "avx512f")]
9401 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
9402 #[rustc_args_required_const(2)]
9403 pub unsafe fn _mm512_i32gather_epi32(offsets
: __m512i
, slice
: *const u8, scale
: i32) -> __m512i
{
9404 let zero
= _mm512_setzero_si512().as_i32x16();
9406 let slice
= slice
as *const i8;
9407 let offsets
= offsets
.as_i32x16();
9410 vpgatherdd(zero
, slice
, offsets
, neg_one
, $imm8
)
9413 let r
= constify_imm8_gather
!(scale
, call
);
9417 /// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_epi32&expand=2987)
9421 #[target_feature(enable = "avx512f")]
9422 #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
9423 #[rustc_args_required_const(4)]
9424 pub unsafe fn _mm512_mask_i32gather_epi32(
9431 let src
= src
.as_i32x16();
9432 let mask
= mask
as i16;
9433 let slice
= slice
as *const i8;
9434 let offsets
= offsets
.as_i32x16();
9437 vpgatherdd(src
, slice
, offsets
, mask
, $imm8
)
9440 let r
= constify_imm8_gather
!(scale
, call
);
9444 /// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9446 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32gather_epi64&expand=2994)
9448 #[target_feature(enable = "avx512f")]
9449 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
9450 #[rustc_args_required_const(2)]
9451 pub unsafe fn _mm512_i32gather_epi64(offsets
: __m256i
, slice
: *const u8, scale
: i32) -> __m512i
{
9452 let zero
= _mm512_setzero_si512().as_i64x8();
9454 let slice
= slice
as *const i8;
9455 let offsets
= offsets
.as_i32x8();
9458 vpgatherdq(zero
, slice
, offsets
, neg_one
, $imm8
)
9461 let r
= constify_imm8_gather
!(scale
, call
);
9465 /// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9467 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32gather_epi64&expand=2995)
9469 #[target_feature(enable = "avx512f")]
9470 #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
9471 #[rustc_args_required_const(4)]
9472 pub unsafe fn _mm512_mask_i32gather_epi64(
9479 let src
= src
.as_i64x8();
9480 let mask
= mask
as i8;
9481 let slice
= slice
as *const i8;
9482 let offsets
= offsets
.as_i32x8();
9485 vpgatherdq(src
, slice
, offsets
, mask
, $imm8
)
9488 let r
= constify_imm8_gather
!(scale
, call
);
9492 /// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_epi64&expand=3084)
9496 #[target_feature(enable = "avx512f")]
9497 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
9498 #[rustc_args_required_const(2)]
9499 pub unsafe fn _mm512_i64gather_epi64(offsets
: __m512i
, slice
: *const u8, scale
: i32) -> __m512i
{
9500 let zero
= _mm512_setzero_si512().as_i64x8();
9502 let slice
= slice
as *const i8;
9503 let offsets
= offsets
.as_i64x8();
9506 vpgatherqq(zero
, slice
, offsets
, neg_one
, $imm8
)
9509 let r
= constify_imm8_gather
!(scale
, call
);
9513 /// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9515 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_epi64&expand=3085)
9517 #[target_feature(enable = "avx512f")]
9518 #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
9519 #[rustc_args_required_const(4)]
9520 pub unsafe fn _mm512_mask_i64gather_epi64(
9527 let src
= src
.as_i64x8();
9528 let mask
= mask
as i8;
9529 let slice
= slice
as *const i8;
9530 let offsets
= offsets
.as_i64x8();
9533 vpgatherqq(src
, slice
, offsets
, mask
, $imm8
)
9536 let r
= constify_imm8_gather
!(scale
, call
);
9540 /// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
9542 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64gather_epi32&expand=3074)
9544 #[target_feature(enable = "avx512f")]
9545 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
9546 #[rustc_args_required_const(2)]
9547 pub unsafe fn _mm512_i64gather_epi32(offsets
: __m512i
, slice
: *const u8, scale
: i32) -> __m256i
{
9548 let zeros
= _mm256_setzero_si256().as_i32x8();
9550 let slice
= slice
as *const i8;
9551 let offsets
= offsets
.as_i64x8();
9554 vpgatherqd(zeros
, slice
, offsets
, neg_one
, $imm8
)
9557 let r
= constify_imm8_gather
!(scale
, call
);
9561 /// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9563 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64gather_epi32&expand=3075)
9565 #[target_feature(enable = "avx512f")]
9566 #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
9567 #[rustc_args_required_const(4)]
9568 pub unsafe fn _mm512_mask_i64gather_epi32(
9575 let src
= src
.as_i32x8();
9576 let mask
= mask
as i8;
9577 let slice
= slice
as *const i8;
9578 let offsets
= offsets
.as_i64x8();
9581 vpgatherqd(src
, slice
, offsets
, mask
, $imm8
)
9584 let r
= constify_imm8_gather
!(scale
, call
);
9588 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9590 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_pd&expand=3044)
9592 #[target_feature(enable = "avx512f")]
9593 #[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
9594 #[rustc_args_required_const(3)]
9595 pub unsafe fn _mm512_i32scatter_pd(slice
: *mut u8, offsets
: __m256i
, src
: __m512d
, scale
: i32) {
9596 let src
= src
.as_f64x8();
9598 let slice
= slice
as *mut i8;
9599 let offsets
= offsets
.as_i32x8();
9602 vscatterdpd(slice
, neg_one
, offsets
, src
, $imm8
)
9605 constify_imm8_gather
!(scale
, call
);
9608 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_pd&expand=3045)
9612 #[target_feature(enable = "avx512f")]
9613 #[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
9614 #[rustc_args_required_const(4)]
9615 pub unsafe fn _mm512_mask_i32scatter_pd(
9622 let src
= src
.as_f64x8();
9623 let slice
= slice
as *mut i8;
9624 let offsets
= offsets
.as_i32x8();
9627 vscatterdpd(slice
, mask
as i8, offsets
, src
, $imm8
)
9630 constify_imm8_gather
!(scale
, call
);
9633 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9635 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_pd&expand=3122)
9637 #[target_feature(enable = "avx512f")]
9638 #[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
9639 #[rustc_args_required_const(3)]
9640 pub unsafe fn _mm512_i64scatter_pd(slice
: *mut u8, offsets
: __m512i
, src
: __m512d
, scale
: i32) {
9641 let src
= src
.as_f64x8();
9643 let slice
= slice
as *mut i8;
9644 let offsets
= offsets
.as_i64x8();
9647 vscatterqpd(slice
, neg_one
, offsets
, src
, $imm8
)
9650 constify_imm8_gather
!(scale
, call
);
9653 /// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9655 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_pd&expand=3123)
9657 #[target_feature(enable = "avx512f")]
9658 #[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
9659 #[rustc_args_required_const(4)]
9660 pub unsafe fn _mm512_mask_i64scatter_pd(
9667 let src
= src
.as_f64x8();
9668 let slice
= slice
as *mut i8;
9669 let offsets
= offsets
.as_i64x8();
9672 vscatterqpd(slice
, mask
as i8, offsets
, src
, $imm8
)
9675 constify_imm8_gather
!(scale
, call
);
9678 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_ps&expand=3050)
9682 #[target_feature(enable = "avx512f")]
9683 #[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
9684 #[rustc_args_required_const(3)]
9685 pub unsafe fn _mm512_i32scatter_ps(slice
: *mut u8, offsets
: __m512i
, src
: __m512
, scale
: i32) {
9686 let src
= src
.as_f32x16();
9688 let slice
= slice
as *mut i8;
9689 let offsets
= offsets
.as_i32x16();
9692 vscatterdps(slice
, neg_one
, offsets
, src
, $imm8
)
9695 constify_imm8_gather
!(scale
, call
);
9698 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9700 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_ps&expand=3051)
9702 #[target_feature(enable = "avx512f")]
9703 #[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
9704 #[rustc_args_required_const(4)]
9705 pub unsafe fn _mm512_mask_i32scatter_ps(
9712 let src
= src
.as_f32x16();
9713 let slice
= slice
as *mut i8;
9714 let offsets
= offsets
.as_i32x16();
9717 vscatterdps(slice
, mask
as i16, offsets
, src
, $imm8
)
9720 constify_imm8_gather
!(scale
, call
);
9723 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9725 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_ps&expand=3128)
9727 #[target_feature(enable = "avx512f")]
9728 #[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
9729 #[rustc_args_required_const(3)]
9730 pub unsafe fn _mm512_i64scatter_ps(slice
: *mut u8, offsets
: __m512i
, src
: __m256
, scale
: i32) {
9731 let src
= src
.as_f32x8();
9733 let slice
= slice
as *mut i8;
9734 let offsets
= offsets
.as_i64x8();
9737 vscatterqps(slice
, neg_one
, offsets
, src
, $imm8
)
9740 constify_imm8_gather
!(scale
, call
);
9743 /// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_ps&expand=3129)
9747 #[target_feature(enable = "avx512f")]
9748 #[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
9749 #[rustc_args_required_const(4)]
9750 pub unsafe fn _mm512_mask_i64scatter_ps(
9757 let src
= src
.as_f32x8();
9758 let slice
= slice
as *mut i8;
9759 let offsets
= offsets
.as_i64x8();
9762 vscatterqps(slice
, mask
as i8, offsets
, src
, $imm8
)
9765 constify_imm8_gather
!(scale
, call
);
9768 /// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9770 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_epi64&expand=3038)
9772 #[target_feature(enable = "avx512f")]
9773 #[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
9774 #[rustc_args_required_const(3)]
9775 pub unsafe fn _mm512_i32scatter_epi64(slice
: *mut u8, offsets
: __m256i
, src
: __m512i
, scale
: i32) {
9776 let src
= src
.as_i64x8();
9778 let slice
= slice
as *mut i8;
9779 let offsets
= offsets
.as_i32x8();
9782 vpscatterdq(slice
, neg_one
, offsets
, src
, $imm8
)
9785 constify_imm8_gather
!(scale
, call
);
9788 /// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9790 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_epi64&expand=3039)
9792 #[target_feature(enable = "avx512f")]
9793 #[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
9794 #[rustc_args_required_const(4)]
9795 pub unsafe fn _mm512_mask_i32scatter_epi64(
9802 let src
= src
.as_i64x8();
9803 let mask
= mask
as i8;
9804 let slice
= slice
as *mut i8;
9805 let offsets
= offsets
.as_i32x8();
9808 vpscatterdq(slice
, mask
, offsets
, src
, $imm8
)
9811 constify_imm8_gather
!(scale
, call
);
9814 /// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9816 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_epi64&expand=3116)
9818 #[target_feature(enable = "avx512f")]
9819 #[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
9820 #[rustc_args_required_const(3)]
9821 pub unsafe fn _mm512_i64scatter_epi64(slice
: *mut u8, offsets
: __m512i
, src
: __m512i
, scale
: i32) {
9822 let src
= src
.as_i64x8();
9824 let slice
= slice
as *mut i8;
9825 let offsets
= offsets
.as_i64x8();
9828 vpscatterqq(slice
, neg_one
, offsets
, src
, $imm8
)
9831 constify_imm8_gather
!(scale
, call
);
9834 /// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_epi64&expand=3117)
9838 #[target_feature(enable = "avx512f")]
9839 #[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
9840 #[rustc_args_required_const(4)]
9841 pub unsafe fn _mm512_mask_i64scatter_epi64(
9848 let src
= src
.as_i64x8();
9849 let mask
= mask
as i8;
9850 let slice
= slice
as *mut i8;
9851 let offsets
= offsets
.as_i64x8();
9854 vpscatterqq(slice
, mask
, offsets
, src
, $imm8
)
9857 constify_imm8_gather
!(scale
, call
);
9860 /// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9862 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i32scatter_epi32&expand=3032)
9864 #[target_feature(enable = "avx512f")]
9865 #[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
9866 #[rustc_args_required_const(3)]
9867 pub unsafe fn _mm512_i32scatter_epi32(slice
: *mut u8, offsets
: __m512i
, src
: __m512i
, scale
: i32) {
9868 let src
= src
.as_i32x16();
9870 let slice
= slice
as *mut i8;
9871 let offsets
= offsets
.as_i32x16();
9874 vpscatterdd(slice
, neg_one
, offsets
, src
, $imm8
)
9877 constify_imm8_gather
!(scale
, call
);
9880 /// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i32scatter_epi32&expand=3033)
9884 #[target_feature(enable = "avx512f")]
9885 #[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
9886 #[rustc_args_required_const(4)]
9887 pub unsafe fn _mm512_mask_i32scatter_epi32(
9894 let src
= src
.as_i32x16();
9895 let mask
= mask
as i16;
9896 let slice
= slice
as *mut i8;
9897 let offsets
= offsets
.as_i32x16();
9900 vpscatterdd(slice
, mask
, offsets
, src
, $imm8
)
9903 constify_imm8_gather
!(scale
, call
);
9906 /// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
9908 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_i64scatter_epi32&expand=3108)
9910 #[target_feature(enable = "avx512f")]
9911 #[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
9912 #[rustc_args_required_const(3)]
9913 pub unsafe fn _mm512_i64scatter_epi32(slice
: *mut u8, offsets
: __m512i
, src
: __m256i
, scale
: i32) {
9914 let src
= src
.as_i32x8();
9916 let slice
= slice
as *mut i8;
9917 let offsets
= offsets
.as_i64x8();
9920 vpscatterqd(slice
, neg_one
, offsets
, src
, $imm8
)
9923 constify_imm8_gather
!(scale
, call
);
9926 /// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
9928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_i64scatter_epi32&expand=3109)
9930 #[target_feature(enable = "avx512f")]
9931 #[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
9932 #[rustc_args_required_const(4)]
9933 pub unsafe fn _mm512_mask_i64scatter_epi32(
9940 let src
= src
.as_i32x8();
9941 let mask
= mask
as i8;
9942 let slice
= slice
as *mut i8;
9943 let offsets
= offsets
.as_i64x8();
9946 vpscatterqd(slice
, mask
, offsets
, src
, $imm8
)
9949 constify_imm8_gather
!(scale
, call
);
9952 /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
9954 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_epi32&expand=1198)
9956 #[target_feature(enable = "avx512f")]
9957 #[cfg_attr(test, assert_instr(vpcompressd))]
9958 pub unsafe fn _mm512_mask_compress_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
) -> __m512i
{
9959 transmute(vpcompressd(a
.as_i32x16(), src
.as_i32x16(), k
))
9962 /// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
9964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_epi32&expand=1199)
9966 #[target_feature(enable = "avx512f")]
9967 #[cfg_attr(test, assert_instr(vpcompressd))]
9968 pub unsafe fn _mm512_maskz_compress_epi32(k
: __mmask16
, a
: __m512i
) -> __m512i
{
9969 transmute(vpcompressd(
9971 _mm512_setzero_si512().as_i32x16(),
9976 /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
9978 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_epi64&expand=1204)
9980 #[target_feature(enable = "avx512f")]
9981 #[cfg_attr(test, assert_instr(vpcompressq))]
9982 pub unsafe fn _mm512_mask_compress_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
) -> __m512i
{
9983 transmute(vpcompressq(a
.as_i64x8(), src
.as_i64x8(), k
))
9986 /// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
9988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_epi64&expand=1205)
9990 #[target_feature(enable = "avx512f")]
9991 #[cfg_attr(test, assert_instr(vpcompressq))]
9992 pub unsafe fn _mm512_maskz_compress_epi64(k
: __mmask8
, a
: __m512i
) -> __m512i
{
9993 transmute(vpcompressq(
9995 _mm512_setzero_si512().as_i64x8(),
10000 /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
10002 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_ps&expand=1222)
10004 #[target_feature(enable = "avx512f")]
10005 #[cfg_attr(test, assert_instr(vcompressps))]
10006 pub unsafe fn _mm512_mask_compress_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
10007 transmute(vcompressps(a
.as_f32x16(), src
.as_f32x16(), k
))
10010 /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
10012 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_ps&expand=1223)
10014 #[target_feature(enable = "avx512f")]
10015 #[cfg_attr(test, assert_instr(vcompressps))]
10016 pub unsafe fn _mm512_maskz_compress_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
10017 transmute(vcompressps(
10019 _mm512_setzero_ps().as_f32x16(),
10024 /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
10026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_compress_pd&expand=1216)
10028 #[target_feature(enable = "avx512f")]
10029 #[cfg_attr(test, assert_instr(vcompresspd))]
10030 pub unsafe fn _mm512_mask_compress_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
10031 transmute(vcompresspd(a
.as_f64x8(), src
.as_f64x8(), k
))
10034 /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
10036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_compress_pd&expand=1217)
10038 #[target_feature(enable = "avx512f")]
10039 #[cfg_attr(test, assert_instr(vcompresspd))]
10040 pub unsafe fn _mm512_maskz_compress_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
10041 transmute(vcompresspd(a
.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k
))
10044 /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10046 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_epi32&expand=2316)
10048 #[target_feature(enable = "avx512f")]
10049 #[cfg_attr(test, assert_instr(vpexpandd))]
10050 pub unsafe fn _mm512_mask_expand_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
) -> __m512i
{
10051 transmute(vpexpandd(a
.as_i32x16(), src
.as_i32x16(), k
))
10054 /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10056 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_epi32&expand=2317)
10058 #[target_feature(enable = "avx512f")]
10059 #[cfg_attr(test, assert_instr(vpexpandd))]
10060 pub unsafe fn _mm512_maskz_expand_epi32(k
: __mmask16
, a
: __m512i
) -> __m512i
{
10061 transmute(vpexpandd(
10063 _mm512_setzero_si512().as_i32x16(),
10068 /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_epi64&expand=2322)
10072 #[target_feature(enable = "avx512f")]
10073 #[cfg_attr(test, assert_instr(vpexpandq))]
10074 pub unsafe fn _mm512_mask_expand_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
) -> __m512i
{
10075 transmute(vpexpandq(a
.as_i64x8(), src
.as_i64x8(), k
))
10078 /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10080 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_epi64&expand=2323)
10082 #[target_feature(enable = "avx512f")]
10083 #[cfg_attr(test, assert_instr(vpexpandq))]
10084 pub unsafe fn _mm512_maskz_expand_epi64(k
: __mmask8
, a
: __m512i
) -> __m512i
{
10085 transmute(vpexpandq(
10087 _mm512_setzero_si512().as_i64x8(),
10092 /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_ps&expand=2340)
10096 #[target_feature(enable = "avx512f")]
10097 #[cfg_attr(test, assert_instr(vexpandps))]
10098 pub unsafe fn _mm512_mask_expand_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
10099 transmute(vexpandps(a
.as_f32x16(), src
.as_f32x16(), k
))
10102 /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_ps&expand=2341)
10106 #[target_feature(enable = "avx512f")]
10107 #[cfg_attr(test, assert_instr(vexpandps))]
10108 pub unsafe fn _mm512_maskz_expand_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
10109 transmute(vexpandps(a
.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k
))
10112 /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_expand_pd&expand=2334)
10116 #[target_feature(enable = "avx512f")]
10117 #[cfg_attr(test, assert_instr(vexpandpd))]
10118 pub unsafe fn _mm512_mask_expand_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
10119 transmute(vexpandpd(a
.as_f64x8(), src
.as_f64x8(), k
))
10122 /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10124 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_expand_pd&expand=2335)
10126 #[target_feature(enable = "avx512f")]
10127 #[cfg_attr(test, assert_instr(vexpandpd))]
10128 pub unsafe fn _mm512_maskz_expand_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
10129 transmute(vexpandpd(a
.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k
))
10132 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
10134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685)
10136 #[target_feature(enable = "avx512f")]
10137 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10138 #[rustc_args_required_const(1)]
10139 pub unsafe fn _mm512_rol_epi32(a
: __m512i
, imm8
: i32) -> __m512i
{
10140 let a
= a
.as_i32x16();
10141 macro_rules
! call
{
10146 let r
= constify_imm8_sae
!(imm8
, call
);
10150 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10152 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683)
10154 #[target_feature(enable = "avx512f")]
10155 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10156 #[rustc_args_required_const(3)]
10157 pub unsafe fn _mm512_mask_rol_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, imm8
: i32) -> __m512i
{
10158 let a
= a
.as_i32x16();
10159 macro_rules
! call
{
10164 let rol
= constify_imm8_sae
!(imm8
, call
);
10165 transmute(simd_select_bitmask(k
, rol
, src
.as_i32x16()))
10168 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684)
10172 #[target_feature(enable = "avx512f")]
10173 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10174 #[rustc_args_required_const(2)]
10175 pub unsafe fn _mm512_maskz_rol_epi32(k
: __mmask16
, a
: __m512i
, imm8
: i32) -> __m512i
{
10176 let a
= a
.as_i32x16();
10177 macro_rules
! call
{
10182 let rol
= constify_imm8_sae
!(imm8
, call
);
10183 let zero
= _mm512_setzero_si512().as_i32x16();
10184 transmute(simd_select_bitmask(k
, rol
, zero
))
10187 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
10189 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721)
10191 #[target_feature(enable = "avx512f")]
10192 #[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
10193 #[rustc_args_required_const(1)]
10194 pub unsafe fn _mm512_ror_epi32(a
: __m512i
, imm8
: i32) -> __m512i
{
10195 let a
= a
.as_i32x16();
10196 macro_rules
! call
{
10201 let r
= constify_imm8_sae
!(imm8
, call
);
10205 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719)
10209 #[target_feature(enable = "avx512f")]
10210 #[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
10211 #[rustc_args_required_const(3)]
10212 pub unsafe fn _mm512_mask_ror_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, imm8
: i32) -> __m512i
{
10213 let a
= a
.as_i32x16();
10214 macro_rules
! call
{
10219 let ror
= constify_imm8_sae
!(imm8
, call
);
10220 transmute(simd_select_bitmask(k
, ror
, src
.as_i32x16()))
10223 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10225 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720)
10227 #[target_feature(enable = "avx512f")]
10228 #[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
10229 #[rustc_args_required_const(2)]
10230 pub unsafe fn _mm512_maskz_ror_epi32(k
: __mmask16
, a
: __m512i
, imm8
: i32) -> __m512i
{
10231 let a
= a
.as_i32x16();
10232 macro_rules
! call
{
10237 let ror
= constify_imm8_sae
!(imm8
, call
);
10238 let zero
= _mm512_setzero_si512().as_i32x16();
10239 transmute(simd_select_bitmask(k
, ror
, zero
))
10242 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
10244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694)
10246 #[target_feature(enable = "avx512f")]
10247 #[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
10248 #[rustc_args_required_const(1)]
10249 pub unsafe fn _mm512_rol_epi64(a
: __m512i
, imm8
: i32) -> __m512i
{
10250 let a
= a
.as_i64x8();
10251 macro_rules
! call
{
10256 let r
= constify_imm8_sae
!(imm8
, call
);
10260 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10262 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692)
10264 #[target_feature(enable = "avx512f")]
10265 #[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
10266 #[rustc_args_required_const(3)]
10267 pub unsafe fn _mm512_mask_rol_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m512i
{
10268 let a
= a
.as_i64x8();
10269 macro_rules
! call
{
10274 let rol
= constify_imm8_sae
!(imm8
, call
);
10275 transmute(simd_select_bitmask(k
, rol
, src
.as_i64x8()))
10278 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693)
10282 #[target_feature(enable = "avx512f")]
10283 #[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
10284 #[rustc_args_required_const(2)]
10285 pub unsafe fn _mm512_maskz_rol_epi64(k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m512i
{
10286 let a
= a
.as_i64x8();
10287 macro_rules
! call
{
10292 let rol
= constify_imm8_sae
!(imm8
, call
);
10293 let zero
= _mm512_setzero_si512().as_i64x8();
10294 transmute(simd_select_bitmask(k
, rol
, zero
))
10297 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
10299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730)
10301 #[target_feature(enable = "avx512f")]
10302 #[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
10303 #[rustc_args_required_const(1)]
10304 pub unsafe fn _mm512_ror_epi64(a
: __m512i
, imm8
: i32) -> __m512i
{
10305 let a
= a
.as_i64x8();
10306 macro_rules
! call
{
10311 let r
= constify_imm8_sae
!(imm8
, call
);
10315 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728)
10319 #[target_feature(enable = "avx512f")]
10320 #[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
10321 #[rustc_args_required_const(3)]
10322 pub unsafe fn _mm512_mask_ror_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m512i
{
10323 let a
= a
.as_i64x8();
10324 macro_rules
! call
{
10329 let ror
= constify_imm8_sae
!(imm8
, call
);
10330 transmute(simd_select_bitmask(k
, ror
, src
.as_i64x8()))
10333 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729)
10337 #[target_feature(enable = "avx512f")]
10338 #[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
10339 #[rustc_args_required_const(2)]
10340 pub unsafe fn _mm512_maskz_ror_epi64(k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m512i
{
10341 let a
= a
.as_i64x8();
10342 macro_rules
! call
{
10347 let ror
= constify_imm8_sae
!(imm8
, call
);
10348 let zero
= _mm512_setzero_si512().as_i64x8();
10349 transmute(simd_select_bitmask(k
, ror
, zero
))
10352 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
10354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310)
10356 #[target_feature(enable = "avx512f")]
10357 #[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
10358 #[rustc_args_required_const(1)]
10359 pub unsafe fn _mm512_slli_epi32(a
: __m512i
, imm8
: u32) -> __m512i
{
10360 let a
= a
.as_i32x16();
10361 macro_rules
! call
{
10366 let r
= constify_imm8_sae
!(imm8
, call
);
10370 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308)
10374 #[target_feature(enable = "avx512f")]
10375 #[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
10376 #[rustc_args_required_const(3)]
10377 pub unsafe fn _mm512_mask_slli_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, imm8
: u32) -> __m512i
{
10378 let a
= a
.as_i32x16();
10379 macro_rules
! call
{
10384 let shf
= constify_imm8_sae
!(imm8
, call
);
10385 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10388 /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309)
10392 #[target_feature(enable = "avx512f")]
10393 #[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
10394 #[rustc_args_required_const(2)]
10395 pub unsafe fn _mm512_maskz_slli_epi32(k
: __mmask16
, a
: __m512i
, imm8
: u32) -> __m512i
{
10396 let a
= a
.as_i32x16();
10397 macro_rules
! call
{
10402 let shf
= constify_imm8_sae
!(imm8
, call
);
10403 let zero
= _mm512_setzero_si512().as_i32x16();
10404 transmute(simd_select_bitmask(k
, shf
, zero
))
10407 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
10409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522)
10411 #[target_feature(enable = "avx512f")]
10412 #[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
10413 #[rustc_args_required_const(1)]
10414 pub unsafe fn _mm512_srli_epi32(a
: __m512i
, imm8
: u32) -> __m512i
{
10415 let a
= a
.as_i32x16();
10416 macro_rules
! call
{
10421 let r
= constify_imm8_sae
!(imm8
, call
);
10425 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10427 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520)
10429 #[target_feature(enable = "avx512f")]
10430 #[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
10431 #[rustc_args_required_const(3)]
10432 pub unsafe fn _mm512_mask_srli_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, imm8
: u32) -> __m512i
{
10433 let a
= a
.as_i32x16();
10434 macro_rules
! call
{
10439 let shf
= constify_imm8_sae
!(imm8
, call
);
10440 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10443 /// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10445 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521)
10447 #[target_feature(enable = "avx512f")]
10448 #[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
10449 #[rustc_args_required_const(2)]
10450 pub unsafe fn _mm512_maskz_srli_epi32(k
: __mmask16
, a
: __m512i
, imm8
: u32) -> __m512i
{
10451 let a
= a
.as_i32x16();
10452 macro_rules
! call
{
10457 let shf
= constify_imm8_sae
!(imm8
, call
);
10458 let zero
= _mm512_setzero_si512().as_i32x16();
10459 transmute(simd_select_bitmask(k
, shf
, zero
))
10462 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
10464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319)
10466 #[target_feature(enable = "avx512f")]
10467 #[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
10468 #[rustc_args_required_const(1)]
10469 pub unsafe fn _mm512_slli_epi64(a
: __m512i
, imm8
: u32) -> __m512i
{
10470 let a
= a
.as_i64x8();
10471 macro_rules
! call
{
10476 let r
= constify_imm8_sae
!(imm8
, call
);
10480 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317)
10484 #[target_feature(enable = "avx512f")]
10485 #[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
10486 #[rustc_args_required_const(3)]
10487 pub unsafe fn _mm512_mask_slli_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, imm8
: u32) -> __m512i
{
10488 let a
= a
.as_i64x8();
10489 macro_rules
! call
{
10494 let shf
= constify_imm8_sae
!(imm8
, call
);
10495 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10498 /// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10500 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318)
10502 #[target_feature(enable = "avx512f")]
10503 #[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
10504 #[rustc_args_required_const(2)]
10505 pub unsafe fn _mm512_maskz_slli_epi64(k
: __mmask8
, a
: __m512i
, imm8
: u32) -> __m512i
{
10506 let a
= a
.as_i64x8();
10507 macro_rules
! call
{
10512 let shf
= constify_imm8_sae
!(imm8
, call
);
10513 let zero
= _mm512_setzero_si512().as_i64x8();
10514 transmute(simd_select_bitmask(k
, shf
, zero
))
10517 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
10519 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531)
10521 #[target_feature(enable = "avx512f")]
10522 #[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
10523 #[rustc_args_required_const(1)]
10524 pub unsafe fn _mm512_srli_epi64(a
: __m512i
, imm8
: u32) -> __m512i
{
10525 let a
= a
.as_i64x8();
10526 macro_rules
! call
{
10531 let r
= constify_imm8_sae
!(imm8
, call
);
10535 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529)
10539 #[target_feature(enable = "avx512f")]
10540 #[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
10541 #[rustc_args_required_const(3)]
10542 pub unsafe fn _mm512_mask_srli_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, imm8
: u32) -> __m512i
{
10543 let a
= a
.as_i64x8();
10544 macro_rules
! call
{
10549 let shf
= constify_imm8_sae
!(imm8
, call
);
10550 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10553 /// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530)
10557 #[target_feature(enable = "avx512f")]
10558 #[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
10559 #[rustc_args_required_const(2)]
10560 pub unsafe fn _mm512_maskz_srli_epi64(k
: __mmask8
, a
: __m512i
, imm8
: u32) -> __m512i
{
10561 let a
= a
.as_i64x8();
10562 macro_rules
! call
{
10567 let shf
= constify_imm8_sae
!(imm8
, call
);
10568 let zero
= _mm512_setzero_si512().as_i64x8();
10569 transmute(simd_select_bitmask(k
, shf
, zero
))
10572 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
10574 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280)
10576 #[target_feature(enable = "avx512f")]
10577 #[cfg_attr(test, assert_instr(vpslld))]
10578 pub unsafe fn _mm512_sll_epi32(a
: __m512i
, count
: __m128i
) -> __m512i
{
10579 transmute(vpslld(a
.as_i32x16(), count
.as_i32x4()))
10582 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10584 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278)
10586 #[target_feature(enable = "avx512f")]
10587 #[cfg_attr(test, assert_instr(vpslld))]
10588 pub unsafe fn _mm512_mask_sll_epi32(
10594 let shf
= _mm512_sll_epi32(a
, count
).as_i32x16();
10595 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10598 /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10600 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279)
10602 #[target_feature(enable = "avx512f")]
10603 #[cfg_attr(test, assert_instr(vpslld))]
10604 pub unsafe fn _mm512_maskz_sll_epi32(k
: __mmask16
, a
: __m512i
, count
: __m128i
) -> __m512i
{
10605 let shf
= _mm512_sll_epi32(a
, count
).as_i32x16();
10606 let zero
= _mm512_setzero_si512().as_i32x16();
10607 transmute(simd_select_bitmask(k
, shf
, zero
))
10610 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
10612 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492)
10614 #[target_feature(enable = "avx512f")]
10615 #[cfg_attr(test, assert_instr(vpsrld))]
10616 pub unsafe fn _mm512_srl_epi32(a
: __m512i
, count
: __m128i
) -> __m512i
{
10617 transmute(vpsrld(a
.as_i32x16(), count
.as_i32x4()))
10620 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10622 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490)
10624 #[target_feature(enable = "avx512f")]
10625 #[cfg_attr(test, assert_instr(vpsrld))]
10626 pub unsafe fn _mm512_mask_srl_epi32(
10632 let shf
= _mm512_srl_epi32(a
, count
).as_i32x16();
10633 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10636 /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10638 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491)
10640 #[target_feature(enable = "avx512f")]
10641 #[cfg_attr(test, assert_instr(vpsrld))]
10642 pub unsafe fn _mm512_maskz_srl_epi32(k
: __mmask16
, a
: __m512i
, count
: __m128i
) -> __m512i
{
10643 let shf
= _mm512_srl_epi32(a
, count
).as_i32x16();
10644 let zero
= _mm512_setzero_si512().as_i32x16();
10645 transmute(simd_select_bitmask(k
, shf
, zero
))
10648 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
10650 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289)
10652 #[target_feature(enable = "avx512f")]
10653 #[cfg_attr(test, assert_instr(vpsllq))]
10654 pub unsafe fn _mm512_sll_epi64(a
: __m512i
, count
: __m128i
) -> __m512i
{
10655 transmute(vpsllq(a
.as_i64x8(), count
.as_i64x2()))
10658 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287)
10662 #[target_feature(enable = "avx512f")]
10663 #[cfg_attr(test, assert_instr(vpsllq))]
10664 pub unsafe fn _mm512_mask_sll_epi64(
10670 let shf
= _mm512_sll_epi64(a
, count
).as_i64x8();
10671 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10674 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
10678 #[target_feature(enable = "avx512f")]
10679 #[cfg_attr(test, assert_instr(vpsllq))]
10680 pub unsafe fn _mm512_maskz_sll_epi64(k
: __mmask8
, a
: __m512i
, count
: __m128i
) -> __m512i
{
10681 let shf
= _mm512_sll_epi64(a
, count
).as_i64x8();
10682 let zero
= _mm512_setzero_si512().as_i64x8();
10683 transmute(simd_select_bitmask(k
, shf
, zero
))
10686 /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
10688 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501)
10690 #[target_feature(enable = "avx512f")]
10691 #[cfg_attr(test, assert_instr(vpsrlq))]
10692 pub unsafe fn _mm512_srl_epi64(a
: __m512i
, count
: __m128i
) -> __m512i
{
10693 transmute(vpsrlq(a
.as_i64x8(), count
.as_i64x2()))
10696 /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499)
10700 #[target_feature(enable = "avx512f")]
10701 #[cfg_attr(test, assert_instr(vpsrlq))]
10702 pub unsafe fn _mm512_mask_srl_epi64(
10708 let shf
= _mm512_srl_epi64(a
, count
).as_i64x8();
10709 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10712 /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10714 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288)
10716 #[target_feature(enable = "avx512f")]
10717 #[cfg_attr(test, assert_instr(vpsrlq))]
10718 pub unsafe fn _mm512_maskz_srl_epi64(k
: __mmask8
, a
: __m512i
, count
: __m128i
) -> __m512i
{
10719 let shf
= _mm512_srl_epi64(a
, count
).as_i64x8();
10720 let zero
= _mm512_setzero_si512().as_i64x8();
10721 transmute(simd_select_bitmask(k
, shf
, zero
))
10724 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
10726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407)
10728 #[target_feature(enable = "avx512f")]
10729 #[cfg_attr(test, assert_instr(vpsrad))]
10730 pub unsafe fn _mm512_sra_epi32(a
: __m512i
, count
: __m128i
) -> __m512i
{
10731 transmute(vpsrad(a
.as_i32x16(), count
.as_i32x4()))
10734 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10736 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405)
10738 #[target_feature(enable = "avx512f")]
10739 #[cfg_attr(test, assert_instr(vpsrad))]
10740 pub unsafe fn _mm512_mask_sra_epi32(
10746 let shf
= _mm512_sra_epi32(a
, count
).as_i32x16();
10747 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10750 /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10752 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406)
10754 #[target_feature(enable = "avx512f")]
10755 #[cfg_attr(test, assert_instr(vpsrad))]
10756 pub unsafe fn _mm512_maskz_sra_epi32(k
: __mmask16
, a
: __m512i
, count
: __m128i
) -> __m512i
{
10757 let shf
= _mm512_sra_epi32(a
, count
).as_i32x16();
10758 let zero
= _mm512_setzero_si512().as_i32x16();
10759 transmute(simd_select_bitmask(k
, shf
, zero
))
10762 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
10764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416)
10766 #[target_feature(enable = "avx512f")]
10767 #[cfg_attr(test, assert_instr(vpsraq))]
10768 pub unsafe fn _mm512_sra_epi64(a
: __m512i
, count
: __m128i
) -> __m512i
{
10769 transmute(vpsraq(a
.as_i64x8(), count
.as_i64x2()))
10772 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10774 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414)
10776 #[target_feature(enable = "avx512f")]
10777 #[cfg_attr(test, assert_instr(vpsraq))]
10778 pub unsafe fn _mm512_mask_sra_epi64(
10784 let shf
= _mm512_sra_epi64(a
, count
).as_i64x8();
10785 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10788 /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10790 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415)
10792 #[target_feature(enable = "avx512f")]
10793 #[cfg_attr(test, assert_instr(vpsraq))]
10794 pub unsafe fn _mm512_maskz_sra_epi64(k
: __mmask8
, a
: __m512i
, count
: __m128i
) -> __m512i
{
10795 let shf
= _mm512_sra_epi64(a
, count
).as_i64x8();
10796 let zero
= _mm512_setzero_si512().as_i64x8();
10797 transmute(simd_select_bitmask(k
, shf
, zero
))
10800 /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
10802 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436)
10804 #[target_feature(enable = "avx512f")]
10805 #[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
10806 #[rustc_args_required_const(1)]
10807 pub unsafe fn _mm512_srai_epi32(a
: __m512i
, imm8
: u32) -> __m512i
{
10808 let a
= a
.as_i32x16();
10809 macro_rules
! call
{
10814 let r
= constify_imm8_sae
!(imm8
, call
);
10818 /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434)
10822 #[target_feature(enable = "avx512f")]
10823 #[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
10824 #[rustc_args_required_const(3)]
10825 pub unsafe fn _mm512_mask_srai_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, imm8
: u32) -> __m512i
{
10826 let a
= a
.as_i32x16();
10827 macro_rules
! call
{
10832 let shf
= constify_imm8_sae
!(imm8
, call
);
10833 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10836 /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10838 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435)
10840 #[target_feature(enable = "avx512f")]
10841 #[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
10842 #[rustc_args_required_const(2)]
10843 pub unsafe fn _mm512_maskz_srai_epi32(k
: __mmask16
, a
: __m512i
, imm8
: u32) -> __m512i
{
10844 let a
= a
.as_i32x16();
10845 macro_rules
! call
{
10850 let shf
= constify_imm8_sae
!(imm8
, call
);
10851 let zero
= _mm512_setzero_si512().as_i32x16();
10852 transmute(simd_select_bitmask(k
, shf
, zero
))
10855 /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
10857 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445)
10859 #[target_feature(enable = "avx512f")]
10860 #[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
10861 #[rustc_args_required_const(1)]
10862 pub unsafe fn _mm512_srai_epi64(a
: __m512i
, imm8
: u32) -> __m512i
{
10863 let a
= a
.as_i64x8();
10864 macro_rules
! call
{
10869 let r
= constify_imm8_sae
!(imm8
, call
);
10873 /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10875 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443)
10877 #[target_feature(enable = "avx512f")]
10878 #[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
10879 #[rustc_args_required_const(3)]
10880 pub unsafe fn _mm512_mask_srai_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, imm8
: u32) -> __m512i
{
10881 let a
= a
.as_i64x8();
10882 macro_rules
! call
{
10887 let shf
= constify_imm8_sae
!(imm8
, call
);
10888 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10891 /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10893 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444)
10895 #[target_feature(enable = "avx512f")]
10896 #[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
10897 #[rustc_args_required_const(2)]
10898 pub unsafe fn _mm512_maskz_srai_epi64(k
: __mmask8
, a
: __m512i
, imm8
: u32) -> __m512i
{
10899 let a
= a
.as_i64x8();
10900 macro_rules
! call
{
10905 let shf
= constify_imm8_sae
!(imm8
, call
);
10906 let zero
= _mm512_setzero_si512().as_i64x8();
10907 transmute(simd_select_bitmask(k
, shf
, zero
))
10910 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
10912 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465)
10914 #[target_feature(enable = "avx512f")]
10915 #[cfg_attr(test, assert_instr(vpsravd))]
10916 pub unsafe fn _mm512_srav_epi32(a
: __m512i
, count
: __m512i
) -> __m512i
{
10917 transmute(vpsravd(a
.as_i32x16(), count
.as_i32x16()))
10920 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10922 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463)
10924 #[target_feature(enable = "avx512f")]
10925 #[cfg_attr(test, assert_instr(vpsravd))]
10926 pub unsafe fn _mm512_mask_srav_epi32(
10932 let shf
= _mm512_srav_epi32(a
, count
).as_i32x16();
10933 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
10936 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10938 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464)
10940 #[target_feature(enable = "avx512f")]
10941 #[cfg_attr(test, assert_instr(vpsravd))]
10942 pub unsafe fn _mm512_maskz_srav_epi32(k
: __mmask16
, a
: __m512i
, count
: __m512i
) -> __m512i
{
10943 let shf
= _mm512_srav_epi32(a
, count
).as_i32x16();
10944 let zero
= _mm512_setzero_si512().as_i32x16();
10945 transmute(simd_select_bitmask(k
, shf
, zero
))
10948 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
10950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474)
10952 #[target_feature(enable = "avx512f")]
10953 #[cfg_attr(test, assert_instr(vpsravq))]
10954 pub unsafe fn _mm512_srav_epi64(a
: __m512i
, count
: __m512i
) -> __m512i
{
10955 transmute(vpsravq(a
.as_i64x8(), count
.as_i64x8()))
10958 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10960 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472)
10962 #[target_feature(enable = "avx512f")]
10963 #[cfg_attr(test, assert_instr(vpsravq))]
10964 pub unsafe fn _mm512_mask_srav_epi64(
10970 let shf
= _mm512_srav_epi64(a
, count
).as_i64x8();
10971 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
10974 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473)
10978 #[target_feature(enable = "avx512f")]
10979 #[cfg_attr(test, assert_instr(vpsravq))]
10980 pub unsafe fn _mm512_maskz_srav_epi64(k
: __mmask8
, a
: __m512i
, count
: __m512i
) -> __m512i
{
10981 let shf
= _mm512_srav_epi64(a
, count
).as_i64x8();
10982 let zero
= _mm512_setzero_si512().as_i64x8();
10983 transmute(simd_select_bitmask(k
, shf
, zero
))
10986 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
10988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703)
10990 #[target_feature(enable = "avx512f")]
10991 #[cfg_attr(test, assert_instr(vprolvd))]
10992 pub unsafe fn _mm512_rolv_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
10993 transmute(vprolvd(a
.as_i32x16(), b
.as_i32x16()))
10996 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701)
11000 #[target_feature(enable = "avx512f")]
11001 #[cfg_attr(test, assert_instr(vprolvd))]
11002 pub unsafe fn _mm512_mask_rolv_epi32(
11008 let rol
= _mm512_rolv_epi32(a
, b
).as_i32x16();
11009 transmute(simd_select_bitmask(k
, rol
, src
.as_i32x16()))
11012 /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11014 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702)
11016 #[target_feature(enable = "avx512f")]
11017 #[cfg_attr(test, assert_instr(vprolvd))]
11018 pub unsafe fn _mm512_maskz_rolv_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
11019 let rol
= _mm512_rolv_epi32(a
, b
).as_i32x16();
11020 let zero
= _mm512_setzero_si512().as_i32x16();
11021 transmute(simd_select_bitmask(k
, rol
, zero
))
11024 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
11026 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739)
11028 #[target_feature(enable = "avx512f")]
11029 #[cfg_attr(test, assert_instr(vprorvd))]
11030 pub unsafe fn _mm512_rorv_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
11031 transmute(vprorvd(a
.as_i32x16(), b
.as_i32x16()))
11034 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737)
11038 #[target_feature(enable = "avx512f")]
11039 #[cfg_attr(test, assert_instr(vprorvd))]
11040 pub unsafe fn _mm512_mask_rorv_epi32(
11046 let ror
= _mm512_rorv_epi32(a
, b
).as_i32x16();
11047 transmute(simd_select_bitmask(k
, ror
, src
.as_i32x16()))
11050 /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738)
11054 #[target_feature(enable = "avx512f")]
11055 #[cfg_attr(test, assert_instr(vprorvd))]
11056 pub unsafe fn _mm512_maskz_rorv_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
11057 let ror
= _mm512_rorv_epi32(a
, b
).as_i32x16();
11058 let zero
= _mm512_setzero_si512().as_i32x16();
11059 transmute(simd_select_bitmask(k
, ror
, zero
))
11062 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
11064 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712)
11066 #[target_feature(enable = "avx512f")]
11067 #[cfg_attr(test, assert_instr(vprolvq))]
11068 pub unsafe fn _mm512_rolv_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
11069 transmute(vprolvq(a
.as_i64x8(), b
.as_i64x8()))
11072 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11074 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710)
11076 #[target_feature(enable = "avx512f")]
11077 #[cfg_attr(test, assert_instr(vprolvq))]
11078 pub unsafe fn _mm512_mask_rolv_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
11079 let rol
= _mm512_rolv_epi64(a
, b
).as_i64x8();
11080 transmute(simd_select_bitmask(k
, rol
, src
.as_i64x8()))
11083 /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711)
11087 #[target_feature(enable = "avx512f")]
11088 #[cfg_attr(test, assert_instr(vprolvq))]
11089 pub unsafe fn _mm512_maskz_rolv_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
11090 let rol
= _mm512_rolv_epi64(a
, b
).as_i64x8();
11091 let zero
= _mm512_setzero_si512().as_i64x8();
11092 transmute(simd_select_bitmask(k
, rol
, zero
))
11095 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
11097 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748)
11099 #[target_feature(enable = "avx512f")]
11100 #[cfg_attr(test, assert_instr(vprorvq))]
11101 pub unsafe fn _mm512_rorv_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
11102 transmute(vprorvq(a
.as_i64x8(), b
.as_i64x8()))
11105 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746)
11109 #[target_feature(enable = "avx512f")]
11110 #[cfg_attr(test, assert_instr(vprorvq))]
11111 pub unsafe fn _mm512_mask_rorv_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
11112 let ror
= _mm512_rorv_epi64(a
, b
).as_i64x8();
11113 transmute(simd_select_bitmask(k
, ror
, src
.as_i64x8()))
11116 /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747)
11120 #[target_feature(enable = "avx512f")]
11121 #[cfg_attr(test, assert_instr(vprorvq))]
11122 pub unsafe fn _mm512_maskz_rorv_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
11123 let ror
= _mm512_rorv_epi64(a
, b
).as_i64x8();
11124 let zero
= _mm512_setzero_si512().as_i64x8();
11125 transmute(simd_select_bitmask(k
, ror
, zero
))
11128 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11130 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342)
11132 #[target_feature(enable = "avx512f")]
11133 #[cfg_attr(test, assert_instr(vpsllvd))]
11134 pub unsafe fn _mm512_sllv_epi32(a
: __m512i
, count
: __m512i
) -> __m512i
{
11135 transmute(vpsllvd(a
.as_i32x16(), count
.as_i32x16()))
11138 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340)
11142 #[target_feature(enable = "avx512f")]
11143 #[cfg_attr(test, assert_instr(vpsllvd))]
11144 pub unsafe fn _mm512_mask_sllv_epi32(
11150 let shf
= _mm512_sllv_epi32(a
, count
).as_i32x16();
11151 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
11154 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11156 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341)
11158 #[target_feature(enable = "avx512f")]
11159 #[cfg_attr(test, assert_instr(vpsllvd))]
11160 pub unsafe fn _mm512_maskz_sllv_epi32(k
: __mmask16
, a
: __m512i
, count
: __m512i
) -> __m512i
{
11161 let shf
= _mm512_sllv_epi32(a
, count
).as_i32x16();
11162 let zero
= _mm512_setzero_si512().as_i32x16();
11163 transmute(simd_select_bitmask(k
, shf
, zero
))
11166 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11168 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554)
11170 #[target_feature(enable = "avx512f")]
11171 #[cfg_attr(test, assert_instr(vpsrlvd))]
11172 pub unsafe fn _mm512_srlv_epi32(a
: __m512i
, count
: __m512i
) -> __m512i
{
11173 transmute(vpsrlvd(a
.as_i32x16(), count
.as_i32x16()))
11176 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11178 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552)
11180 #[target_feature(enable = "avx512f")]
11181 #[cfg_attr(test, assert_instr(vpsrlvd))]
11182 pub unsafe fn _mm512_mask_srlv_epi32(
11188 let shf
= _mm512_srlv_epi32(a
, count
).as_i32x16();
11189 transmute(simd_select_bitmask(k
, shf
, src
.as_i32x16()))
11192 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11194 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553)
11196 #[target_feature(enable = "avx512f")]
11197 #[cfg_attr(test, assert_instr(vpsrlvd))]
11198 pub unsafe fn _mm512_maskz_srlv_epi32(k
: __mmask16
, a
: __m512i
, count
: __m512i
) -> __m512i
{
11199 let shf
= _mm512_srlv_epi32(a
, count
).as_i32x16();
11200 let zero
= _mm512_setzero_si512().as_i32x16();
11201 transmute(simd_select_bitmask(k
, shf
, zero
))
11204 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11206 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351)
11208 #[target_feature(enable = "avx512f")]
11209 #[cfg_attr(test, assert_instr(vpsllvq))]
11210 pub unsafe fn _mm512_sllv_epi64(a
: __m512i
, count
: __m512i
) -> __m512i
{
11211 transmute(vpsllvq(a
.as_i64x8(), count
.as_i64x8()))
11214 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11216 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349)
11218 #[target_feature(enable = "avx512f")]
11219 #[cfg_attr(test, assert_instr(vpsllvq))]
11220 pub unsafe fn _mm512_mask_sllv_epi64(
11226 let shf
= _mm512_sllv_epi64(a
, count
).as_i64x8();
11227 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
11230 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350)
11234 #[target_feature(enable = "avx512f")]
11235 #[cfg_attr(test, assert_instr(vpsllvq))]
11236 pub unsafe fn _mm512_maskz_sllv_epi64(k
: __mmask8
, a
: __m512i
, count
: __m512i
) -> __m512i
{
11237 let shf
= _mm512_sllv_epi64(a
, count
).as_i64x8();
11238 let zero
= _mm512_setzero_si512().as_i64x8();
11239 transmute(simd_select_bitmask(k
, shf
, zero
))
11242 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
11244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563)
11246 #[target_feature(enable = "avx512f")]
11247 #[cfg_attr(test, assert_instr(vpsrlvq))]
11248 pub unsafe fn _mm512_srlv_epi64(a
: __m512i
, count
: __m512i
) -> __m512i
{
11249 transmute(vpsrlvq(a
.as_i64x8(), count
.as_i64x8()))
11252 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11254 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561)
11256 #[target_feature(enable = "avx512f")]
11257 #[cfg_attr(test, assert_instr(vpsrlvq))]
11258 pub unsafe fn _mm512_mask_srlv_epi64(
11264 let shf
= _mm512_srlv_epi64(a
, count
).as_i64x8();
11265 transmute(simd_select_bitmask(k
, shf
, src
.as_i64x8()))
11268 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11270 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562)
11272 #[target_feature(enable = "avx512f")]
11273 #[cfg_attr(test, assert_instr(vpsrlvq))]
11274 pub unsafe fn _mm512_maskz_srlv_epi64(k
: __mmask8
, a
: __m512i
, count
: __m512i
) -> __m512i
{
11275 let shf
= _mm512_srlv_epi64(a
, count
).as_i64x8();
11276 let zero
= _mm512_setzero_si512().as_i64x8();
11277 transmute(simd_select_bitmask(k
, shf
, zero
))
11280 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
11282 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permute_ps&expand=4170)
11284 #[target_feature(enable = "avx512f")]
11285 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
11286 #[rustc_args_required_const(1)]
11287 pub unsafe fn _mm512_permute_ps(a
: __m512
, imm8
: i32) -> __m512
{
11288 let a
= a
.as_f32x16();
11289 macro_rules
! call
{
11291 vpermilps(a
, _mm512_set1_epi32($imm8
).as_i32x16())
11294 let r
= constify_imm8_sae
!(imm8
, call
);
11298 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11300 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permute_ps&expand=4168)
11302 #[target_feature(enable = "avx512f")]
11303 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
11304 #[rustc_args_required_const(3)]
11305 pub unsafe fn _mm512_mask_permute_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, imm8
: i32) -> __m512
{
11306 let a
= a
.as_f32x16();
11307 macro_rules
! call
{
11309 vpermilps(a
, _mm512_set1_epi32($imm8
).as_i32x16())
11312 let permute
= constify_imm8_sae
!(imm8
, call
);
11313 transmute(simd_select_bitmask(k
, permute
, src
.as_f32x16()))
11316 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permute_ps&expand=4169)
11320 #[target_feature(enable = "avx512f")]
11321 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
11322 #[rustc_args_required_const(2)]
11323 pub unsafe fn _mm512_maskz_permute_ps(k
: __mmask16
, a
: __m512
, imm8
: i32) -> __m512
{
11324 let a
= a
.as_f32x16();
11325 macro_rules
! call
{
11327 vpermilps(a
, _mm512_set1_epi32($imm8
).as_i32x16())
11330 let permute
= constify_imm8_sae
!(imm8
, call
);
11331 let zero
= _mm512_setzero_ps().as_f32x16();
11332 transmute(simd_select_bitmask(k
, permute
, zero
))
11335 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
11337 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permute_pd&expand=4161)
11339 #[target_feature(enable = "avx512f")]
11340 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
11341 #[rustc_args_required_const(1)]
11342 pub unsafe fn _mm512_permute_pd(a
: __m512d
, imm8
: i32) -> __m512d
{
11343 let a
= a
.as_f64x8();
11344 macro_rules
! call
{
11346 vpermilpd(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11349 let r
= constify_imm8_sae
!(imm8
, call
);
11353 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permute_pd&expand=4159)
11357 #[target_feature(enable = "avx512f")]
11358 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
11359 #[rustc_args_required_const(3)]
11360 pub unsafe fn _mm512_mask_permute_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, imm8
: i32) -> __m512d
{
11361 let a
= a
.as_f64x8();
11362 macro_rules
! call
{
11364 vpermilpd(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11367 let permute
= constify_imm8_sae
!(imm8
, call
);
11368 transmute(simd_select_bitmask(k
, permute
, src
.as_f64x8()))
11371 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11373 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permute_pd&expand=4160)
11375 #[target_feature(enable = "avx512f")]
11376 #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
11377 #[rustc_args_required_const(2)]
11378 pub unsafe fn _mm512_maskz_permute_pd(k
: __mmask8
, a
: __m512d
, imm8
: i32) -> __m512d
{
11379 let a
= a
.as_f64x8();
11380 macro_rules
! call
{
11382 vpermilpd(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11385 let permute
= constify_imm8_sae
!(imm8
, call
);
11386 let zero
= _mm512_setzero_pd().as_f64x8();
11387 transmute(simd_select_bitmask(k
, permute
, zero
))
11390 /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
11392 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex_epi64&expand=4208)
11394 #[target_feature(enable = "avx512f")]
11395 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))]
11396 //shoud be vpermq, but generate vpermpd. It generates vpermq with mask. change to vbroadcast becaise CI Windows
11397 #[rustc_args_required_const(1)]
11398 pub unsafe fn _mm512_permutex_epi64(a
: __m512i
, imm8
: i32) -> __m512i
{
11399 let a
= a
.as_i64x8();
11400 macro_rules
! call
{
11402 vpermq(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11405 let r
= constify_imm8_sae
!(imm8
, call
);
11409 /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex_epi64&expand=4206)
11413 #[target_feature(enable = "avx512f")]
11414 #[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))] //shoud be vpermq. change to vpbroadcast becaise CI Windows
11415 #[rustc_args_required_const(3)]
11416 pub unsafe fn _mm512_mask_permutex_epi64(
11422 let a
= a
.as_i64x8();
11423 macro_rules
! call
{
11425 vpermq(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11428 let permute
= constify_imm8_sae
!(imm8
, call
);
11429 transmute(simd_select_bitmask(k
, permute
, src
.as_i64x8()))
11432 /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11434 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex_epi64&expand=4207)
11436 #[target_feature(enable = "avx512f")]
11437 #[cfg_attr(test, assert_instr(vpbroadcast, imm8 = 0b11111111))] //shoud be vpermq. change to vpbroadcast becaise CI Windows
11438 #[rustc_args_required_const(2)]
11439 pub unsafe fn _mm512_maskz_permutex_epi64(k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m512i
{
11440 let a
= a
.as_i64x8();
11441 macro_rules
! call
{
11443 vpermq(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11446 let permute
= constify_imm8_sae
!(imm8
, call
);
11447 let zero
= _mm512_setzero_si512().as_i64x8();
11448 transmute(simd_select_bitmask(k
, permute
, zero
))
11451 /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
11453 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex_pd&expand=4214)
11455 #[target_feature(enable = "avx512f")]
11456 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
11457 #[rustc_args_required_const(1)]
11458 pub unsafe fn _mm512_permutex_pd(a
: __m512d
, imm8
: i32) -> __m512d
{
11459 let a
= a
.as_f64x8();
11460 macro_rules
! call
{
11462 vpermpd(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11465 let r
= constify_imm8_sae
!(imm8
, call
);
11469 /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11471 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex_pd&expand=4212)
11473 #[target_feature(enable = "avx512f")]
11474 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
11475 #[rustc_args_required_const(3)]
11476 pub unsafe fn _mm512_mask_permutex_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
, imm8
: i32) -> __m512d
{
11477 let a
= a
.as_f64x8();
11478 macro_rules
! call
{
11480 vpermpd(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11483 let permute
= constify_imm8_sae
!(imm8
, call
);
11484 transmute(simd_select_bitmask(k
, permute
, src
.as_f64x8()))
11487 /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11489 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex_pd&expand=4213)
11491 #[target_feature(enable = "avx512f")]
11492 #[cfg_attr(test, assert_instr(vbroadcast, imm8 = 0b11111111))] //shoud be vpermpd. change to vbroadcast becaise CI Windows
11493 #[rustc_args_required_const(2)]
11494 pub unsafe fn _mm512_maskz_permutex_pd(k
: __mmask8
, a
: __m512d
, imm8
: i32) -> __m512d
{
11495 let a
= a
.as_f64x8();
11496 macro_rules
! call
{
11498 vpermpd(a
, _mm512_set1_epi64($imm8
).as_i64x8())
11501 let permute
= constify_imm8_sae
!(imm8
, call
);
11502 let zero
= _mm512_setzero_pd().as_f64x8();
11503 transmute(simd_select_bitmask(k
, permute
, zero
))
11506 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
11508 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_epi32&expand=4182)
11510 #[target_feature(enable = "avx512f")]
11511 #[cfg_attr(test, assert_instr(vperm))] //should be vpermd, but generate vpermps. It generates vpermd with mask
11512 pub unsafe fn _mm512_permutevar_epi32(idx
: __m512i
, a
: __m512i
) -> __m512i
{
11513 transmute(vpermd(a
.as_i32x16(), idx
.as_i32x16()))
11516 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
11518 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_epi32&expand=4181)
11520 #[target_feature(enable = "avx512f")]
11521 #[cfg_attr(test, assert_instr(vpermd))]
11522 pub unsafe fn _mm512_mask_permutevar_epi32(
11528 let permute
= _mm512_permutevar_epi32(idx
, a
).as_i32x16();
11529 transmute(simd_select_bitmask(k
, permute
, src
.as_i32x16()))
11532 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
11534 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_ps&expand=4200)
11536 #[target_feature(enable = "avx512f")]
11537 #[cfg_attr(test, assert_instr(vpermilps))]
11538 pub unsafe fn _mm512_permutevar_ps(a
: __m512
, b
: __m512i
) -> __m512
{
11539 transmute(vpermilps(a
.as_f32x16(), b
.as_i32x16()))
11542 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_ps&expand=4198)
11546 #[target_feature(enable = "avx512f")]
11547 #[cfg_attr(test, assert_instr(vpermilps))]
11548 pub unsafe fn _mm512_mask_permutevar_ps(
11554 let permute
= _mm512_permutevar_ps(a
, b
).as_f32x16();
11555 transmute(simd_select_bitmask(k
, permute
, src
.as_f32x16()))
11558 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11560 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutevar_ps&expand=4199)
11562 #[target_feature(enable = "avx512f")]
11563 #[cfg_attr(test, assert_instr(vpermilps))]
11564 pub unsafe fn _mm512_maskz_permutevar_ps(k
: __mmask16
, a
: __m512
, b
: __m512i
) -> __m512
{
11565 let permute
= _mm512_permutevar_ps(a
, b
).as_f32x16();
11566 let zero
= _mm512_setzero_ps().as_f32x16();
11567 transmute(simd_select_bitmask(k
, permute
, zero
))
11570 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
11572 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_pd&expand=4191)
11574 #[target_feature(enable = "avx512f")]
11575 #[cfg_attr(test, assert_instr(vpermilpd))]
11576 pub unsafe fn _mm512_permutevar_pd(a
: __m512d
, b
: __m512i
) -> __m512d
{
11577 transmute(vpermilpd(a
.as_f64x8(), b
.as_i64x8()))
11580 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutevar_pd&expand=4189)
11584 #[target_feature(enable = "avx512f")]
11585 #[cfg_attr(test, assert_instr(vpermilpd))]
11586 pub unsafe fn _mm512_mask_permutevar_pd(
11592 let permute
= _mm512_permutevar_pd(a
, b
).as_f64x8();
11593 transmute(simd_select_bitmask(k
, permute
, src
.as_f64x8()))
11596 /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11598 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutevar_pd&expand=4190)
11600 #[target_feature(enable = "avx512f")]
11601 #[cfg_attr(test, assert_instr(vpermilpd))]
11602 pub unsafe fn _mm512_maskz_permutevar_pd(k
: __mmask8
, a
: __m512d
, b
: __m512i
) -> __m512d
{
11603 let permute
= _mm512_permutevar_pd(a
, b
).as_f64x8();
11604 let zero
= _mm512_setzero_pd().as_f64x8();
11605 transmute(simd_select_bitmask(k
, permute
, zero
))
11608 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
11610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_epi32&expand=4301)
11612 #[target_feature(enable = "avx512f")]
11613 #[cfg_attr(test, assert_instr(vperm))] //should be vpermd, but generate vpermps. It generates vpermd with mask
11614 pub unsafe fn _mm512_permutexvar_epi32(idx
: __m512i
, a
: __m512i
) -> __m512i
{
11615 transmute(vpermd(a
.as_i32x16(), idx
.as_i32x16()))
11618 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11620 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_epi32&expand=4299)
11622 #[target_feature(enable = "avx512f")]
11623 #[cfg_attr(test, assert_instr(vpermd))]
11624 pub unsafe fn _mm512_mask_permutexvar_epi32(
11630 let permute
= _mm512_permutexvar_epi32(idx
, a
).as_i32x16();
11631 transmute(simd_select_bitmask(k
, permute
, src
.as_i32x16()))
11634 /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_epi32&expand=4300)
11638 #[target_feature(enable = "avx512f")]
11639 #[cfg_attr(test, assert_instr(vpermd))]
11640 pub unsafe fn _mm512_maskz_permutexvar_epi32(k
: __mmask16
, idx
: __m512i
, a
: __m512i
) -> __m512i
{
11641 let permute
= _mm512_permutexvar_epi32(idx
, a
).as_i32x16();
11642 let zero
= _mm512_setzero_si512().as_i32x16();
11643 transmute(simd_select_bitmask(k
, permute
, zero
))
11646 /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
11648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_epi64&expand=4307)
11650 #[target_feature(enable = "avx512f")]
11651 #[cfg_attr(test, assert_instr(vperm))] //should be vpermq, but generate vpermpd. It generates vpermd with mask
11652 pub unsafe fn _mm512_permutexvar_epi64(idx
: __m512i
, a
: __m512i
) -> __m512i
{
11653 transmute(vpermq(a
.as_i64x8(), idx
.as_i64x8()))
11656 /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_epi64&expand=4305)
11660 #[target_feature(enable = "avx512f")]
11661 #[cfg_attr(test, assert_instr(vpermq))]
11662 pub unsafe fn _mm512_mask_permutexvar_epi64(
11668 let permute
= _mm512_permutexvar_epi64(idx
, a
).as_i64x8();
11669 transmute(simd_select_bitmask(k
, permute
, src
.as_i64x8()))
11672 /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_epi64&expand=4306)
11676 #[target_feature(enable = "avx512f")]
11677 #[cfg_attr(test, assert_instr(vpermq))]
11678 pub unsafe fn _mm512_maskz_permutexvar_epi64(k
: __mmask8
, idx
: __m512i
, a
: __m512i
) -> __m512i
{
11679 let permute
= _mm512_permutexvar_epi64(idx
, a
).as_i64x8();
11680 let zero
= _mm512_setzero_si512().as_i64x8();
11681 transmute(simd_select_bitmask(k
, permute
, zero
))
11684 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
11686 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutevar_ps&expand=4200)
11688 #[target_feature(enable = "avx512f")]
11689 #[cfg_attr(test, assert_instr(vpermps))]
11690 pub unsafe fn _mm512_permutexvar_ps(idx
: __m512i
, a
: __m512
) -> __m512
{
11691 transmute(vpermps(a
.as_f32x16(), idx
.as_i32x16()))
11694 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11696 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_ps&expand=4326)
11698 #[target_feature(enable = "avx512f")]
11699 #[cfg_attr(test, assert_instr(vpermps))]
11700 pub unsafe fn _mm512_mask_permutexvar_ps(
11706 let permute
= _mm512_permutexvar_ps(idx
, a
).as_f32x16();
11707 transmute(simd_select_bitmask(k
, permute
, src
.as_f32x16()))
11710 /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11712 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_ps&expand=4327)
11714 #[target_feature(enable = "avx512f")]
11715 #[cfg_attr(test, assert_instr(vpermps))]
11716 pub unsafe fn _mm512_maskz_permutexvar_ps(k
: __mmask16
, idx
: __m512i
, a
: __m512
) -> __m512
{
11717 let permute
= _mm512_permutexvar_ps(idx
, a
).as_f32x16();
11718 let zero
= _mm512_setzero_ps().as_f32x16();
11719 transmute(simd_select_bitmask(k
, permute
, zero
))
11722 /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
11724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutexvar_pd&expand=4322)
11726 #[target_feature(enable = "avx512f")]
11727 #[cfg_attr(test, assert_instr(vpermpd))]
11728 pub unsafe fn _mm512_permutexvar_pd(idx
: __m512i
, a
: __m512d
) -> __m512d
{
11729 transmute(vpermpd(a
.as_f64x8(), idx
.as_i64x8()))
11732 /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutexvar_pd&expand=4320)
11736 #[target_feature(enable = "avx512f")]
11737 #[cfg_attr(test, assert_instr(vpermpd))]
11738 pub unsafe fn _mm512_mask_permutexvar_pd(
11744 let permute
= _mm512_permutexvar_pd(idx
, a
).as_f64x8();
11745 transmute(simd_select_bitmask(k
, permute
, src
.as_f64x8()))
11748 /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutexvar_pd&expand=4321)
11752 #[target_feature(enable = "avx512f")]
11753 #[cfg_attr(test, assert_instr(vpermpd))]
11754 pub unsafe fn _mm512_maskz_permutexvar_pd(k
: __mmask8
, idx
: __m512i
, a
: __m512d
) -> __m512d
{
11755 let permute
= _mm512_permutexvar_pd(idx
, a
).as_f64x8();
11756 let zero
= _mm512_setzero_pd().as_f64x8();
11757 transmute(simd_select_bitmask(k
, permute
, zero
))
11760 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11762 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_epi32&expand=4238)
11764 #[target_feature(enable = "avx512f")]
11765 #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
11766 pub unsafe fn _mm512_permutex2var_epi32(a
: __m512i
, idx
: __m512i
, b
: __m512i
) -> __m512i
{
11767 transmute(vpermi2d(a
.as_i32x16(), idx
.as_i32x16(), b
.as_i32x16()))
11770 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_epi32&expand=4235)
11774 #[target_feature(enable = "avx512f")]
11775 #[cfg_attr(test, assert_instr(vpermt2d))]
11776 pub unsafe fn _mm512_mask_permutex2var_epi32(
11782 let permute
= _mm512_permutex2var_epi32(a
, idx
, b
).as_i32x16();
11783 transmute(simd_select_bitmask(k
, permute
, a
.as_i32x16()))
11786 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11788 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_epi32&expand=4237)
11790 #[target_feature(enable = "avx512f")]
11791 #[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
11792 pub unsafe fn _mm512_maskz_permutex2var_epi32(
11798 let permute
= _mm512_permutex2var_epi32(a
, idx
, b
).as_i32x16();
11799 let zero
= _mm512_setzero_si512().as_i32x16();
11800 transmute(simd_select_bitmask(k
, permute
, zero
))
11803 /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
11805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_epi32&expand=4236)
11807 #[target_feature(enable = "avx512f")]
11808 #[cfg_attr(test, assert_instr(vpermi2d))]
11809 pub unsafe fn _mm512_mask2_permutex2var_epi32(
11815 let permute
= _mm512_permutex2var_epi32(a
, idx
, b
).as_i32x16();
11816 transmute(simd_select_bitmask(k
, permute
, idx
.as_i32x16()))
11819 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_epi64&expand=4250)
11823 #[target_feature(enable = "avx512f")]
11824 #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
11825 pub unsafe fn _mm512_permutex2var_epi64(a
: __m512i
, idx
: __m512i
, b
: __m512i
) -> __m512i
{
11826 transmute(vpermi2q(a
.as_i64x8(), idx
.as_i64x8(), b
.as_i64x8()))
11829 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11831 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_epi64&expand=4247)
11833 #[target_feature(enable = "avx512f")]
11834 #[cfg_attr(test, assert_instr(vpermt2q))]
11835 pub unsafe fn _mm512_mask_permutex2var_epi64(
11841 let permute
= _mm512_permutex2var_epi64(a
, idx
, b
).as_i64x8();
11842 transmute(simd_select_bitmask(k
, permute
, a
.as_i64x8()))
11845 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_epi64&expand=4249)
11849 #[target_feature(enable = "avx512f")]
11850 #[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
11851 pub unsafe fn _mm512_maskz_permutex2var_epi64(
11857 let permute
= _mm512_permutex2var_epi64(a
, idx
, b
).as_i64x8();
11858 let zero
= _mm512_setzero_si512().as_i64x8();
11859 transmute(simd_select_bitmask(k
, permute
, zero
))
11862 /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
11864 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_epi64&expand=4248)
11866 #[target_feature(enable = "avx512f")]
11867 #[cfg_attr(test, assert_instr(vpermi2q))]
11868 pub unsafe fn _mm512_mask2_permutex2var_epi64(
11874 let permute
= _mm512_permutex2var_epi64(a
, idx
, b
).as_i64x8();
11875 transmute(simd_select_bitmask(k
, permute
, idx
.as_i64x8()))
11878 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_ps&expand=4286)
11882 #[target_feature(enable = "avx512f")]
11883 #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
11884 pub unsafe fn _mm512_permutex2var_ps(a
: __m512
, idx
: __m512i
, b
: __m512
) -> __m512
{
11885 transmute(vpermi2ps(a
.as_f32x16(), idx
.as_i32x16(), b
.as_f32x16()))
11888 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11890 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_ps&expand=4283)
11892 #[target_feature(enable = "avx512f")]
11893 #[cfg_attr(test, assert_instr(vpermt2ps))]
11894 pub unsafe fn _mm512_mask_permutex2var_ps(
11900 let permute
= _mm512_permutex2var_ps(a
, idx
, b
).as_f32x16();
11901 transmute(simd_select_bitmask(k
, permute
, a
.as_f32x16()))
11904 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_ps&expand=4285)
11908 #[target_feature(enable = "avx512f")]
11909 #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
11910 pub unsafe fn _mm512_maskz_permutex2var_ps(
11916 let permute
= _mm512_permutex2var_ps(a
, idx
, b
).as_f32x16();
11917 let zero
= _mm512_setzero_ps().as_f32x16();
11918 transmute(simd_select_bitmask(k
, permute
, zero
))
11921 /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
11923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_ps&expand=4284)
11925 #[target_feature(enable = "avx512f")]
11926 #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
11927 pub unsafe fn _mm512_mask2_permutex2var_ps(
11933 let permute
= _mm512_permutex2var_ps(a
, idx
, b
).as_f32x16();
11934 let zero
= _mm512_setzero_ps().as_f32x16();
11935 transmute(simd_select_bitmask(k
, permute
, zero
))
11938 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
11940 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_permutex2var_pd&expand=4274)
11942 #[target_feature(enable = "avx512f")]
11943 #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
11944 pub unsafe fn _mm512_permutex2var_pd(a
: __m512d
, idx
: __m512i
, b
: __m512d
) -> __m512d
{
11945 transmute(vpermi2pd(a
.as_f64x8(), idx
.as_i64x8(), b
.as_f64x8()))
11948 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
11950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_permutex2var_pd&expand=4271)
11952 #[target_feature(enable = "avx512f")]
11953 #[cfg_attr(test, assert_instr(vpermt2pd))]
11954 pub unsafe fn _mm512_mask_permutex2var_pd(
11960 let permute
= _mm512_permutex2var_pd(a
, idx
, b
).as_f64x8();
11961 transmute(simd_select_bitmask(k
, permute
, a
.as_f64x8()))
11964 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11966 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_permutex2var_pd&expand=4273)
11968 #[target_feature(enable = "avx512f")]
11969 #[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
11970 pub unsafe fn _mm512_maskz_permutex2var_pd(
11976 let permute
= _mm512_permutex2var_pd(a
, idx
, b
).as_f64x8();
11977 let zero
= _mm512_setzero_pd().as_f64x8();
11978 transmute(simd_select_bitmask(k
, permute
, zero
))
11981 /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
11983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2_permutex2var_pd&expand=4272)
11985 #[target_feature(enable = "avx512f")]
11986 #[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
11987 pub unsafe fn _mm512_mask2_permutex2var_pd(
11993 let permute
= _mm512_permutex2var_pd(a
, idx
, b
).as_f64x8();
11994 let zero
= _mm512_setzero_pd().as_f64x8();
11995 transmute(simd_select_bitmask(k
, permute
, zero
))
11998 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
12000 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_epi32&expand=5150)
12002 #[target_feature(enable = "avx512f")]
12003 #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] //should be vpshufd, but generate vpermilps
12004 #[rustc_args_required_const(1)]
12005 pub unsafe fn _mm512_shuffle_epi32(a
: __m512i
, imm8
: _MM_PERM_ENUM
) -> __m512i
{
12006 let imm8
= (imm8
& 0xFF) as u8;
12008 let a
= a
.as_i32x16();
12009 macro_rules
! shuffle4
{
12032 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12037 macro_rules
! shuffle3
{
12038 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12039 match (imm8
>> 6) & 0x3 {
12040 0 => shuffle4
!($a
, $b
, $c
, 16, $e
, $f
, $g
, 20, $i
, $j
, $k
, 24, $m
, $n
, $o
, 28),
12041 1 => shuffle4
!($a
, $b
, $c
, 17, $e
, $f
, $g
, 21, $i
, $j
, $k
, 25, $m
, $n
, $o
, 29),
12042 2 => shuffle4
!($a
, $b
, $c
, 18, $e
, $f
, $g
, 22, $i
, $j
, $k
, 26, $m
, $n
, $o
, 30),
12043 _
=> shuffle4
!($a
, $b
, $c
, 19, $e
, $f
, $g
, 23, $i
, $j
, $k
, 27, $m
, $n
, $o
, 31),
12047 macro_rules
! shuffle2
{
12048 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12049 match (imm8
>> 4) & 0x3 {
12050 0 => shuffle3
!($a
, $b
, 16, $e
, $f
, 20, $i
, $j
, 24, $m
, $n
, 28),
12051 1 => shuffle3
!($a
, $b
, 17, $e
, $f
, 21, $i
, $j
, 25, $m
, $n
, 29),
12052 2 => shuffle3
!($a
, $b
, 18, $e
, $f
, 22, $i
, $j
, 26, $m
, $n
, 30),
12053 _
=> shuffle3
!($a
, $b
, 19, $e
, $f
, 23, $i
, $j
, 27, $m
, $n
, 31),
12057 macro_rules
! shuffle1
{
12058 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12059 match (imm8
>> 2) & 0x3 {
12060 0 => shuffle2
!($a
, 0, $e
, 4, $i
, 8, $m
, 12),
12061 1 => shuffle2
!($a
, 1, $e
, 5, $i
, 9, $m
, 13),
12062 2 => shuffle2
!($a
, 2, $e
, 6, $i
, 10, $m
, 14),
12063 _
=> shuffle2
!($a
, 3, $e
, 7, $i
, 11, $m
, 15),
12067 let r
: i32x16
= match imm8
& 0x3 {
12068 0 => shuffle1
!(0, 4, 8, 12),
12069 1 => shuffle1
!(1, 5, 9, 13),
12070 2 => shuffle1
!(2, 6, 10, 14),
12071 _
=> shuffle1
!(3, 7, 11, 15),
12076 /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_epi32&expand=5148)
12080 #[target_feature(enable = "avx512f")]
12081 #[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] //should be vpshufd, but generate vpermilps
12082 #[rustc_args_required_const(3)]
12083 pub unsafe fn _mm512_mask_shuffle_epi32(
12087 imm8
: _MM_PERM_ENUM
,
12089 let imm8
= (imm8
& 0xFF) as u8;
12091 let a
= a
.as_i32x16();
12092 macro_rules
! shuffle4
{
12115 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12120 macro_rules
! shuffle3
{
12121 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12122 match (imm8
>> 6) & 0x3 {
12123 0 => shuffle4
!($a
, $b
, $c
, 16, $e
, $f
, $g
, 20, $i
, $j
, $k
, 24, $m
, $n
, $o
, 28),
12124 1 => shuffle4
!($a
, $b
, $c
, 17, $e
, $f
, $g
, 21, $i
, $j
, $k
, 25, $m
, $n
, $o
, 29),
12125 2 => shuffle4
!($a
, $b
, $c
, 18, $e
, $f
, $g
, 22, $i
, $j
, $k
, 26, $m
, $n
, $o
, 30),
12126 _
=> shuffle4
!($a
, $b
, $c
, 19, $e
, $f
, $g
, 23, $i
, $j
, $k
, 27, $m
, $n
, $o
, 31),
12130 macro_rules
! shuffle2
{
12131 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12132 match (imm8
>> 4) & 0x3 {
12133 0 => shuffle3
!($a
, $b
, 16, $e
, $f
, 20, $i
, $j
, 24, $m
, $n
, 28),
12134 1 => shuffle3
!($a
, $b
, 17, $e
, $f
, 21, $i
, $j
, 25, $m
, $n
, 29),
12135 2 => shuffle3
!($a
, $b
, 18, $e
, $f
, 22, $i
, $j
, 26, $m
, $n
, 30),
12136 _
=> shuffle3
!($a
, $b
, 19, $e
, $f
, 23, $i
, $j
, 27, $m
, $n
, 31),
12140 macro_rules
! shuffle1
{
12141 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12142 match (imm8
>> 2) & 0x3 {
12143 0 => shuffle2
!($a
, 0, $e
, 4, $i
, 8, $m
, 12),
12144 1 => shuffle2
!($a
, 1, $e
, 5, $i
, 9, $m
, 13),
12145 2 => shuffle2
!($a
, 2, $e
, 6, $i
, 10, $m
, 14),
12146 _
=> shuffle2
!($a
, 3, $e
, 7, $i
, 11, $m
, 15),
12150 let shuffle
: i32x16
= match imm8
& 0x3 {
12151 0 => shuffle1
!(0, 4, 8, 12),
12152 1 => shuffle1
!(1, 5, 9, 13),
12153 2 => shuffle1
!(2, 6, 10, 14),
12154 _
=> shuffle1
!(3, 7, 11, 15),
12156 transmute(simd_select_bitmask(k
, shuffle
, src
.as_i32x16()))
12159 /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12161 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_epi32&expand=5149)
12163 #[target_feature(enable = "avx512f")]
12164 #[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))] //should be vpshufd, but generate vpermilps
12165 #[rustc_args_required_const(2)]
12166 pub unsafe fn _mm512_maskz_shuffle_epi32(k
: __mmask16
, a
: __m512i
, imm8
: _MM_PERM_ENUM
) -> __m512i
{
12167 let imm8
= (imm8
& 0xFF) as u8;
12169 let a
= a
.as_i32x16();
12170 macro_rules
! shuffle4
{
12193 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12198 macro_rules
! shuffle3
{
12199 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12200 match (imm8
>> 6) & 0x3 {
12201 0 => shuffle4
!($a
, $b
, $c
, 16, $e
, $f
, $g
, 20, $i
, $j
, $k
, 24, $m
, $n
, $o
, 28),
12202 1 => shuffle4
!($a
, $b
, $c
, 17, $e
, $f
, $g
, 21, $i
, $j
, $k
, 25, $m
, $n
, $o
, 29),
12203 2 => shuffle4
!($a
, $b
, $c
, 18, $e
, $f
, $g
, 22, $i
, $j
, $k
, 26, $m
, $n
, $o
, 30),
12204 _
=> shuffle4
!($a
, $b
, $c
, 19, $e
, $f
, $g
, 23, $i
, $j
, $k
, 27, $m
, $n
, $o
, 31),
12208 macro_rules
! shuffle2
{
12209 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12210 match (imm8
>> 4) & 0x3 {
12211 0 => shuffle3
!($a
, $b
, 16, $e
, $f
, 20, $i
, $j
, 24, $m
, $n
, 28),
12212 1 => shuffle3
!($a
, $b
, 17, $e
, $f
, 21, $i
, $j
, 25, $m
, $n
, 29),
12213 2 => shuffle3
!($a
, $b
, 18, $e
, $f
, 22, $i
, $j
, 26, $m
, $n
, 30),
12214 _
=> shuffle3
!($a
, $b
, 19, $e
, $f
, 23, $i
, $j
, 27, $m
, $n
, 31),
12218 macro_rules
! shuffle1
{
12219 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12220 match (imm8
>> 2) & 0x3 {
12221 0 => shuffle2
!($a
, 0, $e
, 4, $i
, 8, $m
, 12),
12222 1 => shuffle2
!($a
, 1, $e
, 5, $i
, 9, $m
, 13),
12223 2 => shuffle2
!($a
, 2, $e
, 6, $i
, 10, $m
, 14),
12224 _
=> shuffle2
!($a
, 3, $e
, 7, $i
, 11, $m
, 15),
12228 let shuffle
: i32x16
= match imm8
& 0x3 {
12229 0 => shuffle1
!(0, 4, 8, 12),
12230 1 => shuffle1
!(1, 5, 9, 13),
12231 2 => shuffle1
!(2, 6, 10, 14),
12232 _
=> shuffle1
!(3, 7, 11, 15),
12234 let zero
= _mm512_setzero_si512().as_i32x16();
12235 transmute(simd_select_bitmask(k
, shuffle
, zero
))
12238 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
12240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_ps&expand=5203)
12242 #[target_feature(enable = "avx512f")]
12243 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
12244 #[rustc_args_required_const(2)]
12245 pub unsafe fn _mm512_shuffle_ps(a
: __m512
, b
: __m512
, imm8
: i32) -> __m512
{
12246 assert
!(imm8
>= 0 && imm8
<= 255);
12247 let imm8
= (imm8
& 0xFF) as u8;
12248 macro_rules
! shuffle4
{
12271 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12276 macro_rules
! shuffle3
{
12277 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12278 match (imm8
>> 6) & 0x3 {
12279 0 => shuffle4
!($a
, $b
, $c
, 16, $e
, $f
, $g
, 20, $i
, $j
, $k
, 24, $m
, $n
, $o
, 28),
12280 1 => shuffle4
!($a
, $b
, $c
, 17, $e
, $f
, $g
, 21, $i
, $j
, $k
, 25, $m
, $n
, $o
, 29),
12281 2 => shuffle4
!($a
, $b
, $c
, 18, $e
, $f
, $g
, 22, $i
, $j
, $k
, 26, $m
, $n
, $o
, 30),
12282 _
=> shuffle4
!($a
, $b
, $c
, 19, $e
, $f
, $g
, 23, $i
, $j
, $k
, 27, $m
, $n
, $o
, 31),
12286 macro_rules
! shuffle2
{
12287 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12288 match (imm8
>> 4) & 0x3 {
12289 0 => shuffle3
!($a
, $b
, 16, $e
, $f
, 20, $i
, $j
, 24, $m
, $n
, 28),
12290 1 => shuffle3
!($a
, $b
, 17, $e
, $f
, 21, $i
, $j
, 25, $m
, $n
, 29),
12291 2 => shuffle3
!($a
, $b
, 18, $e
, $f
, 22, $i
, $j
, 26, $m
, $n
, 30),
12292 _
=> shuffle3
!($a
, $b
, 19, $e
, $f
, 23, $i
, $j
, 27, $m
, $n
, 31),
12296 macro_rules
! shuffle1
{
12297 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12298 match (imm8
>> 2) & 0x3 {
12299 0 => shuffle2
!($a
, 0, $e
, 4, $i
, 8, $m
, 12),
12300 1 => shuffle2
!($a
, 1, $e
, 5, $i
, 9, $m
, 13),
12301 2 => shuffle2
!($a
, 2, $e
, 6, $i
, 10, $m
, 14),
12302 _
=> shuffle2
!($a
, 3, $e
, 7, $i
, 11, $m
, 15),
12307 0 => shuffle1
!(0, 4, 8, 12),
12308 1 => shuffle1
!(1, 5, 9, 13),
12309 2 => shuffle1
!(2, 6, 10, 14),
12310 _
=> shuffle1
!(3, 7, 11, 15),
12314 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_ps&expand=5201)
12318 #[target_feature(enable = "avx512f")]
12319 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
12320 #[rustc_args_required_const(4)]
12321 pub unsafe fn _mm512_mask_shuffle_ps(
12328 assert
!(imm8
>= 0 && imm8
<= 255);
12329 let imm8
= (imm8
& 0xFF) as u8;
12330 macro_rules
! shuffle4
{
12353 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12358 macro_rules
! shuffle3
{
12359 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12360 match (imm8
>> 6) & 0x3 {
12361 0 => shuffle4
!($a
, $b
, $c
, 16, $e
, $f
, $g
, 20, $i
, $j
, $k
, 24, $m
, $n
, $o
, 28),
12362 1 => shuffle4
!($a
, $b
, $c
, 17, $e
, $f
, $g
, 21, $i
, $j
, $k
, 25, $m
, $n
, $o
, 29),
12363 2 => shuffle4
!($a
, $b
, $c
, 18, $e
, $f
, $g
, 22, $i
, $j
, $k
, 26, $m
, $n
, $o
, 30),
12364 _
=> shuffle4
!($a
, $b
, $c
, 19, $e
, $f
, $g
, 23, $i
, $j
, $k
, 27, $m
, $n
, $o
, 31),
12368 macro_rules
! shuffle2
{
12369 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12370 match (imm8
>> 4) & 0x3 {
12371 0 => shuffle3
!($a
, $b
, 16, $e
, $f
, 20, $i
, $j
, 24, $m
, $n
, 28),
12372 1 => shuffle3
!($a
, $b
, 17, $e
, $f
, 21, $i
, $j
, 25, $m
, $n
, 29),
12373 2 => shuffle3
!($a
, $b
, 18, $e
, $f
, 22, $i
, $j
, 26, $m
, $n
, 30),
12374 _
=> shuffle3
!($a
, $b
, 19, $e
, $f
, 23, $i
, $j
, 27, $m
, $n
, 31),
12378 macro_rules
! shuffle1
{
12379 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12380 match (imm8
>> 2) & 0x3 {
12381 0 => shuffle2
!($a
, 0, $e
, 4, $i
, 8, $m
, 12),
12382 1 => shuffle2
!($a
, 1, $e
, 5, $i
, 9, $m
, 13),
12383 2 => shuffle2
!($a
, 2, $e
, 6, $i
, 10, $m
, 14),
12384 _
=> shuffle2
!($a
, 3, $e
, 7, $i
, 11, $m
, 15),
12388 let shuffle
= match imm8
& 0x3 {
12389 0 => shuffle1
!(0, 4, 8, 12),
12390 1 => shuffle1
!(1, 5, 9, 13),
12391 2 => shuffle1
!(2, 6, 10, 14),
12392 _
=> shuffle1
!(3, 7, 11, 15),
12395 transmute(simd_select_bitmask(k
, shuffle
, src
.as_f32x16()))
12398 /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_ps&expand=5202)
12402 #[target_feature(enable = "avx512f")]
12403 #[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
12404 #[rustc_args_required_const(3)]
12405 pub unsafe fn _mm512_maskz_shuffle_ps(k
: __mmask16
, a
: __m512
, b
: __m512
, imm8
: i32) -> __m512
{
12406 assert
!(imm8
>= 0 && imm8
<= 255);
12407 let imm8
= (imm8
& 0xFF) as u8;
12408 macro_rules
! shuffle4
{
12431 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12436 macro_rules
! shuffle3
{
12437 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12438 match (imm8
>> 6) & 0x3 {
12439 0 => shuffle4
!($a
, $b
, $c
, 16, $e
, $f
, $g
, 20, $i
, $j
, $k
, 24, $m
, $n
, $o
, 28),
12440 1 => shuffle4
!($a
, $b
, $c
, 17, $e
, $f
, $g
, 21, $i
, $j
, $k
, 25, $m
, $n
, $o
, 29),
12441 2 => shuffle4
!($a
, $b
, $c
, 18, $e
, $f
, $g
, 22, $i
, $j
, $k
, 26, $m
, $n
, $o
, 30),
12442 _
=> shuffle4
!($a
, $b
, $c
, 19, $e
, $f
, $g
, 23, $i
, $j
, $k
, 27, $m
, $n
, $o
, 31),
12446 macro_rules
! shuffle2
{
12447 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12448 match (imm8
>> 4) & 0x3 {
12449 0 => shuffle3
!($a
, $b
, 16, $e
, $f
, 20, $i
, $j
, 24, $m
, $n
, 28),
12450 1 => shuffle3
!($a
, $b
, 17, $e
, $f
, 21, $i
, $j
, 25, $m
, $n
, 29),
12451 2 => shuffle3
!($a
, $b
, 18, $e
, $f
, 22, $i
, $j
, 26, $m
, $n
, 30),
12452 _
=> shuffle3
!($a
, $b
, 19, $e
, $f
, 23, $i
, $j
, 27, $m
, $n
, 31),
12456 macro_rules
! shuffle1
{
12457 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12458 match (imm8
>> 2) & 0x3 {
12459 0 => shuffle2
!($a
, 0, $e
, 4, $i
, 8, $m
, 12),
12460 1 => shuffle2
!($a
, 1, $e
, 5, $i
, 9, $m
, 13),
12461 2 => shuffle2
!($a
, 2, $e
, 6, $i
, 10, $m
, 14),
12462 _
=> shuffle2
!($a
, 3, $e
, 7, $i
, 11, $m
, 15),
12466 let shuffle
= match imm8
& 0x3 {
12467 0 => shuffle1
!(0, 4, 8, 12),
12468 1 => shuffle1
!(1, 5, 9, 13),
12469 2 => shuffle1
!(2, 6, 10, 14),
12470 _
=> shuffle1
!(3, 7, 11, 15),
12473 let zero
= _mm512_setzero_ps().as_f32x16();
12474 transmute(simd_select_bitmask(k
, shuffle
, zero
))
12477 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
12479 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_pd&expand=5192)
12481 #[target_feature(enable = "avx512f")]
12482 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
12483 #[rustc_args_required_const(2)]
12484 pub unsafe fn _mm512_shuffle_pd(a
: __m512d
, b
: __m512d
, imm8
: i32) -> __m512d
{
12485 assert
!(imm8
>= 0 && imm8
<= 255);
12486 let imm8
= (imm8
& 0xFF) as u8;
12487 macro_rules
! shuffle8
{
12488 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $h
:expr
) => {
12489 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
12492 macro_rules
! shuffle7
{
12493 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
12494 match (imm8
>> 7) & 0x1 {
12495 0 => shuffle8
!($a
, $b
, $c
, $d
, $e
, $f
, $g
, 14),
12496 _
=> shuffle8
!($a
, $b
, $c
, $d
, $e
, $f
, $g
, 15),
12500 macro_rules
! shuffle6
{
12501 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
) => {
12502 match (imm8
>> 6) & 0x1 {
12503 0 => shuffle7
!($a
, $b
, $c
, $d
, $e
, $f
, 6),
12504 _
=> shuffle7
!($a
, $b
, $c
, $d
, $e
, $f
, 7),
12508 macro_rules
! shuffle5
{
12509 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
) => {
12510 match (imm8
>> 5) & 0x1 {
12511 0 => shuffle6
!($a
, $b
, $c
, $d
, $e
, 12),
12512 _
=> shuffle6
!($a
, $b
, $c
, $d
, $e
, 13),
12516 macro_rules
! shuffle4
{
12517 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
12518 match (imm8
>> 4) & 0x1 {
12519 0 => shuffle5
!($a
, $b
, $c
, $d
, 4),
12520 _
=> shuffle5
!($a
, $b
, $c
, $d
, 5),
12524 macro_rules
! shuffle3
{
12525 ($a
:expr
, $b
:expr
, $c
:expr
) => {
12526 match (imm8
>> 3) & 0x1 {
12527 0 => shuffle4
!($a
, $b
, $c
, 10),
12528 _
=> shuffle4
!($a
, $b
, $c
, 11),
12532 macro_rules
! shuffle2
{
12533 ($a
:expr
, $b
:expr
) => {
12534 match (imm8
>> 2) & 0x1 {
12535 0 => shuffle3
!($a
, $b
, 2),
12536 _
=> shuffle3
!($a
, $b
, 3),
12540 macro_rules
! shuffle1
{
12542 match (imm8
>> 1) & 0x1 {
12543 0 => shuffle2
!($a
, 8),
12544 _
=> shuffle2
!($a
, 9),
12554 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12556 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_pd&expand=5190)
12558 #[target_feature(enable = "avx512f")]
12559 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
12560 #[rustc_args_required_const(4)]
12561 pub unsafe fn _mm512_mask_shuffle_pd(
12568 assert
!(imm8
>= 0 && imm8
<= 255);
12569 let imm8
= (imm8
& 0xFF) as u8;
12570 macro_rules
! shuffle8
{
12571 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $h
:expr
) => {
12572 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
12575 macro_rules
! shuffle7
{
12576 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
12577 match (imm8
>> 7) & 0x1 {
12578 0 => shuffle8
!($a
, $b
, $c
, $d
, $e
, $f
, $g
, 14),
12579 _
=> shuffle8
!($a
, $b
, $c
, $d
, $e
, $f
, $g
, 15),
12583 macro_rules
! shuffle6
{
12584 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
) => {
12585 match (imm8
>> 6) & 0x1 {
12586 0 => shuffle7
!($a
, $b
, $c
, $d
, $e
, $f
, 6),
12587 _
=> shuffle7
!($a
, $b
, $c
, $d
, $e
, $f
, 7),
12591 macro_rules
! shuffle5
{
12592 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
) => {
12593 match (imm8
>> 5) & 0x1 {
12594 0 => shuffle6
!($a
, $b
, $c
, $d
, $e
, 12),
12595 _
=> shuffle6
!($a
, $b
, $c
, $d
, $e
, 13),
12599 macro_rules
! shuffle4
{
12600 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
12601 match (imm8
>> 4) & 0x1 {
12602 0 => shuffle5
!($a
, $b
, $c
, $d
, 4),
12603 _
=> shuffle5
!($a
, $b
, $c
, $d
, 5),
12607 macro_rules
! shuffle3
{
12608 ($a
:expr
, $b
:expr
, $c
:expr
) => {
12609 match (imm8
>> 3) & 0x1 {
12610 0 => shuffle4
!($a
, $b
, $c
, 10),
12611 _
=> shuffle4
!($a
, $b
, $c
, 11),
12615 macro_rules
! shuffle2
{
12616 ($a
:expr
, $b
:expr
) => {
12617 match (imm8
>> 2) & 0x1 {
12618 0 => shuffle3
!($a
, $b
, 2),
12619 _
=> shuffle3
!($a
, $b
, 3),
12623 macro_rules
! shuffle1
{
12625 match (imm8
>> 1) & 0x1 {
12626 0 => shuffle2
!($a
, 8),
12627 _
=> shuffle2
!($a
, 9),
12631 let shuffle
= match imm8
& 0x1 {
12636 transmute(simd_select_bitmask(k
, shuffle
, src
.as_f64x8()))
12639 /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_pd&expand=5191)
12643 #[target_feature(enable = "avx512f")]
12644 #[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
12645 #[rustc_args_required_const(3)]
12646 pub unsafe fn _mm512_maskz_shuffle_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
, imm8
: i32) -> __m512d
{
12647 assert
!(imm8
>= 0 && imm8
<= 255);
12648 let imm8
= (imm8
& 0xFF) as u8;
12649 macro_rules
! shuffle8
{
12650 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $h
:expr
) => {
12651 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
12654 macro_rules
! shuffle7
{
12655 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
12656 match (imm8
>> 7) & 0x1 {
12657 0 => shuffle8
!($a
, $b
, $c
, $d
, $e
, $f
, $g
, 14),
12658 _
=> shuffle8
!($a
, $b
, $c
, $d
, $e
, $f
, $g
, 15),
12662 macro_rules
! shuffle6
{
12663 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
, $f
:expr
) => {
12664 match (imm8
>> 6) & 0x1 {
12665 0 => shuffle7
!($a
, $b
, $c
, $d
, $e
, $f
, 6),
12666 _
=> shuffle7
!($a
, $b
, $c
, $d
, $e
, $f
, 7),
12670 macro_rules
! shuffle5
{
12671 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
, $e
:expr
) => {
12672 match (imm8
>> 5) & 0x1 {
12673 0 => shuffle6
!($a
, $b
, $c
, $d
, $e
, 12),
12674 _
=> shuffle6
!($a
, $b
, $c
, $d
, $e
, 13),
12678 macro_rules
! shuffle4
{
12679 ($a
:expr
, $b
:expr
, $c
:expr
, $d
:expr
) => {
12680 match (imm8
>> 4) & 0x1 {
12681 0 => shuffle5
!($a
, $b
, $c
, $d
, 4),
12682 _
=> shuffle5
!($a
, $b
, $c
, $d
, 5),
12686 macro_rules
! shuffle3
{
12687 ($a
:expr
, $b
:expr
, $c
:expr
) => {
12688 match (imm8
>> 3) & 0x1 {
12689 0 => shuffle4
!($a
, $b
, $c
, 10),
12690 _
=> shuffle4
!($a
, $b
, $c
, 11),
12694 macro_rules
! shuffle2
{
12695 ($a
:expr
, $b
:expr
) => {
12696 match (imm8
>> 2) & 0x1 {
12697 0 => shuffle3
!($a
, $b
, 2),
12698 _
=> shuffle3
!($a
, $b
, 3),
12702 macro_rules
! shuffle1
{
12704 match (imm8
>> 1) & 0x1 {
12705 0 => shuffle2
!($a
, 8),
12706 _
=> shuffle2
!($a
, 9),
12710 let shuffle
= match imm8
& 0x1 {
12715 let zero
= _mm512_setzero_pd().as_f64x8();
12716 transmute(simd_select_bitmask(k
, shuffle
, zero
))
12719 /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
12721 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_i32&expand=5177)
12723 #[target_feature(enable = "avx512f")]
12724 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))] //should be vshufi32x4, but generate vshufi64x2
12725 #[rustc_args_required_const(2)]
12726 pub unsafe fn _mm512_shuffle_i32x4(a
: __m512i
, b
: __m512i
, imm8
: i32) -> __m512i
{
12727 assert
!(imm8
>= 0 && imm8
<= 255);
12728 let imm8
= (imm8
& 0xFF) as u8;
12729 let a
= a
.as_i32x16();
12730 let b
= b
.as_i32x16();
12731 macro_rules
! shuffle4
{
12754 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12759 macro_rules
! shuffle3
{
12760 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12761 match (imm8
>> 6) & 0x3 {
12762 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 16, 17, 18, 19),
12763 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 20, 21, 22, 23),
12764 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 24, 25, 26, 27),
12765 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 28, 29, 30, 31),
12769 macro_rules
! shuffle2
{
12770 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12771 match (imm8
>> 4) & 0x3 {
12772 0 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 16, 17, 18, 19),
12773 1 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 20, 21, 22, 23),
12774 2 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 24, 25, 26, 27),
12775 _
=> shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 28, 29, 30, 31),
12779 macro_rules
! shuffle1
{
12780 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12781 match (imm8
>> 2) & 0x3 {
12782 0 => shuffle2
!($a
, $e
, $i
, $m
, 0, 1, 2, 3),
12783 1 => shuffle2
!($a
, $e
, $i
, $m
, 4, 5, 6, 7),
12784 2 => shuffle2
!($a
, $e
, $i
, $m
, 8, 9, 10, 11),
12785 _
=> shuffle2
!($a
, $e
, $i
, $m
, 12, 13, 14, 15),
12789 let r
: i32x16
= match imm8
& 0x3 {
12790 0 => shuffle1
!(0, 1, 2, 3),
12791 1 => shuffle1
!(4, 5, 6, 7),
12792 2 => shuffle1
!(8, 9, 10, 11),
12793 _
=> shuffle1
!(12, 13, 14, 15),
12799 /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_i32x&expand=5175)
12803 #[target_feature(enable = "avx512f")]
12804 #[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
12805 #[rustc_args_required_const(4)]
12806 pub unsafe fn _mm512_mask_shuffle_i32x4(
12813 assert
!(imm8
>= 0 && imm8
<= 255);
12814 let imm8
= (imm8
& 0xFF) as u8;
12815 let a
= a
.as_i32x16();
12816 let b
= b
.as_i32x16();
12817 macro_rules
! shuffle4
{
12840 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12845 macro_rules
! shuffle3
{
12846 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12847 match (imm8
>> 6) & 0x3 {
12848 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 16, 17, 18, 19),
12849 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 20, 21, 22, 23),
12850 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 24, 25, 26, 27),
12851 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 28, 29, 30, 31),
12855 macro_rules
! shuffle2
{
12856 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12857 match (imm8
>> 4) & 0x3 {
12858 0 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 16, 17, 18, 19),
12859 1 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 20, 21, 22, 23),
12860 2 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 24, 25, 26, 27),
12861 _
=> shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 28, 29, 30, 31),
12865 macro_rules
! shuffle1
{
12866 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12867 match (imm8
>> 2) & 0x3 {
12868 0 => shuffle2
!($a
, $e
, $i
, $m
, 0, 1, 2, 3),
12869 1 => shuffle2
!($a
, $e
, $i
, $m
, 4, 5, 6, 7),
12870 2 => shuffle2
!($a
, $e
, $i
, $m
, 8, 9, 10, 11),
12871 _
=> shuffle2
!($a
, $e
, $i
, $m
, 12, 13, 14, 15),
12875 let shuffle
= match imm8
& 0x3 {
12876 0 => shuffle1
!(0, 1, 2, 3),
12877 1 => shuffle1
!(4, 5, 6, 7),
12878 2 => shuffle1
!(8, 9, 10, 11),
12879 _
=> shuffle1
!(12, 13, 14, 15),
12882 transmute(simd_select_bitmask(k
, shuffle
, src
.as_i32x16()))
12885 /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_i32&expand=5176)
12889 #[target_feature(enable = "avx512f")]
12890 #[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10111111))]
12891 #[rustc_args_required_const(3)]
12892 pub unsafe fn _mm512_maskz_shuffle_i32x4(
12898 assert
!(imm8
>= 0 && imm8
<= 255);
12899 let imm8
= (imm8
& 0xFF) as u8;
12900 let a
= a
.as_i32x16();
12901 let b
= b
.as_i32x16();
12902 macro_rules
! shuffle4
{
12925 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
12930 macro_rules
! shuffle3
{
12931 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
12932 match (imm8
>> 6) & 0x3 {
12933 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 16, 17, 18, 19),
12934 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 20, 21, 22, 23),
12935 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 24, 25, 26, 27),
12936 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 28, 29, 30, 31),
12940 macro_rules
! shuffle2
{
12941 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
12942 match (imm8
>> 4) & 0x3 {
12943 0 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 16, 17, 18, 19),
12944 1 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 20, 21, 22, 23),
12945 2 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 24, 25, 26, 27),
12946 _
=> shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 28, 29, 30, 31),
12950 macro_rules
! shuffle1
{
12951 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
12952 match (imm8
>> 2) & 0x3 {
12953 0 => shuffle2
!($a
, $e
, $i
, $m
, 0, 1, 2, 3),
12954 1 => shuffle2
!($a
, $e
, $i
, $m
, 4, 5, 6, 7),
12955 2 => shuffle2
!($a
, $e
, $i
, $m
, 8, 9, 10, 11),
12956 _
=> shuffle2
!($a
, $e
, $i
, $m
, 12, 13, 14, 15),
12960 let shuffle
= match imm8
& 0x3 {
12961 0 => shuffle1
!(0, 1, 2, 3),
12962 1 => shuffle1
!(4, 5, 6, 7),
12963 2 => shuffle1
!(8, 9, 10, 11),
12964 _
=> shuffle1
!(12, 13, 14, 15),
12967 let zero
= _mm512_setzero_si512().as_i32x16();
12968 transmute(simd_select_bitmask(k
, shuffle
, zero
))
12971 /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
12973 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_i64x2&expand=5183)
12975 #[target_feature(enable = "avx512f")]
12976 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
12977 #[rustc_args_required_const(2)]
12978 pub unsafe fn _mm512_shuffle_i64x2(a
: __m512i
, b
: __m512i
, imm8
: i32) -> __m512i
{
12979 assert
!(imm8
>= 0 && imm8
<= 255);
12980 let imm8
= (imm8
& 0xFF) as u8;
12981 macro_rules
! shuffle4
{
12992 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
12995 macro_rules
! shuffle3
{
12996 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
12997 match (imm8
>> 6) & 0x3 {
12998 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 8, 9),
12999 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 10, 11),
13000 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 12, 13),
13001 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 14, 15),
13005 macro_rules
! shuffle2
{
13006 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
) => {
13007 match (imm8
>> 4) & 0x3 {
13008 0 => shuffle3
!($a
, $b
, $e
, $f
, 8, 9),
13009 1 => shuffle3
!($a
, $b
, $e
, $f
, 10, 11),
13010 2 => shuffle3
!($a
, $b
, $e
, $f
, 12, 13),
13011 _
=> shuffle3
!($a
, $b
, $e
, $f
, 14, 15),
13015 macro_rules
! shuffle1
{
13016 ($a
:expr
, $e
:expr
) => {
13017 match (imm8
>> 2) & 0x3 {
13018 0 => shuffle2
!($a
, $e
, 0, 1),
13019 1 => shuffle2
!($a
, $e
, 2, 3),
13020 2 => shuffle2
!($a
, $e
, 4, 5),
13021 _
=> shuffle2
!($a
, $e
, 6, 7),
13026 0 => shuffle1
!(0, 1),
13027 1 => shuffle1
!(2, 3),
13028 2 => shuffle1
!(4, 5),
13029 _
=> shuffle1
!(6, 7),
13033 /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13035 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_i64x&expand=5181)
13037 #[target_feature(enable = "avx512f")]
13038 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
13039 #[rustc_args_required_const(4)]
13040 pub unsafe fn _mm512_mask_shuffle_i64x2(
13047 assert
!(imm8
>= 0 && imm8
<= 255);
13048 let imm8
= (imm8
& 0xFF) as u8;
13049 macro_rules
! shuffle4
{
13060 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
13063 macro_rules
! shuffle3
{
13064 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
13065 match (imm8
>> 6) & 0x3 {
13066 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 8, 9),
13067 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 10, 11),
13068 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 12, 13),
13069 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 14, 15),
13073 macro_rules
! shuffle2
{
13074 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
) => {
13075 match (imm8
>> 4) & 0x3 {
13076 0 => shuffle3
!($a
, $b
, $e
, $f
, 8, 9),
13077 1 => shuffle3
!($a
, $b
, $e
, $f
, 10, 11),
13078 2 => shuffle3
!($a
, $b
, $e
, $f
, 12, 13),
13079 _
=> shuffle3
!($a
, $b
, $e
, $f
, 14, 15),
13083 macro_rules
! shuffle1
{
13084 ($a
:expr
, $e
:expr
) => {
13085 match (imm8
>> 2) & 0x3 {
13086 0 => shuffle2
!($a
, $e
, 0, 1),
13087 1 => shuffle2
!($a
, $e
, 2, 3),
13088 2 => shuffle2
!($a
, $e
, 4, 5),
13089 _
=> shuffle2
!($a
, $e
, 6, 7),
13093 let shuffle
= match imm8
& 0x3 {
13094 0 => shuffle1
!(0, 1),
13095 1 => shuffle1
!(2, 3),
13096 2 => shuffle1
!(4, 5),
13097 _
=> shuffle1
!(6, 7),
13100 transmute(simd_select_bitmask(k
, shuffle
, src
.as_i64x8()))
13103 /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_i64&expand=5182)
13107 #[target_feature(enable = "avx512f")]
13108 #[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
13109 #[rustc_args_required_const(3)]
13110 pub unsafe fn _mm512_maskz_shuffle_i64x2(
13116 assert
!(imm8
>= 0 && imm8
<= 255);
13117 let imm8
= (imm8
& 0xFF) as u8;
13118 macro_rules
! shuffle4
{
13129 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
13132 macro_rules
! shuffle3
{
13133 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
13134 match (imm8
>> 6) & 0x3 {
13135 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 8, 9),
13136 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 10, 11),
13137 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 12, 13),
13138 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 14, 15),
13142 macro_rules
! shuffle2
{
13143 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
) => {
13144 match (imm8
>> 4) & 0x3 {
13145 0 => shuffle3
!($a
, $b
, $e
, $f
, 8, 9),
13146 1 => shuffle3
!($a
, $b
, $e
, $f
, 10, 11),
13147 2 => shuffle3
!($a
, $b
, $e
, $f
, 12, 13),
13148 _
=> shuffle3
!($a
, $b
, $e
, $f
, 14, 15),
13152 macro_rules
! shuffle1
{
13153 ($a
:expr
, $e
:expr
) => {
13154 match (imm8
>> 2) & 0x3 {
13155 0 => shuffle2
!($a
, $e
, 0, 1),
13156 1 => shuffle2
!($a
, $e
, 2, 3),
13157 2 => shuffle2
!($a
, $e
, 4, 5),
13158 _
=> shuffle2
!($a
, $e
, 6, 7),
13162 let shuffle
= match imm8
& 0x3 {
13163 0 => shuffle1
!(0, 1),
13164 1 => shuffle1
!(2, 3),
13165 2 => shuffle1
!(4, 5),
13166 _
=> shuffle1
!(6, 7),
13169 let zero
= _mm512_setzero_si512().as_i64x8();
13170 transmute(simd_select_bitmask(k
, shuffle
, zero
))
13173 /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
13175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_f32x4&expand=5165)
13177 #[target_feature(enable = "avx512f")]
13178 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))] //should be vshuff32x4, but generate vshuff64x2
13179 #[rustc_args_required_const(2)]
13180 pub unsafe fn _mm512_shuffle_f32x4(a
: __m512
, b
: __m512
, imm8
: i32) -> __m512
{
13181 assert
!(imm8
>= 0 && imm8
<= 255);
13182 let imm8
= (imm8
& 0xFF) as u8;
13183 macro_rules
! shuffle4
{
13206 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
13211 macro_rules
! shuffle3
{
13212 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
13213 match (imm8
>> 6) & 0x3 {
13214 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 16, 17, 18, 19),
13215 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 20, 21, 22, 23),
13216 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 24, 25, 26, 27),
13217 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 28, 29, 30, 31),
13221 macro_rules
! shuffle2
{
13222 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
13223 match (imm8
>> 4) & 0x3 {
13224 0 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 16, 17, 18, 19),
13225 1 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 20, 21, 22, 23),
13226 2 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 24, 25, 26, 27),
13227 _
=> shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 28, 29, 30, 31),
13231 macro_rules
! shuffle1
{
13232 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
13233 match (imm8
>> 2) & 0x3 {
13234 0 => shuffle2
!($a
, $e
, $i
, $m
, 0, 1, 2, 3),
13235 1 => shuffle2
!($a
, $e
, $i
, $m
, 4, 5, 6, 7),
13236 2 => shuffle2
!($a
, $e
, $i
, $m
, 8, 9, 10, 11),
13237 _
=> shuffle2
!($a
, $e
, $i
, $m
, 12, 13, 14, 15),
13242 0 => shuffle1
!(0, 1, 2, 3),
13243 1 => shuffle1
!(4, 5, 6, 7),
13244 2 => shuffle1
!(8, 9, 10, 11),
13245 _
=> shuffle1
!(12, 13, 14, 15),
13249 /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13251 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_f32&expand=5163)
13253 #[target_feature(enable = "avx512f")]
13254 #[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
13255 #[rustc_args_required_const(4)]
13256 pub unsafe fn _mm512_mask_shuffle_f32x4(
13263 assert
!(imm8
>= 0 && imm8
<= 255);
13264 let imm8
= (imm8
& 0xFF) as u8;
13265 macro_rules
! shuffle4
{
13288 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
13293 macro_rules
! shuffle3
{
13294 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
13295 match (imm8
>> 6) & 0x3 {
13296 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 16, 17, 18, 19),
13297 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 20, 21, 22, 23),
13298 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 24, 25, 26, 27),
13299 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 28, 29, 30, 31),
13303 macro_rules
! shuffle2
{
13304 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
13305 match (imm8
>> 4) & 0x3 {
13306 0 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 16, 17, 18, 19),
13307 1 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 20, 21, 22, 23),
13308 2 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 24, 25, 26, 27),
13309 _
=> shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 28, 29, 30, 31),
13313 macro_rules
! shuffle1
{
13314 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
13315 match (imm8
>> 2) & 0x3 {
13316 0 => shuffle2
!($a
, $e
, $i
, $m
, 0, 1, 2, 3),
13317 1 => shuffle2
!($a
, $e
, $i
, $m
, 4, 5, 6, 7),
13318 2 => shuffle2
!($a
, $e
, $i
, $m
, 8, 9, 10, 11),
13319 _
=> shuffle2
!($a
, $e
, $i
, $m
, 12, 13, 14, 15),
13323 let shuffle
= match imm8
& 0x3 {
13324 0 => shuffle1
!(0, 1, 2, 3),
13325 1 => shuffle1
!(4, 5, 6, 7),
13326 2 => shuffle1
!(8, 9, 10, 11),
13327 _
=> shuffle1
!(12, 13, 14, 15),
13330 transmute(simd_select_bitmask(k
, shuffle
, src
.as_f32x16()))
13333 /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_f32&expand=5164)
13337 #[target_feature(enable = "avx512f")]
13338 #[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
13339 #[rustc_args_required_const(3)]
13340 pub unsafe fn _mm512_maskz_shuffle_f32x4(k
: __mmask16
, a
: __m512
, b
: __m512
, imm8
: i32) -> __m512
{
13341 assert
!(imm8
>= 0 && imm8
<= 255);
13342 let imm8
= (imm8
& 0xFF) as u8;
13343 macro_rules
! shuffle4
{
13366 $a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
, $i
, $j
, $k
, $l
, $m
, $n
, $o
, $p
,
13371 macro_rules
! shuffle3
{
13372 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
, $i
:expr
, $j
:expr
, $k
:expr
, $m
:expr
, $n
:expr
, $o
:expr
) => {
13373 match (imm8
>> 6) & 0x3 {
13374 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 16, 17, 18, 19),
13375 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 20, 21, 22, 23),
13376 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 24, 25, 26, 27),
13377 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, $i
, $j
, $k
, $m
, $n
, $o
, 28, 29, 30, 31),
13381 macro_rules
! shuffle2
{
13382 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
, $i
:expr
, $j
:expr
, $m
:expr
, $n
:expr
) => {
13383 match (imm8
>> 4) & 0x3 {
13384 0 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 16, 17, 18, 19),
13385 1 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 20, 21, 22, 23),
13386 2 => shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 24, 25, 26, 27),
13387 _
=> shuffle3
!($a
, $b
, $e
, $f
, $i
, $j
, $m
, $n
, 28, 29, 30, 31),
13391 macro_rules
! shuffle1
{
13392 ($a
:expr
, $e
:expr
, $i
: expr
, $m
: expr
) => {
13393 match (imm8
>> 2) & 0x3 {
13394 0 => shuffle2
!($a
, $e
, $i
, $m
, 0, 1, 2, 3),
13395 1 => shuffle2
!($a
, $e
, $i
, $m
, 4, 5, 6, 7),
13396 2 => shuffle2
!($a
, $e
, $i
, $m
, 8, 9, 10, 11),
13397 _
=> shuffle2
!($a
, $e
, $i
, $m
, 12, 13, 14, 15),
13401 let shuffle
= match imm8
& 0x3 {
13402 0 => shuffle1
!(0, 1, 2, 3),
13403 1 => shuffle1
!(4, 5, 6, 7),
13404 2 => shuffle1
!(8, 9, 10, 11),
13405 _
=> shuffle1
!(12, 13, 14, 15),
13408 let zero
= _mm512_setzero_ps().as_f32x16();
13409 transmute(simd_select_bitmask(k
, shuffle
, zero
))
13412 /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
13414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_shuffle_f64x2&expand=5171)
13416 #[target_feature(enable = "avx512f")]
13417 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
13418 #[rustc_args_required_const(2)]
13419 pub unsafe fn _mm512_shuffle_f64x2(a
: __m512d
, b
: __m512d
, imm8
: i32) -> __m512d
{
13420 assert
!(imm8
>= 0 && imm8
<= 255);
13421 let imm8
= (imm8
& 0xFF) as u8;
13422 macro_rules
! shuffle4
{
13433 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
13436 macro_rules
! shuffle3
{
13437 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
13438 match (imm8
>> 6) & 0x3 {
13439 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 8, 9),
13440 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 10, 11),
13441 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 12, 13),
13442 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 14, 15),
13446 macro_rules
! shuffle2
{
13447 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
) => {
13448 match (imm8
>> 4) & 0x3 {
13449 0 => shuffle3
!($a
, $b
, $e
, $f
, 8, 9),
13450 1 => shuffle3
!($a
, $b
, $e
, $f
, 10, 11),
13451 2 => shuffle3
!($a
, $b
, $e
, $f
, 12, 13),
13452 _
=> shuffle3
!($a
, $b
, $e
, $f
, 14, 15),
13456 macro_rules
! shuffle1
{
13457 ($a
:expr
, $e
:expr
) => {
13458 match (imm8
>> 2) & 0x3 {
13459 0 => shuffle2
!($a
, $e
, 0, 1),
13460 1 => shuffle2
!($a
, $e
, 2, 3),
13461 2 => shuffle2
!($a
, $e
, 4, 5),
13462 _
=> shuffle2
!($a
, $e
, 6, 7),
13467 0 => shuffle1
!(0, 1),
13468 1 => shuffle1
!(2, 3),
13469 2 => shuffle1
!(4, 5),
13470 _
=> shuffle1
!(6, 7),
13474 /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13476 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_shuffle_f64x2&expand=5169)
13478 #[target_feature(enable = "avx512f")]
13479 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
13480 #[rustc_args_required_const(4)]
13481 pub unsafe fn _mm512_mask_shuffle_f64x2(
13488 assert
!(imm8
>= 0 && imm8
<= 255);
13489 let imm8
= (imm8
& 0xFF) as u8;
13490 macro_rules
! shuffle4
{
13501 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
13504 macro_rules
! shuffle3
{
13505 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
13506 match (imm8
>> 6) & 0x3 {
13507 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 8, 9),
13508 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 10, 11),
13509 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 12, 13),
13510 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 14, 15),
13514 macro_rules
! shuffle2
{
13515 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
) => {
13516 match (imm8
>> 4) & 0x3 {
13517 0 => shuffle3
!($a
, $b
, $e
, $f
, 8, 9),
13518 1 => shuffle3
!($a
, $b
, $e
, $f
, 10, 11),
13519 2 => shuffle3
!($a
, $b
, $e
, $f
, 12, 13),
13520 _
=> shuffle3
!($a
, $b
, $e
, $f
, 14, 15),
13524 macro_rules
! shuffle1
{
13525 ($a
:expr
, $e
:expr
) => {
13526 match (imm8
>> 2) & 0x3 {
13527 0 => shuffle2
!($a
, $e
, 0, 1),
13528 1 => shuffle2
!($a
, $e
, 2, 3),
13529 2 => shuffle2
!($a
, $e
, 4, 5),
13530 _
=> shuffle2
!($a
, $e
, 6, 7),
13534 let shuffle
= match imm8
& 0x3 {
13535 0 => shuffle1
!(0, 1),
13536 1 => shuffle1
!(2, 3),
13537 2 => shuffle1
!(4, 5),
13538 _
=> shuffle1
!(6, 7),
13541 transmute(simd_select_bitmask(k
, shuffle
, src
.as_f64x8()))
13544 /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_shuffle_f64x2&expand=5170)
13548 #[target_feature(enable = "avx512f")]
13549 #[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
13550 #[rustc_args_required_const(3)]
13551 pub unsafe fn _mm512_maskz_shuffle_f64x2(
13557 assert
!(imm8
>= 0 && imm8
<= 255);
13558 let imm8
= (imm8
& 0xFF) as u8;
13559 macro_rules
! shuffle4
{
13570 simd_shuffle8(a
, b
, [$a
, $b
, $c
, $d
, $e
, $f
, $g
, $h
])
13573 macro_rules
! shuffle3
{
13574 ($a
:expr
, $b
:expr
, $c
:expr
, $e
:expr
, $f
:expr
, $g
:expr
) => {
13575 match (imm8
>> 6) & 0x3 {
13576 0 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 8, 9),
13577 1 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 10, 11),
13578 2 => shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 12, 13),
13579 _
=> shuffle4
!($a
, $b
, $c
, $e
, $f
, $g
, 14, 15),
13583 macro_rules
! shuffle2
{
13584 ($a
:expr
, $b
:expr
, $e
:expr
, $f
:expr
) => {
13585 match (imm8
>> 4) & 0x3 {
13586 0 => shuffle3
!($a
, $b
, $e
, $f
, 8, 9),
13587 1 => shuffle3
!($a
, $b
, $e
, $f
, 10, 11),
13588 2 => shuffle3
!($a
, $b
, $e
, $f
, 12, 13),
13589 _
=> shuffle3
!($a
, $b
, $e
, $f
, 14, 15),
13593 macro_rules
! shuffle1
{
13594 ($a
:expr
, $e
:expr
) => {
13595 match (imm8
>> 2) & 0x3 {
13596 0 => shuffle2
!($a
, $e
, 0, 1),
13597 1 => shuffle2
!($a
, $e
, 2, 3),
13598 2 => shuffle2
!($a
, $e
, 4, 5),
13599 _
=> shuffle2
!($a
, $e
, 6, 7),
13603 let shuffle
= match imm8
& 0x3 {
13604 0 => shuffle1
!(0, 1),
13605 1 => shuffle1
!(2, 3),
13606 2 => shuffle1
!(4, 5),
13607 _
=> shuffle1
!(6, 7),
13610 let zero
= _mm512_setzero_pd().as_f64x8();
13611 transmute(simd_select_bitmask(k
, shuffle
, zero
))
13614 /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
13616 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extractf32x4_ps&expand=2442)
13618 #[target_feature(enable = "avx512f")]
13620 all(test
, not(target_os
= "windows")),
13621 assert_instr(vextractf32x4
, imm8
= 3)
13623 #[rustc_args_required_const(1)]
13624 pub unsafe fn _mm512_extractf32x4_ps(a
: __m512
, imm8
: i32) -> __m128
{
13625 assert
!(imm8
>= 0 && imm8
<= 3);
13627 0 => simd_shuffle4(a
, _mm512_undefined_ps(), [0, 1, 2, 3]),
13628 1 => simd_shuffle4(a
, _mm512_undefined_ps(), [4, 5, 6, 7]),
13629 2 => simd_shuffle4(a
, _mm512_undefined_ps(), [8, 9, 10, 11]),
13630 _
=> simd_shuffle4(a
, _mm512_undefined_ps(), [12, 13, 14, 15]),
13634 /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extractf32x4_ps&expand=2443)
13638 #[target_feature(enable = "avx512f")]
13640 all(test
, not(target_os
= "windows")),
13641 assert_instr(vextractf32x4
, imm8
= 3)
13643 #[rustc_args_required_const(3)]
13644 pub unsafe fn _mm512_mask_extractf32x4_ps(
13650 assert
!(imm8
>= 0 && imm8
<= 3);
13651 let extract
: __m128
= match imm8
& 0x3 {
13652 0 => simd_shuffle4(a
, _mm512_undefined_ps(), [0, 1, 2, 3]),
13653 1 => simd_shuffle4(a
, _mm512_undefined_ps(), [4, 5, 6, 7]),
13654 2 => simd_shuffle4(a
, _mm512_undefined_ps(), [8, 9, 10, 11]),
13655 _
=> simd_shuffle4(a
, _mm512_undefined_ps(), [12, 13, 14, 15]),
13657 transmute(simd_select_bitmask(k
, extract
.as_f32x4(), src
.as_f32x4()))
13660 /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13662 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extractf32x4_ps&expand=2444)
13664 #[target_feature(enable = "avx512f")]
13666 all(test
, not(target_os
= "windows")),
13667 assert_instr(vextractf32x4
, imm8
= 3)
13669 #[rustc_args_required_const(2)]
13670 pub unsafe fn _mm512_maskz_extractf32x4_ps(k
: __mmask8
, a
: __m512
, imm8
: i32) -> __m128
{
13671 assert
!(imm8
>= 0 && imm8
<= 3);
13672 let extract
: __m128
= match imm8
& 0x3 {
13673 0 => simd_shuffle4(a
, _mm512_undefined_ps(), [0, 1, 2, 3]),
13674 1 => simd_shuffle4(a
, _mm512_undefined_ps(), [4, 5, 6, 7]),
13675 2 => simd_shuffle4(a
, _mm512_undefined_ps(), [8, 9, 10, 11]),
13676 _
=> simd_shuffle4(a
, _mm512_undefined_ps(), [12, 13, 14, 15]),
13678 let zero
= _mm_setzero_ps().as_f32x4();
13679 transmute(simd_select_bitmask(k
, extract
.as_f32x4(), zero
))
13682 /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the result in dst.
13684 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extracti64x4_epi64&expand=2473)
13686 #[target_feature(enable = "avx512f")]
13688 all(test
, not(target_os
= "windows")),
13689 assert_instr(vextractf64x4
, imm8
= 1) //should be vextracti64x4
13691 #[rustc_args_required_const(1)]
13692 pub unsafe fn _mm512_extracti64x4_epi64(a
: __m512i
, imm8
: i32) -> __m256i
{
13693 assert
!(imm8
>= 0 && imm8
<= 1);
13695 0 => simd_shuffle4(a
, _mm512_set1_epi64(0), [0, 1, 2, 3]),
13696 _
=> simd_shuffle4(a
, _mm512_set1_epi64(0), [4, 5, 6, 7]),
13700 /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extracti64x4_epi64&expand=2474)
13704 #[target_feature(enable = "avx512f")]
13706 all(test
, not(target_os
= "windows")),
13707 assert_instr(vextracti64x4
, imm8
= 1)
13709 #[rustc_args_required_const(3)]
13710 pub unsafe fn _mm512_mask_extracti64x4_epi64(
13716 assert
!(imm8
>= 0 && imm8
<= 1);
13717 let extract
= match imm8
& 0x1 {
13718 0 => simd_shuffle4(a
, _mm512_set1_epi64(0), [0, 1, 2, 3]),
13719 _
=> simd_shuffle4(a
, _mm512_set1_epi64(0), [4, 5, 6, 7]),
13721 transmute(simd_select_bitmask(k
, extract
, src
.as_i64x4()))
13724 /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13726 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extracti64x4_epi64&expand=2475)
13728 #[target_feature(enable = "avx512f")]
13730 all(test
, not(target_os
= "windows")),
13731 assert_instr(vextracti64x4
, imm8
= 1)
13733 #[rustc_args_required_const(2)]
13734 pub unsafe fn _mm512_maskz_extracti64x4_epi64(k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m256i
{
13735 assert
!(imm8
>= 0 && imm8
<= 1);
13736 let extract
: __m256i
= match imm8
& 0x1 {
13737 0 => simd_shuffle4(a
, _mm512_set1_epi64(0), [0, 1, 2, 3]),
13738 _
=> simd_shuffle4(a
, _mm512_set1_epi64(0), [4, 5, 6, 7]),
13740 let zero
= _mm256_setzero_si256().as_i64x4();
13741 transmute(simd_select_bitmask(k
, extract
.as_i64x4(), zero
))
13744 /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
13746 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extractf64x4_pd&expand=2454)
13748 #[target_feature(enable = "avx512f")]
13750 all(test
, not(target_os
= "windows")),
13751 assert_instr(vextractf64x4
, imm8
= 1)
13753 #[rustc_args_required_const(1)]
13754 pub unsafe fn _mm512_extractf64x4_pd(a
: __m512d
, imm8
: i32) -> __m256d
{
13755 assert
!(imm8
>= 0 && imm8
<= 1);
13757 0 => simd_shuffle4(a
, _mm512_undefined_pd(), [0, 1, 2, 3]),
13758 _
=> simd_shuffle4(a
, _mm512_undefined_pd(), [4, 5, 6, 7]),
13762 /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extractf64x4_pd&expand=2455)
13766 #[target_feature(enable = "avx512f")]
13768 all(test
, not(target_os
= "windows")),
13769 assert_instr(vextractf64x4
, imm8
= 1)
13771 #[rustc_args_required_const(3)]
13772 pub unsafe fn _mm512_mask_extractf64x4_pd(
13778 assert
!(imm8
>= 0 && imm8
<= 1);
13779 let extract
= match imm8
& 0x1 {
13780 0 => simd_shuffle4(a
, _mm512_undefined_pd(), [0, 1, 2, 3]),
13781 _
=> simd_shuffle4(a
, _mm512_undefined_pd(), [4, 5, 6, 7]),
13783 transmute(simd_select_bitmask(k
, extract
, src
))
13786 /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13788 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extractf64x4_pd&expand=2456)
13790 #[target_feature(enable = "avx512f")]
13792 all(test
, not(target_os
= "windows")),
13793 assert_instr(vextractf64x4
, imm8
= 1)
13795 #[rustc_args_required_const(2)]
13796 pub unsafe fn _mm512_maskz_extractf64x4_pd(k
: __mmask8
, a
: __m512d
, imm8
: i32) -> __m256d
{
13797 assert
!(imm8
>= 0 && imm8
<= 1);
13798 let extract
= match imm8
& 0x1 {
13799 0 => simd_shuffle4(a
, _mm512_undefined_pd(), [0, 1, 2, 3]),
13800 _
=> simd_shuffle4(a
, _mm512_undefined_pd(), [4, 5, 6, 7]),
13802 let zero
= _mm256_setzero_pd();
13803 transmute(simd_select_bitmask(k
, extract
, zero
))
13806 /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the result in dst.
13808 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_extracti32x4_epi32&expand=2461)
13810 #[target_feature(enable = "avx512f")]
13812 all(test
, not(target_os
= "windows")),
13813 assert_instr(vextractf32x4
, imm8
= 3) //should be vextracti32x4
13815 #[rustc_args_required_const(1)]
13816 pub unsafe fn _mm512_extracti32x4_epi32(a
: __m512i
, imm8
: i32) -> __m128i
{
13817 assert
!(imm8
>= 0 && imm8
<= 3);
13818 let a
= a
.as_i32x16();
13819 let undefined
= _mm512_undefined_epi32().as_i32x16();
13820 let extract
: i32x4
= match imm8
& 0x3 {
13821 0 => simd_shuffle4(a
, undefined
, [0, 1, 2, 3]),
13822 1 => simd_shuffle4(a
, undefined
, [4, 5, 6, 7]),
13823 2 => simd_shuffle4(a
, undefined
, [8, 9, 10, 11]),
13824 _
=> simd_shuffle4(a
, undefined
, [12, 13, 14, 15]),
13829 /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13831 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_extracti32x4_epi32&expand=2462)
13833 #[target_feature(enable = "avx512f")]
13835 all(test
, not(target_os
= "windows")),
13836 assert_instr(vextracti32x4
, imm8
= 3)
13838 #[rustc_args_required_const(3)]
13839 pub unsafe fn _mm512_mask_extracti32x4_epi32(
13845 assert
!(imm8
>= 0 && imm8
<= 3);
13846 let a
= a
.as_i32x16();
13847 let undefined
= _mm512_undefined_epi32().as_i32x16();
13848 let extract
: i32x4
= match imm8
& 0x3 {
13849 0 => simd_shuffle4(a
, undefined
, [0, 1, 2, 3]),
13850 1 => simd_shuffle4(a
, undefined
, [4, 5, 6, 7]),
13851 2 => simd_shuffle4(a
, undefined
, [8, 9, 10, 11]),
13852 _
=> simd_shuffle4(a
, undefined
, [12, 13, 14, 15]),
13854 transmute(simd_select_bitmask(k
, extract
, src
.as_i32x4()))
13857 /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_extracti32x4_epi32&expand=2463)
13861 #[target_feature(enable = "avx512f")]
13863 all(test
, not(target_os
= "windows")),
13864 assert_instr(vextracti32x4
, imm8
= 3)
13866 #[rustc_args_required_const(2)]
13867 pub unsafe fn _mm512_maskz_extracti32x4_epi32(k
: __mmask8
, a
: __m512i
, imm8
: i32) -> __m128i
{
13868 assert
!(imm8
>= 0 && imm8
<= 3);
13869 let a
= a
.as_i32x16();
13870 let undefined
= _mm512_undefined_epi32().as_i32x16();
13871 let extract
: i32x4
= match imm8
& 0x3 {
13872 0 => simd_shuffle4(a
, undefined
, [0, 1, 2, 3]),
13873 1 => simd_shuffle4(a
, undefined
, [4, 5, 6, 7]),
13874 2 => simd_shuffle4(a
, undefined
, [8, 9, 10, 11]),
13875 _
=> simd_shuffle4(a
, undefined
, [12, 13, 14, 15]),
13877 let zero
= _mm_setzero_si128().as_i32x4();
13878 transmute(simd_select_bitmask(k
, extract
, zero
))
13881 /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
13883 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_moveldup_ps&expand=3862)
13885 #[target_feature(enable = "avx512f")]
13886 #[cfg_attr(test, assert_instr(vmovsldup))]
13887 pub unsafe fn _mm512_moveldup_ps(a
: __m512
) -> __m512
{
13888 let r
: f32x16
= simd_shuffle16(a
, a
, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
13892 /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_moveldup_ps&expand=3860)
13896 #[target_feature(enable = "avx512f")]
13897 #[cfg_attr(test, assert_instr(vmovsldup))]
13898 pub unsafe fn _mm512_mask_moveldup_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
13899 let mov
: f32x16
= simd_shuffle16(a
, a
, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
13900 transmute(simd_select_bitmask(k
, mov
, src
.as_f32x16()))
13903 /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_moveldup_ps&expand=3861)
13907 #[target_feature(enable = "avx512f")]
13908 #[cfg_attr(test, assert_instr(vmovsldup))]
13909 pub unsafe fn _mm512_maskz_moveldup_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
13910 let mov
: f32x16
= simd_shuffle16(a
, a
, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
13911 let zero
= _mm512_setzero_ps().as_f32x16();
13912 transmute(simd_select_bitmask(k
, mov
, zero
))
13915 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
13917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_movehdup_ps&expand=3852)
13919 #[target_feature(enable = "avx512f")]
13920 #[cfg_attr(test, assert_instr(vmovshdup))]
13921 pub unsafe fn _mm512_movehdup_ps(a
: __m512
) -> __m512
{
13922 let r
: f32x16
= simd_shuffle16(a
, a
, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
13926 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_movehdup&expand=3850)
13930 #[target_feature(enable = "avx512f")]
13931 #[cfg_attr(test, assert_instr(vmovshdup))]
13932 pub unsafe fn _mm512_mask_movehdup_ps(src
: __m512
, k
: __mmask16
, a
: __m512
) -> __m512
{
13933 let mov
: f32x16
= simd_shuffle16(a
, a
, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
13934 transmute(simd_select_bitmask(k
, mov
, src
.as_f32x16()))
13937 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_moveh&expand=3851)
13941 #[target_feature(enable = "avx512f")]
13942 #[cfg_attr(test, assert_instr(vmovshdup))]
13943 pub unsafe fn _mm512_maskz_movehdup_ps(k
: __mmask16
, a
: __m512
) -> __m512
{
13944 let mov
: f32x16
= simd_shuffle16(a
, a
, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
13945 let zero
= _mm512_setzero_ps().as_f32x16();
13946 transmute(simd_select_bitmask(k
, mov
, zero
))
13949 /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
13951 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_movedup_pd&expand=3843)
13953 #[target_feature(enable = "avx512f")]
13954 #[cfg_attr(test, assert_instr(vmovddup))]
13955 pub unsafe fn _mm512_movedup_pd(a
: __m512d
) -> __m512d
{
13956 let r
: f64x8
= simd_shuffle8(a
, a
, [0, 0, 2, 2, 4, 4, 6, 6]);
13960 /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13962 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_movedup_pd&expand=3841)
13964 #[target_feature(enable = "avx512f")]
13965 #[cfg_attr(test, assert_instr(vmovddup))]
13966 pub unsafe fn _mm512_mask_movedup_pd(src
: __m512d
, k
: __mmask8
, a
: __m512d
) -> __m512d
{
13967 let mov
: f64x8
= simd_shuffle8(a
, a
, [0, 0, 2, 2, 4, 4, 6, 6]);
13968 transmute(simd_select_bitmask(k
, mov
, src
.as_f64x8()))
13971 /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13973 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_movedup_pd&expand=3842)
13975 #[target_feature(enable = "avx512f")]
13976 #[cfg_attr(test, assert_instr(vmovddup))]
13977 pub unsafe fn _mm512_maskz_movedup_pd(k
: __mmask8
, a
: __m512d
) -> __m512d
{
13978 let mov
: f64x8
= simd_shuffle8(a
, a
, [0, 0, 2, 2, 4, 4, 6, 6]);
13979 let zero
= _mm512_setzero_pd().as_f64x8();
13980 transmute(simd_select_bitmask(k
, mov
, zero
))
13983 /// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
13985 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_inserti32x4&expand=3174)
13987 #[target_feature(enable = "avx512f")]
13988 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))] //should be vinserti32x4
13989 #[rustc_args_required_const(2)]
13990 pub unsafe fn _mm512_inserti32x4(a
: __m512i
, b
: __m128i
, imm8
: i32) -> __m512i
{
13991 assert
!(imm8
>= 0 && imm8
<= 3);
13992 let a
= a
.as_i32x16();
13993 let b
= _mm512_castsi128_si512(b
).as_i32x16();
13994 let ret
: i32x16
= match imm8
& 0b11 {
13995 0 => simd_shuffle16(
13998 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14000 1 => simd_shuffle16(
14003 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14005 2 => simd_shuffle16(
14008 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14010 _
=> simd_shuffle16(a
, b
, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14015 /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14017 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_inserti32x4&expand=3175)
14019 #[target_feature(enable = "avx512f")]
14020 #[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
14021 #[rustc_args_required_const(4)]
14022 pub unsafe fn _mm512_mask_inserti32x4(
14029 assert
!(imm8
>= 0 && imm8
<= 3);
14030 let a
= a
.as_i32x16();
14031 let b
= _mm512_castsi128_si512(b
).as_i32x16();
14032 let insert
: i32x16
= match imm8
& 0b11 {
14033 0 => simd_shuffle16(
14036 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14038 1 => simd_shuffle16(
14041 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14043 2 => simd_shuffle16(
14046 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14048 _
=> simd_shuffle16(a
, b
, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14050 transmute(simd_select_bitmask(k
, insert
, src
.as_i32x16()))
14053 /// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_inserti32x4&expand=3176)
14057 #[target_feature(enable = "avx512f")]
14058 #[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
14059 #[rustc_args_required_const(3)]
14060 pub unsafe fn _mm512_maskz_inserti32x4(k
: __mmask16
, a
: __m512i
, b
: __m128i
, imm8
: i32) -> __m512i
{
14061 assert
!(imm8
>= 0 && imm8
<= 3);
14062 let a
= a
.as_i32x16();
14063 let b
= _mm512_castsi128_si512(b
).as_i32x16();
14064 let insert
= match imm8
& 0b11 {
14065 0 => simd_shuffle16(
14068 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14070 1 => simd_shuffle16(
14073 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14075 2 => simd_shuffle16(
14078 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14080 _
=> simd_shuffle16(a
, b
, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14082 let zero
= _mm512_setzero_si512().as_i32x16();
14083 transmute(simd_select_bitmask(k
, insert
, zero
))
14086 /// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
14088 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_inserti64x4&expand=3186)
14090 #[target_feature(enable = "avx512f")]
14091 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))] //should be vinserti64x4
14092 #[rustc_args_required_const(2)]
14093 pub unsafe fn _mm512_inserti64x4(a
: __m512i
, b
: __m256i
, imm8
: i32) -> __m512i
{
14094 assert
!(imm8
>= 0 && imm8
<= 1);
14095 let b
= _mm512_castsi256_si512(b
);
14097 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 4, 5, 6, 7]),
14098 _
=> simd_shuffle8(a
, b
, [0, 1, 2, 3, 8, 9, 10, 11]),
14102 /// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14104 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_inserti64x4&expand=3187)
14106 #[target_feature(enable = "avx512f")]
14107 #[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
14108 #[rustc_args_required_const(4)]
14109 pub unsafe fn _mm512_mask_inserti64x4(
14116 assert
!(imm8
>= 0 && imm8
<= 1);
14117 let b
= _mm512_castsi256_si512(b
);
14118 let insert
= match imm8
& 0b1 {
14119 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 4, 5, 6, 7]),
14120 _
=> simd_shuffle8(a
, b
, [0, 1, 2, 3, 8, 9, 10, 11]),
14122 transmute(simd_select_bitmask(k
, insert
, src
.as_i64x8()))
14125 /// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_inserti64x4&expand=3188)
14129 #[target_feature(enable = "avx512f")]
14130 #[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
14131 #[rustc_args_required_const(3)]
14132 pub unsafe fn _mm512_maskz_inserti64x4(k
: __mmask8
, a
: __m512i
, b
: __m256i
, imm8
: i32) -> __m512i
{
14133 assert
!(imm8
>= 0 && imm8
<= 1);
14134 let b
= _mm512_castsi256_si512(b
);
14135 let insert
= match imm8
& 0b1 {
14136 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 4, 5, 6, 7]),
14137 _
=> simd_shuffle8(a
, b
, [0, 1, 2, 3, 8, 9, 10, 11]),
14139 let zero
= _mm512_setzero_si512().as_i64x8();
14140 transmute(simd_select_bitmask(k
, insert
, zero
))
14143 /// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
14145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_insertf32x4&expand=3155)
14147 #[target_feature(enable = "avx512f")]
14148 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
14149 #[rustc_args_required_const(2)]
14150 pub unsafe fn _mm512_insertf32x4(a
: __m512
, b
: __m128
, imm8
: i32) -> __m512
{
14151 assert
!(imm8
>= 0 && imm8
<= 3);
14152 let b
= _mm512_castps128_ps512(b
);
14153 match imm8
& 0b11 {
14154 0 => simd_shuffle16(
14157 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14159 1 => simd_shuffle16(
14162 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14164 2 => simd_shuffle16(
14167 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14169 _
=> simd_shuffle16(a
, b
, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14173 /// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_insertf32x4&expand=3156)
14177 #[target_feature(enable = "avx512f")]
14178 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
14179 #[rustc_args_required_const(4)]
14180 pub unsafe fn _mm512_mask_insertf32x4(
14187 assert
!(imm8
>= 0 && imm8
<= 3);
14188 let b
= _mm512_castps128_ps512(b
);
14189 let insert
= match imm8
& 0b11 {
14190 0 => simd_shuffle16(
14193 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14195 1 => simd_shuffle16(
14198 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14200 2 => simd_shuffle16(
14203 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14205 _
=> simd_shuffle16(a
, b
, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14207 transmute(simd_select_bitmask(k
, insert
, src
.as_f32x16()))
14210 /// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_insertf32x4&expand=3157)
14214 #[target_feature(enable = "avx512f")]
14215 #[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
14216 #[rustc_args_required_const(3)]
14217 pub unsafe fn _mm512_maskz_insertf32x4(k
: __mmask16
, a
: __m512
, b
: __m128
, imm8
: i32) -> __m512
{
14218 assert
!(imm8
>= 0 && imm8
<= 3);
14219 let b
= _mm512_castps128_ps512(b
);
14220 let insert
= match imm8
& 0b11 {
14221 0 => simd_shuffle16(
14224 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
14226 1 => simd_shuffle16(
14229 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
14231 2 => simd_shuffle16(
14234 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
14236 _
=> simd_shuffle16(a
, b
, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
14238 let zero
= _mm512_setzero_ps().as_f32x16();
14239 transmute(simd_select_bitmask(k
, insert
, zero
))
14242 /// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
14244 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_insertf64x4&expand=3167)
14246 #[target_feature(enable = "avx512f")]
14247 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
14248 #[rustc_args_required_const(2)]
14249 pub unsafe fn _mm512_insertf64x4(a
: __m512d
, b
: __m256d
, imm8
: i32) -> __m512d
{
14250 assert
!(imm8
>= 0 && imm8
<= 1);
14251 let b
= _mm512_castpd256_pd512(b
);
14253 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 4, 5, 6, 7]),
14254 _
=> simd_shuffle8(a
, b
, [0, 1, 2, 3, 8, 9, 10, 11]),
14258 /// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14260 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_insertf64x4&expand=3168)
14262 #[target_feature(enable = "avx512f")]
14263 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
14264 #[rustc_args_required_const(4)]
14265 pub unsafe fn _mm512_mask_insertf64x4(
14272 assert
!(imm8
>= 0 && imm8
<= 1);
14273 let b
= _mm512_castpd256_pd512(b
);
14274 let insert
= match imm8
& 0b1 {
14275 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 4, 5, 6, 7]),
14276 _
=> simd_shuffle8(a
, b
, [0, 1, 2, 3, 8, 9, 10, 11]),
14278 transmute(simd_select_bitmask(k
, insert
, src
.as_f64x8()))
14281 /// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14283 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_insertf64x4&expand=3169)
14285 #[target_feature(enable = "avx512f")]
14286 #[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
14287 #[rustc_args_required_const(3)]
14288 pub unsafe fn _mm512_maskz_insertf64x4(k
: __mmask8
, a
: __m512d
, b
: __m256d
, imm8
: i32) -> __m512d
{
14289 assert
!(imm8
>= 0 && imm8
<= 1);
14290 let b
= _mm512_castpd256_pd512(b
);
14291 let insert
= match imm8
& 0b1 {
14292 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 4, 5, 6, 7]),
14293 _
=> simd_shuffle8(a
, b
, [0, 1, 2, 3, 8, 9, 10, 11]),
14295 let zero
= _mm512_setzero_pd().as_f64x8();
14296 transmute(simd_select_bitmask(k
, insert
, zero
))
14299 /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
14301 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_epi32&expand=6021)
14303 #[target_feature(enable = "avx512f")]
14304 #[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
14305 pub unsafe fn _mm512_unpackhi_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
14306 let a
= a
.as_i32x16();
14307 let b
= b
.as_i32x16();
14308 let r
: i32x16
= simd_shuffle16(
14333 /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_epi32&expand=6019)
14337 #[target_feature(enable = "avx512f")]
14338 #[cfg_attr(test, assert_instr(vpunpckhdq))]
14339 pub unsafe fn _mm512_mask_unpackhi_epi32(
14345 let unpackhi
= _mm512_unpackhi_epi32(a
, b
).as_i32x16();
14346 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_i32x16()))
14349 /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_epi32&expand=6020)
14353 #[target_feature(enable = "avx512f")]
14354 #[cfg_attr(test, assert_instr(vpunpckhdq))]
14355 pub unsafe fn _mm512_maskz_unpackhi_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
14356 let unpackhi
= _mm512_unpackhi_epi32(a
, b
).as_i32x16();
14357 let zero
= _mm512_setzero_si512().as_i32x16();
14358 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14361 /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and
14362 /// store the results in dst.
14364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_epi64&expand=6030)
14366 #[target_feature(enable = "avx512f")]
14367 #[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
14368 pub unsafe fn _mm512_unpackhi_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
14369 simd_shuffle8(a
, b
, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
14372 /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_epi64&expand=6028)
14376 #[target_feature(enable = "avx512f")]
14377 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
14378 pub unsafe fn _mm512_mask_unpackhi_epi64(
14384 let unpackhi
= _mm512_unpackhi_epi64(a
, b
).as_i64x8();
14385 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_i64x8()))
14388 /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_epi64&expand=6029)
14392 #[target_feature(enable = "avx512f")]
14393 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
14394 pub unsafe fn _mm512_maskz_unpackhi_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
14395 let unpackhi
= _mm512_unpackhi_epi64(a
, b
).as_i64x8();
14396 let zero
= _mm512_setzero_si512().as_i64x8();
14397 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14400 /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
14402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_ps&expand=6060)
14404 #[target_feature(enable = "avx512f")]
14405 #[cfg_attr(test, assert_instr(vunpckhps))]
14406 pub unsafe fn _mm512_unpackhi_ps(a
: __m512
, b
: __m512
) -> __m512
{
14431 /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14433 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_ps&expand=6058)
14435 #[target_feature(enable = "avx512f")]
14436 #[cfg_attr(test, assert_instr(vunpckhps))]
14437 pub unsafe fn _mm512_mask_unpackhi_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
14438 let unpackhi
= _mm512_unpackhi_ps(a
, b
).as_f32x16();
14439 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_f32x16()))
14442 /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14444 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_ps&expand=6059)
14446 #[target_feature(enable = "avx512f")]
14447 #[cfg_attr(test, assert_instr(vunpckhps))]
14448 pub unsafe fn _mm512_maskz_unpackhi_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
14449 let unpackhi
= _mm512_unpackhi_ps(a
, b
).as_f32x16();
14450 let zero
= _mm512_setzero_ps().as_f32x16();
14451 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14454 /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
14456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpackhi_pd&expand=6048)
14458 #[target_feature(enable = "avx512f")]
14459 #[cfg_attr(test, assert_instr(vunpckhpd))]
14460 pub unsafe fn _mm512_unpackhi_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
14461 simd_shuffle8(a
, b
, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
14464 /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14466 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpackhi_pd&expand=6046)
14468 #[target_feature(enable = "avx512f")]
14469 #[cfg_attr(test, assert_instr(vunpckhpd))]
14470 pub unsafe fn _mm512_mask_unpackhi_pd(
14476 let unpackhi
= _mm512_unpackhi_pd(a
, b
).as_f64x8();
14477 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_f64x8()))
14480 /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14482 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpackhi_pd&expand=6047)
14484 #[target_feature(enable = "avx512f")]
14485 #[cfg_attr(test, assert_instr(vunpckhpd))]
14486 pub unsafe fn _mm512_maskz_unpackhi_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
14487 let unpackhi
= _mm512_unpackhi_pd(a
, b
).as_f64x8();
14488 let zero
= _mm512_setzero_pd().as_f64x8();
14489 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14492 /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
14494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_epi32&expand=6078)
14496 #[target_feature(enable = "avx512f")]
14497 #[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
14498 pub unsafe fn _mm512_unpacklo_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
14499 let a
= a
.as_i32x16();
14500 let b
= b
.as_i32x16();
14501 let r
: i32x16
= simd_shuffle16(
14526 /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14528 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_epi32&expand=6076)
14530 #[target_feature(enable = "avx512f")]
14531 #[cfg_attr(test, assert_instr(vpunpckldq))]
14532 pub unsafe fn _mm512_mask_unpacklo_epi32(
14538 let unpackhi
= _mm512_unpacklo_epi32(a
, b
).as_i32x16();
14539 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_i32x16()))
14542 /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14544 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_epi32&expand=6077)
14546 #[target_feature(enable = "avx512f")]
14547 #[cfg_attr(test, assert_instr(vpunpckldq))]
14548 pub unsafe fn _mm512_maskz_unpacklo_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
14549 let unpackhi
= _mm512_unpacklo_epi32(a
, b
).as_i32x16();
14550 let zero
= _mm512_setzero_si512().as_i32x16();
14551 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14554 /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
14556 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_epi64&expand=6087)
14558 #[target_feature(enable = "avx512f")]
14559 #[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
14560 pub unsafe fn _mm512_unpacklo_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
14561 simd_shuffle8(a
, b
, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
14564 /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_epi64&expand=6085)
14568 #[target_feature(enable = "avx512f")]
14569 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
14570 pub unsafe fn _mm512_mask_unpacklo_epi64(
14576 let unpackhi
= _mm512_unpacklo_epi64(a
, b
).as_i64x8();
14577 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_i64x8()))
14580 /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14582 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_epi64&expand=6086)
14584 #[target_feature(enable = "avx512f")]
14585 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
14586 pub unsafe fn _mm512_maskz_unpacklo_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
14587 let unpackhi
= _mm512_unpacklo_epi64(a
, b
).as_i64x8();
14588 let zero
= _mm512_setzero_si512().as_i64x8();
14589 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14592 /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
14594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_ps&expand=6117)
14596 #[target_feature(enable = "avx512f")]
14597 #[cfg_attr(test, assert_instr(vunpcklps))]
14598 pub unsafe fn _mm512_unpacklo_ps(a
: __m512
, b
: __m512
) -> __m512
{
14623 /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14625 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_ps&expand=6115)
14627 #[target_feature(enable = "avx512f")]
14628 #[cfg_attr(test, assert_instr(vunpcklps))]
14629 pub unsafe fn _mm512_mask_unpacklo_ps(src
: __m512
, k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
14630 let unpackhi
= _mm512_unpacklo_ps(a
, b
).as_f32x16();
14631 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_f32x16()))
14634 /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_ps&expand=6116)
14638 #[target_feature(enable = "avx512f")]
14639 #[cfg_attr(test, assert_instr(vunpcklps))]
14640 pub unsafe fn _mm512_maskz_unpacklo_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
14641 let unpackhi
= _mm512_unpacklo_ps(a
, b
).as_f32x16();
14642 let zero
= _mm512_setzero_ps().as_f32x16();
14643 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14646 /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
14648 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_unpacklo_pd&expand=6105)
14650 #[target_feature(enable = "avx512f")]
14651 #[cfg_attr(test, assert_instr(vunpcklpd))]
14652 pub unsafe fn _mm512_unpacklo_pd(a
: __m512d
, b
: __m512d
) -> __m512d
{
14653 simd_shuffle8(a
, b
, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
14656 /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_unpacklo_pd&expand=6103)
14660 #[target_feature(enable = "avx512f")]
14661 #[cfg_attr(test, assert_instr(vunpcklpd))]
14662 pub unsafe fn _mm512_mask_unpacklo_pd(
14668 let unpackhi
= _mm512_unpacklo_pd(a
, b
).as_f64x8();
14669 transmute(simd_select_bitmask(k
, unpackhi
, src
.as_f64x8()))
14672 /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_unpacklo_pd&expand=6104)
14676 #[target_feature(enable = "avx512f")]
14677 #[cfg_attr(test, assert_instr(vunpcklpd))]
14678 pub unsafe fn _mm512_maskz_unpacklo_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
14679 let unpackhi
= _mm512_unpacklo_pd(a
, b
).as_f64x8();
14680 let zero
= _mm512_setzero_pd().as_f64x8();
14681 transmute(simd_select_bitmask(k
, unpackhi
, zero
))
14684 /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14686 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps128_ps512&expand=621)
14688 #[target_feature(enable = "avx512f")]
14689 pub unsafe fn _mm512_castps128_ps512(a
: __m128
) -> __m512
{
14693 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
14697 /// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14699 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps256_ps512&expand=623)
14701 #[target_feature(enable = "avx512f")]
14702 pub unsafe fn _mm512_castps256_ps512(a
: __m256
) -> __m512
{
14705 _mm256_set1_ps(-1.),
14706 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
14710 /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14712 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextps128_ps512&expand=6196)
14714 #[target_feature(enable = "avx512f")]
14715 pub unsafe fn _mm512_zextps128_ps512(a
: __m128
) -> __m512
{
14719 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
14723 /// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14725 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextps256_ps512&expand=6197)
14727 #[target_feature(enable = "avx512f")]
14728 pub unsafe fn _mm512_zextps256_ps512(a
: __m256
) -> __m512
{
14731 _mm256_set1_ps(0.),
14732 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
14736 /// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14738 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps512_ps128&expand=624)
14740 #[target_feature(enable = "avx512f")]
14741 pub unsafe fn _mm512_castps512_ps128(a
: __m512
) -> __m128
{
14742 simd_shuffle4(a
, a
, [0, 1, 2, 3])
14745 /// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14747 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps512_ps256&expand=625)
14749 #[target_feature(enable = "avx512f")]
14750 pub unsafe fn _mm512_castps512_ps256(a
: __m512
) -> __m256
{
14751 simd_shuffle8(a
, a
, [0, 1, 2, 3, 4, 5, 6, 7])
14754 /// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps_pd&expand=616)
14758 #[target_feature(enable = "avx512f")]
14759 pub unsafe fn _mm512_castps_pd(a
: __m512
) -> __m512d
{
14760 transmute(a
.as_m512())
14763 /// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14765 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castps_si512&expand=619)
14767 #[target_feature(enable = "avx512f")]
14768 pub unsafe fn _mm512_castps_si512(a
: __m512
) -> __m512i
{
14769 transmute(a
.as_m512())
14772 /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14774 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd128_pd512&expand=609)
14776 #[target_feature(enable = "avx512f")]
14777 pub unsafe fn _mm512_castpd128_pd512(a
: __m128d
) -> __m512d
{
14778 simd_shuffle8(a
, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
14781 /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd256_pd512&expand=611)
14785 #[target_feature(enable = "avx512f")]
14786 pub unsafe fn _mm512_castpd256_pd512(a
: __m256d
) -> __m512d
{
14787 simd_shuffle8(a
, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
14790 /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14792 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextpd128_pd512&expand=6193)
14794 #[target_feature(enable = "avx512f")]
14795 pub unsafe fn _mm512_zextpd128_pd512(a
: __m128d
) -> __m512d
{
14796 simd_shuffle8(a
, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
14799 /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14801 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextpd256_pd512&expand=6194)
14803 #[target_feature(enable = "avx512f")]
14804 pub unsafe fn _mm512_zextpd256_pd512(a
: __m256d
) -> __m512d
{
14805 simd_shuffle8(a
, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
14808 /// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14810 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd512_pd128&expand=612)
14812 #[target_feature(enable = "avx512f")]
14813 pub unsafe fn _mm512_castpd512_pd128(a
: __m512d
) -> __m128d
{
14814 simd_shuffle2(a
, a
, [0, 1])
14817 /// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14819 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd512_pd256&expand=613)
14821 #[target_feature(enable = "avx512f")]
14822 pub unsafe fn _mm512_castpd512_pd256(a
: __m512d
) -> __m256d
{
14823 simd_shuffle4(a
, a
, [0, 1, 2, 3])
14826 /// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14828 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd_ps&expand=604)
14830 #[target_feature(enable = "avx512f")]
14831 pub unsafe fn _mm512_castpd_ps(a
: __m512d
) -> __m512
{
14832 transmute(a
.as_m512d())
14835 /// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14837 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castpd_si512&expand=607)
14839 #[target_feature(enable = "avx512f")]
14840 pub unsafe fn _mm512_castpd_si512(a
: __m512d
) -> __m512i
{
14841 transmute(a
.as_m512d())
14844 /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14846 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi128_si512&expand=629)
14848 #[target_feature(enable = "avx512f")]
14849 pub unsafe fn _mm512_castsi128_si512(a
: __m128i
) -> __m512i
{
14850 simd_shuffle8(a
, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
14853 /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi256_si512&expand=633)
14857 #[target_feature(enable = "avx512f")]
14858 pub unsafe fn _mm512_castsi256_si512(a
: __m256i
) -> __m512i
{
14859 simd_shuffle8(a
, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
14862 /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14864 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextsi128_si512&expand=6199)
14866 #[target_feature(enable = "avx512f")]
14867 pub unsafe fn _mm512_zextsi128_si512(a
: __m128i
) -> __m512i
{
14868 simd_shuffle8(a
, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
14871 /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14873 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_zextsi256_si512&expand=6200)
14875 #[target_feature(enable = "avx512f")]
14876 pub unsafe fn _mm512_zextsi256_si512(a
: __m256i
) -> __m512i
{
14877 simd_shuffle8(a
, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
14880 /// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14882 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_si128&expand=636)
14884 #[target_feature(enable = "avx512f")]
14885 pub unsafe fn _mm512_castsi512_si128(a
: __m512i
) -> __m128i
{
14886 simd_shuffle2(a
, a
, [0, 1])
14889 /// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14891 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_si256&expand=637)
14893 #[target_feature(enable = "avx512f")]
14894 pub unsafe fn _mm512_castsi512_si256(a
: __m512i
) -> __m256i
{
14895 simd_shuffle4(a
, a
, [0, 1, 2, 3])
14898 /// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_ps&expand=635)
14902 #[target_feature(enable = "avx512f")]
14903 pub unsafe fn _mm512_castsi512_ps(a
: __m512i
) -> __m512
{
14907 /// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
14909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_castsi512_pd&expand=634)
14911 #[target_feature(enable = "avx512f")]
14912 pub unsafe fn _mm512_castsi512_pd(a
: __m512i
) -> __m512d
{
14916 /// Broadcast the low packed 32-bit integer from a to all elements of dst.
14918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastd_epi32&expand=545)
14920 #[target_feature(enable = "avx512f")]
14921 #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
14922 pub unsafe fn _mm512_broadcastd_epi32(a
: __m128i
) -> __m512i
{
14923 let a
= _mm512_castsi128_si512(a
).as_i32x16();
14924 let ret
: i32x16
= simd_shuffle16(a
, a
, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
14928 /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14930 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastd_epi32&expand=546)
14932 #[target_feature(enable = "avx512f")]
14933 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
14934 pub unsafe fn _mm512_mask_broadcastd_epi32(src
: __m512i
, k
: __mmask16
, a
: __m128i
) -> __m512i
{
14935 let broadcast
= _mm512_broadcastd_epi32(a
).as_i32x16();
14936 transmute(simd_select_bitmask(k
, broadcast
, src
.as_i32x16()))
14939 /// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14941 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastd_epi32&expand=547)
14943 #[target_feature(enable = "avx512f")]
14944 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
14945 pub unsafe fn _mm512_maskz_broadcastd_epi32(k
: __mmask16
, a
: __m128i
) -> __m512i
{
14946 let broadcast
= _mm512_broadcastd_epi32(a
).as_i32x16();
14947 let zero
= _mm512_setzero_si512().as_i32x16();
14948 transmute(simd_select_bitmask(k
, broadcast
, zero
))
14951 /// Broadcast the low packed 64-bit integer from a to all elements of dst.
14953 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastq_epi64&expand=560)
14955 #[target_feature(enable = "avx512f")]
14956 #[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq
14957 pub unsafe fn _mm512_broadcastq_epi64(a
: __m128i
) -> __m512i
{
14958 simd_shuffle8(a
, a
, [0, 0, 0, 0, 0, 0, 0, 0])
14961 /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastq_epi64&expand=561)
14965 #[target_feature(enable = "avx512f")]
14966 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
14967 pub unsafe fn _mm512_mask_broadcastq_epi64(src
: __m512i
, k
: __mmask8
, a
: __m128i
) -> __m512i
{
14968 let broadcast
= _mm512_broadcastq_epi64(a
).as_i64x8();
14969 transmute(simd_select_bitmask(k
, broadcast
, src
.as_i64x8()))
14972 /// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14974 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastq_epi64&expand=562)
14976 #[target_feature(enable = "avx512f")]
14977 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
14978 pub unsafe fn _mm512_maskz_broadcastq_epi64(k
: __mmask8
, a
: __m128i
) -> __m512i
{
14979 let broadcast
= _mm512_broadcastq_epi64(a
).as_i64x8();
14980 let zero
= _mm512_setzero_si512().as_i64x8();
14981 transmute(simd_select_bitmask(k
, broadcast
, zero
))
14984 /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
14986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastss_ps&expand=578)
14988 #[target_feature(enable = "avx512f")]
14989 #[cfg_attr(test, assert_instr(vbroadcastss))]
14990 pub unsafe fn _mm512_broadcastss_ps(a
: __m128
) -> __m512
{
14991 simd_shuffle16(a
, a
, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
14994 /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14996 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastss_ps&expand=579)
14998 #[target_feature(enable = "avx512f")]
14999 #[cfg_attr(test, assert_instr(vbroadcastss))]
15000 pub unsafe fn _mm512_mask_broadcastss_ps(src
: __m512
, k
: __mmask16
, a
: __m128
) -> __m512
{
15001 let broadcast
= _mm512_broadcastss_ps(a
).as_f32x16();
15002 transmute(simd_select_bitmask(k
, broadcast
, src
.as_f32x16()))
15005 /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15007 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastss_ps&expand=580)
15009 #[target_feature(enable = "avx512f")]
15010 #[cfg_attr(test, assert_instr(vbroadcastss))]
15011 pub unsafe fn _mm512_maskz_broadcastss_ps(k
: __mmask16
, a
: __m128
) -> __m512
{
15012 let broadcast
= _mm512_broadcastss_ps(a
).as_f32x16();
15013 let zero
= _mm512_setzero_ps().as_f32x16();
15014 transmute(simd_select_bitmask(k
, broadcast
, zero
))
15017 /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
15019 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcastsd_pd&expand=567)
15021 #[target_feature(enable = "avx512f")]
15022 #[cfg_attr(test, assert_instr(vbroadcastsd))]
15023 pub unsafe fn _mm512_broadcastsd_pd(a
: __m128d
) -> __m512d
{
15024 simd_shuffle8(a
, a
, [1, 1, 1, 1, 1, 1, 1, 1])
15027 /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15029 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcastsd_pd&expand=568)
15031 #[target_feature(enable = "avx512f")]
15032 #[cfg_attr(test, assert_instr(vbroadcastsd))]
15033 pub unsafe fn _mm512_mask_broadcastsd_pd(src
: __m512d
, k
: __mmask8
, a
: __m128d
) -> __m512d
{
15034 let broadcast
= _mm512_broadcastsd_pd(a
).as_f64x8();
15035 transmute(simd_select_bitmask(k
, broadcast
, src
.as_f64x8()))
15038 /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15040 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcastsd_pd&expand=569)
15042 #[target_feature(enable = "avx512f")]
15043 #[cfg_attr(test, assert_instr(vbroadcastsd))]
15044 pub unsafe fn _mm512_maskz_broadcastsd_pd(k
: __mmask8
, a
: __m128d
) -> __m512d
{
15045 let broadcast
= _mm512_broadcastsd_pd(a
).as_f64x8();
15046 let zero
= _mm512_setzero_pd().as_f64x8();
15047 transmute(simd_select_bitmask(k
, broadcast
, zero
))
15050 /// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
15052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_i32x4&expand=510)
15054 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
15055 pub unsafe fn _mm512_broadcast_i32x4(a
: __m128i
) -> __m512i
{
15056 let a
= _mm512_castsi128_si512(a
).as_i32x16();
15057 let ret
: i32x16
= simd_shuffle16(a
, a
, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
15061 /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15063 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_i32x4&expand=511)
15065 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
15066 pub unsafe fn _mm512_mask_broadcast_i32x4(src
: __m512i
, k
: __mmask16
, a
: __m128i
) -> __m512i
{
15067 let broadcast
= _mm512_broadcast_i32x4(a
).as_i32x16();
15068 transmute(simd_select_bitmask(k
, broadcast
, src
.as_i32x16()))
15071 /// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15073 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_i32x4&expand=512)
15075 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
15076 pub unsafe fn _mm512_maskz_broadcast_i32x4(k
: __mmask16
, a
: __m128i
) -> __m512i
{
15077 let broadcast
= _mm512_broadcast_i32x4(a
).as_i32x16();
15078 let zero
= _mm512_setzero_si512().as_i32x16();
15079 transmute(simd_select_bitmask(k
, broadcast
, zero
))
15082 /// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
15084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_i64x4&expand=522)
15086 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
15087 pub unsafe fn _mm512_broadcast_i64x4(a
: __m256i
) -> __m512i
{
15088 simd_shuffle8(a
, a
, [0, 1, 2, 3, 0, 1, 2, 3])
15091 /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15093 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_i64x4&expand=523)
15095 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
15096 pub unsafe fn _mm512_mask_broadcast_i64x4(src
: __m512i
, k
: __mmask8
, a
: __m256i
) -> __m512i
{
15097 let broadcast
= _mm512_broadcast_i64x4(a
).as_i64x8();
15098 transmute(simd_select_bitmask(k
, broadcast
, src
.as_i64x8()))
15101 /// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15103 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_i64x4&expand=524)
15105 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
15106 pub unsafe fn _mm512_maskz_broadcast_i64x4(k
: __mmask8
, a
: __m256i
) -> __m512i
{
15107 let broadcast
= _mm512_broadcast_i64x4(a
).as_i64x8();
15108 let zero
= _mm512_setzero_si512().as_i64x8();
15109 transmute(simd_select_bitmask(k
, broadcast
, zero
))
15112 /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
15114 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_f32x4&expand=483)
15116 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
15117 pub unsafe fn _mm512_broadcast_f32x4(a
: __m128
) -> __m512
{
15118 simd_shuffle16(a
, a
, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
15121 /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15123 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_f32x4&expand=484)
15125 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
15126 pub unsafe fn _mm512_mask_broadcast_f32x4(src
: __m512
, k
: __mmask16
, a
: __m128
) -> __m512
{
15127 let broadcast
= _mm512_broadcast_f32x4(a
).as_f32x16();
15128 transmute(simd_select_bitmask(k
, broadcast
, src
.as_f32x16()))
15131 /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15133 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_f32x4&expand=485)
15135 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
15136 pub unsafe fn _mm512_maskz_broadcast_f32x4(k
: __mmask16
, a
: __m128
) -> __m512
{
15137 let broadcast
= _mm512_broadcast_f32x4(a
).as_f32x16();
15138 let zero
= _mm512_setzero_ps().as_f32x16();
15139 transmute(simd_select_bitmask(k
, broadcast
, zero
))
15142 /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
15144 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_broadcast_f64x4&expand=495)
15146 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
15147 pub unsafe fn _mm512_broadcast_f64x4(a
: __m256d
) -> __m512d
{
15148 simd_shuffle8(a
, a
, [0, 1, 2, 3, 0, 1, 2, 3])
15151 /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_broadcast_f64x4&expand=496)
15155 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
15156 pub unsafe fn _mm512_mask_broadcast_f64x4(src
: __m512d
, k
: __mmask8
, a
: __m256d
) -> __m512d
{
15157 let broadcast
= _mm512_broadcast_f64x4(a
).as_f64x8();
15158 transmute(simd_select_bitmask(k
, broadcast
, src
.as_f64x8()))
15161 /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15163 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_broadcast_f64x4&expand=497)
15165 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
15166 pub unsafe fn _mm512_maskz_broadcast_f64x4(k
: __mmask8
, a
: __m256d
) -> __m512d
{
15167 let broadcast
= _mm512_broadcast_f64x4(a
).as_f64x8();
15168 let zero
= _mm512_setzero_pd().as_f64x8();
15169 transmute(simd_select_bitmask(k
, broadcast
, zero
))
15172 /// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
15174 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_epi32&expand=435)
15176 #[target_feature(enable = "avx512f")]
15177 #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
15178 pub unsafe fn _mm512_mask_blend_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15179 transmute(simd_select_bitmask(k
, b
.as_i32x16(), a
.as_i32x16()))
15182 /// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
15184 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_epi64&expand=438)
15186 #[target_feature(enable = "avx512f")]
15187 #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
15188 pub unsafe fn _mm512_mask_blend_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15189 transmute(simd_select_bitmask(k
, b
.as_i64x8(), a
.as_i64x8()))
15192 /// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
15194 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_ps&expand=451)
15196 #[target_feature(enable = "avx512f")]
15197 #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
15198 pub unsafe fn _mm512_mask_blend_ps(k
: __mmask16
, a
: __m512
, b
: __m512
) -> __m512
{
15199 transmute(simd_select_bitmask(k
, b
.as_f32x16(), a
.as_f32x16()))
15202 /// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
15204 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_blend_pd&expand=446)
15206 #[target_feature(enable = "avx512f")]
15207 #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
15208 pub unsafe fn _mm512_mask_blend_pd(k
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __m512d
{
15209 transmute(simd_select_bitmask(k
, b
.as_f64x8(), a
.as_f64x8()))
15212 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
15214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_alignr_epi32&expand=245)
15216 #[target_feature(enable = "avx512f")]
15217 #[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
15218 #[rustc_args_required_const(2)]
15219 pub unsafe fn _mm512_alignr_epi32(a
: __m512i
, b
: __m512i
, imm8
: i32) -> __m512i
{
15220 assert
!(imm8
>= 0 && imm8
<= 255);
15221 let a
= a
.as_i32x16();
15222 let b
= b
.as_i32x16();
15223 let imm8
: i32 = imm8
% 16;
15224 let r
: i32x16
= match imm8
{
15225 0 => simd_shuffle16(
15229 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
15232 1 => simd_shuffle16(
15236 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
15239 2 => simd_shuffle16(
15242 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
15244 3 => simd_shuffle16(
15247 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
15249 4 => simd_shuffle16(
15252 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
15254 5 => simd_shuffle16(
15257 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
15259 6 => simd_shuffle16(
15262 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
15264 7 => simd_shuffle16(
15267 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
15269 8 => simd_shuffle16(
15272 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
15274 9 => simd_shuffle16(
15277 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
15279 10 => simd_shuffle16(a
, b
, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
15280 11 => simd_shuffle16(a
, b
, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
15281 12 => simd_shuffle16(a
, b
, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
15282 13 => simd_shuffle16(a
, b
, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
15283 14 => simd_shuffle16(a
, b
, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
15284 _
=> simd_shuffle16(a
, b
, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
15289 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15291 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_alignr_epi32&expand=246)
15293 #[target_feature(enable = "avx512f")]
15294 #[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
15295 #[rustc_args_required_const(4)]
15296 pub unsafe fn _mm512_mask_alignr_epi32(
15303 assert
!(imm8
>= 0 && imm8
<= 255);
15304 let a
= a
.as_i32x16();
15305 let b
= b
.as_i32x16();
15306 let imm8
: i32 = imm8
% 16;
15307 let r
: i32x16
= match imm8
{
15308 0 => simd_shuffle16(
15312 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
15315 1 => simd_shuffle16(
15319 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
15322 2 => simd_shuffle16(
15325 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
15327 3 => simd_shuffle16(
15330 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
15332 4 => simd_shuffle16(
15335 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
15337 5 => simd_shuffle16(
15340 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
15342 6 => simd_shuffle16(
15345 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
15347 7 => simd_shuffle16(
15350 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
15352 8 => simd_shuffle16(
15355 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
15357 9 => simd_shuffle16(
15360 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
15362 10 => simd_shuffle16(a
, b
, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
15363 11 => simd_shuffle16(a
, b
, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
15364 12 => simd_shuffle16(a
, b
, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
15365 13 => simd_shuffle16(a
, b
, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
15366 14 => simd_shuffle16(a
, b
, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
15367 _
=> simd_shuffle16(a
, b
, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
15369 transmute(simd_select_bitmask(k
, r
, src
.as_i32x16()))
15372 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_alignr_epi32&expand=247)
15376 #[target_feature(enable = "avx512f")]
15377 #[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
15378 #[rustc_args_required_const(3)]
15379 pub unsafe fn _mm512_maskz_alignr_epi32(
15385 assert
!(imm8
>= 0 && imm8
<= 255);
15386 let a
= a
.as_i32x16();
15387 let b
= b
.as_i32x16();
15388 let imm8
: i32 = imm8
% 16;
15389 let r
: i32x16
= match imm8
{
15390 0 => simd_shuffle16(
15394 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
15397 1 => simd_shuffle16(
15401 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
15404 2 => simd_shuffle16(
15407 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
15409 3 => simd_shuffle16(
15412 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
15414 4 => simd_shuffle16(
15417 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
15419 5 => simd_shuffle16(
15422 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
15424 6 => simd_shuffle16(
15427 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
15429 7 => simd_shuffle16(
15432 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
15434 8 => simd_shuffle16(
15437 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
15439 9 => simd_shuffle16(
15442 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
15444 10 => simd_shuffle16(a
, b
, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
15445 11 => simd_shuffle16(a
, b
, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
15446 12 => simd_shuffle16(a
, b
, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
15447 13 => simd_shuffle16(a
, b
, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
15448 14 => simd_shuffle16(a
, b
, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
15449 _
=> simd_shuffle16(a
, b
, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
15451 let zero
= _mm512_setzero_si512().as_i32x16();
15452 transmute(simd_select_bitmask(k
, r
, zero
))
15455 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
15457 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_alignr_epi64&expand=254)
15459 #[target_feature(enable = "avx512f")]
15460 #[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
15461 #[rustc_args_required_const(2)]
15462 pub unsafe fn _mm512_alignr_epi64(a
: __m512i
, b
: __m512i
, imm8
: i32) -> __m512i
{
15463 assert
!(imm8
>= 0 && imm8
<= 255);
15464 let imm8
: i32 = imm8
% 8;
15465 let r
: i64x8
= match imm8
{
15466 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 12, 13, 14, 15]),
15467 1 => simd_shuffle8(a
, b
, [9, 10, 11, 12, 13, 14, 15, 0]),
15468 2 => simd_shuffle8(a
, b
, [10, 11, 12, 13, 14, 15, 0, 1]),
15469 3 => simd_shuffle8(a
, b
, [11, 12, 13, 14, 15, 0, 1, 2]),
15470 4 => simd_shuffle8(a
, b
, [12, 13, 14, 15, 0, 1, 2, 3]),
15471 5 => simd_shuffle8(a
, b
, [13, 14, 15, 0, 1, 2, 3, 4]),
15472 6 => simd_shuffle8(a
, b
, [14, 15, 0, 1, 2, 3, 4, 5]),
15473 _
=> simd_shuffle8(a
, b
, [15, 0, 1, 2, 3, 4, 5, 6]),
15478 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15480 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_alignr_epi64&expand=255)
15482 #[target_feature(enable = "avx512f")]
15483 #[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
15484 #[rustc_args_required_const(4)]
15485 pub unsafe fn _mm512_mask_alignr_epi64(
15492 assert
!(imm8
>= 0 && imm8
<= 255);
15493 let imm8
: i32 = imm8
% 8;
15494 let r
: i64x8
= match imm8
{
15495 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 12, 13, 14, 15]),
15496 1 => simd_shuffle8(a
, b
, [9, 10, 11, 12, 13, 14, 15, 0]),
15497 2 => simd_shuffle8(a
, b
, [10, 11, 12, 13, 14, 15, 0, 1]),
15498 3 => simd_shuffle8(a
, b
, [11, 12, 13, 14, 15, 0, 1, 2]),
15499 4 => simd_shuffle8(a
, b
, [12, 13, 14, 15, 0, 1, 2, 3]),
15500 5 => simd_shuffle8(a
, b
, [13, 14, 15, 0, 1, 2, 3, 4]),
15501 6 => simd_shuffle8(a
, b
, [14, 15, 0, 1, 2, 3, 4, 5]),
15502 _
=> simd_shuffle8(a
, b
, [15, 0, 1, 2, 3, 4, 5, 6]),
15504 transmute(simd_select_bitmask(k
, r
, src
.as_i64x8()))
15507 /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_alignr_epi64&expand=256)
15511 #[target_feature(enable = "avx512f")]
15512 #[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
15513 #[rustc_args_required_const(3)]
15514 pub unsafe fn _mm512_maskz_alignr_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
, imm8
: i32) -> __m512i
{
15515 assert
!(imm8
>= 0 && imm8
<= 255);
15516 let imm8
: i32 = imm8
% 8;
15517 let r
: i64x8
= match imm8
{
15518 0 => simd_shuffle8(a
, b
, [8, 9, 10, 11, 12, 13, 14, 15]),
15519 1 => simd_shuffle8(a
, b
, [9, 10, 11, 12, 13, 14, 15, 0]),
15520 2 => simd_shuffle8(a
, b
, [10, 11, 12, 13, 14, 15, 0, 1]),
15521 3 => simd_shuffle8(a
, b
, [11, 12, 13, 14, 15, 0, 1, 2]),
15522 4 => simd_shuffle8(a
, b
, [12, 13, 14, 15, 0, 1, 2, 3]),
15523 5 => simd_shuffle8(a
, b
, [13, 14, 15, 0, 1, 2, 3, 4]),
15524 6 => simd_shuffle8(a
, b
, [14, 15, 0, 1, 2, 3, 4, 5]),
15525 _
=> simd_shuffle8(a
, b
, [15, 0, 1, 2, 3, 4, 5, 6]),
15527 let zero
= _mm512_setzero_si512().as_i64x8();
15528 transmute(simd_select_bitmask(k
, r
, zero
))
15531 /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
15533 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272)
15535 #[target_feature(enable = "avx512f")]
15536 #[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
15537 pub unsafe fn _mm512_and_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
15538 transmute(simd_and(a
.as_i32x16(), b
.as_i32x16()))
15541 /// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15543 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273)
15545 #[target_feature(enable = "avx512f")]
15546 #[cfg_attr(test, assert_instr(vpandd))]
15547 pub unsafe fn _mm512_mask_and_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15548 let and
= _mm512_and_epi32(a
, b
).as_i32x16();
15549 transmute(simd_select_bitmask(k
, and
, src
.as_i32x16()))
15552 /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15554 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274)
15556 #[target_feature(enable = "avx512f")]
15557 #[cfg_attr(test, assert_instr(vpandd))]
15558 pub unsafe fn _mm512_maskz_and_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15559 let and
= _mm512_and_epi32(a
, b
).as_i32x16();
15560 let zero
= _mm512_setzero_si512().as_i32x16();
15561 transmute(simd_select_bitmask(k
, and
, zero
))
15564 /// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
15566 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279)
15568 #[target_feature(enable = "avx512f")]
15569 #[cfg_attr(test, assert_instr(vpandq))]
15570 pub unsafe fn _mm512_and_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
15571 transmute(simd_and(a
.as_i64x8(), b
.as_i64x8()))
15574 /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15576 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280)
15578 #[target_feature(enable = "avx512f")]
15579 #[cfg_attr(test, assert_instr(vpandq))]
15580 pub unsafe fn _mm512_mask_and_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15581 let and
= _mm512_and_epi64(a
, b
).as_i64x8();
15582 transmute(simd_select_bitmask(k
, and
, src
.as_i64x8()))
15585 /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15587 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274)
15589 #[target_feature(enable = "avx512f")]
15590 #[cfg_attr(test, assert_instr(vpandq))]
15591 pub unsafe fn _mm512_maskz_and_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15592 let and
= _mm512_and_epi64(a
, b
).as_i64x8();
15593 let zero
= _mm512_setzero_si512().as_i64x8();
15594 transmute(simd_select_bitmask(k
, and
, zero
))
15597 /// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
15599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302)
15601 #[target_feature(enable = "avx512f")]
15602 #[cfg_attr(test, assert_instr(vpandq))]
15603 pub unsafe fn _mm512_and_si512(a
: __m512i
, b
: __m512i
) -> __m512i
{
15604 transmute(simd_and(a
.as_i32x16(), b
.as_i32x16()))
15607 /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
15609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042)
15611 #[target_feature(enable = "avx512f")]
15612 #[cfg_attr(test, assert_instr(vporq))]
15613 pub unsafe fn _mm512_or_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
15614 transmute(simd_or(a
.as_i32x16(), b
.as_i32x16()))
15617 /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040)
15621 #[target_feature(enable = "avx512f")]
15622 #[cfg_attr(test, assert_instr(vpord))]
15623 pub unsafe fn _mm512_mask_or_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15624 let or
= _mm512_or_epi32(a
, b
).as_i32x16();
15625 transmute(simd_select_bitmask(k
, or
, src
.as_i32x16()))
15628 /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15630 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041)
15632 #[target_feature(enable = "avx512f")]
15633 #[cfg_attr(test, assert_instr(vpord))]
15634 pub unsafe fn _mm512_maskz_or_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15635 let or
= _mm512_or_epi32(a
, b
).as_i32x16();
15636 let zero
= _mm512_setzero_si512().as_i32x16();
15637 transmute(simd_select_bitmask(k
, or
, zero
))
15640 /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
15642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051)
15644 #[target_feature(enable = "avx512f")]
15645 #[cfg_attr(test, assert_instr(vporq))]
15646 pub unsafe fn _mm512_or_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
15647 transmute(simd_or(a
.as_i64x8(), b
.as_i64x8()))
15650 /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15652 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049)
15654 #[target_feature(enable = "avx512f")]
15655 #[cfg_attr(test, assert_instr(vporq))]
15656 pub unsafe fn _mm512_mask_or_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15657 let or
= _mm512_or_epi64(a
, b
).as_i64x8();
15658 transmute(simd_select_bitmask(k
, or
, src
.as_i64x8()))
15661 /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15663 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050)
15665 #[target_feature(enable = "avx512f")]
15666 #[cfg_attr(test, assert_instr(vporq))]
15667 pub unsafe fn _mm512_maskz_or_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15668 let or
= _mm512_or_epi64(a
, b
).as_i64x8();
15669 let zero
= _mm512_setzero_si512().as_i64x8();
15670 transmute(simd_select_bitmask(k
, or
, zero
))
15673 /// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
15675 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072)
15677 #[target_feature(enable = "avx512f")]
15678 #[cfg_attr(test, assert_instr(vporq))]
15679 pub unsafe fn _mm512_or_si512(a
: __m512i
, b
: __m512i
) -> __m512i
{
15680 transmute(simd_or(a
.as_i32x16(), b
.as_i32x16()))
15683 /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
15685 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142)
15687 #[target_feature(enable = "avx512f")]
15688 #[cfg_attr(test, assert_instr(vpxorq))]
15689 pub unsafe fn _mm512_xor_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
15690 transmute(simd_xor(a
.as_i32x16(), b
.as_i32x16()))
15693 /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15695 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140)
15697 #[target_feature(enable = "avx512f")]
15698 #[cfg_attr(test, assert_instr(vpxord))]
15699 pub unsafe fn _mm512_mask_xor_epi32(src
: __m512i
, k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15700 let xor
= _mm512_xor_epi32(a
, b
).as_i32x16();
15701 transmute(simd_select_bitmask(k
, xor
, src
.as_i32x16()))
15704 /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15706 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141)
15708 #[target_feature(enable = "avx512f")]
15709 #[cfg_attr(test, assert_instr(vpxord))]
15710 pub unsafe fn _mm512_maskz_xor_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15711 let xor
= _mm512_xor_epi32(a
, b
).as_i32x16();
15712 let zero
= _mm512_setzero_si512().as_i32x16();
15713 transmute(simd_select_bitmask(k
, xor
, zero
))
15716 /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
15718 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151)
15720 #[target_feature(enable = "avx512f")]
15721 #[cfg_attr(test, assert_instr(vpxorq))]
15722 pub unsafe fn _mm512_xor_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
15723 transmute(simd_xor(a
.as_i64x8(), b
.as_i64x8()))
15726 /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149)
15730 #[target_feature(enable = "avx512f")]
15731 #[cfg_attr(test, assert_instr(vpxorq))]
15732 pub unsafe fn _mm512_mask_xor_epi64(src
: __m512i
, k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15733 let xor
= _mm512_xor_epi64(a
, b
).as_i64x8();
15734 transmute(simd_select_bitmask(k
, xor
, src
.as_i64x8()))
15737 /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15739 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150)
15741 #[target_feature(enable = "avx512f")]
15742 #[cfg_attr(test, assert_instr(vpxorq))]
15743 pub unsafe fn _mm512_maskz_xor_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15744 let xor
= _mm512_xor_epi64(a
, b
).as_i64x8();
15745 let zero
= _mm512_setzero_si512().as_i64x8();
15746 transmute(simd_select_bitmask(k
, xor
, zero
))
15749 /// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
15751 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172)
15753 #[target_feature(enable = "avx512f")]
15754 #[cfg_attr(test, assert_instr(vpxorq))]
15755 pub unsafe fn _mm512_xor_si512(a
: __m512i
, b
: __m512i
) -> __m512i
{
15756 transmute(simd_xor(a
.as_i32x16(), b
.as_i32x16()))
15759 /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
15761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_andnot_epi32&expand=310)
15763 #[target_feature(enable = "avx512f")]
15764 #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
15765 pub unsafe fn _mm512_andnot_epi32(a
: __m512i
, b
: __m512i
) -> __m512i
{
15766 _mm512_and_epi32(_mm512_xor_epi32(a
, _mm512_set1_epi32(u32::MAX
as i32)), b
)
15769 /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15771 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_andnot_epi32&expand=311)
15773 #[target_feature(enable = "avx512f")]
15774 #[cfg_attr(test, assert_instr(vpandnd))]
15775 pub unsafe fn _mm512_mask_andnot_epi32(
15781 let andnot
= _mm512_andnot_epi32(a
, b
).as_i32x16();
15782 transmute(simd_select_bitmask(k
, andnot
, src
.as_i32x16()))
15785 /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15787 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_andnot_epi32&expand=312)
15789 #[target_feature(enable = "avx512f")]
15790 #[cfg_attr(test, assert_instr(vpandnd))]
15791 pub unsafe fn _mm512_maskz_andnot_epi32(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15792 let andnot
= _mm512_andnot_epi32(a
, b
).as_i32x16();
15793 let zero
= _mm512_setzero_si512().as_i32x16();
15794 transmute(simd_select_bitmask(k
, andnot
, zero
))
15797 /// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
15799 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_andnot_epi64&expand=317)
15801 #[target_feature(enable = "avx512f")]
15802 #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
15803 pub unsafe fn _mm512_andnot_epi64(a
: __m512i
, b
: __m512i
) -> __m512i
{
15804 _mm512_and_epi64(_mm512_xor_epi64(a
, _mm512_set1_epi64(u64::MAX
as i64)), b
)
15807 /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_andnot_epi64&expand=318)
15811 #[target_feature(enable = "avx512f")]
15812 #[cfg_attr(test, assert_instr(vpandnq))]
15813 pub unsafe fn _mm512_mask_andnot_epi64(
15819 let andnot
= _mm512_andnot_epi64(a
, b
).as_i64x8();
15820 transmute(simd_select_bitmask(k
, andnot
, src
.as_i64x8()))
15823 /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_andnot_epi64&expand=319)
15827 #[target_feature(enable = "avx512f")]
15828 #[cfg_attr(test, assert_instr(vpandnq))]
15829 pub unsafe fn _mm512_maskz_andnot_epi64(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __m512i
{
15830 let andnot
= _mm512_andnot_epi64(a
, b
).as_i64x8();
15831 let zero
= _mm512_setzero_si512().as_i64x8();
15832 transmute(simd_select_bitmask(k
, andnot
, zero
))
15835 /// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
15837 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_andnot_si512&expand=340)
15839 #[target_feature(enable = "avx512f")]
15840 #[cfg_attr(test, assert_instr(vpandnq))]
15841 pub unsafe fn _mm512_andnot_si512(a
: __m512i
, b
: __m512i
) -> __m512i
{
15842 _mm512_and_epi64(_mm512_xor_epi64(a
, _mm512_set1_epi64(u64::MAX
as i64)), b
)
15845 /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
15847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
15849 #[target_feature(enable = "avx512f")]
15850 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
15851 pub unsafe fn _kand_mask16(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15855 /// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
15857 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210)
15859 #[target_feature(enable = "avx512f")]
15860 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
15861 pub unsafe fn _mm512_kand(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15865 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
15867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
15869 #[target_feature(enable = "avx512f")]
15870 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
15871 pub unsafe fn _kor_mask16(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15875 /// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
15877 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237)
15879 #[target_feature(enable = "avx512f")]
15880 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
15881 pub unsafe fn _mm512_kor(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15885 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
15887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
15889 #[target_feature(enable = "avx512f")]
15890 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
15891 pub unsafe fn _kxor_mask16(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15895 /// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
15897 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289)
15899 #[target_feature(enable = "avx512f")]
15900 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
15901 pub unsafe fn _mm512_kxor(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15905 /// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
15907 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=knot_mask16&expand=3233)
15909 #[target_feature(enable = "avx512f")]
15910 pub unsafe fn _knot_mask16(a
: __mmask16
) -> __mmask16
{
15911 transmute(a ^
0b11111111_11111111)
15914 /// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
15916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_knot&expand=3231)
15918 #[target_feature(enable = "avx512f")]
15919 pub unsafe fn _mm512_knot(a
: __mmask16
) -> __mmask16
{
15920 transmute(a ^
0b11111111_11111111)
15923 /// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
15925 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kandn_mask16&expand=3218)
15927 #[target_feature(enable = "avx512f")]
15928 #[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
15929 pub unsafe fn _kandn_mask16(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15930 _mm512_kand(_mm512_knot(a
), b
)
15933 /// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
15935 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kandn&expand=3216)
15937 #[target_feature(enable = "avx512f")]
15938 #[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
15939 pub unsafe fn _mm512_kandn(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15940 _mm512_kand(_mm512_knot(a
), b
)
15943 /// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
15945 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxnor_mask16&expand=3285)
15947 #[target_feature(enable = "avx512f")]
15948 #[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
15949 pub unsafe fn _kxnor_mask16(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15950 _mm512_knot(_mm512_kxor(a
, b
))
15953 /// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
15955 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxnor&expand=3283)
15957 #[target_feature(enable = "avx512f")]
15958 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
15959 pub unsafe fn _mm512_kxnor(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
15960 _mm512_knot(_mm512_kxor(a
, b
))
15963 /// Copy 16-bit mask a to k.
15965 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_kmov&expand=3228)
15967 #[target_feature(enable = "avx512f")]
15968 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
15969 pub unsafe fn _mm512_kmov(a
: __mmask16
) -> __mmask16
{
15974 /// Converts integer mask into bitmask, storing the result in dst.
15976 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_int2mask&expand=3189)
15978 #[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
15979 pub unsafe fn _mm512_int2mask(mask
: i32) -> __mmask16
{
15980 let r
: u16 = mask
as u16;
15984 /// Converts bit mask k1 into an integer value, storing the results in dst.
15986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask2int&expand=3544)
15988 #[target_feature(enable = "avx512f")]
15989 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
15990 pub unsafe fn _mm512_mask2int(k1
: __mmask16
) -> i32 {
15991 let r
: i32 = k1
as i32;
15995 /// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
15997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kunpackb&expand=3280)
15999 #[target_feature(enable = "avx512f")]
16000 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
16001 pub unsafe fn _mm512_kunpackb(a
: __mmask16
, b
: __mmask16
) -> __mmask16
{
16002 let a
= a
& 0b00000000_11111111;
16003 let b
= b
& 0b11111111_00000000;
16007 /// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
16009 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kortestc&expand=3247)
16011 #[target_feature(enable = "avx512f")]
16012 #[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
16013 pub unsafe fn _mm512_kortestc(a
: __mmask16
, b
: __mmask16
) -> i32 {
16015 if r
== 0b11111111_11111111 {
16022 /// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
16024 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_test_epi32_mask&expand=5890)
16026 #[target_feature(enable = "avx512f")]
16027 #[cfg_attr(test, assert_instr(vptestmd))]
16028 pub unsafe fn _mm512_test_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
16029 let and
= _mm512_and_epi32(a
, b
);
16030 let zero
= _mm512_setzero_si512();
16031 _mm512_cmpneq_epi32_mask(and
, zero
)
16034 /// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
16036 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_test_epi32_mask&expand=5889)
16038 #[target_feature(enable = "avx512f")]
16039 #[cfg_attr(test, assert_instr(vptestmd))]
16040 pub unsafe fn _mm512_mask_test_epi32_mask(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
16041 let and
= _mm512_and_epi32(a
, b
);
16042 let zero
= _mm512_setzero_si512();
16043 _mm512_mask_cmpneq_epi32_mask(k
, and
, zero
)
16046 /// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
16048 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_test_epi64_mask&expand=5896)
16050 #[target_feature(enable = "avx512f")]
16051 #[cfg_attr(test, assert_instr(vptestmq))]
16052 pub unsafe fn _mm512_test_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
16053 let and
= _mm512_and_epi64(a
, b
);
16054 let zero
= _mm512_setzero_si512();
16055 _mm512_cmpneq_epi64_mask(and
, zero
)
16058 /// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
16060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_test_epi64_mask&expand=5895)
16062 #[target_feature(enable = "avx512f")]
16063 #[cfg_attr(test, assert_instr(vptestmq))]
16064 pub unsafe fn _mm512_mask_test_epi64_mask(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
16065 let and
= _mm512_and_epi64(a
, b
);
16066 let zero
= _mm512_setzero_si512();
16067 _mm512_mask_cmpneq_epi64_mask(k
, and
, zero
)
16070 /// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
16072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_testn_epi32_mask&expand=5921)
16074 #[target_feature(enable = "avx512f")]
16075 #[cfg_attr(test, assert_instr(vptestnmd))]
16076 pub unsafe fn _mm512_testn_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
16077 let and
= _mm512_and_epi32(a
, b
);
16078 let zero
= _mm512_setzero_si512();
16079 _mm512_cmpeq_epi32_mask(and
, zero
)
16082 /// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
16084 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_testn_epi32_mask&expand=5920)
16086 #[target_feature(enable = "avx512f")]
16087 #[cfg_attr(test, assert_instr(vptestnmd))]
16088 pub unsafe fn _mm512_mask_testn_epi32_mask(k
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
16089 let and
= _mm512_and_epi32(a
, b
);
16090 let zero
= _mm512_setzero_si512();
16091 _mm512_mask_cmpeq_epi32_mask(k
, and
, zero
)
16094 /// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
16096 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_testn_epi64_mask&expand=5927)
16098 #[target_feature(enable = "avx512f")]
16099 #[cfg_attr(test, assert_instr(vptestnmq))]
16100 pub unsafe fn _mm512_testn_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
16101 let and
= _mm512_and_epi64(a
, b
);
16102 let zero
= _mm512_setzero_si512();
16103 _mm512_cmpeq_epi64_mask(and
, zero
)
16106 /// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
16108 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_testn_epi64_mask&expand=5926)
16110 #[target_feature(enable = "avx512f")]
16111 #[cfg_attr(test, assert_instr(vptestnmq))]
16112 pub unsafe fn _mm512_mask_testn_epi64_mask(k
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
16113 let and
= _mm512_and_epi64(a
, b
);
16114 let zero
= _mm512_setzero_si512();
16115 _mm512_mask_cmpeq_epi64_mask(k
, and
, zero
)
16118 /// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
16120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_stream_ps&expand=5671)
16122 #[target_feature(enable = "avx512f")]
16123 #[cfg_attr(test, assert_instr(vmovntps))]
16124 #[allow(clippy::cast_ptr_alignment)]
16125 pub unsafe fn _mm512_stream_ps(mem_addr
: *mut f32, a
: __m512
) {
16126 intrinsics
::nontemporal_store(mem_addr
as *mut __m512
, a
);
16129 /// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
16131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_stream_pd&expand=5667)
16133 #[target_feature(enable = "avx512f")]
16134 #[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntpd
16135 #[allow(clippy::cast_ptr_alignment)]
16136 pub unsafe fn _mm512_stream_pd(mem_addr
: *mut f64, a
: __m512d
) {
16137 intrinsics
::nontemporal_store(mem_addr
as *mut __m512d
, a
);
16140 /// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
16142 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_stream_si512&expand=5675)
16144 #[target_feature(enable = "avx512f")]
16145 #[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntdq
16146 #[allow(clippy::cast_ptr_alignment)]
16147 pub unsafe fn _mm512_stream_si512(mem_addr
: *mut i64, a
: __m512i
) {
16148 intrinsics
::nontemporal_store(mem_addr
as *mut __m512i
, a
);
16151 /// Sets packed 32-bit integers in `dst` with the supplied values.
16153 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
16155 #[target_feature(enable = "avx512f")]
16156 pub unsafe fn _mm512_set_ps(
16175 e15
, e14
, e13
, e12
, e11
, e10
, e9
, e8
, e7
, e6
, e5
, e4
, e3
, e2
, e1
, e0
,
16179 /// Sets packed 32-bit integers in `dst` with the supplied values in
16182 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps)
16184 #[target_feature(enable = "avx512f")]
16185 pub unsafe fn _mm512_setr_ps(
16203 let r
= f32x16
::new(
16204 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
,
16209 /// Broadcast 64-bit float `a` to all elements of `dst`.
16211 #[target_feature(enable = "avx512f")]
16212 pub unsafe fn _mm512_set1_pd(a
: f64) -> __m512d
{
16213 transmute(f64x8
::splat(a
))
16216 /// Broadcast 32-bit float `a` to all elements of `dst`.
16218 #[target_feature(enable = "avx512f")]
16219 pub unsafe fn _mm512_set1_ps(a
: f32) -> __m512
{
16220 transmute(f32x16
::splat(a
))
16223 /// Sets packed 32-bit integers in `dst` with the supplied values.
16225 #[target_feature(enable = "avx512f")]
16226 pub unsafe fn _mm512_set_epi32(
16245 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
,
16249 /// Broadcast 8-bit integer a to all elements of dst.
16251 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set1_epi8&expand=4972)
16253 #[target_feature(enable = "avx512f")]
16254 pub unsafe fn _mm512_set1_epi8(a
: i8) -> __m512i
{
16255 transmute(i8x64
::splat(a
))
16258 /// Broadcast the low packed 16-bit integer from a to all all elements of dst.
16260 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set1_epi16&expand=4944)
16262 #[target_feature(enable = "avx512f")]
16263 pub unsafe fn _mm512_set1_epi16(a
: i16) -> __m512i
{
16264 transmute(i16x32
::splat(a
))
16267 /// Broadcast 32-bit integer `a` to all elements of `dst`.
16269 #[target_feature(enable = "avx512f")]
16270 pub unsafe fn _mm512_set1_epi32(a
: i32) -> __m512i
{
16271 transmute(i32x16
::splat(a
))
16274 /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16276 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_set1_epi32&expand=4951)
16278 #[target_feature(enable = "avx512f")]
16279 #[cfg_attr(test, assert_instr(vpbroadcastd))]
16280 pub unsafe fn _mm512_mask_set1_epi32(src
: __m512i
, k
: __mmask16
, a
: i32) -> __m512i
{
16281 let r
= _mm512_set1_epi32(a
).as_i32x16();
16282 transmute(simd_select_bitmask(k
, r
, src
.as_i32x16()))
16285 /// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_set1_epi32&expand=4952)
16289 #[target_feature(enable = "avx512f")]
16290 #[cfg_attr(test, assert_instr(vpbroadcastd))]
16291 pub unsafe fn _mm512_maskz_set1_epi32(k
: __mmask16
, a
: i32) -> __m512i
{
16292 let r
= _mm512_set1_epi32(a
).as_i32x16();
16293 let zero
= _mm512_setzero_si512().as_i32x16();
16294 transmute(simd_select_bitmask(k
, r
, zero
))
16297 /// Broadcast 64-bit integer `a` to all elements of `dst`.
16299 #[target_feature(enable = "avx512f")]
16300 pub unsafe fn _mm512_set1_epi64(a
: i64) -> __m512i
{
16301 transmute(i64x8
::splat(a
))
16304 /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_set1_epi64&expand=4959)
16308 #[target_feature(enable = "avx512f")]
16309 #[cfg_attr(test, assert_instr(vpbroadcastq))]
16310 pub unsafe fn _mm512_mask_set1_epi64(src
: __m512i
, k
: __mmask8
, a
: i64) -> __m512i
{
16311 let r
= _mm512_set1_epi64(a
).as_i64x8();
16312 transmute(simd_select_bitmask(k
, r
, src
.as_i64x8()))
16315 /// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_set1_epi64&expand=4960)
16319 #[target_feature(enable = "avx512f")]
16320 #[cfg_attr(test, assert_instr(vpbroadcastq))]
16321 pub unsafe fn _mm512_maskz_set1_epi64(k
: __mmask8
, a
: i64) -> __m512i
{
16322 let r
= _mm512_set1_epi64(a
).as_i64x8();
16323 let zero
= _mm512_setzero_si512().as_i64x8();
16324 transmute(simd_select_bitmask(k
, r
, zero
))
16327 /// Set packed 64-bit integers in dst with the repeated 4 element sequence.
16329 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_set4_epi64&expand=4983)
16331 #[target_feature(enable = "avx512f")]
16332 pub unsafe fn _mm512_set4_epi64(d
: i64, c
: i64, b
: i64, a
: i64) -> __m512i
{
16333 let r
= i64x8
::new(d
, c
, b
, a
, d
, c
, b
, a
);
16337 /// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
16339 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_setr4_epi64&expand=5010)
16341 #[target_feature(enable = "avx512f")]
16342 pub unsafe fn _mm512_setr4_epi64(d
: i64, c
: i64, b
: i64, a
: i64) -> __m512i
{
16343 let r
= i64x8
::new(a
, b
, c
, d
, a
, b
, c
, d
);
16347 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
16349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_ps_mask&expand=1074)
16351 #[target_feature(enable = "avx512f")]
16352 #[cfg_attr(test, assert_instr(vcmp))]
16353 pub unsafe fn _mm512_cmplt_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16354 _mm512_cmp_ps_mask(a
, b
, _CMP_LT_OS
)
16357 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16359 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmplt_ps_mask&expand=1075)
16361 #[target_feature(enable = "avx512f")]
16362 #[cfg_attr(test, assert_instr(vcmp))]
16363 pub unsafe fn _mm512_mask_cmplt_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16364 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_LT_OS
)
16367 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
16369 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpnlt_ps_mask&expand=1154)
16371 #[target_feature(enable = "avx512f")]
16372 #[cfg_attr(test, assert_instr(vcmp))]
16373 pub unsafe fn _mm512_cmpnlt_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16374 _mm512_cmp_ps_mask(a
, b
, _CMP_NLT_US
)
16377 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16379 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpnlt_ps_mask&expand=1155)
16381 #[target_feature(enable = "avx512f")]
16382 #[cfg_attr(test, assert_instr(vcmp))]
16383 pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16384 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_NLT_US
)
16387 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
16389 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmple_ps_mask&expand=1013)
16391 #[target_feature(enable = "avx512f")]
16392 #[cfg_attr(test, assert_instr(vcmp))]
16393 pub unsafe fn _mm512_cmple_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16394 _mm512_cmp_ps_mask(a
, b
, _CMP_LE_OS
)
16397 /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16399 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmple_ps_mask&expand=1014)
16401 #[target_feature(enable = "avx512f")]
16402 #[cfg_attr(test, assert_instr(vcmp))]
16403 pub unsafe fn _mm512_mask_cmple_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16404 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_LE_OS
)
16407 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
16409 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpnle_ps_mask&expand=1146)
16411 #[target_feature(enable = "avx512f")]
16412 #[cfg_attr(test, assert_instr(vcmp))]
16413 pub unsafe fn _mm512_cmpnle_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16414 _mm512_cmp_ps_mask(a
, b
, _CMP_NLE_US
)
16417 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16419 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpnle_ps_mask&expand=1147)
16421 #[target_feature(enable = "avx512f")]
16422 #[cfg_attr(test, assert_instr(vcmp))]
16423 pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16424 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_NLE_US
)
16427 /// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
16429 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpeq_ps_mask&expand=828)
16431 #[target_feature(enable = "avx512f")]
16432 #[cfg_attr(test, assert_instr(vcmp))]
16433 pub unsafe fn _mm512_cmpeq_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16434 _mm512_cmp_ps_mask(a
, b
, _CMP_EQ_OQ
)
16437 /// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16439 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpeq_ps_mask&expand=829)
16441 #[target_feature(enable = "avx512f")]
16442 #[cfg_attr(test, assert_instr(vcmp))]
16443 pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16444 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_EQ_OQ
)
16447 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
16449 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmpneq_ps_mask&expand=1130)
16451 #[target_feature(enable = "avx512f")]
16452 #[cfg_attr(test, assert_instr(vcmp))]
16453 pub unsafe fn _mm512_cmpneq_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16454 _mm512_cmp_ps_mask(a
, b
, _CMP_NEQ_UQ
)
16457 /// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16459 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmpneq_ps_mask&expand=1131)
16461 #[target_feature(enable = "avx512f")]
16462 #[cfg_attr(test, assert_instr(vcmp))]
16463 pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16464 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_NEQ_UQ
)
16467 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
16469 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmp_ps_mask&expand=749)
16471 #[target_feature(enable = "avx512f")]
16472 #[rustc_args_required_const(2)]
16473 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16474 pub unsafe fn _mm512_cmp_ps_mask(a
: __m512
, b
: __m512
, imm8
: i32) -> __mmask16
{
16476 macro_rules
! call
{
16483 _MM_FROUND_CUR_DIRECTION
,
16487 let r
= constify_imm5
!(imm8
, call
);
16491 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16493 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmp_ps_mask&expand=750)
16495 #[target_feature(enable = "avx512f")]
16496 #[rustc_args_required_const(3)]
16497 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16498 pub unsafe fn _mm512_mask_cmp_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
, imm8
: i32) -> __mmask16
{
16499 macro_rules
! call
{
16506 _MM_FROUND_CUR_DIRECTION
,
16510 let r
= constify_imm5
!(imm8
, call
);
16514 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
16515 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmp_round_ps_mask&expand=753)
16519 #[target_feature(enable = "avx512f")]
16520 #[rustc_args_required_const(2, 3)]
16521 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16522 pub unsafe fn _mm512_cmp_round_ps_mask(a
: __m512
, b
: __m512
, imm8
: i32, sae
: i32) -> __mmask16
{
16524 macro_rules
! call
{
16525 ($imm5
:expr
, $imm4
:expr
) => {
16526 vcmpps(a
.as_f32x16(), b
.as_f32x16(), $imm5
, neg_one
, $imm4
)
16529 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16533 /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
16534 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16536 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_cmp_round_ps_mask&expand=754)
16538 #[target_feature(enable = "avx512f")]
16539 #[rustc_args_required_const(3, 4)]
16540 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16541 pub unsafe fn _mm512_mask_cmp_round_ps_mask(
16548 macro_rules
! call
{
16549 ($imm5
:expr
, $imm4
:expr
) => {
16550 vcmpps(a
.as_f32x16(), b
.as_f32x16(), $imm5
, m
as i16, $imm4
)
16553 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16557 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
16559 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpord_ps_mask&expand=1162)
16561 #[target_feature(enable = "avx512f")]
16562 #[cfg_attr(test, assert_instr(vcmp))]
16563 pub unsafe fn _mm512_cmpord_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16564 _mm512_cmp_ps_mask(a
, b
, _CMP_ORD_Q
)
16567 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16569 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpord_ps_mask&expand=1163)
16571 #[target_feature(enable = "avx512f")]
16572 #[cfg_attr(test, assert_instr(vcmp))]
16573 pub unsafe fn _mm512_mask_cmpord_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16574 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_ORD_Q
)
16577 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
16579 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpunord_ps_mask&expand=1170)
16581 #[target_feature(enable = "avx512f")]
16582 #[cfg_attr(test, assert_instr(vcmp))]
16583 pub unsafe fn _mm512_cmpunord_ps_mask(a
: __m512
, b
: __m512
) -> __mmask16
{
16584 _mm512_cmp_ps_mask(a
, b
, _CMP_UNORD_Q
)
16587 /// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16589 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
16591 #[target_feature(enable = "avx512f")]
16592 #[cfg_attr(test, assert_instr(vcmp))]
16593 pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1
: __mmask16
, a
: __m512
, b
: __m512
) -> __mmask16
{
16594 _mm512_mask_cmp_ps_mask(k1
, a
, b
, _CMP_UNORD_Q
)
16597 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
16599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_pd_mask&expand=1071)
16601 #[target_feature(enable = "avx512f")]
16602 #[cfg_attr(test, assert_instr(vcmp))]
16603 pub unsafe fn _mm512_cmplt_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16604 _mm512_cmp_pd_mask(a
, b
, _CMP_LT_OS
)
16607 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_pd_mask&expand=1072)
16611 #[target_feature(enable = "avx512f")]
16612 #[cfg_attr(test, assert_instr(vcmp))]
16613 pub unsafe fn _mm512_mask_cmplt_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16614 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_LT_OS
)
16617 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
16619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnlt_pd_mask&expand=1151)
16621 #[target_feature(enable = "avx512f")]
16622 #[cfg_attr(test, assert_instr(vcmp))]
16623 pub unsafe fn _mm512_cmpnlt_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16624 _mm512_cmp_pd_mask(a
, b
, _CMP_NLT_US
)
16627 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16629 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
16631 #[target_feature(enable = "avx512f")]
16632 #[cfg_attr(test, assert_instr(vcmp))]
16633 pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16634 _mm512_mask_cmp_pd_mask(m
, a
, b
, _CMP_NLT_US
)
16637 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
16639 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_pd_mask&expand=1010)
16641 #[target_feature(enable = "avx512f")]
16642 #[cfg_attr(test, assert_instr(vcmp))]
16643 pub unsafe fn _mm512_cmple_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16644 _mm512_cmp_pd_mask(a
, b
, _CMP_LE_OS
)
16647 /// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16649 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_pd_mask&expand=1011)
16651 #[target_feature(enable = "avx512f")]
16652 #[cfg_attr(test, assert_instr(vcmp))]
16653 pub unsafe fn _mm512_mask_cmple_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16654 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_LE_OS
)
16657 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
16659 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnle_pd_mask&expand=1143)
16661 #[target_feature(enable = "avx512f")]
16662 #[cfg_attr(test, assert_instr(vcmp))]
16663 pub unsafe fn _mm512_cmpnle_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16664 _mm512_cmp_pd_mask(a
, b
, _CMP_NLE_US
)
16667 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
16671 #[target_feature(enable = "avx512f")]
16672 #[cfg_attr(test, assert_instr(vcmp))]
16673 pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16674 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_NLE_US
)
16677 /// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
16679 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_pd_mask&expand=822)
16681 #[target_feature(enable = "avx512f")]
16682 #[cfg_attr(test, assert_instr(vcmp))]
16683 pub unsafe fn _mm512_cmpeq_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16684 _mm512_cmp_pd_mask(a
, b
, _CMP_EQ_OQ
)
16687 /// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16689 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_pd_mask&expand=823)
16691 #[target_feature(enable = "avx512f")]
16692 #[cfg_attr(test, assert_instr(vcmp))]
16693 pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16694 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_EQ_OQ
)
16697 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
16699 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_pd_mask&expand=1127)
16701 #[target_feature(enable = "avx512f")]
16702 #[cfg_attr(test, assert_instr(vcmp))]
16703 pub unsafe fn _mm512_cmpneq_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16704 _mm512_cmp_pd_mask(a
, b
, _CMP_NEQ_UQ
)
16707 /// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16709 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
16711 #[target_feature(enable = "avx512f")]
16712 #[cfg_attr(test, assert_instr(vcmp))]
16713 pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16714 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_NEQ_UQ
)
16717 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
16719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_pd_mask&expand=741)
16721 #[target_feature(enable = "avx512f")]
16722 #[rustc_args_required_const(2)]
16723 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16724 pub unsafe fn _mm512_cmp_pd_mask(a
: __m512d
, b
: __m512d
, imm8
: i32) -> __mmask8
{
16726 macro_rules
! call
{
16733 _MM_FROUND_CUR_DIRECTION
,
16737 let r
= constify_imm5
!(imm8
, call
);
16741 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16743 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_pd_mask&expand=742)
16745 #[target_feature(enable = "avx512f")]
16746 #[rustc_args_required_const(3)]
16747 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16748 pub unsafe fn _mm512_mask_cmp_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
, imm8
: i32) -> __mmask8
{
16749 macro_rules
! call
{
16756 _MM_FROUND_CUR_DIRECTION
,
16760 let r
= constify_imm5
!(imm8
, call
);
16764 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
16765 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16767 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_pd_mask&expand=751)
16769 #[target_feature(enable = "avx512f")]
16770 #[rustc_args_required_const(2, 3)]
16771 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16772 pub unsafe fn _mm512_cmp_round_pd_mask(a
: __m512d
, b
: __m512d
, imm8
: i32, sae
: i32) -> __mmask8
{
16774 macro_rules
! call
{
16775 ($imm5
:expr
, $imm4
:expr
) => {
16776 vcmppd(a
.as_f64x8(), b
.as_f64x8(), $imm5
, neg_one
, $imm4
)
16779 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16783 /// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
16784 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16786 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_pd_mask&expand=752)
16788 #[target_feature(enable = "avx512f")]
16789 #[rustc_args_required_const(3, 4)]
16790 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16791 pub unsafe fn _mm512_mask_cmp_round_pd_mask(
16798 macro_rules
! call
{
16799 ($imm5
:expr
, $imm4
:expr
) => {
16800 vcmppd(a
.as_f64x8(), b
.as_f64x8(), $imm5
, k1
as i8, $imm4
)
16803 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16807 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
16809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpord_pd_mask&expand=1159)
16811 #[target_feature(enable = "avx512f")]
16812 #[cfg_attr(test, assert_instr(vcmp))]
16813 pub unsafe fn _mm512_cmpord_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16814 _mm512_cmp_pd_mask(a
, b
, _CMP_ORD_Q
)
16817 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16819 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpord_pd_mask&expand=1160)
16821 #[target_feature(enable = "avx512f")]
16822 #[cfg_attr(test, assert_instr(vcmp))]
16823 pub unsafe fn _mm512_mask_cmpord_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16824 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_ORD_Q
)
16827 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
16829 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpunord_pd_mask&expand=1167)
16831 #[target_feature(enable = "avx512f")]
16832 #[cfg_attr(test, assert_instr(vcmp))]
16833 pub unsafe fn _mm512_cmpunord_pd_mask(a
: __m512d
, b
: __m512d
) -> __mmask8
{
16834 _mm512_cmp_pd_mask(a
, b
, _CMP_UNORD_Q
)
16837 /// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
16839 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
16841 #[target_feature(enable = "avx512f")]
16842 #[cfg_attr(test, assert_instr(vcmp))]
16843 pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1
: __mmask8
, a
: __m512d
, b
: __m512d
) -> __mmask8
{
16844 _mm512_mask_cmp_pd_mask(k1
, a
, b
, _CMP_UNORD_Q
)
16847 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
16849 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=763)
16851 #[target_feature(enable = "avx512f")]
16852 #[rustc_args_required_const(2)]
16853 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16854 pub unsafe fn _mm_cmp_ss_mask(a
: __m128
, b
: __m128
, imm8
: i32) -> __mmask8
{
16856 macro_rules
! call
{
16858 vcmpss(a
, b
, $imm5
, neg_one
, _MM_FROUND_CUR_DIRECTION
)
16861 let r
= constify_imm5
!(imm8
, call
);
16865 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
16867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=764)
16869 #[target_feature(enable = "avx512f")]
16870 #[rustc_args_required_const(3)]
16871 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16872 pub unsafe fn _mm_mask_cmp_ss_mask(k1
: __mmask8
, a
: __m128
, b
: __m128
, imm8
: i32) -> __mmask8
{
16873 macro_rules
! call
{
16875 vcmpss(a
, b
, $imm5
, k1
as i8, _MM_FROUND_CUR_DIRECTION
)
16878 let r
= constify_imm5
!(imm8
, call
);
16882 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
16883 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16885 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=757)
16887 #[target_feature(enable = "avx512f")]
16888 #[rustc_args_required_const(2, 3)]
16889 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16890 pub unsafe fn _mm_cmp_round_ss_mask(a
: __m128
, b
: __m128
, imm8
: i32, sae
: i32) -> __mmask8
{
16892 macro_rules
! call
{
16893 ($imm5
:expr
, $imm4
:expr
) => {
16894 vcmpss(a
, b
, $imm5
, neg_one
, $imm4
)
16897 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16901 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
16902 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=758)
16906 #[target_feature(enable = "avx512f")]
16907 #[rustc_args_required_const(3, 4)]
16908 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16909 pub unsafe fn _mm_mask_cmp_round_ss_mask(
16916 macro_rules
! call
{
16917 ($imm5
:expr
, $imm4
:expr
) => {
16918 vcmpss(a
, b
, $imm5
, k1
as i8, $imm4
)
16921 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16925 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
16927 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=760)
16929 #[target_feature(enable = "avx512f")]
16930 #[rustc_args_required_const(2)]
16931 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16932 pub unsafe fn _mm_cmp_sd_mask(a
: __m128d
, b
: __m128d
, imm8
: i32) -> __mmask8
{
16934 macro_rules
! call
{
16936 vcmpsd(a
, b
, $imm5
, neg_one
, _MM_FROUND_CUR_DIRECTION
)
16939 let r
= constify_imm5
!(imm8
, call
);
16943 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
16945 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=761)
16947 #[target_feature(enable = "avx512f")]
16948 #[rustc_args_required_const(3)]
16949 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
16950 pub unsafe fn _mm_mask_cmp_sd_mask(k1
: __mmask8
, a
: __m128d
, b
: __m128d
, imm8
: i32) -> __mmask8
{
16951 macro_rules
! call
{
16953 vcmpsd(a
, b
, $imm5
, k1
as i8, _MM_FROUND_CUR_DIRECTION
)
16956 let r
= constify_imm5
!(imm8
, call
);
16960 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
16961 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=755)
16965 #[target_feature(enable = "avx512f")]
16966 #[rustc_args_required_const(2, 3)]
16967 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16968 pub unsafe fn _mm_cmp_round_sd_mask(a
: __m128d
, b
: __m128d
, imm8
: i32, sae
: i32) -> __mmask8
{
16970 macro_rules
! call
{
16971 ($imm5
:expr
, $imm4
:expr
) => {
16972 vcmpsd(a
, b
, $imm5
, neg_one
, $imm4
)
16975 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
16979 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
16980 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16982 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=756)
16984 #[target_feature(enable = "avx512f")]
16985 #[rustc_args_required_const(3, 4)]
16986 #[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
16987 pub unsafe fn _mm_mask_cmp_round_sd_mask(
16994 macro_rules
! call
{
16995 ($imm5
:expr
, $imm4
:expr
) => {
16996 vcmpsd(a
, b
, $imm5
, k1
as i8, $imm4
)
16999 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
17003 /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
17005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epu32_mask&expand=1056)
17007 #[target_feature(enable = "avx512f")]
17008 #[cfg_attr(test, assert_instr(vpcmp))]
17009 pub unsafe fn _mm512_cmplt_epu32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17010 simd_bitmask
::<u32x16
, _
>(simd_lt(a
.as_u32x16(), b
.as_u32x16()))
17013 /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17015 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
17017 #[target_feature(enable = "avx512f")]
17018 #[cfg_attr(test, assert_instr(vpcmp))]
17019 pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17020 _mm512_cmplt_epu32_mask(a
, b
) & k1
17023 /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
17025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu32_mask&expand=933)
17027 #[target_feature(enable = "avx512f")]
17028 #[cfg_attr(test, assert_instr(vpcmp))]
17029 pub unsafe fn _mm512_cmpgt_epu32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17030 simd_bitmask
::<u32x16
, _
>(simd_gt(a
.as_u32x16(), b
.as_u32x16()))
17033 /// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17035 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
17037 #[target_feature(enable = "avx512f")]
17038 #[cfg_attr(test, assert_instr(vpcmp))]
17039 pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17040 _mm512_cmpgt_epu32_mask(a
, b
) & k1
17043 /// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17045 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu32_mask&expand=995)
17047 #[target_feature(enable = "avx512f")]
17048 #[cfg_attr(test, assert_instr(vpcmp))]
17049 pub unsafe fn _mm512_cmple_epu32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17050 simd_bitmask
::<u32x16
, _
>(simd_le(a
.as_u32x16(), b
.as_u32x16()))
17053 /// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17055 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu32_mask&expand=996)
17057 #[target_feature(enable = "avx512f")]
17058 #[cfg_attr(test, assert_instr(vpcmp))]
17059 pub unsafe fn _mm512_mask_cmple_epu32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17060 _mm512_cmple_epu32_mask(a
, b
) & k1
17063 /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17065 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu32_mask&expand=873)
17067 #[target_feature(enable = "avx512f")]
17068 #[cfg_attr(test, assert_instr(vpcmp))]
17069 pub unsafe fn _mm512_cmpge_epu32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17070 simd_bitmask
::<u32x16
, _
>(simd_ge(a
.as_u32x16(), b
.as_u32x16()))
17073 /// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17075 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu32_mask&expand=874)
17077 #[target_feature(enable = "avx512f")]
17078 #[cfg_attr(test, assert_instr(vpcmp))]
17079 pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17080 _mm512_cmpge_epu32_mask(a
, b
) & k1
17083 /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
17085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu32_mask&expand=807)
17087 #[target_feature(enable = "avx512f")]
17088 #[cfg_attr(test, assert_instr(vpcmp))]
17089 pub unsafe fn _mm512_cmpeq_epu32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17090 simd_bitmask
::<u32x16
, _
>(simd_eq(a
.as_u32x16(), b
.as_u32x16()))
17093 /// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17095 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
17097 #[target_feature(enable = "avx512f")]
17098 #[cfg_attr(test, assert_instr(vpcmp))]
17099 pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17100 _mm512_cmpeq_epu32_mask(a
, b
) & k1
17103 /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
17105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu32_mask&expand=1112)
17107 #[target_feature(enable = "avx512f")]
17108 #[cfg_attr(test, assert_instr(vpcmp))]
17109 pub unsafe fn _mm512_cmpneq_epu32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17110 simd_bitmask
::<u32x16
, _
>(simd_ne(a
.as_u32x16(), b
.as_u32x16()))
17113 /// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17115 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
17117 #[target_feature(enable = "avx512f")]
17118 #[cfg_attr(test, assert_instr(vpcmp))]
17119 pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17120 _mm512_cmpneq_epu32_mask(a
, b
) & k1
17123 /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu32_mask&expand=721)
17127 #[target_feature(enable = "avx512f")]
17128 #[rustc_args_required_const(2)]
17129 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17130 pub unsafe fn _mm512_cmp_epu32_mask(a
: __m512i
, b
: __m512i
, imm8
: _MM_CMPINT_ENUM
) -> __mmask16
{
17132 macro_rules
! call
{
17134 vpcmpud(a
.as_i32x16(), b
.as_i32x16(), $imm3
, neg_one
)
17137 let r
= constify_imm3
!(imm8
, call
);
17141 /// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu32_mask&expand=722)
17145 #[target_feature(enable = "avx512f")]
17146 #[rustc_args_required_const(3)]
17147 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17148 pub unsafe fn _mm512_mask_cmp_epu32_mask(
17152 imm8
: _MM_CMPINT_ENUM
,
17154 macro_rules
! call
{
17156 vpcmpud(a
.as_i32x16(), b
.as_i32x16(), $imm3
, k1
as i16)
17159 let r
= constify_imm3
!(imm8
, call
);
17163 /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
17165 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi32_mask&expand=1029)
17167 #[target_feature(enable = "avx512f")]
17168 #[cfg_attr(test, assert_instr(vpcmp))]
17169 pub unsafe fn _mm512_cmplt_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17170 simd_bitmask
::<i32x16
, _
>(simd_lt(a
.as_i32x16(), b
.as_i32x16()))
17173 /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
17177 #[target_feature(enable = "avx512f")]
17178 #[cfg_attr(test, assert_instr(vpcmp))]
17179 pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17180 _mm512_cmplt_epi32_mask(a
, b
) & k1
17183 /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
17185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi32_mask&expand=905)
17187 #[target_feature(enable = "avx512f")]
17188 #[cfg_attr(test, assert_instr(vpcmp))]
17189 pub unsafe fn _mm512_cmpgt_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17190 simd_bitmask
::<i32x16
, _
>(simd_gt(a
.as_i32x16(), b
.as_i32x16()))
17193 /// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17195 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
17197 #[target_feature(enable = "avx512f")]
17198 #[cfg_attr(test, assert_instr(vpcmp))]
17199 pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17200 _mm512_cmpgt_epi32_mask(a
, b
) & k1
17203 /// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi32_mask&expand=971)
17207 #[target_feature(enable = "avx512f")]
17208 #[cfg_attr(test, assert_instr(vpcmp))]
17209 pub unsafe fn _mm512_cmple_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17210 simd_bitmask
::<i32x16
, _
>(simd_le(a
.as_i32x16(), b
.as_i32x16()))
17213 /// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17215 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi32_mask&expand=972)
17217 #[target_feature(enable = "avx512f")]
17218 #[cfg_attr(test, assert_instr(vpcmp))]
17219 pub unsafe fn _mm512_mask_cmple_epi32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17220 _mm512_cmple_epi32_mask(a
, b
) & k1
17223 /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17225 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi32_mask&expand=849)
17227 #[target_feature(enable = "avx512f")]
17228 #[cfg_attr(test, assert_instr(vpcmp))]
17229 pub unsafe fn _mm512_cmpge_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17230 simd_bitmask
::<i32x16
, _
>(simd_ge(a
.as_i32x16(), b
.as_i32x16()))
17233 /// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17235 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi32_mask&expand=850)
17237 #[target_feature(enable = "avx512f")]
17238 #[cfg_attr(test, assert_instr(vpcmp))]
17239 pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17240 _mm512_cmpge_epi32_mask(a
, b
) & k1
17243 /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
17245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi32_mask&expand=779)
17247 #[target_feature(enable = "avx512f")]
17248 #[cfg_attr(test, assert_instr(vpcmp))]
17249 pub unsafe fn _mm512_cmpeq_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17250 simd_bitmask
::<i32x16
, _
>(simd_eq(a
.as_i32x16(), b
.as_i32x16()))
17253 /// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17255 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
17257 #[target_feature(enable = "avx512f")]
17258 #[cfg_attr(test, assert_instr(vpcmp))]
17259 pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17260 _mm512_cmpeq_epi32_mask(a
, b
) & k1
17263 /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
17265 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi32_mask&expand=1088)
17267 #[target_feature(enable = "avx512f")]
17268 #[cfg_attr(test, assert_instr(vpcmp))]
17269 pub unsafe fn _mm512_cmpneq_epi32_mask(a
: __m512i
, b
: __m512i
) -> __mmask16
{
17270 simd_bitmask
::<i32x16
, _
>(simd_ne(a
.as_i32x16(), b
.as_i32x16()))
17273 /// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
17277 #[target_feature(enable = "avx512f")]
17278 #[cfg_attr(test, assert_instr(vpcmp))]
17279 pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1
: __mmask16
, a
: __m512i
, b
: __m512i
) -> __mmask16
{
17280 _mm512_cmpneq_epi32_mask(a
, b
) & k1
17283 /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17285 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi32_mask&expand=697)
17287 #[target_feature(enable = "avx512f")]
17288 #[rustc_args_required_const(2)]
17289 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17290 pub unsafe fn _mm512_cmp_epi32_mask(a
: __m512i
, b
: __m512i
, imm8
: _MM_CMPINT_ENUM
) -> __mmask16
{
17292 macro_rules
! call
{
17294 vpcmpd(a
.as_i32x16(), b
.as_i32x16(), $imm3
, neg_one
)
17297 let r
= constify_imm3
!(imm8
, call
);
17301 /// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17303 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi32_mask&expand=698)
17305 #[target_feature(enable = "avx512f")]
17306 #[rustc_args_required_const(3)]
17307 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17308 pub unsafe fn _mm512_mask_cmp_epi32_mask(
17312 imm8
: _MM_CMPINT_ENUM
,
17314 macro_rules
! call
{
17316 vpcmpd(a
.as_i32x16(), b
.as_i32x16(), $imm3
, k1
as i16)
17319 let r
= constify_imm3
!(imm8
, call
);
17323 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
17325 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epu64_mask&expand=1062)
17327 #[target_feature(enable = "avx512f")]
17328 #[cfg_attr(test, assert_instr(vpcmp))]
17329 pub unsafe fn _mm512_cmplt_epu64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17330 simd_bitmask
::<__m512i
, _
>(simd_lt(a
.as_u64x8(), b
.as_u64x8()))
17333 /// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
17337 #[target_feature(enable = "avx512f")]
17338 #[cfg_attr(test, assert_instr(vpcmp))]
17339 pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17340 _mm512_cmplt_epu64_mask(a
, b
) & k1
17343 /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
17345 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu64_mask&expand=939)
17347 #[target_feature(enable = "avx512f")]
17348 #[cfg_attr(test, assert_instr(vpcmp))]
17349 pub unsafe fn _mm512_cmpgt_epu64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17350 simd_bitmask
::<__m512i
, _
>(simd_gt(a
.as_u64x8(), b
.as_u64x8()))
17353 /// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17355 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
17357 #[target_feature(enable = "avx512f")]
17358 #[cfg_attr(test, assert_instr(vpcmp))]
17359 pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17360 _mm512_cmpgt_epu64_mask(a
, b
) & k1
17363 /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17365 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu64_mask&expand=1001)
17367 #[target_feature(enable = "avx512f")]
17368 #[cfg_attr(test, assert_instr(vpcmp))]
17369 pub unsafe fn _mm512_cmple_epu64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17370 simd_bitmask
::<__m512i
, _
>(simd_le(a
.as_u64x8(), b
.as_u64x8()))
17373 /// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu64_mask&expand=1002)
17377 #[target_feature(enable = "avx512f")]
17378 #[cfg_attr(test, assert_instr(vpcmp))]
17379 pub unsafe fn _mm512_mask_cmple_epu64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17380 _mm512_cmple_epu64_mask(a
, b
) & k1
17383 /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17385 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu64_mask&expand=879)
17387 #[target_feature(enable = "avx512f")]
17388 #[cfg_attr(test, assert_instr(vpcmp))]
17389 pub unsafe fn _mm512_cmpge_epu64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17390 simd_bitmask
::<__m512i
, _
>(simd_ge(a
.as_u64x8(), b
.as_u64x8()))
17393 /// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17395 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu64_mask&expand=880)
17397 #[target_feature(enable = "avx512f")]
17398 #[cfg_attr(test, assert_instr(vpcmp))]
17399 pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17400 _mm512_cmpge_epu64_mask(b
, a
) & k1
17403 /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
17405 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu64_mask&expand=813)
17407 #[target_feature(enable = "avx512f")]
17408 #[cfg_attr(test, assert_instr(vpcmp))]
17409 pub unsafe fn _mm512_cmpeq_epu64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17410 simd_bitmask
::<__m512i
, _
>(simd_eq(a
.as_u64x8(), b
.as_u64x8()))
17413 /// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17415 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
17417 #[target_feature(enable = "avx512f")]
17418 #[cfg_attr(test, assert_instr(vpcmp))]
17419 pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17420 _mm512_cmpeq_epu64_mask(a
, b
) & k1
17423 /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
17425 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu64_mask&expand=1118)
17427 #[target_feature(enable = "avx512f")]
17428 #[cfg_attr(test, assert_instr(vpcmp))]
17429 pub unsafe fn _mm512_cmpneq_epu64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17430 simd_bitmask
::<__m512i
, _
>(simd_ne(a
.as_u64x8(), b
.as_u64x8()))
17433 /// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17435 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
17437 #[target_feature(enable = "avx512f")]
17438 #[cfg_attr(test, assert_instr(vpcmp))]
17439 pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17440 _mm512_cmpneq_epu64_mask(a
, b
) & k1
17443 /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17445 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu64_mask&expand=727)
17447 #[target_feature(enable = "avx512f")]
17448 #[rustc_args_required_const(2)]
17449 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17450 pub unsafe fn _mm512_cmp_epu64_mask(a
: __m512i
, b
: __m512i
, imm8
: _MM_CMPINT_ENUM
) -> __mmask8
{
17452 macro_rules
! call
{
17454 vpcmpuq(a
.as_i64x8(), b
.as_i64x8(), $imm3
, neg_one
)
17457 let r
= constify_imm3
!(imm8
, call
);
17461 /// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17463 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu64_mask&expand=728)
17465 #[target_feature(enable = "avx512f")]
17466 #[rustc_args_required_const(3)]
17467 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17468 pub unsafe fn _mm512_mask_cmp_epu64_mask(
17472 imm8
: _MM_CMPINT_ENUM
,
17474 macro_rules
! call
{
17476 vpcmpuq(a
.as_i64x8(), b
.as_i64x8(), $imm3
, k1
as i8)
17479 let r
= constify_imm3
!(imm8
, call
);
17483 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
17485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi64_mask&expand=1037)
17487 #[target_feature(enable = "avx512f")]
17488 #[cfg_attr(test, assert_instr(vpcmp))]
17489 pub unsafe fn _mm512_cmplt_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17490 simd_bitmask
::<__m512i
, _
>(simd_lt(a
.as_i64x8(), b
.as_i64x8()))
17493 /// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17495 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
17497 #[target_feature(enable = "avx512f")]
17498 #[cfg_attr(test, assert_instr(vpcmp))]
17499 pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17500 _mm512_cmplt_epi64_mask(a
, b
) & k1
17503 /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
17505 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi64_mask&expand=913)
17507 #[target_feature(enable = "avx512f")]
17508 #[cfg_attr(test, assert_instr(vpcmp))]
17509 pub unsafe fn _mm512_cmpgt_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17510 simd_bitmask
::<__m512i
, _
>(simd_gt(a
.as_i64x8(), b
.as_i64x8()))
17513 /// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17515 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
17517 #[target_feature(enable = "avx512f")]
17518 #[cfg_attr(test, assert_instr(vpcmp))]
17519 pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17520 _mm512_cmpgt_epi64_mask(a
, b
) & k1
17523 /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
17525 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi64_mask&expand=977)
17527 #[target_feature(enable = "avx512f")]
17528 #[cfg_attr(test, assert_instr(vpcmp))]
17529 pub unsafe fn _mm512_cmple_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17530 simd_bitmask
::<__m512i
, _
>(simd_le(a
.as_i64x8(), b
.as_i64x8()))
17533 /// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17535 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi64_mask&expand=978)
17537 #[target_feature(enable = "avx512f")]
17538 #[cfg_attr(test, assert_instr(vpcmp))]
17539 pub unsafe fn _mm512_mask_cmple_epi64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17540 _mm512_cmple_epi64_mask(a
, b
) & k1
17543 /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
17545 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi64_mask&expand=855)
17547 #[target_feature(enable = "avx512f")]
17548 #[cfg_attr(test, assert_instr(vpcmp))]
17549 pub unsafe fn _mm512_cmpge_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17550 simd_bitmask
::<__m512i
, _
>(simd_ge(a
.as_i64x8(), b
.as_i64x8()))
17553 /// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17555 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi64_mask&expand=856)
17557 #[target_feature(enable = "avx512f")]
17558 #[cfg_attr(test, assert_instr(vpcmp))]
17559 pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17560 _mm512_cmpge_epi64_mask(b
, a
) & k1
17563 /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
17565 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi64_mask&expand=787)
17567 #[target_feature(enable = "avx512f")]
17568 #[cfg_attr(test, assert_instr(vpcmp))]
17569 pub unsafe fn _mm512_cmpeq_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17570 simd_bitmask
::<__m512i
, _
>(simd_eq(a
.as_i64x8(), b
.as_i64x8()))
17573 /// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17575 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
17577 #[target_feature(enable = "avx512f")]
17578 #[cfg_attr(test, assert_instr(vpcmp))]
17579 pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17580 _mm512_cmpeq_epi64_mask(a
, b
) & k1
17583 /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
17585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi64_mask&expand=1094)
17587 #[target_feature(enable = "avx512f")]
17588 #[cfg_attr(test, assert_instr(vpcmp))]
17589 pub unsafe fn _mm512_cmpneq_epi64_mask(a
: __m512i
, b
: __m512i
) -> __mmask8
{
17590 simd_bitmask
::<__m512i
, _
>(simd_ne(a
.as_i64x8(), b
.as_i64x8()))
17593 /// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17595 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
17597 #[target_feature(enable = "avx512f")]
17598 #[cfg_attr(test, assert_instr(vpcmp))]
17599 pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1
: __mmask8
, a
: __m512i
, b
: __m512i
) -> __mmask8
{
17600 _mm512_cmpneq_epi64_mask(a
, b
) & k1
17603 /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
17605 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi64_mask&expand=703)
17607 #[target_feature(enable = "avx512f")]
17608 #[rustc_args_required_const(2)]
17609 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17610 pub unsafe fn _mm512_cmp_epi64_mask(a
: __m512i
, b
: __m512i
, imm8
: _MM_CMPINT_ENUM
) -> __mmask8
{
17612 macro_rules
! call
{
17614 vpcmpq(a
.as_i64x8(), b
.as_i64x8(), $imm3
, neg_one
)
17617 let r
= constify_imm3
!(imm8
, call
);
17621 /// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
17623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi64_mask&expand=704)
17625 #[target_feature(enable = "avx512f")]
17626 #[rustc_args_required_const(3)]
17627 #[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
17628 pub unsafe fn _mm512_mask_cmp_epi64_mask(
17632 imm8
: _MM_CMPINT_ENUM
,
17634 macro_rules
! call
{
17636 vpcmpq(a
.as_i64x8(), b
.as_i64x8(), $imm3
, k1
as i8)
17639 let r
= constify_imm3
!(imm8
, call
);
17643 /// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
17645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_epi32&expand=4556)
17647 #[target_feature(enable = "avx512f")]
17648 pub unsafe fn _mm512_reduce_add_epi32(a
: __m512i
) -> i32 {
17649 simd_reduce_add_unordered(a
.as_i32x16())
17652 /// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
17654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_epi32&expand=4555)
17656 #[target_feature(enable = "avx512f")]
17657 pub unsafe fn _mm512_mask_reduce_add_epi32(k
: __mmask16
, a
: __m512i
) -> i32 {
17658 simd_reduce_add_unordered(simd_select_bitmask(
17661 _mm512_setzero_si512().as_i32x16(),
17665 /// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
17667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_epi64&expand=4558)
17669 #[target_feature(enable = "avx512f")]
17670 pub unsafe fn _mm512_reduce_add_epi64(a
: __m512i
) -> i64 {
17671 simd_reduce_add_unordered(a
.as_i64x8())
17674 /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
17676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_epi64&expand=4557)
17678 #[target_feature(enable = "avx512f")]
17679 pub unsafe fn _mm512_mask_reduce_add_epi64(k
: __mmask8
, a
: __m512i
) -> i64 {
17680 simd_reduce_add_unordered(simd_select_bitmask(
17683 _mm512_setzero_si512().as_i64x8(),
17687 /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
17689 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_ps&expand=4562)
17691 #[target_feature(enable = "avx512f")]
17692 pub unsafe fn _mm512_reduce_add_ps(a
: __m512
) -> f32 {
17693 simd_reduce_add_unordered(a
.as_f32x16())
17696 /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
17698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_ps&expand=4561)
17700 #[target_feature(enable = "avx512f")]
17701 pub unsafe fn _mm512_mask_reduce_add_ps(k
: __mmask16
, a
: __m512
) -> f32 {
17702 simd_reduce_add_unordered(simd_select_bitmask(
17705 _mm512_setzero_ps().as_f32x16(),
17709 /// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
17711 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_add_pd&expand=4560)
17713 #[target_feature(enable = "avx512f")]
17714 pub unsafe fn _mm512_reduce_add_pd(a
: __m512d
) -> f64 {
17715 simd_reduce_add_unordered(a
.as_f64x8())
17718 /// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
17720 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_pd&expand=4559)
17722 #[target_feature(enable = "avx512f")]
17723 pub unsafe fn _mm512_mask_reduce_add_pd(k
: __mmask8
, a
: __m512d
) -> f64 {
17724 simd_reduce_add_unordered(simd_select_bitmask(
17727 _mm512_setzero_pd().as_f64x8(),
17731 /// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
17733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_epi32&expand=4600)
17735 #[target_feature(enable = "avx512f")]
17736 pub unsafe fn _mm512_reduce_mul_epi32(a
: __m512i
) -> i32 {
17737 simd_reduce_mul_unordered(a
.as_i32x16())
17740 /// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
17742 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_epi32&expand=4599)
17744 #[target_feature(enable = "avx512f")]
17745 pub unsafe fn _mm512_mask_reduce_mul_epi32(k
: __mmask16
, a
: __m512i
) -> i32 {
17746 simd_reduce_mul_unordered(simd_select_bitmask(
17749 _mm512_set1_epi32(1).as_i32x16(),
17753 /// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
17755 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_epi64&expand=4602)
17757 #[target_feature(enable = "avx512f")]
17758 pub unsafe fn _mm512_reduce_mul_epi64(a
: __m512i
) -> i64 {
17759 simd_reduce_mul_unordered(a
.as_i64x8())
17762 /// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
17764 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_epi64&expand=4601)
17766 #[target_feature(enable = "avx512f")]
17767 pub unsafe fn _mm512_mask_reduce_mul_epi64(k
: __mmask8
, a
: __m512i
) -> i64 {
17768 simd_reduce_mul_unordered(simd_select_bitmask(
17771 _mm512_set1_epi64(1).as_i64x8(),
17775 /// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
17777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_ps&expand=4606)
17779 #[target_feature(enable = "avx512f")]
17780 pub unsafe fn _mm512_reduce_mul_ps(a
: __m512
) -> f32 {
17781 simd_reduce_mul_unordered(a
.as_f32x16())
17784 /// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
17786 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_ps&expand=4605)
17788 #[target_feature(enable = "avx512f")]
17789 pub unsafe fn _mm512_mask_reduce_mul_ps(k
: __mmask16
, a
: __m512
) -> f32 {
17790 simd_reduce_mul_unordered(simd_select_bitmask(
17793 _mm512_set1_ps(1.).as_f32x16(),
17797 /// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
17799 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_mul_pd&expand=4604)
17801 #[target_feature(enable = "avx512f")]
17802 pub unsafe fn _mm512_reduce_mul_pd(a
: __m512d
) -> f64 {
17803 simd_reduce_mul_unordered(a
.as_f64x8())
17806 /// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
17808 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_mul_pd&expand=4603)
17810 #[target_feature(enable = "avx512f")]
17811 pub unsafe fn _mm512_mask_reduce_mul_pd(k
: __mmask8
, a
: __m512d
) -> f64 {
17812 simd_reduce_mul_unordered(simd_select_bitmask(
17815 _mm512_set1_pd(1.).as_f64x8(),
17819 /// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
17821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epi32&expand=4576)
17823 #[target_feature(enable = "avx512f")]
17824 pub unsafe fn _mm512_reduce_max_epi32(a
: __m512i
) -> i32 {
17825 simd_reduce_max(a
.as_i32x16())
17828 /// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17830 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epi32&expand=4575)
17832 #[target_feature(enable = "avx512f")]
17833 pub unsafe fn _mm512_mask_reduce_max_epi32(k
: __mmask16
, a
: __m512i
) -> i32 {
17834 simd_reduce_max(simd_select_bitmask(
17837 _mm512_undefined_epi32().as_i32x16(),
17841 /// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
17843 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epi64&expand=4578)
17845 #[target_feature(enable = "avx512f")]
17846 pub unsafe fn _mm512_reduce_max_epi64(a
: __m512i
) -> i64 {
17847 simd_reduce_max(a
.as_i64x8())
17850 /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epi64&expand=4577)
17854 #[target_feature(enable = "avx512f")]
17855 pub unsafe fn _mm512_mask_reduce_max_epi64(k
: __mmask8
, a
: __m512i
) -> i64 {
17856 simd_reduce_max(simd_select_bitmask(
17859 _mm512_set1_epi64(0).as_i64x8(),
17863 /// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
17865 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epu32&expand=4580)
17867 #[target_feature(enable = "avx512f")]
17868 pub unsafe fn _mm512_reduce_max_epu32(a
: __m512i
) -> u32 {
17869 simd_reduce_max(a
.as_u32x16())
17872 /// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epu32&expand=4579)
17876 #[target_feature(enable = "avx512f")]
17877 pub unsafe fn _mm512_mask_reduce_max_epu32(k
: __mmask16
, a
: __m512i
) -> u32 {
17878 simd_reduce_max(simd_select_bitmask(
17881 _mm512_undefined_epi32().as_u32x16(),
17885 /// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
17887 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_epu64&expand=4582)
17889 #[target_feature(enable = "avx512f")]
17890 pub unsafe fn _mm512_reduce_max_epu64(a
: __m512i
) -> u64 {
17891 simd_reduce_max(a
.as_u64x8())
17894 /// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
17896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_epu64&expand=4581)
17898 #[target_feature(enable = "avx512f")]
17899 pub unsafe fn _mm512_mask_reduce_max_epu64(k
: __mmask8
, a
: __m512i
) -> u64 {
17900 simd_reduce_max(simd_select_bitmask(
17903 _mm512_set1_epi64(0).as_u64x8(),
17907 /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
17909 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_ps&expand=4586)
17911 #[target_feature(enable = "avx512f")]
17912 pub unsafe fn _mm512_reduce_max_ps(a
: __m512
) -> f32 {
17913 simd_reduce_max(a
.as_f32x16())
17916 /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
17918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_ps&expand=4585)
17920 #[target_feature(enable = "avx512f")]
17921 pub unsafe fn _mm512_mask_reduce_max_ps(k
: __mmask16
, a
: __m512
) -> f32 {
17922 simd_reduce_max(simd_select_bitmask(
17925 _mm512_undefined_ps().as_f32x16(),
17929 /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
17931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_max_pd&expand=4584)
17933 #[target_feature(enable = "avx512f")]
17934 pub unsafe fn _mm512_reduce_max_pd(a
: __m512d
) -> f64 {
17935 simd_reduce_max(a
.as_f64x8())
17938 /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
17940 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_max_pd&expand=4583)
17942 #[target_feature(enable = "avx512f")]
17943 pub unsafe fn _mm512_mask_reduce_max_pd(k
: __mmask8
, a
: __m512d
) -> f64 {
17944 simd_reduce_max(simd_select_bitmask(
17947 _mm512_undefined_pd().as_f64x8(),
17951 /// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
17953 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epi32&expand=4588)
17955 #[target_feature(enable = "avx512f")]
17956 pub unsafe fn _mm512_reduce_min_epi32(a
: __m512i
) -> i32 {
17957 simd_reduce_min(a
.as_i32x16())
17960 /// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
17962 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epi32&expand=4587)
17964 #[target_feature(enable = "avx512f")]
17965 pub unsafe fn _mm512_mask_reduce_min_epi32(k
: __mmask16
, a
: __m512i
) -> i32 {
17966 simd_reduce_min(simd_select_bitmask(
17969 _mm512_undefined_epi32().as_i32x16(),
17973 /// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
17975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epi64&expand=4590)
17977 #[target_feature(enable = "avx512f")]
17978 pub unsafe fn _mm512_reduce_min_epi64(a
: __m512i
) -> i64 {
17979 simd_reduce_min(a
.as_i64x8())
17982 /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
17984 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epi64&expand=4589)
17986 #[target_feature(enable = "avx512f")]
17987 pub unsafe fn _mm512_mask_reduce_min_epi64(k
: __mmask8
, a
: __m512i
) -> i64 {
17988 simd_reduce_min(simd_select_bitmask(
17991 _mm512_set1_epi64(0).as_i64x8(),
17995 /// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
17997 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epu32&expand=4592)
17999 #[target_feature(enable = "avx512f")]
18000 pub unsafe fn _mm512_reduce_min_epu32(a
: __m512i
) -> u32 {
18001 simd_reduce_min(a
.as_u32x16())
18004 /// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
18006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epu32&expand=4591)
18008 #[target_feature(enable = "avx512f")]
18009 pub unsafe fn _mm512_mask_reduce_min_epu32(k
: __mmask16
, a
: __m512i
) -> u32 {
18010 simd_reduce_min(simd_select_bitmask(
18013 _mm512_undefined_epi32().as_u32x16(),
18017 /// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
18019 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_epu64&expand=4594)
18021 #[target_feature(enable = "avx512f")]
18022 pub unsafe fn _mm512_reduce_min_epu64(a
: __m512i
) -> u64 {
18023 simd_reduce_min(a
.as_u64x8())
18026 /// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
18028 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_epi64&expand=4589)
18030 #[target_feature(enable = "avx512f")]
18031 pub unsafe fn _mm512_mask_reduce_min_epu64(k
: __mmask8
, a
: __m512i
) -> u64 {
18032 simd_reduce_min(simd_select_bitmask(
18035 _mm512_set1_epi64(0).as_u64x8(),
18039 /// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
18041 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_ps&expand=4598)
18043 #[target_feature(enable = "avx512f")]
18044 pub unsafe fn _mm512_reduce_min_ps(a
: __m512
) -> f32 {
18045 simd_reduce_min(a
.as_f32x16())
18048 /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
18050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_ps&expand=4597)
18052 #[target_feature(enable = "avx512f")]
18053 pub unsafe fn _mm512_mask_reduce_min_ps(k
: __mmask16
, a
: __m512
) -> f32 {
18054 simd_reduce_min(simd_select_bitmask(
18057 _mm512_undefined_ps().as_f32x16(),
18061 /// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
18063 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_min_pd&expand=4596)
18065 #[target_feature(enable = "avx512f")]
18066 pub unsafe fn _mm512_reduce_min_pd(a
: __m512d
) -> f64 {
18067 simd_reduce_min(a
.as_f64x8())
18070 /// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
18072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_min_pd&expand=4595)
18074 #[target_feature(enable = "avx512f")]
18075 pub unsafe fn _mm512_mask_reduce_min_pd(k
: __mmask8
, a
: __m512d
) -> f64 {
18076 simd_reduce_min(simd_select_bitmask(
18079 _mm512_undefined_pd().as_f64x8(),
18083 /// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
18085 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_and_epi32&expand=4564)
18087 #[target_feature(enable = "avx512f")]
18088 pub unsafe fn _mm512_reduce_and_epi32(a
: __m512i
) -> i32 {
18089 simd_reduce_and(a
.as_i32x16())
18092 /// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
18094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_and_epi32&expand=4563)
18096 #[target_feature(enable = "avx512f")]
18097 pub unsafe fn _mm512_mask_reduce_and_epi32(k
: __mmask16
, a
: __m512i
) -> i32 {
18098 simd_reduce_and(simd_select_bitmask(
18123 /// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
18125 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_and_epi64&expand=4566)
18127 #[target_feature(enable = "avx512f")]
18128 pub unsafe fn _mm512_reduce_and_epi64(a
: __m512i
) -> i64 {
18129 simd_reduce_and(a
.as_i64x8())
18132 /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
18134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_add_epi64&expand=4557)
18136 #[target_feature(enable = "avx512f")]
18137 pub unsafe fn _mm512_mask_reduce_and_epi64(k
: __mmask8
, a
: __m512i
) -> i64 {
18138 simd_reduce_and(simd_select_bitmask(
18141 _mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
18146 /// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
18148 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_or_epi32&expand=4608)
18150 #[target_feature(enable = "avx512f")]
18151 pub unsafe fn _mm512_reduce_or_epi32(a
: __m512i
) -> i32 {
18152 simd_reduce_or(a
.as_i32x16())
18155 /// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
18157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_or_epi32&expand=4607)
18159 #[target_feature(enable = "avx512f")]
18160 pub unsafe fn _mm512_mask_reduce_or_epi32(k
: __mmask16
, a
: __m512i
) -> i32 {
18161 simd_reduce_or(simd_select_bitmask(
18164 _mm512_setzero_si512().as_i32x16(),
18168 /// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
18170 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_reduce_or_epi64&expand=4610)
18172 #[target_feature(enable = "avx512f")]
18173 pub unsafe fn _mm512_reduce_or_epi64(a
: __m512i
) -> i64 {
18174 simd_reduce_or(a
.as_i64x8())
18177 /// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
18179 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_reduce_or_epi64&expand=4609)
18181 #[target_feature(enable = "avx512f")]
18182 pub unsafe fn _mm512_mask_reduce_or_epi64(k
: __mmask8
, a
: __m512i
) -> i64 {
18183 simd_reduce_or(simd_select_bitmask(
18186 _mm512_setzero_si512().as_i64x8(),
18190 /// Returns vector of type `__m512d` with undefined elements.
18192 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
18194 #[target_feature(enable = "avx512f")]
18195 // This intrinsic has no corresponding instruction.
18196 pub unsafe fn _mm512_undefined_pd() -> __m512d
{
18197 _mm512_set1_pd(0.0)
18200 /// Returns vector of type `__m512` with undefined elements.
18202 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
18204 #[target_feature(enable = "avx512f")]
18205 // This intrinsic has no corresponding instruction.
18206 pub unsafe fn _mm512_undefined_ps() -> __m512
{
18207 _mm512_set1_ps(0.0)
18210 /// Return vector of type __m512i with undefined elements.
18212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_undefined_epi32&expand=5995)
18214 #[target_feature(enable = "avx512f")]
18215 // This intrinsic has no corresponding instruction.
18216 pub unsafe fn _mm512_undefined_epi32() -> __m512i
{
18217 _mm512_set1_epi32(0)
18220 /// Return vector of type __m512 with undefined elements.
18222 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_undefined&expand=5994)
18224 #[target_feature(enable = "avx512f")]
18225 // This intrinsic has no corresponding instruction.
18226 pub unsafe fn _mm512_undefined() -> __m512
{
18227 _mm512_set1_ps(0.0)
18230 /// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
18232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_loadu_epi32&expand=3377)
18234 #[target_feature(enable = "avx512f")]
18235 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18236 pub unsafe fn _mm512_loadu_epi32(mem_addr
: *const i32) -> __m512i
{
18237 ptr
::read_unaligned(mem_addr
as *const __m512i
)
18240 /// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
18242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_storeu_epi32&expand=5628)
18244 #[target_feature(enable = "avx512f")]
18245 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18246 pub unsafe fn _mm512_storeu_epi32(mem_addr
: *mut i32, a
: __m512i
) {
18247 ptr
::write_unaligned(mem_addr
as *mut __m512i
, a
);
18250 /// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
18252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_loadu_epi64&expand=3386)
18254 #[target_feature(enable = "avx512f")]
18255 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
18256 pub unsafe fn _mm512_loadu_epi64(mem_addr
: *const i64) -> __m512i
{
18257 ptr
::read_unaligned(mem_addr
as *const __m512i
)
18260 /// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
18262 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_storeu_epi64&expand=5634)
18264 #[target_feature(enable = "avx512f")]
18265 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
18266 pub unsafe fn _mm512_storeu_epi64(mem_addr
: *mut i64, a
: __m512i
) {
18267 ptr
::write_unaligned(mem_addr
as *mut __m512i
, a
);
18270 /// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
18272 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_loadu_si512&expand=3420)
18274 #[target_feature(enable = "avx512f")]
18275 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18276 pub unsafe fn _mm512_loadu_si512(mem_addr
: *const i32) -> __m512i
{
18277 ptr
::read_unaligned(mem_addr
as *const __m512i
)
18280 /// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
18282 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_storeu_si512&expand=5657)
18284 #[target_feature(enable = "avx512f")]
18285 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
18286 pub unsafe fn _mm512_storeu_si512(mem_addr
: *mut i32, a
: __m512i
) {
18287 ptr
::write_unaligned(mem_addr
as *mut __m512i
, a
);
18290 /// Loads 512-bits (composed of 8 packed double-precision (64-bit)
18291 /// floating-point elements) from memory into result.
18292 /// `mem_addr` does not need to be aligned on any particular boundary.
18294 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
18296 #[target_feature(enable = "avx512f")]
18297 #[cfg_attr(test, assert_instr(vmovups))]
18298 pub unsafe fn _mm512_loadu_pd(mem_addr
: *const f64) -> __m512d
{
18299 ptr
::read_unaligned(mem_addr
as *const __m512d
)
18302 /// Stores 512-bits (composed of 8 packed double-precision (64-bit)
18303 /// floating-point elements) from `a` into memory.
18304 /// `mem_addr` does not need to be aligned on any particular boundary.
18306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
18308 #[target_feature(enable = "avx512f")]
18309 #[cfg_attr(test, assert_instr(vmovups))]
18310 pub unsafe fn _mm512_storeu_pd(mem_addr
: *mut f64, a
: __m512d
) {
18311 ptr
::write_unaligned(mem_addr
as *mut __m512d
, a
);
18314 /// Loads 512-bits (composed of 16 packed single-precision (32-bit)
18315 /// floating-point elements) from memory into result.
18316 /// `mem_addr` does not need to be aligned on any particular boundary.
18318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
18320 #[target_feature(enable = "avx512f")]
18321 #[cfg_attr(test, assert_instr(vmovups))]
18322 pub unsafe fn _mm512_loadu_ps(mem_addr
: *const f32) -> __m512
{
18323 ptr
::read_unaligned(mem_addr
as *const __m512
)
18326 /// Stores 512-bits (composed of 16 packed single-precision (32-bit)
18327 /// floating-point elements) from `a` into memory.
18328 /// `mem_addr` does not need to be aligned on any particular boundary.
18330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
18332 #[target_feature(enable = "avx512f")]
18333 #[cfg_attr(test, assert_instr(vmovups))]
18334 #[stable(feature = "simd_x86", since = "1.27.0")]
18335 pub unsafe fn _mm512_storeu_ps(mem_addr
: *mut f32, a
: __m512
) {
18336 ptr
::write_unaligned(mem_addr
as *mut __m512
, a
);
18339 /// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_si512&expand=3345)
18343 #[target_feature(enable = "avx512f")]
18344 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18345 pub unsafe fn _mm512_load_si512(mem_addr
: *const i32) -> __m512i
{
18346 ptr
::read(mem_addr
as *const __m512i
)
18349 /// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_si512&expand=5598)
18353 #[target_feature(enable = "avx512f")]
18354 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18355 pub unsafe fn _mm512_store_si512(mem_addr
: *mut i32, a
: __m512i
) {
18356 ptr
::write(mem_addr
as *mut __m512i
, a
);
18359 /// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_epi32&expand=3304)
18363 #[target_feature(enable = "avx512f")]
18364 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18365 pub unsafe fn _mm512_load_epi32(mem_addr
: *const i32) -> __m512i
{
18366 ptr
::read(mem_addr
as *const __m512i
)
18369 /// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18371 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_epi32&expand=5569)
18373 #[target_feature(enable = "avx512f")]
18374 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
18375 pub unsafe fn _mm512_store_epi32(mem_addr
: *mut i32, a
: __m512i
) {
18376 ptr
::write(mem_addr
as *mut __m512i
, a
);
18379 /// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18381 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_epi64&expand=3313)
18383 #[target_feature(enable = "avx512f")]
18384 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
18385 pub unsafe fn _mm512_load_epi64(mem_addr
: *const i64) -> __m512i
{
18386 ptr
::read(mem_addr
as *const __m512i
)
18389 /// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18391 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_epi64&expand=5575)
18393 #[target_feature(enable = "avx512f")]
18394 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
18395 pub unsafe fn _mm512_store_epi64(mem_addr
: *mut i64, a
: __m512i
) {
18396 ptr
::write(mem_addr
as *mut __m512i
, a
);
18399 /// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18401 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_ps&expand=3336)
18403 #[target_feature(enable = "avx512f")]
18404 #[cfg_attr(test, assert_instr(vmovaps))]
18405 pub unsafe fn _mm512_load_ps(mem_addr
: *const f32) -> __m512
{
18406 ptr
::read(mem_addr
as *const __m512
)
18409 /// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_ps&expand=5592)
18413 #[target_feature(enable = "avx512f")]
18414 #[cfg_attr(test, assert_instr(vmovaps))]
18415 pub unsafe fn _mm512_store_ps(mem_addr
: *mut f32, a
: __m512
) {
18416 ptr
::write(mem_addr
as *mut __m512
, a
);
18419 /// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18421 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_load_pd&expand=3326)
18423 #[target_feature(enable = "avx512f")]
18424 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
18425 pub unsafe fn _mm512_load_pd(mem_addr
: *const f64) -> __m512d
{
18426 ptr
::read(mem_addr
as *const __m512d
)
18429 /// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
18431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_store_pd&expand=5585)
18433 #[target_feature(enable = "avx512f")]
18434 #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
18435 pub unsafe fn _mm512_store_pd(mem_addr
: *mut f64, a
: __m512d
) {
18436 ptr
::write(mem_addr
as *mut __m512d
, a
);
18439 /// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
18441 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_pd&expand=5002)
18443 #[target_feature(enable = "avx512f")]
18444 pub unsafe fn _mm512_setr_pd(
18454 let r
= f64x8
::new(e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
);
18458 /// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
18460 /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_pd&expand=4924)
18462 #[target_feature(enable = "avx512f")]
18463 pub unsafe fn _mm512_set_pd(
18473 _mm512_setr_pd(e7
, e6
, e5
, e4
, e3
, e2
, e1
, e0
)
18476 /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_move_ss&expand=3832)
18480 #[target_feature(enable = "avx512f")]
18481 #[cfg_attr(test, assert_instr(vmovss))]
18482 pub unsafe fn _mm_mask_move_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18483 let extractsrc
: f32 = simd_extract(src
, 0);
18484 let mut mov
: f32 = extractsrc
;
18485 if (k
& 0b00000001) != 0 {
18486 mov
= simd_extract(b
, 0);
18488 let r
= simd_insert(a
, 0, mov
);
18492 /// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18494 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_move_ss&expand=3833)
18496 #[target_feature(enable = "avx512f")]
18497 #[cfg_attr(test, assert_instr(vmovss))]
18498 pub unsafe fn _mm_maskz_move_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18499 let mut mov
: f32 = 0.;
18500 if (k
& 0b00000001) != 0 {
18501 mov
= simd_extract(b
, 0);
18503 let r
= simd_insert(a
, 0, mov
);
18507 /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_move_sd&expand=3829)
18511 #[target_feature(enable = "avx512f")]
18512 #[cfg_attr(test, assert_instr(vmovsd))]
18513 pub unsafe fn _mm_mask_move_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18514 let extractsrc
: f64 = simd_extract(src
, 0);
18515 let mut mov
: f64 = extractsrc
;
18516 if (k
& 0b00000001) != 0 {
18517 mov
= simd_extract(b
, 0);
18519 let r
= simd_insert(a
, 0, mov
);
18523 /// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18525 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_move_sd&expand=3830)
18527 #[target_feature(enable = "avx512f")]
18528 #[cfg_attr(test, assert_instr(vmovsd))]
18529 pub unsafe fn _mm_maskz_move_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18530 let mut mov
: f64 = 0.;
18531 if (k
& 0b00000001) != 0 {
18532 mov
= simd_extract(b
, 0);
18534 let r
= simd_insert(a
, 0, mov
);
18538 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18540 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_ss&expand=159)
18542 #[target_feature(enable = "avx512f")]
18543 #[cfg_attr(test, assert_instr(vaddss))]
18544 pub unsafe fn _mm_mask_add_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18545 let extractsrc
: f32 = simd_extract(src
, 0);
18546 let mut add
: f32 = extractsrc
;
18547 if (k
& 0b00000001) != 0 {
18548 let extracta
: f32 = simd_extract(a
, 0);
18549 let extractb
: f32 = simd_extract(b
, 0);
18550 add
= extracta
+ extractb
;
18552 let r
= simd_insert(a
, 0, add
);
18556 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18558 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_add_ss&expand=160)
18560 #[target_feature(enable = "avx512f")]
18561 #[cfg_attr(test, assert_instr(vaddss))]
18562 pub unsafe fn _mm_maskz_add_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18563 let mut add
: f32 = 0.;
18564 if (k
& 0b00000001) != 0 {
18565 let extracta
: f32 = simd_extract(a
, 0);
18566 let extractb
: f32 = simd_extract(b
, 0);
18567 add
= extracta
+ extractb
;
18569 let r
= simd_insert(a
, 0, add
);
18573 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18575 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_sd&expand=155)
18577 #[target_feature(enable = "avx512f")]
18578 #[cfg_attr(test, assert_instr(vaddsd))]
18579 pub unsafe fn _mm_mask_add_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18580 let extractsrc
: f64 = simd_extract(src
, 0);
18581 let mut add
: f64 = extractsrc
;
18582 if (k
& 0b00000001) != 0 {
18583 let extracta
: f64 = simd_extract(a
, 0);
18584 let extractb
: f64 = simd_extract(b
, 0);
18585 add
= extracta
+ extractb
;
18587 let r
= simd_insert(a
, 0, add
);
18591 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_add_sd&expand=156)
18595 #[target_feature(enable = "avx512f")]
18596 #[cfg_attr(test, assert_instr(vaddsd))]
18597 pub unsafe fn _mm_maskz_add_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18598 let mut add
: f64 = 0.;
18599 if (k
& 0b00000001) != 0 {
18600 let extracta
: f64 = simd_extract(a
, 0);
18601 let extractb
: f64 = simd_extract(b
, 0);
18602 add
= extracta
+ extractb
;
18604 let r
= simd_insert(a
, 0, add
);
18608 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_ss&expand=5750)
18612 #[target_feature(enable = "avx512f")]
18613 #[cfg_attr(test, assert_instr(vsubss))]
18614 pub unsafe fn _mm_mask_sub_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18615 let extractsrc
: f32 = simd_extract(src
, 0);
18616 let mut add
: f32 = extractsrc
;
18617 if (k
& 0b00000001) != 0 {
18618 let extracta
: f32 = simd_extract(a
, 0);
18619 let extractb
: f32 = simd_extract(b
, 0);
18620 add
= extracta
- extractb
;
18622 let r
= simd_insert(a
, 0, add
);
18626 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18628 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_ss&expand=5751)
18630 #[target_feature(enable = "avx512f")]
18631 #[cfg_attr(test, assert_instr(vsubss))]
18632 pub unsafe fn _mm_maskz_sub_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18633 let mut add
: f32 = 0.;
18634 if (k
& 0b00000001) != 0 {
18635 let extracta
: f32 = simd_extract(a
, 0);
18636 let extractb
: f32 = simd_extract(b
, 0);
18637 add
= extracta
- extractb
;
18639 let r
= simd_insert(a
, 0, add
);
18643 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_sd&expand=5746)
18647 #[target_feature(enable = "avx512f")]
18648 #[cfg_attr(test, assert_instr(vsubsd))]
18649 pub unsafe fn _mm_mask_sub_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18650 let extractsrc
: f64 = simd_extract(src
, 0);
18651 let mut add
: f64 = extractsrc
;
18652 if (k
& 0b00000001) != 0 {
18653 let extracta
: f64 = simd_extract(a
, 0);
18654 let extractb
: f64 = simd_extract(b
, 0);
18655 add
= extracta
- extractb
;
18657 let r
= simd_insert(a
, 0, add
);
18661 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18663 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_sd&expand=5747)
18665 #[target_feature(enable = "avx512f")]
18666 #[cfg_attr(test, assert_instr(vsubsd))]
18667 pub unsafe fn _mm_maskz_sub_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18668 let mut add
: f64 = 0.;
18669 if (k
& 0b00000001) != 0 {
18670 let extracta
: f64 = simd_extract(a
, 0);
18671 let extractb
: f64 = simd_extract(b
, 0);
18672 add
= extracta
- extractb
;
18674 let r
= simd_insert(a
, 0, add
);
18678 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_ss&expand=3950)
18682 #[target_feature(enable = "avx512f")]
18683 #[cfg_attr(test, assert_instr(vmulss))]
18684 pub unsafe fn _mm_mask_mul_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18685 let extractsrc
: f32 = simd_extract(src
, 0);
18686 let mut add
: f32 = extractsrc
;
18687 if (k
& 0b00000001) != 0 {
18688 let extracta
: f32 = simd_extract(a
, 0);
18689 let extractb
: f32 = simd_extract(b
, 0);
18690 add
= extracta
* extractb
;
18692 let r
= simd_insert(a
, 0, add
);
18696 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18698 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_ss&expand=3951)
18700 #[target_feature(enable = "avx512f")]
18701 #[cfg_attr(test, assert_instr(vmulss))]
18702 pub unsafe fn _mm_maskz_mul_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18703 let mut add
: f32 = 0.;
18704 if (k
& 0b00000001) != 0 {
18705 let extracta
: f32 = simd_extract(a
, 0);
18706 let extractb
: f32 = simd_extract(b
, 0);
18707 add
= extracta
* extractb
;
18709 let r
= simd_insert(a
, 0, add
);
18713 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18715 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_sd&expand=3947)
18717 #[target_feature(enable = "avx512f")]
18718 #[cfg_attr(test, assert_instr(vmulsd))]
18719 pub unsafe fn _mm_mask_mul_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18720 let extractsrc
: f64 = simd_extract(src
, 0);
18721 let mut add
: f64 = extractsrc
;
18722 if (k
& 0b00000001) != 0 {
18723 let extracta
: f64 = simd_extract(a
, 0);
18724 let extractb
: f64 = simd_extract(b
, 0);
18725 add
= extracta
* extractb
;
18727 let r
= simd_insert(a
, 0, add
);
18731 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_sd&expand=3948)
18735 #[target_feature(enable = "avx512f")]
18736 #[cfg_attr(test, assert_instr(vmulsd))]
18737 pub unsafe fn _mm_maskz_mul_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18738 let mut add
: f64 = 0.;
18739 if (k
& 0b00000001) != 0 {
18740 let extracta
: f64 = simd_extract(a
, 0);
18741 let extractb
: f64 = simd_extract(b
, 0);
18742 add
= extracta
* extractb
;
18744 let r
= simd_insert(a
, 0, add
);
18748 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18750 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_ss&expand=2181)
18752 #[target_feature(enable = "avx512f")]
18753 #[cfg_attr(test, assert_instr(vdivss))]
18754 pub unsafe fn _mm_mask_div_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18755 let extractsrc
: f32 = simd_extract(src
, 0);
18756 let mut add
: f32 = extractsrc
;
18757 if (k
& 0b00000001) != 0 {
18758 let extracta
: f32 = simd_extract(a
, 0);
18759 let extractb
: f32 = simd_extract(b
, 0);
18760 add
= extracta
/ extractb
;
18762 let r
= simd_insert(a
, 0, add
);
18766 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18768 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_ss&expand=2182)
18770 #[target_feature(enable = "avx512f")]
18771 #[cfg_attr(test, assert_instr(vdivss))]
18772 pub unsafe fn _mm_maskz_div_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18773 let mut add
: f32 = 0.;
18774 if (k
& 0b00000001) != 0 {
18775 let extracta
: f32 = simd_extract(a
, 0);
18776 let extractb
: f32 = simd_extract(b
, 0);
18777 add
= extracta
/ extractb
;
18779 let r
= simd_insert(a
, 0, add
);
18783 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18785 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_sd&expand=2178)
18787 #[target_feature(enable = "avx512f")]
18788 #[cfg_attr(test, assert_instr(vdivsd))]
18789 pub unsafe fn _mm_mask_div_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18790 let extractsrc
: f64 = simd_extract(src
, 0);
18791 let mut add
: f64 = extractsrc
;
18792 if (k
& 0b00000001) != 0 {
18793 let extracta
: f64 = simd_extract(a
, 0);
18794 let extractb
: f64 = simd_extract(b
, 0);
18795 add
= extracta
/ extractb
;
18797 let r
= simd_insert(a
, 0, add
);
18801 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_sd&expand=2179)
18805 #[target_feature(enable = "avx512f")]
18806 #[cfg_attr(test, assert_instr(vdivsd))]
18807 pub unsafe fn _mm_maskz_div_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18808 let mut add
: f64 = 0.;
18809 if (k
& 0b00000001) != 0 {
18810 let extracta
: f64 = simd_extract(a
, 0);
18811 let extractb
: f64 = simd_extract(b
, 0);
18812 add
= extracta
/ extractb
;
18814 let r
= simd_insert(a
, 0, add
);
18818 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_ss&expand=3672)
18822 #[target_feature(enable = "avx512f")]
18823 #[cfg_attr(test, assert_instr(vmaxss))]
18824 pub unsafe fn _mm_mask_max_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18830 _MM_FROUND_CUR_DIRECTION
,
18834 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_ss&expand=3673)
18838 #[target_feature(enable = "avx512f")]
18839 #[cfg_attr(test, assert_instr(vmaxss))]
18840 pub unsafe fn _mm_maskz_max_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18844 _mm_setzero_ps().as_f32x4(),
18846 _MM_FROUND_CUR_DIRECTION
,
18850 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_sd&expand=3669)
18854 #[target_feature(enable = "avx512f")]
18855 #[cfg_attr(test, assert_instr(vmaxsd))]
18856 pub unsafe fn _mm_mask_max_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18862 _MM_FROUND_CUR_DIRECTION
,
18866 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_sd&expand=3670)
18870 #[target_feature(enable = "avx512f")]
18871 #[cfg_attr(test, assert_instr(vmaxsd))]
18872 pub unsafe fn _mm_maskz_max_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18876 _mm_setzero_pd().as_f64x2(),
18878 _MM_FROUND_CUR_DIRECTION
,
18882 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18884 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_ss&expand=3786)
18886 #[target_feature(enable = "avx512f")]
18887 #[cfg_attr(test, assert_instr(vminss))]
18888 pub unsafe fn _mm_mask_min_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18894 _MM_FROUND_CUR_DIRECTION
,
18898 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_ss&expand=3787)
18902 #[target_feature(enable = "avx512f")]
18903 #[cfg_attr(test, assert_instr(vminss))]
18904 pub unsafe fn _mm_maskz_min_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18908 _mm_setzero_ps().as_f32x4(),
18910 _MM_FROUND_CUR_DIRECTION
,
18914 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_sd&expand=3783)
18918 #[target_feature(enable = "avx512f")]
18919 #[cfg_attr(test, assert_instr(vminsd))]
18920 pub unsafe fn _mm_mask_min_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18926 _MM_FROUND_CUR_DIRECTION
,
18930 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18932 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_sd&expand=3784)
18934 #[target_feature(enable = "avx512f")]
18935 #[cfg_attr(test, assert_instr(vminsd))]
18936 pub unsafe fn _mm_maskz_min_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18940 _mm_setzero_pd().as_f64x2(),
18942 _MM_FROUND_CUR_DIRECTION
,
18946 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18948 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_ss&expand=5387)
18950 #[target_feature(enable = "avx512f")]
18951 #[cfg_attr(test, assert_instr(vsqrtss))]
18952 pub unsafe fn _mm_mask_sqrt_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18958 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
18962 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
18964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_ss&expand=5388)
18966 #[target_feature(enable = "avx512f")]
18967 #[cfg_attr(test, assert_instr(vsqrtss))]
18968 pub unsafe fn _mm_maskz_sqrt_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
18972 _mm_setzero_ps().as_f32x4(),
18974 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
18978 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18980 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_sd&expand=5384)
18982 #[target_feature(enable = "avx512f")]
18983 #[cfg_attr(test, assert_instr(vsqrtsd))]
18984 pub unsafe fn _mm_mask_sqrt_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
18990 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
18994 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
18996 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_sd&expand=5385)
18998 #[target_feature(enable = "avx512f")]
18999 #[cfg_attr(test, assert_instr(vsqrtsd))]
19000 pub unsafe fn _mm_maskz_sqrt_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19004 _mm_setzero_pd().as_f64x2(),
19006 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
19010 /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19012 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rsqrt14_ss&expand=4825)
19014 #[target_feature(enable = "avx512f")]
19015 #[cfg_attr(test, assert_instr(vrsqrt14ss))]
19016 pub unsafe fn _mm_rsqrt14_ss(a
: __m128
, b
: __m128
) -> __m128
{
19017 transmute(vrsqrt14ss(
19020 _mm_setzero_ps().as_f32x4(),
19025 /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rsqrt14_ss&expand=4823)
19029 #[target_feature(enable = "avx512f")]
19030 #[cfg_attr(test, assert_instr(vrsqrt14ss))]
19031 pub unsafe fn _mm_mask_rsqrt14_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19032 transmute(vrsqrt14ss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
))
19035 /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rsqrt14_ss&expand=4824)
19039 #[target_feature(enable = "avx512f")]
19040 #[cfg_attr(test, assert_instr(vrsqrt14ss))]
19041 pub unsafe fn _mm_maskz_rsqrt14_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19042 transmute(vrsqrt14ss(
19045 _mm_setzero_ps().as_f32x4(),
19050 /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19052 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rsqrt14_sd&expand=4822)
19054 #[target_feature(enable = "avx512f")]
19055 #[cfg_attr(test, assert_instr(vrsqrt14sd))]
19056 pub unsafe fn _mm_rsqrt14_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
19057 transmute(vrsqrt14sd(
19060 _mm_setzero_pd().as_f64x2(),
19065 /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rsqrt14_sd&expand=4820)
19069 #[target_feature(enable = "avx512f")]
19070 #[cfg_attr(test, assert_instr(vrsqrt14sd))]
19071 pub unsafe fn _mm_mask_rsqrt14_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19072 transmute(vrsqrt14sd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
))
19075 /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19077 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rsqrt14_sd&expand=4821)
19079 #[target_feature(enable = "avx512f")]
19080 #[cfg_attr(test, assert_instr(vrsqrt14sd))]
19081 pub unsafe fn _mm_maskz_rsqrt14_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19082 transmute(vrsqrt14sd(
19085 _mm_setzero_pd().as_f64x2(),
19090 /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rcp14_ss&expand=4508)
19094 #[target_feature(enable = "avx512f")]
19095 #[cfg_attr(test, assert_instr(vrcp14ss))]
19096 pub unsafe fn _mm_rcp14_ss(a
: __m128
, b
: __m128
) -> __m128
{
19097 transmute(vrcp14ss(
19100 _mm_setzero_ps().as_f32x4(),
19105 /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rcp14_ss&expand=4506)
19109 #[target_feature(enable = "avx512f")]
19110 #[cfg_attr(test, assert_instr(vrcp14ss))]
19111 pub unsafe fn _mm_mask_rcp14_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19112 transmute(vrcp14ss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
))
19115 /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
19117 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rcp14_ss&expand=4507)
19119 #[target_feature(enable = "avx512f")]
19120 #[cfg_attr(test, assert_instr(vrcp14ss))]
19121 pub unsafe fn _mm_maskz_rcp14_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19122 transmute(vrcp14ss(
19125 _mm_setzero_ps().as_f32x4(),
19130 /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19132 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_rcp14_sd&expand=4505)
19134 #[target_feature(enable = "avx512f")]
19135 #[cfg_attr(test, assert_instr(vrcp14sd))]
19136 pub unsafe fn _mm_rcp14_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
19137 transmute(vrcp14sd(
19140 _mm_setzero_pd().as_f64x2(),
19145 /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19147 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_rcp14_sd&expand=4503)
19149 #[target_feature(enable = "avx512f")]
19150 #[cfg_attr(test, assert_instr(vrcp14sd))]
19151 pub unsafe fn _mm_mask_rcp14_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19152 transmute(vrcp14sd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
))
19155 /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
19157 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_rcp14_sd&expand=4504)
19159 #[target_feature(enable = "avx512f")]
19160 #[cfg_attr(test, assert_instr(vrcp14sd))]
19161 pub unsafe fn _mm_maskz_rcp14_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19162 transmute(vrcp14sd(
19165 _mm_setzero_pd().as_f64x2(),
19170 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19172 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_ss&expand=2862)
19174 #[target_feature(enable = "avx512f")]
19175 #[cfg_attr(test, assert_instr(vgetexpss))]
19176 pub unsafe fn _mm_getexp_ss(a
: __m128
, b
: __m128
) -> __m128
{
19177 transmute(vgetexpss(
19180 _mm_setzero_ps().as_f32x4(),
19186 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_ss&expand=2863)
19190 #[target_feature(enable = "avx512f")]
19191 #[cfg_attr(test, assert_instr(vgetexpss))]
19192 pub unsafe fn _mm_mask_getexp_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19193 transmute(vgetexpss(
19202 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19204 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_ss&expand=2864)
19206 #[target_feature(enable = "avx512f")]
19207 #[cfg_attr(test, assert_instr(vgetexpss))]
19208 pub unsafe fn _mm_maskz_getexp_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19209 transmute(vgetexpss(
19212 _mm_setzero_ps().as_f32x4(),
19218 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19220 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_sd&expand=2859)
19222 #[target_feature(enable = "avx512f")]
19223 #[cfg_attr(test, assert_instr(vgetexpsd))]
19224 pub unsafe fn _mm_getexp_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
19225 transmute(vgetexpsd(
19228 _mm_setzero_pd().as_f64x2(),
19234 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_sd&expand=2860)
19238 #[target_feature(enable = "avx512f")]
19239 #[cfg_attr(test, assert_instr(vgetexpsd))]
19240 pub unsafe fn _mm_mask_getexp_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19241 transmute(vgetexpsd(
19250 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
19252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_sd&expand=2861)
19254 #[target_feature(enable = "avx512f")]
19255 #[cfg_attr(test, assert_instr(vgetexpsd))]
19256 pub unsafe fn _mm_maskz_getexp_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19257 transmute(vgetexpsd(
19260 _mm_setzero_pd().as_f64x2(),
19266 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19267 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19268 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19269 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19270 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19271 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19272 /// The sign is determined by sc which can take the following values:\
19273 /// _MM_MANT_SIGN_src // sign = sign(src)\
19274 /// _MM_MANT_SIGN_zero // sign = 0\
19275 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19276 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_ss&expand=2898)
19280 #[target_feature(enable = "avx512f")]
19281 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
19282 #[rustc_args_required_const(2, 3)]
19283 pub unsafe fn _mm_getmant_ss(
19286 norm
: _MM_MANTISSA_NORM_ENUM
,
19287 sign
: _MM_MANTISSA_SIGN_ENUM
,
19289 macro_rules
! call
{
19290 ($imm4_1
:expr
, $imm2
:expr
) => {
19294 $imm2
<< 2 | $imm4_1
,
19295 _mm_setzero_ps().as_f32x4(),
19297 _MM_FROUND_CUR_DIRECTION
,
19301 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
19305 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19306 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19307 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19308 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19309 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19310 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19311 /// The sign is determined by sc which can take the following values:\
19312 /// _MM_MANT_SIGN_src // sign = sign(src)\
19313 /// _MM_MANT_SIGN_zero // sign = 0\
19314 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19315 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19317 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_ss&expand=2899)
19319 #[target_feature(enable = "avx512f")]
19320 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
19321 #[rustc_args_required_const(4, 5)]
19322 pub unsafe fn _mm_mask_getmant_ss(
19327 norm
: _MM_MANTISSA_NORM_ENUM
,
19328 sign
: _MM_MANTISSA_SIGN_ENUM
,
19330 macro_rules
! call
{
19331 ($imm4_1
:expr
, $imm2
:expr
) => {
19335 $imm2
<< 2 | $imm4_1
,
19338 _MM_FROUND_CUR_DIRECTION
,
19342 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
19346 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19347 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19348 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19349 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19350 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19351 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19352 /// The sign is determined by sc which can take the following values:\
19353 /// _MM_MANT_SIGN_src // sign = sign(src)\
19354 /// _MM_MANT_SIGN_zero // sign = 0\
19355 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19356 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19358 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_ss&expand=2900)
19360 #[target_feature(enable = "avx512f")]
19361 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
19362 #[rustc_args_required_const(3, 4)]
19363 pub unsafe fn _mm_maskz_getmant_ss(
19367 norm
: _MM_MANTISSA_NORM_ENUM
,
19368 sign
: _MM_MANTISSA_SIGN_ENUM
,
19370 macro_rules
! call
{
19371 ($imm4_1
:expr
, $imm2
:expr
) => {
19375 $imm2
<< 2 | $imm4_1
,
19376 _mm_setzero_ps().as_f32x4(),
19378 _MM_FROUND_CUR_DIRECTION
,
19382 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
19386 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19387 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19388 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19389 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19390 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19391 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19392 /// The sign is determined by sc which can take the following values:\
19393 /// _MM_MANT_SIGN_src // sign = sign(src)\
19394 /// _MM_MANT_SIGN_zero // sign = 0\
19395 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19396 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_sd&expand=2895)
19400 #[target_feature(enable = "avx512f")]
19401 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
19402 #[rustc_args_required_const(2, 3)]
19403 pub unsafe fn _mm_getmant_sd(
19406 norm
: _MM_MANTISSA_NORM_ENUM
,
19407 sign
: _MM_MANTISSA_SIGN_ENUM
,
19409 macro_rules
! call
{
19410 ($imm4_1
:expr
, $imm2
:expr
) => {
19414 $imm2
<< 2 | $imm4_1
,
19415 _mm_setzero_pd().as_f64x2(),
19417 _MM_FROUND_CUR_DIRECTION
,
19421 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
19425 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19426 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19427 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19428 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19429 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19430 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19431 /// The sign is determined by sc which can take the following values:\
19432 /// _MM_MANT_SIGN_src // sign = sign(src)\
19433 /// _MM_MANT_SIGN_zero // sign = 0\
19434 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19435 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_sd&expand=2896)
19439 #[target_feature(enable = "avx512f")]
19440 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
19441 #[rustc_args_required_const(4, 5)]
19442 pub unsafe fn _mm_mask_getmant_sd(
19447 norm
: _MM_MANTISSA_NORM_ENUM
,
19448 sign
: _MM_MANTISSA_SIGN_ENUM
,
19450 macro_rules
! call
{
19451 ($imm4_1
:expr
, $imm2
:expr
) => {
19455 $imm2
<< 2 | $imm4_1
,
19458 _MM_FROUND_CUR_DIRECTION
,
19462 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
19466 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
19467 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
19468 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
19469 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
19470 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
19471 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
19472 /// The sign is determined by sc which can take the following values:\
19473 /// _MM_MANT_SIGN_src // sign = sign(src)\
19474 /// _MM_MANT_SIGN_zero // sign = 0\
19475 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
19476 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
19478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_sd&expand=2897)
19480 #[target_feature(enable = "avx512f")]
19481 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
19482 #[rustc_args_required_const(3, 4)]
19483 pub unsafe fn _mm_maskz_getmant_sd(
19487 norm
: _MM_MANTISSA_NORM_ENUM
,
19488 sign
: _MM_MANTISSA_SIGN_ENUM
,
19490 macro_rules
! call
{
19491 ($imm4_1
:expr
, $imm2
:expr
) => {
19495 $imm2
<< 2 | $imm4_1
,
19496 _mm_setzero_pd().as_f64x2(),
19498 _MM_FROUND_CUR_DIRECTION
,
19502 let r
= constify_imm4_mantissas
!(norm
, sign
, call
);
19506 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
19507 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19508 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19509 /// _MM_FROUND_TO_NEG_INF // round down\
19510 /// _MM_FROUND_TO_POS_INF // round up\
19511 /// _MM_FROUND_TO_ZERO // truncate\
19512 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19514 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_ss&expand=4802)
19516 #[target_feature(enable = "avx512f")]
19517 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 255))]
19518 #[rustc_args_required_const(2)]
19519 pub unsafe fn _mm_roundscale_ss(a
: __m128
, b
: __m128
, imm8
: i32) -> __m128
{
19520 let a
= a
.as_f32x4();
19521 let b
= b
.as_f32x4();
19522 let zero
= _mm_setzero_ps().as_f32x4();
19523 macro_rules
! call
{
19525 vrndscaless(a
, b
, zero
, 0b11111111, $imm8
, _MM_FROUND_CUR_DIRECTION
)
19528 let r
= constify_imm8_sae
!(imm8
, call
);
19532 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
19533 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19534 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19535 /// _MM_FROUND_TO_NEG_INF // round down\
19536 /// _MM_FROUND_TO_POS_INF // round up\
19537 /// _MM_FROUND_TO_ZERO // truncate\
19538 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19540 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_ss&expand=4800)
19542 #[target_feature(enable = "avx512f")]
19543 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
19544 #[rustc_args_required_const(4)]
19545 pub unsafe fn _mm_mask_roundscale_ss(
19552 let a
= a
.as_f32x4();
19553 let b
= b
.as_f32x4();
19554 let src
= src
.as_f32x4();
19555 macro_rules
! call
{
19557 vrndscaless(a
, b
, src
, k
, $imm8
, _MM_FROUND_CUR_DIRECTION
)
19560 let r
= constify_imm8_sae
!(imm8
, call
);
19564 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
19565 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19566 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19567 /// _MM_FROUND_TO_NEG_INF // round down\
19568 /// _MM_FROUND_TO_POS_INF // round up\
19569 /// _MM_FROUND_TO_ZERO // truncate\
19570 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19572 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_ss&expand=4801)
19574 #[target_feature(enable = "avx512f")]
19575 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
19576 #[rustc_args_required_const(3)]
19577 pub unsafe fn _mm_maskz_roundscale_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, imm8
: i32) -> __m128
{
19578 let a
= a
.as_f32x4();
19579 let b
= b
.as_f32x4();
19580 let zero
= _mm_setzero_ps().as_f32x4();
19581 macro_rules
! call
{
19583 vrndscaless(a
, b
, zero
, k
, $imm8
, _MM_FROUND_CUR_DIRECTION
)
19586 let r
= constify_imm8_sae
!(imm8
, call
);
19590 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
19591 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19592 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19593 /// _MM_FROUND_TO_NEG_INF // round down\
19594 /// _MM_FROUND_TO_POS_INF // round up\
19595 /// _MM_FROUND_TO_ZERO // truncate\
19596 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19598 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_sd&expand=4799)
19600 #[target_feature(enable = "avx512f")]
19601 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 255))]
19602 #[rustc_args_required_const(2)]
19603 pub unsafe fn _mm_roundscale_sd(a
: __m128d
, b
: __m128d
, imm8
: i32) -> __m128d
{
19604 let a
= a
.as_f64x2();
19605 let b
= b
.as_f64x2();
19606 let zero
= _mm_setzero_pd().as_f64x2();
19607 macro_rules
! call
{
19609 vrndscalesd(a
, b
, zero
, 0b11111111, $imm8
, _MM_FROUND_CUR_DIRECTION
)
19612 let r
= constify_imm8_sae
!(imm8
, call
);
19616 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
19617 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19618 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19619 /// _MM_FROUND_TO_NEG_INF // round down\
19620 /// _MM_FROUND_TO_POS_INF // round up\
19621 /// _MM_FROUND_TO_ZERO // truncate\
19622 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_sd&expand=4797)
19626 #[target_feature(enable = "avx512f")]
19627 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
19628 #[rustc_args_required_const(4)]
19629 pub unsafe fn _mm_mask_roundscale_sd(
19636 let a
= a
.as_f64x2();
19637 let b
= b
.as_f64x2();
19638 let src
= src
.as_f64x2();
19639 macro_rules
! call
{
19641 vrndscalesd(a
, b
, src
, k
, $imm8
, _MM_FROUND_CUR_DIRECTION
)
19644 let r
= constify_imm8_sae
!(imm8
, call
);
19648 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
19649 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
19650 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
19651 /// _MM_FROUND_TO_NEG_INF // round down\
19652 /// _MM_FROUND_TO_POS_INF // round up\
19653 /// _MM_FROUND_TO_ZERO // truncate\
19654 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
19656 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_sd&expand=4798)
19658 #[target_feature(enable = "avx512f")]
19659 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
19660 #[rustc_args_required_const(3)]
19661 pub unsafe fn _mm_maskz_roundscale_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, imm8
: i32) -> __m128d
{
19662 let a
= a
.as_f64x2();
19663 let b
= b
.as_f64x2();
19664 let zero
= _mm_setzero_pd().as_f64x2();
19665 macro_rules
! call
{
19667 vrndscalesd(a
, b
, zero
, k
, $imm8
, _MM_FROUND_CUR_DIRECTION
)
19670 let r
= constify_imm8_sae
!(imm8
, call
);
19674 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
19676 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_ss&expand=4901)
19678 #[target_feature(enable = "avx512f")]
19679 #[cfg_attr(test, assert_instr(vscalefss))]
19680 pub unsafe fn _mm_scalef_ss(a
: __m128
, b
: __m128
) -> __m128
{
19681 transmute(vscalefss(
19684 _mm_setzero_ps().as_f32x4(),
19686 _MM_FROUND_CUR_DIRECTION
,
19690 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19692 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_ss&expand=4899)
19694 #[target_feature(enable = "avx512f")]
19695 #[cfg_attr(test, assert_instr(vscalefss))]
19696 pub unsafe fn _mm_mask_scalef_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19697 transmute(vscalefss(
19702 _MM_FROUND_CUR_DIRECTION
,
19706 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19708 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_ss&expand=4900)
19710 #[target_feature(enable = "avx512f")]
19711 #[cfg_attr(test, assert_instr(vscalefss))]
19712 pub unsafe fn _mm_maskz_scalef_ss(k
: __mmask8
, a
: __m128
, b
: __m128
) -> __m128
{
19713 transmute(vscalefss(
19716 _mm_setzero_ps().as_f32x4(),
19718 _MM_FROUND_CUR_DIRECTION
,
19722 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
19724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_sd&expand=4898)
19726 #[target_feature(enable = "avx512f")]
19727 #[cfg_attr(test, assert_instr(vscalefsd))]
19728 pub unsafe fn _mm_scalef_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
19729 transmute(vscalefsd(
19732 _mm_setzero_pd().as_f64x2(),
19734 _MM_FROUND_CUR_DIRECTION
,
19738 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_sd&expand=4896)
19742 #[target_feature(enable = "avx512f")]
19743 #[cfg_attr(test, assert_instr(vscalefsd))]
19744 pub unsafe fn _mm_mask_scalef_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19745 transmute(vscalefsd(
19750 _MM_FROUND_CUR_DIRECTION
,
19754 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_sd&expand=4897)
19758 #[target_feature(enable = "avx512f")]
19759 #[cfg_attr(test, assert_instr(vscalefsd))]
19760 pub unsafe fn _mm_maskz_scalef_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
) -> __m128d
{
19761 transmute(vscalefsd(
19764 _mm_setzero_pd().as_f64x2(),
19766 _MM_FROUND_CUR_DIRECTION
,
19770 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_ss&expand=2582)
19774 #[target_feature(enable = "avx512f")]
19775 #[cfg_attr(test, assert_instr(vfmadd213ss))]
19776 pub unsafe fn _mm_mask_fmadd_ss(a
: __m128
, k
: __mmask8
, b
: __m128
, c
: __m128
) -> __m128
{
19777 let mut fmadd
: f32 = simd_extract(a
, 0);
19778 if (k
& 0b00000001) != 0 {
19779 let extractb
: f32 = simd_extract(b
, 0);
19780 let extractc
: f32 = simd_extract(c
, 0);
19781 fmadd
= vfmadd132ss(fmadd
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19783 let r
= simd_insert(a
, 0, fmadd
);
19787 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19789 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_ss&expand=2584)
19791 #[target_feature(enable = "avx512f")]
19792 #[cfg_attr(test, assert_instr(vfmadd213ss))]
19793 pub unsafe fn _mm_maskz_fmadd_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, c
: __m128
) -> __m128
{
19794 let mut fmadd
: f32 = 0.;
19795 if (k
& 0b00000001) != 0 {
19796 let extracta
: f32 = simd_extract(a
, 0);
19797 let extractb
: f32 = simd_extract(b
, 0);
19798 let extractc
: f32 = simd_extract(c
, 0);
19799 fmadd
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19801 let r
= simd_insert(a
, 0, fmadd
);
19805 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
19807 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_ss&expand=2583)
19809 #[target_feature(enable = "avx512f")]
19810 #[cfg_attr(test, assert_instr(vfmadd213ss))]
19811 pub unsafe fn _mm_mask3_fmadd_ss(a
: __m128
, b
: __m128
, c
: __m128
, k
: __mmask8
) -> __m128
{
19812 let mut fmadd
: f32 = simd_extract(c
, 0);
19813 if (k
& 0b00000001) != 0 {
19814 let extracta
: f32 = simd_extract(a
, 0);
19815 let extractb
: f32 = simd_extract(b
, 0);
19816 fmadd
= vfmadd132ss(extracta
, extractb
, fmadd
, _MM_FROUND_CUR_DIRECTION
);
19818 let r
= simd_insert(c
, 0, fmadd
);
19822 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19824 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_sd&expand=2578)
19826 #[target_feature(enable = "avx512f")]
19827 #[cfg_attr(test, assert_instr(vfmadd213sd))]
19828 pub unsafe fn _mm_mask_fmadd_sd(a
: __m128d
, k
: __mmask8
, b
: __m128d
, c
: __m128d
) -> __m128d
{
19829 let mut fmadd
: f64 = simd_extract(a
, 0);
19830 if (k
& 0b00000001) != 0 {
19831 let extractb
: f64 = simd_extract(b
, 0);
19832 let extractc
: f64 = simd_extract(c
, 0);
19833 fmadd
= vfmadd132sd(fmadd
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19835 let r
= simd_insert(a
, 0, fmadd
);
19839 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_sd&expand=2580)
19843 #[target_feature(enable = "avx512f")]
19844 #[cfg_attr(test, assert_instr(vfmadd213sd))]
19845 pub unsafe fn _mm_maskz_fmadd_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, c
: __m128d
) -> __m128d
{
19846 let mut fmadd
: f64 = 0.;
19847 if (k
& 0b00000001) != 0 {
19848 let extracta
: f64 = simd_extract(a
, 0);
19849 let extractb
: f64 = simd_extract(b
, 0);
19850 let extractc
: f64 = simd_extract(c
, 0);
19851 fmadd
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19853 let r
= simd_insert(a
, 0, fmadd
);
19857 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
19859 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_sd&expand=2579)
19861 #[target_feature(enable = "avx512f")]
19862 #[cfg_attr(test, assert_instr(vfmadd213sd))]
19863 pub unsafe fn _mm_mask3_fmadd_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, k
: __mmask8
) -> __m128d
{
19864 let mut fmadd
: f64 = simd_extract(c
, 0);
19865 if (k
& 0b00000001) != 0 {
19866 let extracta
: f64 = simd_extract(a
, 0);
19867 let extractb
: f64 = simd_extract(b
, 0);
19868 fmadd
= vfmadd132sd(extracta
, extractb
, fmadd
, _MM_FROUND_CUR_DIRECTION
);
19870 let r
= simd_insert(c
, 0, fmadd
);
19874 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
19876 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_ss&expand=2668)
19878 #[target_feature(enable = "avx512f")]
19879 #[cfg_attr(test, assert_instr(vfmsub213ss))]
19880 pub unsafe fn _mm_mask_fmsub_ss(a
: __m128
, k
: __mmask8
, b
: __m128
, c
: __m128
) -> __m128
{
19881 let mut fmsub
: f32 = simd_extract(a
, 0);
19882 if (k
& 0b00000001) != 0 {
19883 let extractb
: f32 = simd_extract(b
, 0);
19884 let extractc
: f32 = simd_extract(c
, 0);
19885 let extractc
= -extractc
;
19886 fmsub
= vfmadd132ss(fmsub
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19888 let r
= simd_insert(a
, 0, fmsub
);
19892 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_ss&expand=2670)
19896 #[target_feature(enable = "avx512f")]
19897 #[cfg_attr(test, assert_instr(vfmsub213ss))]
19898 pub unsafe fn _mm_maskz_fmsub_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, c
: __m128
) -> __m128
{
19899 let mut fmsub
: f32 = 0.;
19900 if (k
& 0b00000001) != 0 {
19901 let extracta
: f32 = simd_extract(a
, 0);
19902 let extractb
: f32 = simd_extract(b
, 0);
19903 let extractc
: f32 = simd_extract(c
, 0);
19904 let extractc
= -extractc
;
19905 fmsub
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19907 let r
= simd_insert(a
, 0, fmsub
);
19911 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
19913 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_ss&expand=2669)
19915 #[target_feature(enable = "avx512f")]
19916 #[cfg_attr(test, assert_instr(vfmsub213ss))]
19917 pub unsafe fn _mm_mask3_fmsub_ss(a
: __m128
, b
: __m128
, c
: __m128
, k
: __mmask8
) -> __m128
{
19918 let mut fmsub
: f32 = simd_extract(c
, 0);
19919 if (k
& 0b00000001) != 0 {
19920 let extracta
: f32 = simd_extract(a
, 0);
19921 let extractb
: f32 = simd_extract(b
, 0);
19922 let extractc
= -fmsub
;
19923 fmsub
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19925 let r
= simd_insert(c
, 0, fmsub
);
19929 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19931 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_sd&expand=2664)
19933 #[target_feature(enable = "avx512f")]
19934 #[cfg_attr(test, assert_instr(vfmsub213sd))]
19935 pub unsafe fn _mm_mask_fmsub_sd(a
: __m128d
, k
: __mmask8
, b
: __m128d
, c
: __m128d
) -> __m128d
{
19936 let mut fmsub
: f64 = simd_extract(a
, 0);
19937 if (k
& 0b00000001) != 0 {
19938 let extractb
: f64 = simd_extract(b
, 0);
19939 let extractc
: f64 = simd_extract(c
, 0);
19940 let extractc
= -extractc
;
19941 fmsub
= vfmadd132sd(fmsub
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19943 let r
= simd_insert(a
, 0, fmsub
);
19947 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
19949 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_sd&expand=2666)
19951 #[target_feature(enable = "avx512f")]
19952 #[cfg_attr(test, assert_instr(vfmsub213sd))]
19953 pub unsafe fn _mm_maskz_fmsub_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, c
: __m128d
) -> __m128d
{
19954 let mut fmsub
: f64 = 0.;
19955 if (k
& 0b00000001) != 0 {
19956 let extracta
: f64 = simd_extract(a
, 0);
19957 let extractb
: f64 = simd_extract(b
, 0);
19958 let extractc
: f64 = simd_extract(c
, 0);
19959 let extractc
= -extractc
;
19960 fmsub
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19962 let r
= simd_insert(a
, 0, fmsub
);
19966 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
19968 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_sd&expand=2665)
19970 #[target_feature(enable = "avx512f")]
19971 #[cfg_attr(test, assert_instr(vfmsub213sd))]
19972 pub unsafe fn _mm_mask3_fmsub_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, k
: __mmask8
) -> __m128d
{
19973 let mut fmsub
: f64 = simd_extract(c
, 0);
19974 if (k
& 0b00000001) != 0 {
19975 let extracta
: f64 = simd_extract(a
, 0);
19976 let extractb
: f64 = simd_extract(b
, 0);
19977 let extractc
= -fmsub
;
19978 fmsub
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19980 let r
= simd_insert(c
, 0, fmsub
);
19984 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
19986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_ss&expand=2748)
19988 #[target_feature(enable = "avx512f")]
19989 #[cfg_attr(test, assert_instr(vfnmadd213ss))]
19990 pub unsafe fn _mm_mask_fnmadd_ss(a
: __m128
, k
: __mmask8
, b
: __m128
, c
: __m128
) -> __m128
{
19991 let mut fnmadd
: f32 = simd_extract(a
, 0);
19992 if (k
& 0b00000001) != 0 {
19993 let extracta
= -fnmadd
;
19994 let extractb
: f32 = simd_extract(b
, 0);
19995 let extractc
: f32 = simd_extract(c
, 0);
19996 fnmadd
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
19998 let r
= simd_insert(a
, 0, fnmadd
);
20002 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
20004 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_ss&expand=2750)
20006 #[target_feature(enable = "avx512f")]
20007 #[cfg_attr(test, assert_instr(vfnmadd213ss))]
20008 pub unsafe fn _mm_maskz_fnmadd_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, c
: __m128
) -> __m128
{
20009 let mut fnmadd
: f32 = 0.;
20010 if (k
& 0b00000001) != 0 {
20011 let extracta
: f32 = simd_extract(a
, 0);
20012 let extracta
= -extracta
;
20013 let extractb
: f32 = simd_extract(b
, 0);
20014 let extractc
: f32 = simd_extract(c
, 0);
20015 fnmadd
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20017 let r
= simd_insert(a
, 0, fnmadd
);
20021 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
20023 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_ss&expand=2749)
20025 #[target_feature(enable = "avx512f")]
20026 #[cfg_attr(test, assert_instr(vfnmadd213ss))]
20027 pub unsafe fn _mm_mask3_fnmadd_ss(a
: __m128
, b
: __m128
, c
: __m128
, k
: __mmask8
) -> __m128
{
20028 let mut fnmadd
: f32 = simd_extract(c
, 0);
20029 if (k
& 0b00000001) != 0 {
20030 let extracta
: f32 = simd_extract(a
, 0);
20031 let extracta
= -extracta
;
20032 let extractb
: f32 = simd_extract(b
, 0);
20033 fnmadd
= vfmadd132ss(extracta
, extractb
, fnmadd
, _MM_FROUND_CUR_DIRECTION
);
20035 let r
= simd_insert(c
, 0, fnmadd
);
20039 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20041 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_sd&expand=2744)
20043 #[target_feature(enable = "avx512f")]
20044 #[cfg_attr(test, assert_instr(vfnmadd213sd))]
20045 pub unsafe fn _mm_mask_fnmadd_sd(a
: __m128d
, k
: __mmask8
, b
: __m128d
, c
: __m128d
) -> __m128d
{
20046 let mut fnmadd
: f64 = simd_extract(a
, 0);
20047 if (k
& 0b00000001) != 0 {
20048 let extracta
= -fnmadd
;
20049 let extractb
: f64 = simd_extract(b
, 0);
20050 let extractc
: f64 = simd_extract(c
, 0);
20051 fnmadd
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20053 let r
= simd_insert(a
, 0, fnmadd
);
20057 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_sd&expand=2746)
20061 #[target_feature(enable = "avx512f")]
20062 #[cfg_attr(test, assert_instr(vfnmadd213sd))]
20063 pub unsafe fn _mm_maskz_fnmadd_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, c
: __m128d
) -> __m128d
{
20064 let mut fnmadd
: f64 = 0.;
20065 if (k
& 0b00000001) != 0 {
20066 let extracta
: f64 = simd_extract(a
, 0);
20067 let extracta
= -extracta
;
20068 let extractb
: f64 = simd_extract(b
, 0);
20069 let extractc
: f64 = simd_extract(c
, 0);
20070 fnmadd
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20072 let r
= simd_insert(a
, 0, fnmadd
);
20076 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
20078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_sd&expand=2745)
20080 #[target_feature(enable = "avx512f")]
20081 #[cfg_attr(test, assert_instr(vfnmadd213sd))]
20082 pub unsafe fn _mm_mask3_fnmadd_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, k
: __mmask8
) -> __m128d
{
20083 let mut fnmadd
: f64 = simd_extract(c
, 0);
20084 if (k
& 0b00000001) != 0 {
20085 let extracta
: f64 = simd_extract(a
, 0);
20086 let extracta
= -extracta
;
20087 let extractb
: f64 = simd_extract(b
, 0);
20088 fnmadd
= vfmadd132sd(extracta
, extractb
, fnmadd
, _MM_FROUND_CUR_DIRECTION
);
20090 let r
= simd_insert(c
, 0, fnmadd
);
20094 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
20096 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_ss&expand=2796)
20098 #[target_feature(enable = "avx512f")]
20099 #[cfg_attr(test, assert_instr(vfnmsub213ss))]
20100 pub unsafe fn _mm_mask_fnmsub_ss(a
: __m128
, k
: __mmask8
, b
: __m128
, c
: __m128
) -> __m128
{
20101 let mut fnmsub
: f32 = simd_extract(a
, 0);
20102 if (k
& 0b00000001) != 0 {
20103 let extracta
= -fnmsub
;
20104 let extractb
: f32 = simd_extract(b
, 0);
20105 let extractc
: f32 = simd_extract(c
, 0);
20106 let extractc
= -extractc
;
20107 fnmsub
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20109 let r
= simd_insert(a
, 0, fnmsub
);
20113 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
20115 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_ss&expand=2798)
20117 #[target_feature(enable = "avx512f")]
20118 #[cfg_attr(test, assert_instr(vfnmsub213ss))]
20119 pub unsafe fn _mm_maskz_fnmsub_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, c
: __m128
) -> __m128
{
20120 let mut fnmsub
: f32 = 0.;
20121 if (k
& 0b00000001) != 0 {
20122 let extracta
: f32 = simd_extract(a
, 0);
20123 let extracta
= -extracta
;
20124 let extractb
: f32 = simd_extract(b
, 0);
20125 let extractc
: f32 = simd_extract(c
, 0);
20126 let extractc
= -extractc
;
20127 fnmsub
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20129 let r
= simd_insert(a
, 0, fnmsub
);
20133 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
20135 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_ss&expand=2797)
20137 #[target_feature(enable = "avx512f")]
20138 #[cfg_attr(test, assert_instr(vfnmsub213ss))]
20139 pub unsafe fn _mm_mask3_fnmsub_ss(a
: __m128
, b
: __m128
, c
: __m128
, k
: __mmask8
) -> __m128
{
20140 let mut fnmsub
: f32 = simd_extract(c
, 0);
20141 if (k
& 0b00000001) != 0 {
20142 let extracta
: f32 = simd_extract(a
, 0);
20143 let extracta
= -extracta
;
20144 let extractb
: f32 = simd_extract(b
, 0);
20145 let extractc
= -fnmsub
;
20146 fnmsub
= vfmadd132ss(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20148 let r
= simd_insert(c
, 0, fnmsub
);
20152 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20154 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_sd&expand=2792)
20156 #[target_feature(enable = "avx512f")]
20157 #[cfg_attr(test, assert_instr(vfnmsub213sd))]
20158 pub unsafe fn _mm_mask_fnmsub_sd(a
: __m128d
, k
: __mmask8
, b
: __m128d
, c
: __m128d
) -> __m128d
{
20159 let mut fnmsub
: f64 = simd_extract(a
, 0);
20160 if (k
& 0b00000001) != 0 {
20161 let extracta
= -fnmsub
;
20162 let extractb
: f64 = simd_extract(b
, 0);
20163 let extractc
: f64 = simd_extract(c
, 0);
20164 let extractc
= -extractc
;
20165 fnmsub
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20167 let r
= simd_insert(a
, 0, fnmsub
);
20171 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
20173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_sd&expand=2794)
20175 #[target_feature(enable = "avx512f")]
20176 #[cfg_attr(test, assert_instr(vfnmsub213sd))]
20177 pub unsafe fn _mm_maskz_fnmsub_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, c
: __m128d
) -> __m128d
{
20178 let mut fnmsub
: f64 = 0.;
20179 if (k
& 0b00000001) != 0 {
20180 let extracta
: f64 = simd_extract(a
, 0);
20181 let extracta
= -extracta
;
20182 let extractb
: f64 = simd_extract(b
, 0);
20183 let extractc
: f64 = simd_extract(c
, 0);
20184 let extractc
= -extractc
;
20185 fnmsub
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20187 let r
= simd_insert(a
, 0, fnmsub
);
20191 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
20193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_sd&expand=2793)
20195 #[target_feature(enable = "avx512f")]
20196 #[cfg_attr(test, assert_instr(vfnmsub213sd))]
20197 pub unsafe fn _mm_mask3_fnmsub_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, k
: __mmask8
) -> __m128d
{
20198 let mut fnmsub
: f64 = simd_extract(c
, 0);
20199 if (k
& 0b00000001) != 0 {
20200 let extracta
: f64 = simd_extract(a
, 0);
20201 let extracta
= -extracta
;
20202 let extractb
: f64 = simd_extract(b
, 0);
20203 let extractc
= -fnmsub
;
20204 fnmsub
= vfmadd132sd(extracta
, extractb
, extractc
, _MM_FROUND_CUR_DIRECTION
);
20206 let r
= simd_insert(c
, 0, fnmsub
);
20210 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20212 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20213 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20214 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20215 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20216 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20217 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ss&expand=151)
20221 #[target_feature(enable = "avx512f")]
20222 #[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
20223 #[rustc_args_required_const(2)]
20224 pub unsafe fn _mm_add_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20225 macro_rules
! call
{
20230 _mm_setzero_ps().as_f32x4(),
20236 transmute(constify_imm4_round
!(rounding
, call
))
20239 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20241 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20242 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20243 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20244 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20245 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20246 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20248 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_ss&expand=152)
20250 #[target_feature(enable = "avx512f")]
20251 #[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
20252 #[rustc_args_required_const(4)]
20253 pub unsafe fn _mm_mask_add_round_ss(
20260 macro_rules
! call
{
20262 vaddss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
20265 transmute(constify_imm4_round
!(rounding
, call
))
20268 /// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20270 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20271 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20272 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20273 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20274 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20275 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20277 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_ss&expand=153)
20279 #[target_feature(enable = "avx512f")]
20280 #[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
20281 #[rustc_args_required_const(3)]
20282 pub unsafe fn _mm_maskz_add_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20283 macro_rules
! call
{
20288 _mm_setzero_ps().as_f32x4(),
20294 transmute(constify_imm4_round
!(rounding
, call
))
20297 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20299 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20300 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20301 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20302 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20303 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20304 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20306 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sd&expand=148)
20308 #[target_feature(enable = "avx512f")]
20309 #[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
20310 #[rustc_args_required_const(2)]
20311 pub unsafe fn _mm_add_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
20312 macro_rules
! call
{
20317 _mm_setzero_pd().as_f64x2(),
20323 transmute(constify_imm4_round
!(rounding
, call
))
20326 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20328 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20329 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20330 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20331 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20332 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20333 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20335 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_add_round_Sd&expand=149)
20337 #[target_feature(enable = "avx512f")]
20338 #[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
20339 #[rustc_args_required_const(4)]
20340 pub unsafe fn _mm_mask_add_round_sd(
20347 macro_rules
! call
{
20349 vaddsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
20352 transmute(constify_imm4_round
!(rounding
, call
))
20355 /// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20357 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20358 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20359 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20360 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20361 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20362 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sd&expand=150)
20366 #[target_feature(enable = "avx512f")]
20367 #[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
20368 #[rustc_args_required_const(3)]
20369 pub unsafe fn _mm_maskz_add_round_sd(
20375 macro_rules
! call
{
20380 _mm_setzero_pd().as_f64x2(),
20386 transmute(constify_imm4_round
!(rounding
, call
))
20389 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20391 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20392 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20393 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20394 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20395 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20396 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_ss&expand=5745)
20400 #[target_feature(enable = "avx512f")]
20401 #[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
20402 #[rustc_args_required_const(2)]
20403 pub unsafe fn _mm_sub_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20404 macro_rules
! call
{
20409 _mm_setzero_ps().as_f32x4(),
20415 transmute(constify_imm4_round
!(rounding
, call
))
20418 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20420 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20421 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20422 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20423 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20424 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20425 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20427 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_ss&expand=5743)
20429 #[target_feature(enable = "avx512f")]
20430 #[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
20431 #[rustc_args_required_const(4)]
20432 pub unsafe fn _mm_mask_sub_round_ss(
20439 macro_rules
! call
{
20441 vsubss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
20444 transmute(constify_imm4_round
!(rounding
, call
))
20447 /// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20449 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20450 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20451 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20452 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20453 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20454 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_ss&expand=5744)
20458 #[target_feature(enable = "avx512f")]
20459 #[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
20460 #[rustc_args_required_const(3)]
20461 pub unsafe fn _mm_maskz_sub_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20462 macro_rules
! call
{
20467 _mm_setzero_ps().as_f32x4(),
20473 transmute(constify_imm4_round
!(rounding
, call
))
20476 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20478 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20479 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20480 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20481 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20482 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20483 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20485 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sub_round_sd&expand=5742)
20487 #[target_feature(enable = "avx512f")]
20488 #[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
20489 #[rustc_args_required_const(2)]
20490 pub unsafe fn _mm_sub_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
20491 macro_rules
! call
{
20496 _mm_setzero_pd().as_f64x2(),
20502 transmute(constify_imm4_round
!(rounding
, call
))
20505 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20507 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20508 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20509 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20510 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20511 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20512 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20514 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sub_round_sd&expand=5740)
20516 #[target_feature(enable = "avx512f")]
20517 #[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
20518 #[rustc_args_required_const(4)]
20519 pub unsafe fn _mm_mask_sub_round_sd(
20526 macro_rules
! call
{
20528 vsubsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
20531 transmute(constify_imm4_round
!(rounding
, call
))
20534 /// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20536 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20537 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20538 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20539 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20540 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20541 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20543 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sub_round_sd&expand=5741)
20545 #[target_feature(enable = "avx512f")]
20546 #[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
20547 #[rustc_args_required_const(3)]
20548 pub unsafe fn _mm_maskz_sub_round_sd(
20554 macro_rules
! call
{
20559 _mm_setzero_pd().as_f64x2(),
20565 transmute(constify_imm4_round
!(rounding
, call
))
20568 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20570 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20571 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20572 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20573 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20574 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20575 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_ss&expand=3946)
20579 #[target_feature(enable = "avx512f")]
20580 #[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
20581 #[rustc_args_required_const(2)]
20582 pub unsafe fn _mm_mul_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20583 macro_rules
! call
{
20588 _mm_setzero_ps().as_f32x4(),
20594 transmute(constify_imm4_round
!(rounding
, call
))
20597 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20599 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20600 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20601 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20602 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20603 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20604 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20606 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_ss&expand=3944)
20608 #[target_feature(enable = "avx512f")]
20609 #[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
20610 #[rustc_args_required_const(4)]
20611 pub unsafe fn _mm_mask_mul_round_ss(
20618 macro_rules
! call
{
20620 vmulss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
20623 transmute(constify_imm4_round
!(rounding
, call
))
20626 /// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20628 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20629 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20630 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20631 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20632 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20633 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20635 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_ss&expand=3945)
20637 #[target_feature(enable = "avx512f")]
20638 #[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
20639 #[rustc_args_required_const(3)]
20640 pub unsafe fn _mm_maskz_mul_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20641 macro_rules
! call
{
20646 _mm_setzero_ps().as_f32x4(),
20652 transmute(constify_imm4_round
!(rounding
, call
))
20655 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20657 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20658 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20659 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20660 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20661 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20662 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20664 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mul_round_sd&expand=3943)
20666 #[target_feature(enable = "avx512f")]
20667 #[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
20668 #[rustc_args_required_const(2)]
20669 pub unsafe fn _mm_mul_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
20670 macro_rules
! call
{
20675 _mm_setzero_pd().as_f64x2(),
20681 transmute(constify_imm4_round
!(rounding
, call
))
20684 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20686 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20687 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20688 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20689 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20690 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20691 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_mul_round_sd&expand=3941)
20695 #[target_feature(enable = "avx512f")]
20696 #[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
20697 #[rustc_args_required_const(4)]
20698 pub unsafe fn _mm_mask_mul_round_sd(
20705 macro_rules
! call
{
20707 vmulsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
20710 transmute(constify_imm4_round
!(rounding
, call
))
20713 /// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20715 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20716 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20717 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20718 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20719 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20720 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_mul_round_sd&expand=3942)
20724 #[target_feature(enable = "avx512f")]
20725 #[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
20726 #[rustc_args_required_const(3)]
20727 pub unsafe fn _mm_maskz_mul_round_sd(
20733 macro_rules
! call
{
20738 _mm_setzero_pd().as_f64x2(),
20744 transmute(constify_imm4_round
!(rounding
, call
))
20747 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20749 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20750 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20751 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20752 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20753 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20754 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20756 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_ss&expand=2174)
20758 #[target_feature(enable = "avx512f")]
20759 #[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
20760 #[rustc_args_required_const(2)]
20761 pub unsafe fn _mm_div_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20762 macro_rules
! call
{
20767 _mm_setzero_ps().as_f32x4(),
20773 transmute(constify_imm4_round
!(rounding
, call
))
20776 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20778 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20779 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20780 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20781 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20782 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20783 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20785 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_ss&expand=2175)
20787 #[target_feature(enable = "avx512f")]
20788 #[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
20789 #[rustc_args_required_const(4)]
20790 pub unsafe fn _mm_mask_div_round_ss(
20797 macro_rules
! call
{
20799 vdivss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
20802 transmute(constify_imm4_round
!(rounding
, call
))
20805 /// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20807 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20808 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20809 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20810 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20811 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20812 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_ss&expand=2176)
20816 #[target_feature(enable = "avx512f")]
20817 #[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
20818 #[rustc_args_required_const(3)]
20819 pub unsafe fn _mm_maskz_div_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
20820 macro_rules
! call
{
20825 _mm_setzero_ps().as_f32x4(),
20831 transmute(constify_imm4_round
!(rounding
, call
))
20834 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20836 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20837 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20838 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20839 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20840 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20841 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20843 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_div_round_sd&expand=2171)
20845 #[target_feature(enable = "avx512f")]
20846 #[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
20847 #[rustc_args_required_const(2)]
20848 pub unsafe fn _mm_div_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
20849 macro_rules
! call
{
20854 _mm_setzero_pd().as_f64x2(),
20860 transmute(constify_imm4_round
!(rounding
, call
))
20863 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20865 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20866 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20867 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20868 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20869 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20870 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20872 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_div_round_sd&expand=2172)
20874 #[target_feature(enable = "avx512f")]
20875 #[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
20876 #[rustc_args_required_const(4)]
20877 pub unsafe fn _mm_mask_div_round_sd(
20884 macro_rules
! call
{
20886 vdivsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
20889 transmute(constify_imm4_round
!(rounding
, call
))
20892 /// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
20894 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
20895 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
20896 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
20897 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
20898 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
20899 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
20901 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_div_round_sd&expand=2173)
20903 #[target_feature(enable = "avx512f")]
20904 #[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
20905 #[rustc_args_required_const(3)]
20906 pub unsafe fn _mm_maskz_div_round_sd(
20912 macro_rules
! call
{
20917 _mm_setzero_pd().as_f64x2(),
20923 transmute(constify_imm4_round
!(rounding
, call
))
20926 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
20927 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_ss&expand=3668)
20931 #[target_feature(enable = "avx512f")]
20932 #[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
20933 #[rustc_args_required_const(2)]
20934 pub unsafe fn _mm_max_round_ss(a
: __m128
, b
: __m128
, sae
: i32) -> __m128
{
20935 macro_rules
! call
{
20940 _mm_setzero_ps().as_f32x4(),
20946 transmute(constify_imm4_sae
!(sae
, call
))
20949 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20950 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_ss&expand=3672)
20954 #[target_feature(enable = "avx512f")]
20955 #[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
20956 #[rustc_args_required_const(4)]
20957 pub unsafe fn _mm_mask_max_round_ss(
20964 macro_rules
! call
{
20966 vmaxss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
20969 transmute(constify_imm4_sae
!(sae
, call
))
20972 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
20973 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_round_ss&expand=3667)
20977 #[target_feature(enable = "avx512f")]
20978 #[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
20979 #[rustc_args_required_const(3)]
20980 pub unsafe fn _mm_maskz_max_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, sae
: i32) -> __m128
{
20981 macro_rules
! call
{
20986 _mm_setzero_ps().as_f32x4(),
20992 transmute(constify_imm4_sae
!(sae
, call
))
20995 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
20996 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
20998 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_max_round_sd&expand=3665)
21000 #[target_feature(enable = "avx512f")]
21001 #[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
21002 #[rustc_args_required_const(2)]
21003 pub unsafe fn _mm_max_round_sd(a
: __m128d
, b
: __m128d
, sae
: i32) -> __m128d
{
21004 macro_rules
! call
{
21009 _mm_setzero_pd().as_f64x2(),
21015 transmute(constify_imm4_sae
!(sae
, call
))
21018 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21019 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21021 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_max_round_sd&expand=3663)
21023 #[target_feature(enable = "avx512f")]
21024 #[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
21025 #[rustc_args_required_const(4)]
21026 pub unsafe fn _mm_mask_max_round_sd(
21033 macro_rules
! call
{
21035 vmaxsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
21038 transmute(constify_imm4_sae
!(sae
, call
))
21041 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21042 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21044 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_max_sd&expand=3670)
21046 #[target_feature(enable = "avx512f")]
21047 #[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
21048 #[rustc_args_required_const(3)]
21049 pub unsafe fn _mm_maskz_max_round_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, sae
: i32) -> __m128d
{
21050 macro_rules
! call
{
21055 _mm_setzero_pd().as_f64x2(),
21061 transmute(constify_imm4_sae
!(sae
, call
))
21064 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21065 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21067 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_ss&expand=3782)
21069 #[target_feature(enable = "avx512f")]
21070 #[cfg_attr(test, assert_instr(vminss, sae = 8))]
21071 #[rustc_args_required_const(2)]
21072 pub unsafe fn _mm_min_round_ss(a
: __m128
, b
: __m128
, sae
: i32) -> __m128
{
21073 macro_rules
! call
{
21078 _mm_setzero_ps().as_f32x4(),
21084 transmute(constify_imm4_sae
!(sae
, call
))
21087 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21088 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_Ss&expand=3780)
21092 #[target_feature(enable = "avx512f")]
21093 #[cfg_attr(test, assert_instr(vminss, sae = 8))]
21094 #[rustc_args_required_const(4)]
21095 pub unsafe fn _mm_mask_min_round_ss(
21102 macro_rules
! call
{
21104 vminss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
21107 transmute(constify_imm4_sae
!(sae
, call
))
21110 /// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21111 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21113 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_ss&expand=3781)
21115 #[target_feature(enable = "avx512f")]
21116 #[cfg_attr(test, assert_instr(vminss, sae = 8))]
21117 #[rustc_args_required_const(3)]
21118 pub unsafe fn _mm_maskz_min_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, sae
: i32) -> __m128
{
21119 macro_rules
! call
{
21124 _mm_setzero_ps().as_f32x4(),
21130 transmute(constify_imm4_sae
!(sae
, call
))
21133 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
21134 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21136 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_min_round_sd&expand=3779)
21138 #[target_feature(enable = "avx512f")]
21139 #[cfg_attr(test, assert_instr(vminsd, sae = 8))]
21140 #[rustc_args_required_const(2)]
21141 pub unsafe fn _mm_min_round_sd(a
: __m128d
, b
: __m128d
, sae
: i32) -> __m128d
{
21142 macro_rules
! call
{
21147 _mm_setzero_pd().as_f64x2(),
21153 transmute(constify_imm4_sae
!(sae
, call
))
21156 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21157 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21159 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_min_round_sd&expand=3777)
21161 #[target_feature(enable = "avx512f")]
21162 #[cfg_attr(test, assert_instr(vminsd, sae = 8))]
21163 #[rustc_args_required_const(4)]
21164 pub unsafe fn _mm_mask_min_round_sd(
21171 macro_rules
! call
{
21173 vminsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
21176 transmute(constify_imm4_sae
!(sae
, call
))
21179 /// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21180 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21182 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_min_round_Sd&expand=3778)
21184 #[target_feature(enable = "avx512f")]
21185 #[cfg_attr(test, assert_instr(vminsd, sae = 8))]
21186 #[rustc_args_required_const(3)]
21187 pub unsafe fn _mm_maskz_min_round_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, sae
: i32) -> __m128d
{
21188 macro_rules
! call
{
21193 _mm_setzero_pd().as_f64x2(),
21199 transmute(constify_imm4_sae
!(sae
, call
))
21202 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21204 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21205 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21206 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21207 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21208 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21209 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21211 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_ss&expand=5383)
21213 #[target_feature(enable = "avx512f")]
21214 #[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
21215 #[rustc_args_required_const(2)]
21216 pub unsafe fn _mm_sqrt_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
21217 macro_rules
! call
{
21222 _mm_setzero_ps().as_f32x4(),
21228 transmute(constify_imm4_round
!(rounding
, call
))
21231 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21233 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21234 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21235 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21236 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21237 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21238 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_ss&expand=5381)
21242 #[target_feature(enable = "avx512f")]
21243 #[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
21244 #[rustc_args_required_const(4)]
21245 pub unsafe fn _mm_mask_sqrt_round_ss(
21252 macro_rules
! call
{
21254 vsqrtss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
21257 transmute(constify_imm4_round
!(rounding
, call
))
21260 /// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21262 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21263 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21264 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21265 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21266 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21267 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_ss&expand=5382)
21271 #[target_feature(enable = "avx512f")]
21272 #[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
21273 #[rustc_args_required_const(3)]
21274 pub unsafe fn _mm_maskz_sqrt_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
21275 macro_rules
! call
{
21280 _mm_setzero_ps().as_f32x4(),
21286 transmute(constify_imm4_round
!(rounding
, call
))
21289 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
21291 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21292 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21293 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21294 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21295 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21296 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_sqrt_round_sd&expand=5380)
21300 #[target_feature(enable = "avx512f")]
21301 #[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
21302 #[rustc_args_required_const(2)]
21303 pub unsafe fn _mm_sqrt_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
21304 macro_rules
! call
{
21309 _mm_setzero_pd().as_f64x2(),
21315 transmute(constify_imm4_round
!(rounding
, call
))
21318 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21320 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21321 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21322 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21323 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21324 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21325 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21327 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_sqrt_round_sd&expand=5378)
21329 #[target_feature(enable = "avx512f")]
21330 #[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
21331 #[rustc_args_required_const(4)]
21332 pub unsafe fn _mm_mask_sqrt_round_sd(
21339 macro_rules
! call
{
21341 vsqrtsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
21344 transmute(constify_imm4_round
!(rounding
, call
))
21347 /// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21349 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21350 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21351 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21352 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21353 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21354 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_sqrt_round_sd&expand=5379)
21358 #[target_feature(enable = "avx512f")]
21359 #[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
21360 #[rustc_args_required_const(3)]
21361 pub unsafe fn _mm_maskz_sqrt_round_sd(
21367 macro_rules
! call
{
21372 _mm_setzero_pd().as_f64x2(),
21378 transmute(constify_imm4_round
!(rounding
, call
))
21381 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21382 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21384 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_ss&expand=2856)
21386 #[target_feature(enable = "avx512f")]
21387 #[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
21388 #[rustc_args_required_const(2)]
21389 pub unsafe fn _mm_getexp_round_ss(a
: __m128
, b
: __m128
, sae
: i32) -> __m128
{
21390 macro_rules
! call
{
21395 _mm_setzero_ps().as_f32x4(),
21401 let r
= constify_imm4_sae
!(sae
, call
);
21405 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21406 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21408 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_ss&expand=2857)
21410 #[target_feature(enable = "avx512f")]
21411 #[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
21412 #[rustc_args_required_const(4)]
21413 pub unsafe fn _mm_mask_getexp_round_ss(
21420 macro_rules
! call
{
21422 vgetexpss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
21425 let r
= constify_imm4_sae
!(sae
, call
);
21429 /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21430 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21432 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_ss&expand=2858)
21434 #[target_feature(enable = "avx512f")]
21435 #[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
21436 #[rustc_args_required_const(3)]
21437 pub unsafe fn _mm_maskz_getexp_round_ss(k
: __mmask8
, a
: __m128
, b
: __m128
, sae
: i32) -> __m128
{
21438 macro_rules
! call
{
21443 _mm_setzero_ps().as_f32x4(),
21449 let r
= constify_imm4_sae
!(sae
, call
);
21453 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21454 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21456 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getexp_round_sd&expand=2853)
21458 #[target_feature(enable = "avx512f")]
21459 #[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
21460 #[rustc_args_required_const(2)]
21461 pub unsafe fn _mm_getexp_round_sd(a
: __m128d
, b
: __m128d
, sae
: i32) -> __m128d
{
21462 macro_rules
! call
{
21467 _mm_setzero_pd().as_f64x2(),
21473 let r
= constify_imm4_sae
!(sae
, call
);
21477 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21478 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21480 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getexp_round_sd&expand=2854)
21482 #[target_feature(enable = "avx512f")]
21483 #[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
21484 #[rustc_args_required_const(4)]
21485 pub unsafe fn _mm_mask_getexp_round_sd(
21492 macro_rules
! call
{
21494 vgetexpsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
21497 let r
= constify_imm4_sae
!(sae
, call
);
21501 /// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
21502 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getexp_round_sd&expand=2855)
21506 #[target_feature(enable = "avx512f")]
21507 #[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
21508 #[rustc_args_required_const(3)]
21509 pub unsafe fn _mm_maskz_getexp_round_sd(k
: __mmask8
, a
: __m128d
, b
: __m128d
, sae
: i32) -> __m128d
{
21510 macro_rules
! call
{
21515 _mm_setzero_pd().as_f64x2(),
21521 let r
= constify_imm4_sae
!(sae
, call
);
21525 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21526 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21527 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21528 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21529 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21530 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21531 /// The sign is determined by sc which can take the following values:\
21532 /// _MM_MANT_SIGN_src // sign = sign(src)\
21533 /// _MM_MANT_SIGN_zero // sign = 0\
21534 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21535 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21537 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_round_ss&expand=2892)
21539 #[target_feature(enable = "avx512f")]
21540 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
21541 #[rustc_args_required_const(2, 3, 4)]
21542 pub unsafe fn _mm_getmant_round_ss(
21545 norm
: _MM_MANTISSA_NORM_ENUM
,
21546 sign
: _MM_MANTISSA_SIGN_ENUM
,
21549 macro_rules
! call
{
21550 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
21554 $imm2
<< 2 | $imm4_1
,
21555 _mm_setzero_ps().as_f32x4(),
21561 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
21565 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21566 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21567 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21568 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21569 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21570 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21571 /// The sign is determined by sc which can take the following values:\
21572 /// _MM_MANT_SIGN_src // sign = sign(src)\
21573 /// _MM_MANT_SIGN_zero // sign = 0\
21574 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21575 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21577 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_round_ss&expand=2893)
21579 #[target_feature(enable = "avx512f")]
21580 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
21581 #[rustc_args_required_const(4, 5, 6)]
21582 pub unsafe fn _mm_mask_getmant_round_ss(
21587 norm
: _MM_MANTISSA_NORM_ENUM
,
21588 sign
: _MM_MANTISSA_SIGN_ENUM
,
21591 macro_rules
! call
{
21592 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
21596 $imm2
<< 2 | $imm4_1
,
21603 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
21607 /// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21608 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21609 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21610 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21611 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21612 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21613 /// The sign is determined by sc which can take the following values:\
21614 /// _MM_MANT_SIGN_src // sign = sign(src)\
21615 /// _MM_MANT_SIGN_zero // sign = 0\
21616 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21617 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21619 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_round_ss&expand=2894)
21621 #[target_feature(enable = "avx512f")]
21622 #[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
21623 #[rustc_args_required_const(3, 4, 5)]
21624 pub unsafe fn _mm_maskz_getmant_round_ss(
21628 norm
: _MM_MANTISSA_NORM_ENUM
,
21629 sign
: _MM_MANTISSA_SIGN_ENUM
,
21632 macro_rules
! call
{
21633 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
21637 $imm2
<< 2 | $imm4_1
,
21638 _mm_setzero_ps().as_f32x4(),
21644 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
21648 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21649 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21650 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21651 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21652 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21653 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21654 /// The sign is determined by sc which can take the following values:\
21655 /// _MM_MANT_SIGN_src // sign = sign(src)\
21656 /// _MM_MANT_SIGN_zero // sign = 0\
21657 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21658 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_getmant_round_sd&expand=2889)
21662 #[target_feature(enable = "avx512f")]
21663 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
21664 #[rustc_args_required_const(2, 3, 4)]
21665 pub unsafe fn _mm_getmant_round_sd(
21668 norm
: _MM_MANTISSA_NORM_ENUM
,
21669 sign
: _MM_MANTISSA_SIGN_ENUM
,
21672 macro_rules
! call
{
21673 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
21677 $imm2
<< 2 | $imm4_1
,
21678 _mm_setzero_pd().as_f64x2(),
21684 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
21688 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21689 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21690 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21691 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21692 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21693 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21694 /// The sign is determined by sc which can take the following values:\
21695 /// _MM_MANT_SIGN_src // sign = sign(src)\
21696 /// _MM_MANT_SIGN_zero // sign = 0\
21697 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21698 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21700 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_getmant_round_sd&expand=2890)
21702 #[target_feature(enable = "avx512f")]
21703 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
21704 #[rustc_args_required_const(4, 5, 6)]
21705 pub unsafe fn _mm_mask_getmant_round_sd(
21710 norm
: _MM_MANTISSA_NORM_ENUM
,
21711 sign
: _MM_MANTISSA_SIGN_ENUM
,
21714 macro_rules
! call
{
21715 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
21719 $imm2
<< 2 | $imm4_1
,
21726 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
21730 /// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
21731 /// The mantissa is normalized to the interval specified by interv, which can take the following values:\
21732 /// _MM_MANT_NORM_1_2 // interval [1, 2)\
21733 /// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
21734 /// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
21735 /// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
21736 /// The sign is determined by sc which can take the following values:\
21737 /// _MM_MANT_SIGN_src // sign = sign(src)\
21738 /// _MM_MANT_SIGN_zero // sign = 0\
21739 /// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
21740 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21742 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_getmant_round_sd&expand=2891)
21744 #[target_feature(enable = "avx512f")]
21745 #[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
21746 #[rustc_args_required_const(3, 4, 5)]
21747 pub unsafe fn _mm_maskz_getmant_round_sd(
21751 norm
: _MM_MANTISSA_NORM_ENUM
,
21752 sign
: _MM_MANTISSA_SIGN_ENUM
,
21755 macro_rules
! call
{
21756 ($imm4_1
:expr
, $imm2
:expr
, $imm4_2
:expr
) => {
21760 $imm2
<< 2 | $imm4_1
,
21761 _mm_setzero_pd().as_f64x2(),
21767 let r
= constify_imm4_mantissas_sae
!(norm
, sign
, sae
, call
);
21771 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21772 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21773 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21774 /// _MM_FROUND_TO_NEG_INF // round down\
21775 /// _MM_FROUND_TO_POS_INF // round up\
21776 /// _MM_FROUND_TO_ZERO // truncate\
21777 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21779 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21780 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_round_ss&expand=4796)
21782 #[target_feature(enable = "avx512f")]
21783 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
21784 #[rustc_args_required_const(2, 3)]
21785 pub unsafe fn _mm_roundscale_round_ss(a
: __m128
, b
: __m128
, imm8
: i32, sae
: i32) -> __m128
{
21786 let a
= a
.as_f32x4();
21787 let b
= b
.as_f32x4();
21788 let zero
= _mm_setzero_ps().as_f32x4();
21789 macro_rules
! call
{
21790 ($imm8
:expr
, $imm4
:expr
) => {
21791 vrndscaless(a
, b
, zero
, 0b11111111, $imm8
, $imm4
)
21794 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
21798 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21799 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21800 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21801 /// _MM_FROUND_TO_NEG_INF // round down\
21802 /// _MM_FROUND_TO_POS_INF // round up\
21803 /// _MM_FROUND_TO_ZERO // truncate\
21804 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21806 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21807 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_round_ss&expand=4794)
21809 #[target_feature(enable = "avx512f")]
21810 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
21811 #[rustc_args_required_const(4, 5)]
21812 pub unsafe fn _mm_mask_roundscale_round_ss(
21820 let a
= a
.as_f32x4();
21821 let b
= b
.as_f32x4();
21822 let src
= src
.as_f32x4();
21823 macro_rules
! call
{
21824 ($imm8
:expr
, $imm4
:expr
) => {
21825 vrndscaless(a
, b
, src
, k
, $imm8
, $imm4
)
21828 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
21832 /// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21833 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21834 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21835 /// _MM_FROUND_TO_NEG_INF // round down\
21836 /// _MM_FROUND_TO_POS_INF // round up\
21837 /// _MM_FROUND_TO_ZERO // truncate\
21838 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21840 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_round_ss&expand=4795)
21843 #[target_feature(enable = "avx512f")]
21844 #[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
21845 #[rustc_args_required_const(3, 4)]
21846 pub unsafe fn _mm_maskz_roundscale_round_ss(
21853 let a
= a
.as_f32x4();
21854 let b
= b
.as_f32x4();
21855 let zero
= _mm_setzero_ps().as_f32x4();
21856 macro_rules
! call
{
21857 ($imm8
:expr
, $imm4
:expr
) => {
21858 vrndscaless(a
, b
, zero
, k
, $imm8
, $imm4
)
21861 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
21865 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
21866 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21867 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21868 /// _MM_FROUND_TO_NEG_INF // round down\
21869 /// _MM_FROUND_TO_POS_INF // round up\
21870 /// _MM_FROUND_TO_ZERO // truncate\
21871 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21873 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_roundscale_round_sd&expand=4793)
21876 #[target_feature(enable = "avx512f")]
21877 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
21878 #[rustc_args_required_const(2, 3)]
21879 pub unsafe fn _mm_roundscale_round_sd(a
: __m128d
, b
: __m128d
, imm8
: i32, sae
: i32) -> __m128d
{
21880 let a
= a
.as_f64x2();
21881 let b
= b
.as_f64x2();
21882 let zero
= _mm_setzero_pd().as_f64x2();
21883 macro_rules
! call
{
21884 ($imm8
:expr
, $imm4
:expr
) => {
21885 vrndscalesd(a
, b
, zero
, 0b11111111, $imm8
, $imm4
)
21888 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
21892 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21893 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21894 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21895 /// _MM_FROUND_TO_NEG_INF // round down\
21896 /// _MM_FROUND_TO_POS_INF // round up\
21897 /// _MM_FROUND_TO_ZERO // truncate\
21898 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21900 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21901 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_roundscale_round_sd&expand=4791)
21903 #[target_feature(enable = "avx512f")]
21904 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
21905 #[rustc_args_required_const(4, 5)]
21906 pub unsafe fn _mm_mask_roundscale_round_sd(
21914 let a
= a
.as_f64x2();
21915 let b
= b
.as_f64x2();
21916 let src
= src
.as_f64x2();
21917 macro_rules
! call
{
21918 ($imm8
:expr
, $imm4
:expr
) => {
21919 vrndscalesd(a
, b
, src
, k
, $imm8
, $imm4
)
21922 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
21926 /// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
21927 /// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
21928 /// _MM_FROUND_TO_NEAREST_INT // round to nearest\
21929 /// _MM_FROUND_TO_NEG_INF // round down\
21930 /// _MM_FROUND_TO_POS_INF // round up\
21931 /// _MM_FROUND_TO_ZERO // truncate\
21932 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE\
21934 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
21935 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_roundscale_round_sd&expand=4792)
21937 #[target_feature(enable = "avx512f")]
21938 #[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
21939 #[rustc_args_required_const(3, 4)]
21940 pub unsafe fn _mm_maskz_roundscale_round_sd(
21947 let a
= a
.as_f64x2();
21948 let b
= b
.as_f64x2();
21949 let zero
= _mm_setzero_pd().as_f64x2();
21950 macro_rules
! call
{
21951 ($imm8
:expr
, $imm4
:expr
) => {
21952 vrndscalesd(a
, b
, zero
, k
, $imm8
, $imm4
)
21955 let r
= constify_imm8_roundscale
!(imm8
, sae
, call
);
21959 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
21961 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21962 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21963 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21964 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21965 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21966 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21968 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_ss&expand=4895)
21970 #[target_feature(enable = "avx512f")]
21971 #[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
21972 #[rustc_args_required_const(2)]
21973 pub unsafe fn _mm_scalef_round_ss(a
: __m128
, b
: __m128
, rounding
: i32) -> __m128
{
21974 let a
= a
.as_f32x4();
21975 let b
= b
.as_f32x4();
21976 let zero
= _mm_setzero_ps().as_f32x4();
21977 macro_rules
! call
{
21979 vscalefss(a
, b
, zero
, 0b11111111, $imm4
)
21982 let r
= constify_imm4_round
!(rounding
, call
);
21986 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
21988 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
21989 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
21990 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
21991 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
21992 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
21993 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
21995 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_ss&expand=4893)
21997 #[target_feature(enable = "avx512f")]
21998 #[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
21999 #[rustc_args_required_const(4)]
22000 pub unsafe fn _mm_mask_scalef_round_ss(
22007 macro_rules
! call
{
22009 vscalefss(a
.as_f32x4(), b
.as_f32x4(), src
.as_f32x4(), k
, $imm4
)
22012 let r
= constify_imm4_round
!(rounding
, call
);
22016 /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22018 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22019 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22020 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22021 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22022 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22023 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_ss&expand=4894)
22027 #[target_feature(enable = "avx512f")]
22028 #[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
22029 #[rustc_args_required_const(3)]
22030 pub unsafe fn _mm_maskz_scalef_round_ss(
22036 macro_rules
! call
{
22041 _mm_setzero_ps().as_f32x4(),
22047 let r
= constify_imm4_round
!(rounding
, call
);
22051 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22053 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22054 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22055 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22056 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22057 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22058 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22060 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_scalef_round_sd&expand=4892)
22062 #[target_feature(enable = "avx512f")]
22063 #[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
22064 #[rustc_args_required_const(2)]
22065 pub unsafe fn _mm_scalef_round_sd(a
: __m128d
, b
: __m128d
, rounding
: i32) -> __m128d
{
22066 macro_rules
! call
{
22071 _mm_setzero_pd().as_f64x2(),
22077 let r
= constify_imm4_round
!(rounding
, call
);
22081 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22083 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22084 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22085 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22086 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22087 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22088 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22090 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_scalef_round_sd&expand=4890)
22092 #[target_feature(enable = "avx512f")]
22093 #[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
22094 #[rustc_args_required_const(4)]
22095 pub unsafe fn _mm_mask_scalef_round_sd(
22102 macro_rules
! call
{
22104 vscalefsd(a
.as_f64x2(), b
.as_f64x2(), src
.as_f64x2(), k
, $imm4
)
22107 let r
= constify_imm4_round
!(rounding
, call
);
22111 /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22113 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22114 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22115 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22116 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22117 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22118 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_scalef_round_sd&expand=4891)
22122 #[target_feature(enable = "avx512f")]
22123 #[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
22124 #[rustc_args_required_const(3)]
22125 pub unsafe fn _mm_maskz_scalef_round_sd(
22131 macro_rules
! call
{
22136 _mm_setzero_pd().as_f64x2(),
22142 let r
= constify_imm4_round
!(rounding
, call
);
22146 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22148 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22149 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22150 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22151 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22152 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22153 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22155 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_ss&expand=2573)
22157 #[target_feature(enable = "avx512f")]
22158 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22159 #[rustc_args_required_const(3)]
22160 pub unsafe fn _mm_fmadd_round_ss(a
: __m128
, b
: __m128
, c
: __m128
, rounding
: i32) -> __m128
{
22161 let extracta
: f32 = simd_extract(a
, 0);
22162 let extractb
: f32 = simd_extract(b
, 0);
22163 let extractc
: f32 = simd_extract(c
, 0);
22164 macro_rules
! call
{
22166 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22169 let fmadd
= constify_imm4_round
!(rounding
, call
);
22170 let r
= simd_insert(a
, 0, fmadd
);
22174 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22176 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22177 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22178 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22179 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22180 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22181 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22183 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_ss&expand=2574)
22185 #[target_feature(enable = "avx512f")]
22186 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22187 #[rustc_args_required_const(4)]
22188 pub unsafe fn _mm_mask_fmadd_round_ss(
22195 let mut fmadd
: f32 = simd_extract(a
, 0);
22196 if (k
& 0b00000001) != 0 {
22197 let extractb
: f32 = simd_extract(b
, 0);
22198 let extractc
: f32 = simd_extract(c
, 0);
22199 macro_rules
! call
{
22201 vfmadd132ss(fmadd
, extractb
, extractc
, $imm4
)
22204 fmadd
= constify_imm4_round
!(rounding
, call
);
22206 let r
= simd_insert(a
, 0, fmadd
);
22210 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22212 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22213 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22214 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22215 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22216 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22217 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_ss&expand=2576)
22221 #[target_feature(enable = "avx512f")]
22222 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22223 #[rustc_args_required_const(4)]
22224 pub unsafe fn _mm_maskz_fmadd_round_ss(
22231 let mut fmadd
: f32 = 0.;
22232 if (k
& 0b00000001) != 0 {
22233 let extracta
: f32 = simd_extract(a
, 0);
22234 let extractb
: f32 = simd_extract(b
, 0);
22235 let extractc
: f32 = simd_extract(c
, 0);
22236 macro_rules
! call
{
22238 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22241 fmadd
= constify_imm4_round
!(rounding
, call
);
22243 let r
= simd_insert(a
, 0, fmadd
);
22247 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
22249 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22250 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22251 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22252 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22253 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22254 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_ss&expand=2575)
22258 #[target_feature(enable = "avx512f")]
22259 #[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
22260 #[rustc_args_required_const(4)]
22261 pub unsafe fn _mm_mask3_fmadd_round_ss(
22268 let mut fmadd
: f32 = simd_extract(c
, 0);
22269 if (k
& 0b00000001) != 0 {
22270 let extracta
: f32 = simd_extract(a
, 0);
22271 let extractb
: f32 = simd_extract(b
, 0);
22272 macro_rules
! call
{
22274 vfmadd132ss(extracta
, extractb
, fmadd
, $imm4
)
22277 fmadd
= constify_imm4_round
!(rounding
, call
);
22279 let r
= simd_insert(c
, 0, fmadd
);
22283 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22285 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22286 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22287 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22288 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22289 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22290 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmadd_round_sd&expand=2569)
22294 #[target_feature(enable = "avx512f")]
22295 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22296 #[rustc_args_required_const(3)]
22297 pub unsafe fn _mm_fmadd_round_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, rounding
: i32) -> __m128d
{
22298 let extracta
: f64 = simd_extract(a
, 0);
22299 let extractb
: f64 = simd_extract(b
, 0);
22300 let extractc
: f64 = simd_extract(c
, 0);
22301 macro_rules
! call
{
22303 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22306 let fmadd
= constify_imm4_round
!(rounding
, call
);
22307 let r
= simd_insert(a
, 0, fmadd
);
22311 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22313 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22314 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22315 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22316 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22317 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22318 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22320 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmadd_round_sd&expand=2570)
22322 #[target_feature(enable = "avx512f")]
22323 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22324 #[rustc_args_required_const(4)]
22325 pub unsafe fn _mm_mask_fmadd_round_sd(
22332 let mut fmadd
: f64 = simd_extract(a
, 0);
22333 if (k
& 0b00000001) != 0 {
22334 let extractb
: f64 = simd_extract(b
, 0);
22335 let extractc
: f64 = simd_extract(c
, 0);
22336 macro_rules
! call
{
22338 vfmadd132sd(fmadd
, extractb
, extractc
, $imm4
)
22341 fmadd
= constify_imm4_round
!(rounding
, call
);
22343 let r
= simd_insert(a
, 0, fmadd
);
22347 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22349 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22350 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22351 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22352 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22353 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22354 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22356 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmadd_round_sd&expand=2572)
22358 #[target_feature(enable = "avx512f")]
22359 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22360 #[rustc_args_required_const(4)]
22361 pub unsafe fn _mm_maskz_fmadd_round_sd(
22368 let mut fmadd
: f64 = 0.;
22369 if (k
& 0b00000001) != 0 {
22370 let extracta
: f64 = simd_extract(a
, 0);
22371 let extractb
: f64 = simd_extract(b
, 0);
22372 let extractc
: f64 = simd_extract(c
, 0);
22373 macro_rules
! call
{
22375 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22378 fmadd
= constify_imm4_round
!(rounding
, call
);
22380 let r
= simd_insert(a
, 0, fmadd
);
22384 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
22386 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22387 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22388 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22389 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22390 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22391 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22393 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmadd_round_Sd&expand=2571)
22395 #[target_feature(enable = "avx512f")]
22396 #[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
22397 #[rustc_args_required_const(4)]
22398 pub unsafe fn _mm_mask3_fmadd_round_sd(
22405 let mut fmadd
: f64 = simd_extract(c
, 0);
22406 if (k
& 0b00000001) != 0 {
22407 let extracta
: f64 = simd_extract(a
, 0);
22408 let extractb
: f64 = simd_extract(b
, 0);
22409 macro_rules
! call
{
22411 vfmadd132sd(extracta
, extractb
, fmadd
, $imm4
)
22414 fmadd
= constify_imm4_round
!(rounding
, call
);
22416 let r
= simd_insert(c
, 0, fmadd
);
22420 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22422 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22423 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22424 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22425 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22426 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22427 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22429 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_ss&expand=2659)
22431 #[target_feature(enable = "avx512f")]
22432 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22433 #[rustc_args_required_const(3)]
22434 pub unsafe fn _mm_fmsub_round_ss(a
: __m128
, b
: __m128
, c
: __m128
, rounding
: i32) -> __m128
{
22435 let extracta
: f32 = simd_extract(a
, 0);
22436 let extractb
: f32 = simd_extract(b
, 0);
22437 let extractc
: f32 = simd_extract(c
, 0);
22438 let extractc
= -extractc
;
22439 macro_rules
! call
{
22441 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22444 let fmsub
= constify_imm4_round
!(rounding
, call
);
22445 let r
= simd_insert(a
, 0, fmsub
);
22449 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22451 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22452 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22453 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22454 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22455 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22456 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22458 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_ss&expand=2660)
22460 #[target_feature(enable = "avx512f")]
22461 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22462 #[rustc_args_required_const(4)]
22463 pub unsafe fn _mm_mask_fmsub_round_ss(
22470 let mut fmsub
: f32 = simd_extract(a
, 0);
22471 if (k
& 0b00000001) != 0 {
22472 let extractb
: f32 = simd_extract(b
, 0);
22473 let extractc
: f32 = simd_extract(c
, 0);
22474 let extractc
= -extractc
;
22475 macro_rules
! call
{
22477 vfmadd132ss(fmsub
, extractb
, extractc
, $imm4
)
22480 fmsub
= constify_imm4_round
!(rounding
, call
);
22482 let r
= simd_insert(a
, 0, fmsub
);
22486 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22488 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22489 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22490 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22491 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22492 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22493 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22495 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_ss&expand=2662)
22497 #[target_feature(enable = "avx512f")]
22498 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22499 #[rustc_args_required_const(4)]
22500 pub unsafe fn _mm_maskz_fmsub_round_ss(
22507 let mut fmsub
: f32 = 0.;
22508 if (k
& 0b00000001) != 0 {
22509 let extracta
: f32 = simd_extract(a
, 0);
22510 let extractb
: f32 = simd_extract(b
, 0);
22511 let extractc
: f32 = simd_extract(c
, 0);
22512 let extractc
= -extractc
;
22513 macro_rules
! call
{
22515 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22518 fmsub
= constify_imm4_round
!(rounding
, call
);
22520 let r
= simd_insert(a
, 0, fmsub
);
22524 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
22526 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22527 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22528 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22529 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22530 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22531 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22533 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_ss&expand=2661)
22535 #[target_feature(enable = "avx512f")]
22536 #[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
22537 #[rustc_args_required_const(4)]
22538 pub unsafe fn _mm_mask3_fmsub_round_ss(
22545 let mut fmsub
: f32 = simd_extract(c
, 0);
22546 if (k
& 0b00000001) != 0 {
22547 let extracta
: f32 = simd_extract(a
, 0);
22548 let extractb
: f32 = simd_extract(b
, 0);
22549 let extractc
= -fmsub
;
22550 macro_rules
! call
{
22552 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22555 fmsub
= constify_imm4_round
!(rounding
, call
);
22557 let r
= simd_insert(c
, 0, fmsub
);
22561 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22563 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22564 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22565 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22566 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22567 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22568 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22570 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fmsub_round_sd&expand=2655)
22572 #[target_feature(enable = "avx512f")]
22573 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22574 #[rustc_args_required_const(3)]
22575 pub unsafe fn _mm_fmsub_round_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, rounding
: i32) -> __m128d
{
22576 let extracta
: f64 = simd_extract(a
, 0);
22577 let extractb
: f64 = simd_extract(b
, 0);
22578 let extractc
: f64 = simd_extract(c
, 0);
22579 let extractc
= -extractc
;
22580 macro_rules
! call
{
22582 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22585 let fmsub
= constify_imm4_round
!(rounding
, call
);
22586 let r
= simd_insert(a
, 0, fmsub
);
22590 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22592 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22593 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22594 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22595 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22596 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22597 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22599 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fmsub_round_sd&expand=2656)
22601 #[target_feature(enable = "avx512f")]
22602 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22603 #[rustc_args_required_const(4)]
22604 pub unsafe fn _mm_mask_fmsub_round_sd(
22611 let mut fmsub
: f64 = simd_extract(a
, 0);
22612 if (k
& 0b00000001) != 0 {
22613 let extractb
: f64 = simd_extract(b
, 0);
22614 let extractc
: f64 = simd_extract(c
, 0);
22615 let extractc
= -extractc
;
22616 macro_rules
! call
{
22618 vfmadd132sd(fmsub
, extractb
, extractc
, $imm4
)
22621 fmsub
= constify_imm4_round
!(rounding
, call
);
22623 let r
= simd_insert(a
, 0, fmsub
);
22627 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22629 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22630 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22631 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22632 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22633 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22634 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22636 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fmsub_round_sd&expand=2658)
22638 #[target_feature(enable = "avx512f")]
22639 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22640 #[rustc_args_required_const(4)]
22641 pub unsafe fn _mm_maskz_fmsub_round_sd(
22648 let mut fmsub
: f64 = 0.;
22649 if (k
& 0b00000001) != 0 {
22650 let extracta
: f64 = simd_extract(a
, 0);
22651 let extractb
: f64 = simd_extract(b
, 0);
22652 let extractc
: f64 = simd_extract(c
, 0);
22653 let extractc
= -extractc
;
22654 macro_rules
! call
{
22656 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22659 fmsub
= constify_imm4_round
!(rounding
, call
);
22661 let r
= simd_insert(a
, 0, fmsub
);
22665 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
22667 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22668 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22669 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22670 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22671 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22672 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fmsub_round_sd&expand=2657)
22676 #[target_feature(enable = "avx512f")]
22677 #[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
22678 #[rustc_args_required_const(4)]
22679 pub unsafe fn _mm_mask3_fmsub_round_sd(
22686 let mut fmsub
: f64 = simd_extract(c
, 0);
22687 if (k
& 0b00000001) != 0 {
22688 let extracta
: f64 = simd_extract(a
, 0);
22689 let extractb
: f64 = simd_extract(b
, 0);
22690 let extractc
= -fmsub
;
22691 macro_rules
! call
{
22693 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22696 fmsub
= constify_imm4_round
!(rounding
, call
);
22698 let r
= simd_insert(c
, 0, fmsub
);
22702 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22704 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22705 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22706 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22707 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22708 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22709 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22711 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_ss&expand=2739)
22713 #[target_feature(enable = "avx512f")]
22714 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22715 #[rustc_args_required_const(3)]
22716 pub unsafe fn _mm_fnmadd_round_ss(a
: __m128
, b
: __m128
, c
: __m128
, rounding
: i32) -> __m128
{
22717 let extracta
: f32 = simd_extract(a
, 0);
22718 let extracta
= -extracta
;
22719 let extractb
: f32 = simd_extract(b
, 0);
22720 let extractc
: f32 = simd_extract(c
, 0);
22721 macro_rules
! call
{
22723 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22726 let fnmadd
= constify_imm4_round
!(rounding
, call
);
22727 let r
= simd_insert(a
, 0, fnmadd
);
22731 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22733 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22734 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22735 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22736 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22737 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22738 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_ss&expand=2740)
22742 #[target_feature(enable = "avx512f")]
22743 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22744 #[rustc_args_required_const(4)]
22745 pub unsafe fn _mm_mask_fnmadd_round_ss(
22752 let mut fnmadd
: f32 = simd_extract(a
, 0);
22753 if (k
& 0b00000001) != 0 {
22754 let extracta
= -fnmadd
;
22755 let extractb
: f32 = simd_extract(b
, 0);
22756 let extractc
: f32 = simd_extract(c
, 0);
22757 macro_rules
! call
{
22759 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22762 fnmadd
= constify_imm4_round
!(rounding
, call
);
22764 let r
= simd_insert(a
, 0, fnmadd
);
22768 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
22770 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22771 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22772 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22773 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22774 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22775 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_ss&expand=2742)
22779 #[target_feature(enable = "avx512f")]
22780 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22781 #[rustc_args_required_const(4)]
22782 pub unsafe fn _mm_maskz_fnmadd_round_ss(
22789 let mut fnmadd
: f32 = 0.;
22790 if (k
& 0b00000001) != 0 {
22791 let extracta
: f32 = simd_extract(a
, 0);
22792 let extracta
= -extracta
;
22793 let extractb
: f32 = simd_extract(b
, 0);
22794 let extractc
: f32 = simd_extract(c
, 0);
22795 macro_rules
! call
{
22797 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
22800 fnmadd
= constify_imm4_round
!(rounding
, call
);
22802 let r
= simd_insert(a
, 0, fnmadd
);
22806 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
22808 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22809 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22810 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22811 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22812 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22813 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22815 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_ss&expand=2741)
22817 #[target_feature(enable = "avx512f")]
22818 #[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
22819 #[rustc_args_required_const(4)]
22820 pub unsafe fn _mm_mask3_fnmadd_round_ss(
22827 let mut fnmadd
: f32 = simd_extract(c
, 0);
22828 if (k
& 0b00000001) != 0 {
22829 let extracta
: f32 = simd_extract(a
, 0);
22830 let extracta
= -extracta
;
22831 let extractb
: f32 = simd_extract(b
, 0);
22832 macro_rules
! call
{
22834 vfmadd132ss(extracta
, extractb
, fnmadd
, $imm4
)
22837 fnmadd
= constify_imm4_round
!(rounding
, call
);
22839 let r
= simd_insert(c
, 0, fnmadd
);
22843 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
22845 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22846 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22847 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22848 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22849 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22850 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22852 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmadd_round_sd&expand=2735)
22854 #[target_feature(enable = "avx512f")]
22855 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22856 #[rustc_args_required_const(3)]
22857 pub unsafe fn _mm_fnmadd_round_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, rounding
: i32) -> __m128d
{
22858 let extracta
: f64 = simd_extract(a
, 0);
22859 let extracta
= -extracta
;
22860 let extractb
: f64 = simd_extract(b
, 0);
22861 let extractc
: f64 = simd_extract(c
, 0);
22862 macro_rules
! call
{
22864 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22867 let fnmadd
= constify_imm4_round
!(rounding
, call
);
22868 let r
= simd_insert(a
, 0, fnmadd
);
22872 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22874 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22875 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22876 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22877 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22878 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22879 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22881 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmadd_round_sd&expand=2736)
22883 #[target_feature(enable = "avx512f")]
22884 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22885 #[rustc_args_required_const(4)]
22886 pub unsafe fn _mm_mask_fnmadd_round_sd(
22893 let mut fnmadd
: f64 = simd_extract(a
, 0);
22894 if (k
& 0b00000001) != 0 {
22895 let extracta
= -fnmadd
;
22896 let extractb
: f64 = simd_extract(b
, 0);
22897 let extractc
: f64 = simd_extract(c
, 0);
22898 macro_rules
! call
{
22900 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22903 fnmadd
= constify_imm4_round
!(rounding
, call
);
22905 let r
= simd_insert(a
, 0, fnmadd
);
22909 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
22911 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22912 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22913 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22914 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22915 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22916 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22918 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmadd_round_sd&expand=2738)
22920 #[target_feature(enable = "avx512f")]
22921 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22922 #[rustc_args_required_const(4)]
22923 pub unsafe fn _mm_maskz_fnmadd_round_sd(
22930 let mut fnmadd
: f64 = 0.;
22931 if (k
& 0b00000001) != 0 {
22932 let extracta
: f64 = simd_extract(a
, 0);
22933 let extracta
= -extracta
;
22934 let extractb
: f64 = simd_extract(b
, 0);
22935 let extractc
: f64 = simd_extract(c
, 0);
22936 macro_rules
! call
{
22938 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
22941 fnmadd
= constify_imm4_round
!(rounding
, call
);
22943 let r
= simd_insert(a
, 0, fnmadd
);
22947 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
22949 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22950 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22951 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22952 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22953 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22954 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22956 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmadd_round_Sd&expand=2737)
22958 #[target_feature(enable = "avx512f")]
22959 #[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
22960 #[rustc_args_required_const(4)]
22961 pub unsafe fn _mm_mask3_fnmadd_round_sd(
22968 let mut fnmadd
: f64 = simd_extract(c
, 0);
22969 if (k
& 0b00000001) != 0 {
22970 let extracta
: f64 = simd_extract(a
, 0);
22971 let extracta
= -extracta
;
22972 let extractb
: f64 = simd_extract(b
, 0);
22973 macro_rules
! call
{
22975 vfmadd132sd(extracta
, extractb
, fnmadd
, $imm4
)
22978 fnmadd
= constify_imm4_round
!(rounding
, call
);
22980 let r
= simd_insert(c
, 0, fnmadd
);
22984 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
22986 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
22987 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
22988 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
22989 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
22990 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
22991 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
22993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_ss&expand=2787)
22995 #[target_feature(enable = "avx512f")]
22996 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
22997 #[rustc_args_required_const(3)]
22998 pub unsafe fn _mm_fnmsub_round_ss(a
: __m128
, b
: __m128
, c
: __m128
, rounding
: i32) -> __m128
{
22999 let extracta
: f32 = simd_extract(a
, 0);
23000 let extracta
= -extracta
;
23001 let extractb
: f32 = simd_extract(b
, 0);
23002 let extractc
: f32 = simd_extract(c
, 0);
23003 let extractc
= -extractc
;
23004 macro_rules
! call
{
23006 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
23009 let fnmsub
= constify_imm4_round
!(rounding
, call
);
23010 let r
= simd_insert(a
, 0, fnmsub
);
23014 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
23016 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23017 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23018 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23019 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23020 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23021 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23023 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_ss&expand=2788)
23025 #[target_feature(enable = "avx512f")]
23026 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
23027 #[rustc_args_required_const(4)]
23028 pub unsafe fn _mm_mask_fnmsub_round_ss(
23035 let mut fnmsub
: f32 = simd_extract(a
, 0);
23036 if (k
& 0b00000001) != 0 {
23037 let extracta
= -fnmsub
;
23038 let extractb
: f32 = simd_extract(b
, 0);
23039 let extractc
: f32 = simd_extract(c
, 0);
23040 let extractc
= -extractc
;
23041 macro_rules
! call
{
23043 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
23046 fnmsub
= constify_imm4_round
!(rounding
, call
);
23048 let r
= simd_insert(a
, 0, fnmsub
);
23052 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
23054 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23055 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23056 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23057 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23058 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23059 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_ss&expand=2790)
23063 #[target_feature(enable = "avx512f")]
23064 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
23065 #[rustc_args_required_const(4)]
23066 pub unsafe fn _mm_maskz_fnmsub_round_ss(
23073 let mut fnmsub
: f32 = 0.;
23074 if (k
& 0b00000001) != 0 {
23075 let extracta
: f32 = simd_extract(a
, 0);
23076 let extracta
= -extracta
;
23077 let extractb
: f32 = simd_extract(b
, 0);
23078 let extractc
: f32 = simd_extract(c
, 0);
23079 let extractc
= -extractc
;
23080 macro_rules
! call
{
23082 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
23085 fnmsub
= constify_imm4_round
!(rounding
, call
);
23087 let r
= simd_insert(a
, 0, fnmsub
);
23091 /// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
23093 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23094 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23095 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23096 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23097 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23098 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23100 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_ss&expand=2789)
23102 #[target_feature(enable = "avx512f")]
23103 #[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
23104 #[rustc_args_required_const(4)]
23105 pub unsafe fn _mm_mask3_fnmsub_round_ss(
23112 let mut fnmsub
: f32 = simd_extract(c
, 0);
23113 if (k
& 0b00000001) != 0 {
23114 let extracta
: f32 = simd_extract(a
, 0);
23115 let extracta
= -extracta
;
23116 let extractb
: f32 = simd_extract(b
, 0);
23117 let extractc
= -fnmsub
;
23118 macro_rules
! call
{
23120 vfmadd132ss(extracta
, extractb
, extractc
, $imm4
)
23123 fnmsub
= constify_imm4_round
!(rounding
, call
);
23125 let r
= simd_insert(c
, 0, fnmsub
);
23129 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
23131 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23132 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23133 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23134 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23135 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23136 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23138 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fnmsub_round_sd&expand=2783)
23140 #[target_feature(enable = "avx512f")]
23141 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23142 #[rustc_args_required_const(3)]
23143 pub unsafe fn _mm_fnmsub_round_sd(a
: __m128d
, b
: __m128d
, c
: __m128d
, rounding
: i32) -> __m128d
{
23144 let extracta
: f64 = simd_extract(a
, 0);
23145 let extracta
= -extracta
;
23146 let extractb
: f64 = simd_extract(b
, 0);
23147 let extractc
: f64 = simd_extract(c
, 0);
23148 let extractc
= -extractc
;
23149 macro_rules
! call
{
23151 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
23154 let fnmsub
= constify_imm4_round
!(rounding
, call
);
23155 let r
= simd_insert(a
, 0, fnmsub
);
23159 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23161 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23162 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23163 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23164 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23165 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23166 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23168 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fnmsub_round_sd&expand=2784)
23170 #[target_feature(enable = "avx512f")]
23171 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23172 #[rustc_args_required_const(4)]
23173 pub unsafe fn _mm_mask_fnmsub_round_sd(
23180 let mut fnmsub
: f64 = simd_extract(a
, 0);
23181 if (k
& 0b00000001) != 0 {
23182 let extracta
= -fnmsub
;
23183 let extractb
: f64 = simd_extract(b
, 0);
23184 let extractc
: f64 = simd_extract(c
, 0);
23185 let extractc
= -extractc
;
23186 macro_rules
! call
{
23188 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
23191 fnmsub
= constify_imm4_round
!(rounding
, call
);
23193 let r
= simd_insert(a
, 0, fnmsub
);
23197 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23199 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23200 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23201 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23202 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23203 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23204 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23206 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fnmsub_round_sd&expand=2786)
23208 #[target_feature(enable = "avx512f")]
23209 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23210 #[rustc_args_required_const(4)]
23211 pub unsafe fn _mm_maskz_fnmsub_round_sd(
23218 let mut fnmsub
: f64 = 0.;
23219 if (k
& 0b00000001) != 0 {
23220 let extracta
: f64 = simd_extract(a
, 0);
23221 let extracta
= -extracta
;
23222 let extractb
: f64 = simd_extract(b
, 0);
23223 let extractc
: f64 = simd_extract(c
, 0);
23224 let extractc
= -extractc
;
23225 macro_rules
! call
{
23227 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
23230 fnmsub
= constify_imm4_round
!(rounding
, call
);
23232 let r
= simd_insert(a
, 0, fnmsub
);
23236 /// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
23238 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23239 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23240 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23241 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23242 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23243 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23245 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask3_fnmsub_round_sd&expand=2785)
23247 #[target_feature(enable = "avx512f")]
23248 #[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
23249 #[rustc_args_required_const(4)]
23250 pub unsafe fn _mm_mask3_fnmsub_round_sd(
23257 let mut fnmsub
: f64 = simd_extract(c
, 0);
23258 if (k
& 0b00000001) != 0 {
23259 let extracta
: f64 = simd_extract(a
, 0);
23260 let extracta
= -extracta
;
23261 let extractb
: f64 = simd_extract(b
, 0);
23262 let extractc
= -fnmsub
;
23263 macro_rules
! call
{
23265 vfmadd132sd(extracta
, extractb
, extractc
, $imm4
)
23268 fnmsub
= constify_imm4_round
!(rounding
, call
);
23270 let r
= simd_insert(c
, 0, fnmsub
);
23274 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
23276 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_ss&expand=2517)
23278 #[target_feature(enable = "avx512f")]
23279 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
23280 #[rustc_args_required_const(3)]
23281 pub unsafe fn _mm_fixupimm_ss(a
: __m128
, b
: __m128
, c
: __m128i
, imm8
: i32) -> __m128
{
23282 let a
= a
.as_f32x4();
23283 let b
= b
.as_f32x4();
23284 let c
= c
.as_i32x4();
23285 macro_rules
! call
{
23287 vfixupimmss(a
, b
, c
, $imm8
, 0b11111111, _MM_FROUND_CUR_DIRECTION
)
23290 let fixupimm
= constify_imm8_sae
!(imm8
, call
);
23291 let fixupimm
: f32 = simd_extract(fixupimm
, 0);
23292 let r
= simd_insert(a
, 0, fixupimm
);
23296 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
23298 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_ss&expand=2518)
23300 #[target_feature(enable = "avx512f")]
23301 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
23302 #[rustc_args_required_const(4)]
23303 pub unsafe fn _mm_mask_fixupimm_ss(
23310 let a
= a
.as_f32x4();
23311 let b
= b
.as_f32x4();
23312 let c
= c
.as_i32x4();
23313 macro_rules
! call
{
23315 vfixupimmss(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
23318 let fixupimm
= constify_imm8_sae
!(imm8
, call
);
23319 let fixupimm
: f32 = simd_extract(fixupimm
, 0);
23320 let r
= simd_insert(a
, 0, fixupimm
);
23324 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
23326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_ss&expand=2519)
23328 #[target_feature(enable = "avx512f")]
23329 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
23330 #[rustc_args_required_const(4)]
23331 pub unsafe fn _mm_maskz_fixupimm_ss(
23338 let a
= a
.as_f32x4();
23339 let b
= b
.as_f32x4();
23340 let c
= c
.as_i32x4();
23341 macro_rules
! call
{
23343 vfixupimmssz(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
23346 let fixupimm
= constify_imm8_sae
!(imm8
, call
);
23347 let fixupimm
: f32 = simd_extract(fixupimm
, 0);
23348 let r
= simd_insert(a
, 0, fixupimm
);
23352 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
23354 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_sd&expand=2514)
23356 #[target_feature(enable = "avx512f")]
23357 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
23358 #[rustc_args_required_const(3)]
23359 pub unsafe fn _mm_fixupimm_sd(a
: __m128d
, b
: __m128d
, c
: __m128i
, imm8
: i32) -> __m128d
{
23360 let a
= a
.as_f64x2();
23361 let b
= b
.as_f64x2();
23362 let c
= c
.as_i64x2();
23363 macro_rules
! call
{
23365 vfixupimmsd(a
, b
, c
, $imm8
, 0b11111111, _MM_FROUND_CUR_DIRECTION
)
23368 let fixupimm
= constify_imm8_sae
!(imm8
, call
);
23369 let fixupimm
: f64 = simd_extract(fixupimm
, 0);
23370 let r
= simd_insert(a
, 0, fixupimm
);
23374 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
23376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_sd&expand=2515)
23378 #[target_feature(enable = "avx512f")]
23379 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
23380 #[rustc_args_required_const(4)]
23381 pub unsafe fn _mm_mask_fixupimm_sd(
23388 let a
= a
.as_f64x2();
23389 let b
= b
.as_f64x2();
23390 let c
= c
.as_i64x2();
23391 macro_rules
! call
{
23393 vfixupimmsd(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
23396 let fixupimm
= constify_imm8_sae
!(imm8
, call
);
23397 let fixupimm
: f64 = simd_extract(fixupimm
, 0);
23398 let r
= simd_insert(a
, 0, fixupimm
);
23402 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
23404 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_sd&expand=2516)
23406 #[target_feature(enable = "avx512f")]
23407 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
23408 #[rustc_args_required_const(4)]
23409 pub unsafe fn _mm_maskz_fixupimm_sd(
23416 let a
= a
.as_f64x2();
23417 let b
= b
.as_f64x2();
23418 let c
= c
.as_i64x2();
23419 macro_rules
! call
{
23421 vfixupimmsdz(a
, b
, c
, $imm8
, k
, _MM_FROUND_CUR_DIRECTION
)
23424 let fixupimm
= constify_imm8_sae
!(imm8
, call
);
23425 let fixupimm
: f64 = simd_extract(fixupimm
, 0);
23426 let r
= simd_insert(a
, 0, fixupimm
);
23430 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
23432 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23433 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_ss&expand=2511)
23435 #[target_feature(enable = "avx512f")]
23436 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
23437 #[rustc_args_required_const(3, 4)]
23438 pub unsafe fn _mm_fixupimm_round_ss(
23445 let a
= a
.as_f32x4();
23446 let b
= b
.as_f32x4();
23447 let c
= c
.as_i32x4();
23448 macro_rules
! call
{
23449 ($imm8
:expr
, $imm4
:expr
) => {
23450 vfixupimmss(a
, b
, c
, $imm8
, 0b11111111, $imm4
)
23453 let fixupimm
= constify_imm8_roundscale
!(imm8
, sae
, call
);
23454 let fixupimm
: f32 = simd_extract(fixupimm
, 0);
23455 let r
= simd_insert(a
, 0, fixupimm
);
23459 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
23461 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_ss&expand=2512)
23464 #[target_feature(enable = "avx512f")]
23465 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
23466 #[rustc_args_required_const(4, 5)]
23467 pub unsafe fn _mm_mask_fixupimm_round_ss(
23475 let a
= a
.as_f32x4();
23476 let b
= b
.as_f32x4();
23477 let c
= c
.as_i32x4();
23478 macro_rules
! call
{
23479 ($imm8
:expr
, $imm4
:expr
) => {
23480 vfixupimmss(a
, b
, c
, $imm8
, k
, $imm4
)
23483 let fixupimm
= constify_imm8_roundscale
!(imm8
, sae
, call
);
23484 let fixupimm
: f32 = simd_extract(fixupimm
, 0);
23485 let r
= simd_insert(a
, 0, fixupimm
);
23489 /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
23491 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23492 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_ss&expand=2513)
23494 #[target_feature(enable = "avx512f")]
23495 #[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
23496 #[rustc_args_required_const(4, 5)]
23497 pub unsafe fn _mm_maskz_fixupimm_round_ss(
23505 let a
= a
.as_f32x4();
23506 let b
= b
.as_f32x4();
23507 let c
= c
.as_i32x4();
23508 macro_rules
! call
{
23509 ($imm8
:expr
, $imm4
:expr
) => {
23510 vfixupimmssz(a
, b
, c
, $imm8
, k
, $imm4
)
23513 let fixupimm
= constify_imm8_roundscale
!(imm8
, sae
, call
);
23514 let fixupimm
: f32 = simd_extract(fixupimm
, 0);
23515 let r
= simd_insert(a
, 0, fixupimm
);
23519 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
23521 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23522 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_sd&expand=2508)
23524 #[target_feature(enable = "avx512f")]
23525 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
23526 #[rustc_args_required_const(3, 4)]
23527 pub unsafe fn _mm_fixupimm_round_sd(
23534 let a
= a
.as_f64x2();
23535 let b
= b
.as_f64x2();
23536 let c
= c
.as_i64x2();
23537 macro_rules
! call
{
23538 ($imm8
:expr
, $imm4
:expr
) => {
23539 vfixupimmsd(a
, b
, c
, $imm8
, 0b11111111, $imm4
)
23542 let fixupimm
= constify_imm8_roundscale
!(imm8
, sae
, call
);
23543 let fixupimm
: f64 = simd_extract(fixupimm
, 0);
23544 let r
= simd_insert(a
, 0, fixupimm
);
23548 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
23550 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23551 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_sd&expand=2509)
23553 #[target_feature(enable = "avx512f")]
23554 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
23555 #[rustc_args_required_const(4, 5)]
23556 pub unsafe fn _mm_mask_fixupimm_round_sd(
23564 let a
= a
.as_f64x2();
23565 let b
= b
.as_f64x2();
23566 let c
= c
.as_i64x2();
23567 macro_rules
! call
{
23568 ($imm8
:expr
, $imm4
:expr
) => {
23569 vfixupimmsd(a
, b
, c
, $imm8
, k
, $imm4
)
23572 let fixupimm
= constify_imm8_roundscale
!(imm8
, sae
, call
);
23573 let fixupimm
: f64 = simd_extract(fixupimm
, 0);
23574 let r
= simd_insert(a
, 0, fixupimm
);
23578 /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
23580 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_sd&expand=2510)
23583 #[target_feature(enable = "avx512f")]
23584 #[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
23585 #[rustc_args_required_const(4, 5)]
23586 pub unsafe fn _mm_maskz_fixupimm_round_sd(
23594 let a
= a
.as_f64x2();
23595 let b
= b
.as_f64x2();
23596 let c
= c
.as_i64x2();
23597 macro_rules
! call
{
23598 ($imm8
:expr
, $imm4
:expr
) => {
23599 vfixupimmsdz(a
, b
, c
, $imm8
, k
, $imm4
)
23602 let fixupimm
= constify_imm8_roundscale
!(imm8
, sae
, call
);
23603 let fixupimm
: f64 = simd_extract(fixupimm
, 0);
23604 let r
= simd_insert(a
, 0, fixupimm
);
23608 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
23610 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvtss_sd&expand=1896)
23612 #[target_feature(enable = "avx512f")]
23613 #[cfg_attr(test, assert_instr(vcvtss2sd))]
23614 pub unsafe fn _mm_mask_cvtss_sd(src
: __m128d
, k
: __mmask8
, a
: __m128d
, b
: __m128
) -> __m128d
{
23615 transmute(vcvtss2sd(
23620 _MM_FROUND_CUR_DIRECTION
,
23624 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
23626 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvtss_sd&expand=1897)
23628 #[target_feature(enable = "avx512f")]
23629 #[cfg_attr(test, assert_instr(vcvtss2sd))]
23630 pub unsafe fn _mm_maskz_cvtss_sd(k
: __mmask8
, a
: __m128d
, b
: __m128
) -> __m128d
{
23631 transmute(vcvtss2sd(
23634 _mm_setzero_pd().as_f64x2(),
23636 _MM_FROUND_CUR_DIRECTION
,
23640 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
23642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvtsd_ss&expand=1797)
23644 #[target_feature(enable = "avx512f")]
23645 #[cfg_attr(test, assert_instr(vcvtsd2ss))]
23646 pub unsafe fn _mm_mask_cvtsd_ss(src
: __m128
, k
: __mmask8
, a
: __m128
, b
: __m128d
) -> __m128
{
23647 transmute(vcvtsd2ss(
23652 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
23656 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
23658 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvtsd_ss&expand=1798)
23660 #[target_feature(enable = "avx512f")]
23661 #[cfg_attr(test, assert_instr(vcvtsd2ss))]
23662 pub unsafe fn _mm_maskz_cvtsd_ss(k
: __mmask8
, a
: __m128
, b
: __m128d
) -> __m128
{
23663 transmute(vcvtsd2ss(
23666 _mm_setzero_ps().as_f32x4(),
23668 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
23672 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
23673 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23675 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_sd&expand=1371)
23677 #[target_feature(enable = "avx512f")]
23678 #[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
23679 #[rustc_args_required_const(2)]
23680 pub unsafe fn _mm_cvt_roundss_sd(a
: __m128d
, b
: __m128
, sae
: i32) -> __m128d
{
23681 macro_rules
! call
{
23686 _mm_setzero_pd().as_f64x2(),
23692 let r
= constify_imm4_sae
!(sae
, call
);
23696 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23697 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23699 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundss_sd&expand=1372)
23701 #[target_feature(enable = "avx512f")]
23702 #[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
23703 #[rustc_args_required_const(4)]
23704 pub unsafe fn _mm_mask_cvt_roundss_sd(
23711 macro_rules
! call
{
23713 vcvtss2sd(a
.as_f64x2(), b
.as_f32x4(), src
.as_f64x2(), k
, $imm4
)
23716 let r
= constify_imm4_sae
!(sae
, call
);
23720 /// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
23721 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
23723 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundss_sd&expand=1373)
23725 #[target_feature(enable = "avx512f")]
23726 #[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
23727 #[rustc_args_required_const(3)]
23728 pub unsafe fn _mm_maskz_cvt_roundss_sd(k
: __mmask8
, a
: __m128d
, b
: __m128
, sae
: i32) -> __m128d
{
23729 macro_rules
! call
{
23734 _mm_setzero_pd().as_f64x2(),
23740 let r
= constify_imm4_sae
!(sae
, call
);
23744 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
23746 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
23747 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
23748 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
23749 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
23750 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
23751 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_ss&expand=1361)
23755 #[target_feature(enable = "avx512f")]
23756 #[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
23757 #[rustc_args_required_const(2)]
23758 pub unsafe fn _mm_cvt_roundsd_ss(a
: __m128
, b
: __m128d
, rounding
: i32) -> __m128
{
23759 macro_rules
! call
{
23764 _mm_setzero_ps().as_f32x4(),
23770 let r
= constify_imm4_round
!(rounding
, call
);
23774 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
23776 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
23777 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
23778 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
23779 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
23780 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
23781 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23783 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundsd_ss&expand=1362)
23785 #[target_feature(enable = "avx512f")]
23786 #[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
23787 #[rustc_args_required_const(4)]
23788 pub unsafe fn _mm_mask_cvt_roundsd_ss(
23795 macro_rules
! call
{
23797 vcvtsd2ss(a
.as_f32x4(), b
.as_f64x2(), src
.as_f32x4(), k
, $imm4
)
23800 let r
= constify_imm4_round
!(rounding
, call
);
23804 /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
23806 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23807 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23808 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23809 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23810 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23811 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23813 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_cvt_roundsd_ss&expand=1363)
23815 #[target_feature(enable = "avx512f")]
23816 #[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
23817 #[rustc_args_required_const(3)]
23818 pub unsafe fn _mm_maskz_cvt_roundsd_ss(
23824 macro_rules
! call
{
23829 _mm_setzero_ps().as_f32x4(),
23835 let r
= constify_imm4_round
!(rounding
, call
);
23839 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23841 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23842 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23843 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23844 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23845 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23846 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23848 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_si32&expand=1374)
23850 #[target_feature(enable = "avx512f")]
23851 #[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
23852 #[rustc_args_required_const(1)]
23853 pub unsafe fn _mm_cvt_roundss_si32(a
: __m128
, rounding
: i32) -> i32 {
23854 macro_rules
! call
{
23856 vcvtss2si(a
.as_f32x4(), $imm4
)
23859 let r
= constify_imm4_round
!(rounding
, call
);
23863 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23865 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23866 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23867 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23868 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23869 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23870 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23872 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_i32&expand=1369)
23874 #[target_feature(enable = "avx512f")]
23875 #[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
23876 #[rustc_args_required_const(1)]
23877 pub unsafe fn _mm_cvt_roundss_i32(a
: __m128
, rounding
: i32) -> i32 {
23878 macro_rules
! call
{
23880 vcvtss2si(a
.as_f32x4(), $imm4
)
23883 let r
= constify_imm4_round
!(rounding
, call
);
23887 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
23889 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23890 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23891 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23892 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23893 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23894 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23896 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundss_u32&expand=1376)
23898 #[target_feature(enable = "avx512f")]
23899 #[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))]
23900 #[rustc_args_required_const(1)]
23901 pub unsafe fn _mm_cvt_roundss_u32(a
: __m128
, rounding
: i32) -> u32 {
23902 macro_rules
! call
{
23904 vcvtss2usi(a
.as_f32x4(), $imm4
)
23907 let r
= constify_imm4_round
!(rounding
, call
);
23911 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
23913 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtss_i32&expand=1893)
23915 #[target_feature(enable = "avx512f")]
23916 #[cfg_attr(test, assert_instr(vcvtss2si))]
23917 pub unsafe fn _mm_cvtss_i32(a
: __m128
) -> i32 {
23918 transmute(vcvtss2si(a
.as_f32x4(), _MM_FROUND_CUR_DIRECTION
))
23921 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
23923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtss_u32&expand=1901)
23925 #[target_feature(enable = "avx512f")]
23926 #[cfg_attr(test, assert_instr(vcvtss2usi))]
23927 pub unsafe fn _mm_cvtss_u32(a
: __m128
) -> u32 {
23928 transmute(vcvtss2usi(a
.as_f32x4(), _MM_FROUND_CUR_DIRECTION
))
23931 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23933 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23934 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23935 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23936 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23937 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23938 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23940 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_si32&expand=1359)
23942 #[target_feature(enable = "avx512f")]
23943 #[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
23944 #[rustc_args_required_const(1)]
23945 pub unsafe fn _mm_cvt_roundsd_si32(a
: __m128d
, rounding
: i32) -> i32 {
23946 macro_rules
! call
{
23948 vcvtsd2si(a
.as_f64x2(), $imm4
)
23951 let r
= constify_imm4_round
!(rounding
, call
);
23955 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
23957 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23958 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23959 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23960 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23961 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23962 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_i32&expand=1357)
23966 #[target_feature(enable = "avx512f")]
23967 #[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
23968 #[rustc_args_required_const(1)]
23969 pub unsafe fn _mm_cvt_roundsd_i32(a
: __m128d
, rounding
: i32) -> i32 {
23970 macro_rules
! call
{
23972 vcvtsd2si(a
.as_f64x2(), $imm4
)
23975 let r
= constify_imm4_round
!(rounding
, call
);
23979 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
23981 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
23982 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
23983 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
23984 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
23985 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
23986 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
23988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=cvt_roundsd_u32&expand=1364)
23990 #[target_feature(enable = "avx512f")]
23991 #[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))]
23992 #[rustc_args_required_const(1)]
23993 pub unsafe fn _mm_cvt_roundsd_u32(a
: __m128d
, rounding
: i32) -> u32 {
23994 macro_rules
! call
{
23996 vcvtsd2usi(a
.as_f64x2(), $imm4
)
23999 let r
= constify_imm4_round
!(rounding
, call
);
24003 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
24005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtsd_i32&expand=1791)
24007 #[target_feature(enable = "avx512f")]
24008 #[cfg_attr(test, assert_instr(vcvtsd2si))]
24009 pub unsafe fn _mm_cvtsd_i32(a
: __m128d
) -> i32 {
24010 transmute(vcvtsd2si(a
.as_f64x2(), _MM_FROUND_CUR_DIRECTION
))
24013 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
24015 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtsd_u32&expand=1799)
24017 #[target_feature(enable = "avx512f")]
24018 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
24019 pub unsafe fn _mm_cvtsd_u32(a
: __m128d
) -> u32 {
24020 transmute(vcvtsd2usi(a
.as_f64x2(), _MM_FROUND_CUR_DIRECTION
))
24023 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
24025 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
24026 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
24027 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
24028 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
24029 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
24030 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
24032 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundi32_ss&expand=1312)
24034 #[target_feature(enable = "avx512f")]
24035 #[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
24036 #[rustc_args_required_const(2)]
24037 pub unsafe fn _mm_cvt_roundi32_ss(a
: __m128
, b
: i32, rounding
: i32) -> __m128
{
24038 macro_rules
! call
{
24040 vcvtsi2ss(a
.as_f32x4(), b
, $imm4
)
24043 let r
= constify_imm4_round
!(rounding
, call
);
24047 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
24049 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
24050 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
24051 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
24052 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
24053 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
24054 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
24056 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsi32_ss&expand=1366)
24058 #[target_feature(enable = "avx512f")]
24059 #[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
24060 #[rustc_args_required_const(2)]
24061 pub unsafe fn _mm_cvt_roundsi32_ss(a
: __m128
, b
: i32, rounding
: i32) -> __m128
{
24062 macro_rules
! call
{
24064 vcvtsi2ss(a
.as_f32x4(), b
, $imm4
)
24067 let r
= constify_imm4_round
!(rounding
, call
);
24071 /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
24073 /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
24074 /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
24075 /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
24076 /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
24077 /// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
24078 /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
24080 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundu32_ss&expand=1378)
24082 #[target_feature(enable = "avx512f")]
24083 #[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))]
24084 #[rustc_args_required_const(2)]
24085 pub unsafe fn _mm_cvt_roundu32_ss(a
: __m128
, b
: u32, rounding
: i32) -> __m128
{
24086 macro_rules
! call
{
24088 vcvtusi2ss(a
.as_f32x4(), b
, $imm4
)
24091 let r
= constify_imm4_round
!(rounding
, call
);
24095 /// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
24097 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643)
24099 #[target_feature(enable = "avx512f")]
24100 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
24101 pub unsafe fn _mm_cvti32_ss(a
: __m128
, b
: i32) -> __m128
{
24103 let r
= simd_insert(a
, 0, b
);
24107 /// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
24109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_sd&expand=1642)
24111 #[target_feature(enable = "avx512f")]
24112 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
24113 pub unsafe fn _mm_cvti32_sd(a
: __m128d
, b
: i32) -> __m128d
{
24115 let r
= simd_insert(a
, 0, b
);
24119 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24120 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24122 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_Si32&expand=1936)
24124 #[target_feature(enable = "avx512f")]
24125 #[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
24126 #[rustc_args_required_const(1)]
24127 pub unsafe fn _mm_cvtt_roundss_si32(a
: __m128
, sae
: i32) -> i32 {
24128 macro_rules
! call
{
24130 vcvtss2si(a
.as_f32x4(), $imm4
)
24133 let r
= constify_imm4_sae
!(sae
, call
);
24137 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24138 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_i32&expand=1934)
24142 #[target_feature(enable = "avx512f")]
24143 #[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
24144 #[rustc_args_required_const(1)]
24145 pub unsafe fn _mm_cvtt_roundss_i32(a
: __m128
, sae
: i32) -> i32 {
24146 macro_rules
! call
{
24148 vcvtss2si(a
.as_f32x4(), $imm4
)
24151 let r
= constify_imm4_sae
!(sae
, call
);
24155 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
24156 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24158 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundss_u32&expand=1938)
24160 #[target_feature(enable = "avx512f")]
24161 #[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))]
24162 #[rustc_args_required_const(1)]
24163 pub unsafe fn _mm_cvtt_roundss_u32(a
: __m128
, sae
: i32) -> u32 {
24164 macro_rules
! call
{
24166 vcvtss2usi(a
.as_f32x4(), $imm4
)
24169 let r
= constify_imm4_sae
!(sae
, call
);
24173 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
24175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_i32&expand=2022)
24177 #[target_feature(enable = "avx512f")]
24178 #[cfg_attr(test, assert_instr(vcvtss2si))]
24179 pub unsafe fn _mm_cvttss_i32(a
: __m128
) -> i32 {
24180 transmute(vcvtss2si(a
.as_f32x4(), _MM_FROUND_CUR_DIRECTION
))
24183 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
24185 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_u32&expand=2026)
24187 #[target_feature(enable = "avx512f")]
24188 #[cfg_attr(test, assert_instr(vcvtss2usi))]
24189 pub unsafe fn _mm_cvttss_u32(a
: __m128
) -> u32 {
24190 transmute(vcvtss2usi(a
.as_f32x4(), _MM_FROUND_CUR_DIRECTION
))
24193 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24194 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24196 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_si32&expand=1930)
24198 #[target_feature(enable = "avx512f")]
24199 #[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
24200 #[rustc_args_required_const(1)]
24201 pub unsafe fn _mm_cvtt_roundsd_si32(a
: __m128d
, sae
: i32) -> i32 {
24202 macro_rules
! call
{
24204 vcvtsd2si(a
.as_f64x2(), $imm4
)
24207 let r
= constify_imm4_sae
!(sae
, call
);
24211 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
24212 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24214 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_i32&expand=1928)
24216 #[target_feature(enable = "avx512f")]
24217 #[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
24218 #[rustc_args_required_const(1)]
24219 pub unsafe fn _mm_cvtt_roundsd_i32(a
: __m128d
, sae
: i32) -> i32 {
24220 macro_rules
! call
{
24222 vcvtsd2si(a
.as_f64x2(), $imm4
)
24225 let r
= constify_imm4_sae
!(sae
, call
);
24229 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
24230 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24232 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_u32&expand=1932)
24234 #[target_feature(enable = "avx512f")]
24235 #[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))]
24236 #[rustc_args_required_const(1)]
24237 pub unsafe fn _mm_cvtt_roundsd_u32(a
: __m128d
, sae
: i32) -> u32 {
24238 macro_rules
! call
{
24240 vcvtsd2usi(a
.as_f64x2(), $imm4
)
24243 let r
= constify_imm4_sae
!(sae
, call
);
24247 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
24249 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_i32&expand=2015)
24251 #[target_feature(enable = "avx512f")]
24252 #[cfg_attr(test, assert_instr(vcvtsd2si))]
24253 pub unsafe fn _mm_cvttsd_i32(a
: __m128d
) -> i32 {
24254 transmute(vcvtsd2si(a
.as_f64x2(), _MM_FROUND_CUR_DIRECTION
))
24257 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
24259 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_u32&expand=2020)
24261 #[target_feature(enable = "avx512f")]
24262 #[cfg_attr(test, assert_instr(vcvtsd2usi))]
24263 pub unsafe fn _mm_cvttsd_u32(a
: __m128d
) -> u32 {
24264 transmute(vcvtsd2usi(a
.as_f64x2(), _MM_FROUND_CUR_DIRECTION
))
24267 /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
24269 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_ss&expand=2032)
24271 #[target_feature(enable = "avx512f")]
24272 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
24273 pub unsafe fn _mm_cvtu32_ss(a
: __m128
, b
: u32) -> __m128
{
24275 let r
= simd_insert(a
, 0, b
);
24279 /// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
24281 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_sd&expand=2031)
24283 #[target_feature(enable = "avx512f")]
24284 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
24285 pub unsafe fn _mm_cvtu32_sd(a
: __m128d
, b
: u32) -> __m128d
{
24287 let r
= simd_insert(a
, 0, b
);
24291 /// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
24293 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_ss&expand=2035)
24295 #[target_feature(enable = "avx512f")]
24296 #[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
24297 pub unsafe fn _mm_cvtu64_ss(a
: __m128
, b
: u64) -> __m128
{
24299 let r
= simd_insert(a
, 0, b
);
24303 /// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
24305 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_sd&expand=2034)
24307 #[target_feature(enable = "avx512f")]
24308 #[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
24309 pub unsafe fn _mm_cvtu64_sd(a
: __m128d
, b
: u64) -> __m128d
{
24311 let r
= simd_insert(a
, 0, b
);
24315 /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
24316 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24318 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_ss&expand=1175)
24320 #[target_feature(enable = "avx512f")]
24321 #[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomiss
24322 #[rustc_args_required_const(2, 3)]
24323 pub unsafe fn _mm_comi_round_ss(a
: __m128
, b
: __m128
, imm8
: i32, sae
: i32) -> i32 {
24324 macro_rules
! call
{
24325 ($imm8
:expr
, $imm4
:expr
) => {
24326 vcomiss(a
.as_f32x4(), b
.as_f32x4(), $imm8
, $imm4
)
24329 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
24333 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
24334 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
24336 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_sd&expand=1174)
24338 #[target_feature(enable = "avx512f")]
24339 #[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomisd
24340 #[rustc_args_required_const(2, 3)]
24341 pub unsafe fn _mm_comi_round_sd(a
: __m128d
, b
: __m128d
, imm8
: i32, sae
: i32) -> i32 {
24342 macro_rules
! call
{
24343 ($imm8
:expr
, $imm4
:expr
) => {
24344 vcomisd(a
.as_f64x2(), b
.as_f64x2(), $imm8
, $imm4
)
24347 let r
= constify_imm5_sae
!(imm8
, sae
, call
);
24352 pub const _MM_CMPINT_EQ
: _MM_CMPINT_ENUM
= 0x00;
24354 pub const _MM_CMPINT_LT
: _MM_CMPINT_ENUM
= 0x01;
24355 /// Less-than-or-equal
24356 pub const _MM_CMPINT_LE
: _MM_CMPINT_ENUM
= 0x02;
24358 pub const _MM_CMPINT_FALSE
: _MM_CMPINT_ENUM
= 0x03;
24360 pub const _MM_CMPINT_NE
: _MM_CMPINT_ENUM
= 0x04;
24362 pub const _MM_CMPINT_NLT
: _MM_CMPINT_ENUM
= 0x05;
24363 /// Not less-than-or-equal
24364 pub const _MM_CMPINT_NLE
: _MM_CMPINT_ENUM
= 0x06;
24366 pub const _MM_CMPINT_TRUE
: _MM_CMPINT_ENUM
= 0x07;
24368 /// interval [1, 2)
24369 pub const _MM_MANT_NORM_1_2
: _MM_MANTISSA_NORM_ENUM
= 0x00;
24370 /// interval [0.5, 2)
24371 pub const _MM_MANT_NORM_P5_2
: _MM_MANTISSA_NORM_ENUM
= 0x01;
24372 /// interval [0.5, 1)
24373 pub const _MM_MANT_NORM_P5_1
: _MM_MANTISSA_NORM_ENUM
= 0x02;
24374 /// interval [0.75, 1.5)
24375 pub const _MM_MANT_NORM_P75_1P5
: _MM_MANTISSA_NORM_ENUM
= 0x03;
24377 /// sign = sign(SRC)
24378 pub const _MM_MANT_SIGN_SRC
: _MM_MANTISSA_SIGN_ENUM
= 0x00;
24380 pub const _MM_MANT_SIGN_ZERO
: _MM_MANTISSA_SIGN_ENUM
= 0x01;
24381 /// DEST = NaN if sign(SRC) = 1
24382 pub const _MM_MANT_SIGN_NAN
: _MM_MANTISSA_SIGN_ENUM
= 0x02;
24384 pub const _MM_PERM_AAAA
: _MM_PERM_ENUM
= 0x00;
24385 pub const _MM_PERM_AAAB
: _MM_PERM_ENUM
= 0x01;
24386 pub const _MM_PERM_AAAC
: _MM_PERM_ENUM
= 0x02;
24387 pub const _MM_PERM_AAAD
: _MM_PERM_ENUM
= 0x03;
24388 pub const _MM_PERM_AABA
: _MM_PERM_ENUM
= 0x04;
24389 pub const _MM_PERM_AABB
: _MM_PERM_ENUM
= 0x05;
24390 pub const _MM_PERM_AABC
: _MM_PERM_ENUM
= 0x06;
24391 pub const _MM_PERM_AABD
: _MM_PERM_ENUM
= 0x07;
24392 pub const _MM_PERM_AACA
: _MM_PERM_ENUM
= 0x08;
24393 pub const _MM_PERM_AACB
: _MM_PERM_ENUM
= 0x09;
24394 pub const _MM_PERM_AACC
: _MM_PERM_ENUM
= 0x0A;
24395 pub const _MM_PERM_AACD
: _MM_PERM_ENUM
= 0x0B;
24396 pub const _MM_PERM_AADA
: _MM_PERM_ENUM
= 0x0C;
24397 pub const _MM_PERM_AADB
: _MM_PERM_ENUM
= 0x0D;
24398 pub const _MM_PERM_AADC
: _MM_PERM_ENUM
= 0x0E;
24399 pub const _MM_PERM_AADD
: _MM_PERM_ENUM
= 0x0F;
24400 pub const _MM_PERM_ABAA
: _MM_PERM_ENUM
= 0x10;
24401 pub const _MM_PERM_ABAB
: _MM_PERM_ENUM
= 0x11;
24402 pub const _MM_PERM_ABAC
: _MM_PERM_ENUM
= 0x12;
24403 pub const _MM_PERM_ABAD
: _MM_PERM_ENUM
= 0x13;
24404 pub const _MM_PERM_ABBA
: _MM_PERM_ENUM
= 0x14;
24405 pub const _MM_PERM_ABBB
: _MM_PERM_ENUM
= 0x15;
24406 pub const _MM_PERM_ABBC
: _MM_PERM_ENUM
= 0x16;
24407 pub const _MM_PERM_ABBD
: _MM_PERM_ENUM
= 0x17;
24408 pub const _MM_PERM_ABCA
: _MM_PERM_ENUM
= 0x18;
24409 pub const _MM_PERM_ABCB
: _MM_PERM_ENUM
= 0x19;
24410 pub const _MM_PERM_ABCC
: _MM_PERM_ENUM
= 0x1A;
24411 pub const _MM_PERM_ABCD
: _MM_PERM_ENUM
= 0x1B;
24412 pub const _MM_PERM_ABDA
: _MM_PERM_ENUM
= 0x1C;
24413 pub const _MM_PERM_ABDB
: _MM_PERM_ENUM
= 0x1D;
24414 pub const _MM_PERM_ABDC
: _MM_PERM_ENUM
= 0x1E;
24415 pub const _MM_PERM_ABDD
: _MM_PERM_ENUM
= 0x1F;
24416 pub const _MM_PERM_ACAA
: _MM_PERM_ENUM
= 0x20;
24417 pub const _MM_PERM_ACAB
: _MM_PERM_ENUM
= 0x21;
24418 pub const _MM_PERM_ACAC
: _MM_PERM_ENUM
= 0x22;
24419 pub const _MM_PERM_ACAD
: _MM_PERM_ENUM
= 0x23;
24420 pub const _MM_PERM_ACBA
: _MM_PERM_ENUM
= 0x24;
24421 pub const _MM_PERM_ACBB
: _MM_PERM_ENUM
= 0x25;
24422 pub const _MM_PERM_ACBC
: _MM_PERM_ENUM
= 0x26;
24423 pub const _MM_PERM_ACBD
: _MM_PERM_ENUM
= 0x27;
24424 pub const _MM_PERM_ACCA
: _MM_PERM_ENUM
= 0x28;
24425 pub const _MM_PERM_ACCB
: _MM_PERM_ENUM
= 0x29;
24426 pub const _MM_PERM_ACCC
: _MM_PERM_ENUM
= 0x2A;
24427 pub const _MM_PERM_ACCD
: _MM_PERM_ENUM
= 0x2B;
24428 pub const _MM_PERM_ACDA
: _MM_PERM_ENUM
= 0x2C;
24429 pub const _MM_PERM_ACDB
: _MM_PERM_ENUM
= 0x2D;
24430 pub const _MM_PERM_ACDC
: _MM_PERM_ENUM
= 0x2E;
24431 pub const _MM_PERM_ACDD
: _MM_PERM_ENUM
= 0x2F;
24432 pub const _MM_PERM_ADAA
: _MM_PERM_ENUM
= 0x30;
24433 pub const _MM_PERM_ADAB
: _MM_PERM_ENUM
= 0x31;
24434 pub const _MM_PERM_ADAC
: _MM_PERM_ENUM
= 0x32;
24435 pub const _MM_PERM_ADAD
: _MM_PERM_ENUM
= 0x33;
24436 pub const _MM_PERM_ADBA
: _MM_PERM_ENUM
= 0x34;
24437 pub const _MM_PERM_ADBB
: _MM_PERM_ENUM
= 0x35;
24438 pub const _MM_PERM_ADBC
: _MM_PERM_ENUM
= 0x36;
24439 pub const _MM_PERM_ADBD
: _MM_PERM_ENUM
= 0x37;
24440 pub const _MM_PERM_ADCA
: _MM_PERM_ENUM
= 0x38;
24441 pub const _MM_PERM_ADCB
: _MM_PERM_ENUM
= 0x39;
24442 pub const _MM_PERM_ADCC
: _MM_PERM_ENUM
= 0x3A;
24443 pub const _MM_PERM_ADCD
: _MM_PERM_ENUM
= 0x3B;
24444 pub const _MM_PERM_ADDA
: _MM_PERM_ENUM
= 0x3C;
24445 pub const _MM_PERM_ADDB
: _MM_PERM_ENUM
= 0x3D;
24446 pub const _MM_PERM_ADDC
: _MM_PERM_ENUM
= 0x3E;
24447 pub const _MM_PERM_ADDD
: _MM_PERM_ENUM
= 0x3F;
24448 pub const _MM_PERM_BAAA
: _MM_PERM_ENUM
= 0x40;
24449 pub const _MM_PERM_BAAB
: _MM_PERM_ENUM
= 0x41;
24450 pub const _MM_PERM_BAAC
: _MM_PERM_ENUM
= 0x42;
24451 pub const _MM_PERM_BAAD
: _MM_PERM_ENUM
= 0x43;
24452 pub const _MM_PERM_BABA
: _MM_PERM_ENUM
= 0x44;
24453 pub const _MM_PERM_BABB
: _MM_PERM_ENUM
= 0x45;
24454 pub const _MM_PERM_BABC
: _MM_PERM_ENUM
= 0x46;
24455 pub const _MM_PERM_BABD
: _MM_PERM_ENUM
= 0x47;
24456 pub const _MM_PERM_BACA
: _MM_PERM_ENUM
= 0x48;
24457 pub const _MM_PERM_BACB
: _MM_PERM_ENUM
= 0x49;
24458 pub const _MM_PERM_BACC
: _MM_PERM_ENUM
= 0x4A;
24459 pub const _MM_PERM_BACD
: _MM_PERM_ENUM
= 0x4B;
24460 pub const _MM_PERM_BADA
: _MM_PERM_ENUM
= 0x4C;
24461 pub const _MM_PERM_BADB
: _MM_PERM_ENUM
= 0x4D;
24462 pub const _MM_PERM_BADC
: _MM_PERM_ENUM
= 0x4E;
24463 pub const _MM_PERM_BADD
: _MM_PERM_ENUM
= 0x4F;
24464 pub const _MM_PERM_BBAA
: _MM_PERM_ENUM
= 0x50;
24465 pub const _MM_PERM_BBAB
: _MM_PERM_ENUM
= 0x51;
24466 pub const _MM_PERM_BBAC
: _MM_PERM_ENUM
= 0x52;
24467 pub const _MM_PERM_BBAD
: _MM_PERM_ENUM
= 0x53;
24468 pub const _MM_PERM_BBBA
: _MM_PERM_ENUM
= 0x54;
24469 pub const _MM_PERM_BBBB
: _MM_PERM_ENUM
= 0x55;
24470 pub const _MM_PERM_BBBC
: _MM_PERM_ENUM
= 0x56;
24471 pub const _MM_PERM_BBBD
: _MM_PERM_ENUM
= 0x57;
24472 pub const _MM_PERM_BBCA
: _MM_PERM_ENUM
= 0x58;
24473 pub const _MM_PERM_BBCB
: _MM_PERM_ENUM
= 0x59;
24474 pub const _MM_PERM_BBCC
: _MM_PERM_ENUM
= 0x5A;
24475 pub const _MM_PERM_BBCD
: _MM_PERM_ENUM
= 0x5B;
24476 pub const _MM_PERM_BBDA
: _MM_PERM_ENUM
= 0x5C;
24477 pub const _MM_PERM_BBDB
: _MM_PERM_ENUM
= 0x5D;
24478 pub const _MM_PERM_BBDC
: _MM_PERM_ENUM
= 0x5E;
24479 pub const _MM_PERM_BBDD
: _MM_PERM_ENUM
= 0x5F;
24480 pub const _MM_PERM_BCAA
: _MM_PERM_ENUM
= 0x60;
24481 pub const _MM_PERM_BCAB
: _MM_PERM_ENUM
= 0x61;
24482 pub const _MM_PERM_BCAC
: _MM_PERM_ENUM
= 0x62;
24483 pub const _MM_PERM_BCAD
: _MM_PERM_ENUM
= 0x63;
24484 pub const _MM_PERM_BCBA
: _MM_PERM_ENUM
= 0x64;
24485 pub const _MM_PERM_BCBB
: _MM_PERM_ENUM
= 0x65;
24486 pub const _MM_PERM_BCBC
: _MM_PERM_ENUM
= 0x66;
24487 pub const _MM_PERM_BCBD
: _MM_PERM_ENUM
= 0x67;
24488 pub const _MM_PERM_BCCA
: _MM_PERM_ENUM
= 0x68;
24489 pub const _MM_PERM_BCCB
: _MM_PERM_ENUM
= 0x69;
24490 pub const _MM_PERM_BCCC
: _MM_PERM_ENUM
= 0x6A;
24491 pub const _MM_PERM_BCCD
: _MM_PERM_ENUM
= 0x6B;
24492 pub const _MM_PERM_BCDA
: _MM_PERM_ENUM
= 0x6C;
24493 pub const _MM_PERM_BCDB
: _MM_PERM_ENUM
= 0x6D;
24494 pub const _MM_PERM_BCDC
: _MM_PERM_ENUM
= 0x6E;
24495 pub const _MM_PERM_BCDD
: _MM_PERM_ENUM
= 0x6F;
24496 pub const _MM_PERM_BDAA
: _MM_PERM_ENUM
= 0x70;
24497 pub const _MM_PERM_BDAB
: _MM_PERM_ENUM
= 0x71;
24498 pub const _MM_PERM_BDAC
: _MM_PERM_ENUM
= 0x72;
24499 pub const _MM_PERM_BDAD
: _MM_PERM_ENUM
= 0x73;
24500 pub const _MM_PERM_BDBA
: _MM_PERM_ENUM
= 0x74;
24501 pub const _MM_PERM_BDBB
: _MM_PERM_ENUM
= 0x75;
24502 pub const _MM_PERM_BDBC
: _MM_PERM_ENUM
= 0x76;
24503 pub const _MM_PERM_BDBD
: _MM_PERM_ENUM
= 0x77;
24504 pub const _MM_PERM_BDCA
: _MM_PERM_ENUM
= 0x78;
24505 pub const _MM_PERM_BDCB
: _MM_PERM_ENUM
= 0x79;
24506 pub const _MM_PERM_BDCC
: _MM_PERM_ENUM
= 0x7A;
24507 pub const _MM_PERM_BDCD
: _MM_PERM_ENUM
= 0x7B;
24508 pub const _MM_PERM_BDDA
: _MM_PERM_ENUM
= 0x7C;
24509 pub const _MM_PERM_BDDB
: _MM_PERM_ENUM
= 0x7D;
24510 pub const _MM_PERM_BDDC
: _MM_PERM_ENUM
= 0x7E;
24511 pub const _MM_PERM_BDDD
: _MM_PERM_ENUM
= 0x7F;
24512 pub const _MM_PERM_CAAA
: _MM_PERM_ENUM
= 0x80;
24513 pub const _MM_PERM_CAAB
: _MM_PERM_ENUM
= 0x81;
24514 pub const _MM_PERM_CAAC
: _MM_PERM_ENUM
= 0x82;
24515 pub const _MM_PERM_CAAD
: _MM_PERM_ENUM
= 0x83;
24516 pub const _MM_PERM_CABA
: _MM_PERM_ENUM
= 0x84;
24517 pub const _MM_PERM_CABB
: _MM_PERM_ENUM
= 0x85;
24518 pub const _MM_PERM_CABC
: _MM_PERM_ENUM
= 0x86;
24519 pub const _MM_PERM_CABD
: _MM_PERM_ENUM
= 0x87;
24520 pub const _MM_PERM_CACA
: _MM_PERM_ENUM
= 0x88;
24521 pub const _MM_PERM_CACB
: _MM_PERM_ENUM
= 0x89;
24522 pub const _MM_PERM_CACC
: _MM_PERM_ENUM
= 0x8A;
24523 pub const _MM_PERM_CACD
: _MM_PERM_ENUM
= 0x8B;
24524 pub const _MM_PERM_CADA
: _MM_PERM_ENUM
= 0x8C;
24525 pub const _MM_PERM_CADB
: _MM_PERM_ENUM
= 0x8D;
24526 pub const _MM_PERM_CADC
: _MM_PERM_ENUM
= 0x8E;
24527 pub const _MM_PERM_CADD
: _MM_PERM_ENUM
= 0x8F;
24528 pub const _MM_PERM_CBAA
: _MM_PERM_ENUM
= 0x90;
24529 pub const _MM_PERM_CBAB
: _MM_PERM_ENUM
= 0x91;
24530 pub const _MM_PERM_CBAC
: _MM_PERM_ENUM
= 0x92;
24531 pub const _MM_PERM_CBAD
: _MM_PERM_ENUM
= 0x93;
24532 pub const _MM_PERM_CBBA
: _MM_PERM_ENUM
= 0x94;
24533 pub const _MM_PERM_CBBB
: _MM_PERM_ENUM
= 0x95;
24534 pub const _MM_PERM_CBBC
: _MM_PERM_ENUM
= 0x96;
24535 pub const _MM_PERM_CBBD
: _MM_PERM_ENUM
= 0x97;
24536 pub const _MM_PERM_CBCA
: _MM_PERM_ENUM
= 0x98;
24537 pub const _MM_PERM_CBCB
: _MM_PERM_ENUM
= 0x99;
24538 pub const _MM_PERM_CBCC
: _MM_PERM_ENUM
= 0x9A;
24539 pub const _MM_PERM_CBCD
: _MM_PERM_ENUM
= 0x9B;
24540 pub const _MM_PERM_CBDA
: _MM_PERM_ENUM
= 0x9C;
24541 pub const _MM_PERM_CBDB
: _MM_PERM_ENUM
= 0x9D;
24542 pub const _MM_PERM_CBDC
: _MM_PERM_ENUM
= 0x9E;
24543 pub const _MM_PERM_CBDD
: _MM_PERM_ENUM
= 0x9F;
24544 pub const _MM_PERM_CCAA
: _MM_PERM_ENUM
= 0xA0;
24545 pub const _MM_PERM_CCAB
: _MM_PERM_ENUM
= 0xA1;
24546 pub const _MM_PERM_CCAC
: _MM_PERM_ENUM
= 0xA2;
24547 pub const _MM_PERM_CCAD
: _MM_PERM_ENUM
= 0xA3;
24548 pub const _MM_PERM_CCBA
: _MM_PERM_ENUM
= 0xA4;
24549 pub const _MM_PERM_CCBB
: _MM_PERM_ENUM
= 0xA5;
24550 pub const _MM_PERM_CCBC
: _MM_PERM_ENUM
= 0xA6;
24551 pub const _MM_PERM_CCBD
: _MM_PERM_ENUM
= 0xA7;
24552 pub const _MM_PERM_CCCA
: _MM_PERM_ENUM
= 0xA8;
24553 pub const _MM_PERM_CCCB
: _MM_PERM_ENUM
= 0xA9;
24554 pub const _MM_PERM_CCCC
: _MM_PERM_ENUM
= 0xAA;
24555 pub const _MM_PERM_CCCD
: _MM_PERM_ENUM
= 0xAB;
24556 pub const _MM_PERM_CCDA
: _MM_PERM_ENUM
= 0xAC;
24557 pub const _MM_PERM_CCDB
: _MM_PERM_ENUM
= 0xAD;
24558 pub const _MM_PERM_CCDC
: _MM_PERM_ENUM
= 0xAE;
24559 pub const _MM_PERM_CCDD
: _MM_PERM_ENUM
= 0xAF;
24560 pub const _MM_PERM_CDAA
: _MM_PERM_ENUM
= 0xB0;
24561 pub const _MM_PERM_CDAB
: _MM_PERM_ENUM
= 0xB1;
24562 pub const _MM_PERM_CDAC
: _MM_PERM_ENUM
= 0xB2;
24563 pub const _MM_PERM_CDAD
: _MM_PERM_ENUM
= 0xB3;
24564 pub const _MM_PERM_CDBA
: _MM_PERM_ENUM
= 0xB4;
24565 pub const _MM_PERM_CDBB
: _MM_PERM_ENUM
= 0xB5;
24566 pub const _MM_PERM_CDBC
: _MM_PERM_ENUM
= 0xB6;
24567 pub const _MM_PERM_CDBD
: _MM_PERM_ENUM
= 0xB7;
24568 pub const _MM_PERM_CDCA
: _MM_PERM_ENUM
= 0xB8;
24569 pub const _MM_PERM_CDCB
: _MM_PERM_ENUM
= 0xB9;
24570 pub const _MM_PERM_CDCC
: _MM_PERM_ENUM
= 0xBA;
24571 pub const _MM_PERM_CDCD
: _MM_PERM_ENUM
= 0xBB;
24572 pub const _MM_PERM_CDDA
: _MM_PERM_ENUM
= 0xBC;
24573 pub const _MM_PERM_CDDB
: _MM_PERM_ENUM
= 0xBD;
24574 pub const _MM_PERM_CDDC
: _MM_PERM_ENUM
= 0xBE;
24575 pub const _MM_PERM_CDDD
: _MM_PERM_ENUM
= 0xBF;
24576 pub const _MM_PERM_DAAA
: _MM_PERM_ENUM
= 0xC0;
24577 pub const _MM_PERM_DAAB
: _MM_PERM_ENUM
= 0xC1;
24578 pub const _MM_PERM_DAAC
: _MM_PERM_ENUM
= 0xC2;
24579 pub const _MM_PERM_DAAD
: _MM_PERM_ENUM
= 0xC3;
24580 pub const _MM_PERM_DABA
: _MM_PERM_ENUM
= 0xC4;
24581 pub const _MM_PERM_DABB
: _MM_PERM_ENUM
= 0xC5;
24582 pub const _MM_PERM_DABC
: _MM_PERM_ENUM
= 0xC6;
24583 pub const _MM_PERM_DABD
: _MM_PERM_ENUM
= 0xC7;
24584 pub const _MM_PERM_DACA
: _MM_PERM_ENUM
= 0xC8;
24585 pub const _MM_PERM_DACB
: _MM_PERM_ENUM
= 0xC9;
24586 pub const _MM_PERM_DACC
: _MM_PERM_ENUM
= 0xCA;
24587 pub const _MM_PERM_DACD
: _MM_PERM_ENUM
= 0xCB;
24588 pub const _MM_PERM_DADA
: _MM_PERM_ENUM
= 0xCC;
24589 pub const _MM_PERM_DADB
: _MM_PERM_ENUM
= 0xCD;
24590 pub const _MM_PERM_DADC
: _MM_PERM_ENUM
= 0xCE;
24591 pub const _MM_PERM_DADD
: _MM_PERM_ENUM
= 0xCF;
24592 pub const _MM_PERM_DBAA
: _MM_PERM_ENUM
= 0xD0;
24593 pub const _MM_PERM_DBAB
: _MM_PERM_ENUM
= 0xD1;
24594 pub const _MM_PERM_DBAC
: _MM_PERM_ENUM
= 0xD2;
24595 pub const _MM_PERM_DBAD
: _MM_PERM_ENUM
= 0xD3;
24596 pub const _MM_PERM_DBBA
: _MM_PERM_ENUM
= 0xD4;
24597 pub const _MM_PERM_DBBB
: _MM_PERM_ENUM
= 0xD5;
24598 pub const _MM_PERM_DBBC
: _MM_PERM_ENUM
= 0xD6;
24599 pub const _MM_PERM_DBBD
: _MM_PERM_ENUM
= 0xD7;
24600 pub const _MM_PERM_DBCA
: _MM_PERM_ENUM
= 0xD8;
24601 pub const _MM_PERM_DBCB
: _MM_PERM_ENUM
= 0xD9;
24602 pub const _MM_PERM_DBCC
: _MM_PERM_ENUM
= 0xDA;
24603 pub const _MM_PERM_DBCD
: _MM_PERM_ENUM
= 0xDB;
24604 pub const _MM_PERM_DBDA
: _MM_PERM_ENUM
= 0xDC;
24605 pub const _MM_PERM_DBDB
: _MM_PERM_ENUM
= 0xDD;
24606 pub const _MM_PERM_DBDC
: _MM_PERM_ENUM
= 0xDE;
24607 pub const _MM_PERM_DBDD
: _MM_PERM_ENUM
= 0xDF;
24608 pub const _MM_PERM_DCAA
: _MM_PERM_ENUM
= 0xE0;
24609 pub const _MM_PERM_DCAB
: _MM_PERM_ENUM
= 0xE1;
24610 pub const _MM_PERM_DCAC
: _MM_PERM_ENUM
= 0xE2;
24611 pub const _MM_PERM_DCAD
: _MM_PERM_ENUM
= 0xE3;
24612 pub const _MM_PERM_DCBA
: _MM_PERM_ENUM
= 0xE4;
24613 pub const _MM_PERM_DCBB
: _MM_PERM_ENUM
= 0xE5;
24614 pub const _MM_PERM_DCBC
: _MM_PERM_ENUM
= 0xE6;
24615 pub const _MM_PERM_DCBD
: _MM_PERM_ENUM
= 0xE7;
24616 pub const _MM_PERM_DCCA
: _MM_PERM_ENUM
= 0xE8;
24617 pub const _MM_PERM_DCCB
: _MM_PERM_ENUM
= 0xE9;
24618 pub const _MM_PERM_DCCC
: _MM_PERM_ENUM
= 0xEA;
24619 pub const _MM_PERM_DCCD
: _MM_PERM_ENUM
= 0xEB;
24620 pub const _MM_PERM_DCDA
: _MM_PERM_ENUM
= 0xEC;
24621 pub const _MM_PERM_DCDB
: _MM_PERM_ENUM
= 0xED;
24622 pub const _MM_PERM_DCDC
: _MM_PERM_ENUM
= 0xEE;
24623 pub const _MM_PERM_DCDD
: _MM_PERM_ENUM
= 0xEF;
24624 pub const _MM_PERM_DDAA
: _MM_PERM_ENUM
= 0xF0;
24625 pub const _MM_PERM_DDAB
: _MM_PERM_ENUM
= 0xF1;
24626 pub const _MM_PERM_DDAC
: _MM_PERM_ENUM
= 0xF2;
24627 pub const _MM_PERM_DDAD
: _MM_PERM_ENUM
= 0xF3;
24628 pub const _MM_PERM_DDBA
: _MM_PERM_ENUM
= 0xF4;
24629 pub const _MM_PERM_DDBB
: _MM_PERM_ENUM
= 0xF5;
24630 pub const _MM_PERM_DDBC
: _MM_PERM_ENUM
= 0xF6;
24631 pub const _MM_PERM_DDBD
: _MM_PERM_ENUM
= 0xF7;
24632 pub const _MM_PERM_DDCA
: _MM_PERM_ENUM
= 0xF8;
24633 pub const _MM_PERM_DDCB
: _MM_PERM_ENUM
= 0xF9;
24634 pub const _MM_PERM_DDCC
: _MM_PERM_ENUM
= 0xFA;
24635 pub const _MM_PERM_DDCD
: _MM_PERM_ENUM
= 0xFB;
24636 pub const _MM_PERM_DDDA
: _MM_PERM_ENUM
= 0xFC;
24637 pub const _MM_PERM_DDDB
: _MM_PERM_ENUM
= 0xFD;
24638 pub const _MM_PERM_DDDC
: _MM_PERM_ENUM
= 0xFE;
24639 pub const _MM_PERM_DDDD
: _MM_PERM_ENUM
= 0xFF;
24641 #[allow(improper_ctypes)]
24643 #[link_name = "llvm.x86.avx512.pmul.dq.512"]
24644 fn vpmuldq(a
: i32x16
, b
: i32x16
) -> i64x8
;
24645 #[link_name = "llvm.x86.avx512.pmulu.dq.512"]
24646 fn vpmuludq(a
: u32x16
, b
: u32x16
) -> u64x8
;
24648 #[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
24649 fn vpmaxsd(a
: i32x16
, b
: i32x16
) -> i32x16
;
24650 #[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
24651 fn vpmaxsq(a
: i64x8
, b
: i64x8
) -> i64x8
;
24652 #[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
24653 fn vpminsd(a
: i32x16
, b
: i32x16
) -> i32x16
;
24654 #[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
24655 fn vpminsq(a
: i64x8
, b
: i64x8
) -> i64x8
;
24657 #[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
24658 fn vpmaxud(a
: u32x16
, b
: u32x16
) -> u32x16
;
24659 #[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
24660 fn vpmaxuq(a
: u64x8
, b
: u64x8
) -> i64x8
;
24661 #[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
24662 fn vpminud(a
: u32x16
, b
: u32x16
) -> u32x16
;
24663 #[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
24664 fn vpminuq(a
: u64x8
, b
: u64x8
) -> i64x8
;
24666 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
24667 fn vsqrtps(a
: f32x16
, rounding
: i32) -> f32x16
;
24668 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
24669 fn vsqrtpd(a
: f64x8
, rounding
: i32) -> f64x8
;
24671 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
24672 fn vfmadd132ps(a
: f32x16
, b
: f32x16
, c
: f32x16
, rounding
: i32) -> f32x16
;
24673 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
24674 fn vfmadd132pd(a
: f64x8
, b
: f64x8
, c
: f64x8
, rounding
: i32) -> f64x8
;
24676 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
24677 fn vfmaddsub213ps(a
: f32x16
, b
: f32x16
, c
: f32x16
, d
: i32) -> f32x16
; //from clang
24678 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
24679 fn vfmaddsub213pd(a
: f64x8
, b
: f64x8
, c
: f64x8
, d
: i32) -> f64x8
; //from clang
24681 #[link_name = "llvm.x86.avx512.add.ps.512"]
24682 fn vaddps(a
: f32x16
, b
: f32x16
, rounding
: i32) -> f32x16
;
24683 #[link_name = "llvm.x86.avx512.add.pd.512"]
24684 fn vaddpd(a
: f64x8
, b
: f64x8
, rounding
: i32) -> f64x8
;
24685 #[link_name = "llvm.x86.avx512.sub.ps.512"]
24686 fn vsubps(a
: f32x16
, b
: f32x16
, rounding
: i32) -> f32x16
;
24687 #[link_name = "llvm.x86.avx512.sub.pd.512"]
24688 fn vsubpd(a
: f64x8
, b
: f64x8
, rounding
: i32) -> f64x8
;
24689 #[link_name = "llvm.x86.avx512.mul.ps.512"]
24690 fn vmulps(a
: f32x16
, b
: f32x16
, rounding
: i32) -> f32x16
;
24691 #[link_name = "llvm.x86.avx512.mul.pd.512"]
24692 fn vmulpd(a
: f64x8
, b
: f64x8
, rounding
: i32) -> f64x8
;
24693 #[link_name = "llvm.x86.avx512.div.ps.512"]
24694 fn vdivps(a
: f32x16
, b
: f32x16
, rounding
: i32) -> f32x16
;
24695 #[link_name = "llvm.x86.avx512.div.pd.512"]
24696 fn vdivpd(a
: f64x8
, b
: f64x8
, rounding
: i32) -> f64x8
;
24698 #[link_name = "llvm.x86.avx512.max.ps.512"]
24699 fn vmaxps(a
: f32x16
, b
: f32x16
, sae
: i32) -> f32x16
;
24700 #[link_name = "llvm.x86.avx512.max.pd.512"]
24701 fn vmaxpd(a
: f64x8
, b
: f64x8
, sae
: i32) -> f64x8
;
24702 #[link_name = "llvm.x86.avx512.min.ps.512"]
24703 fn vminps(a
: f32x16
, b
: f32x16
, sae
: i32) -> f32x16
;
24704 #[link_name = "llvm.x86.avx512.min.pd.512"]
24705 fn vminpd(a
: f64x8
, b
: f64x8
, sae
: i32) -> f64x8
;
24707 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
24708 fn vgetexpps(a
: f32x16
, src
: f32x16
, m
: u16, sae
: i32) -> f32x16
;
24709 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
24710 fn vgetexppd(a
: f64x8
, src
: f64x8
, m
: u8, sae
: i32) -> f64x8
;
24712 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
24713 fn vrndscaleps(a
: f32x16
, imm8
: i32, src
: f32x16
, mask
: u16, sae
: i32) -> f32x16
;
24714 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
24715 fn vrndscalepd(a
: f64x8
, imm8
: i32, src
: f64x8
, mask
: u8, sae
: i32) -> f64x8
;
24716 #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
24717 fn vscalefps(a
: f32x16
, b
: f32x16
, src
: f32x16
, mask
: u16, rounding
: i32) -> f32x16
;
24718 #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
24719 fn vscalefpd(a
: f64x8
, b
: f64x8
, src
: f64x8
, mask
: u8, rounding
: i32) -> f64x8
;
24721 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
24722 fn vfixupimmps(a
: f32x16
, b
: f32x16
, c
: i32x16
, imm8
: i32, mask
: u16, sae
: i32) -> f32x16
;
24723 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
24724 fn vfixupimmpd(a
: f64x8
, b
: f64x8
, c
: i64x8
, imm8
: i32, mask
: u8, sae
: i32) -> f64x8
;
24725 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
24726 fn vfixupimmpsz(a
: f32x16
, b
: f32x16
, c
: i32x16
, imm8
: i32, mask
: u16, sae
: i32) -> f32x16
;
24727 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
24728 fn vfixupimmpdz(a
: f64x8
, b
: f64x8
, c
: i64x8
, imm8
: i32, mask
: u8, sae
: i32) -> f64x8
;
24730 #[link_name = "llvm.x86.avx512.pternlog.d.512"]
24731 fn vpternlogd(a
: i32x16
, b
: i32x16
, c
: i32x16
, sae
: i32) -> i32x16
;
24732 #[link_name = "llvm.x86.avx512.pternlog.q.512"]
24733 fn vpternlogq(a
: i64x8
, b
: i64x8
, c
: i64x8
, sae
: i32) -> i64x8
;
24735 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
24736 fn vgetmantps(a
: f32x16
, mantissas
: i32, src
: f32x16
, m
: u16, sae
: i32) -> f32x16
;
24737 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
24738 fn vgetmantpd(a
: f64x8
, mantissas
: i32, src
: f64x8
, m
: u8, sae
: i32) -> f64x8
;
24740 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
24741 fn vrcp14ps(a
: f32x16
, src
: f32x16
, m
: u16) -> f32x16
;
24742 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
24743 fn vrcp14pd(a
: f64x8
, src
: f64x8
, m
: u8) -> f64x8
;
24744 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
24745 fn vrsqrt14ps(a
: f32x16
, src
: f32x16
, m
: u16) -> f32x16
;
24746 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
24747 fn vrsqrt14pd(a
: f64x8
, src
: f64x8
, m
: u8) -> f64x8
;
24749 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
24750 fn vcvtps2dq(a
: f32x16
, src
: i32x16
, mask
: u16, rounding
: i32) -> i32x16
;
24751 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
24752 fn vcvtps2udq(a
: f32x16
, src
: u32x16
, mask
: u16, rounding
: i32) -> u32x16
;
24753 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
24754 fn vcvtps2pd(a
: f32x8
, src
: f64x8
, mask
: u8, sae
: i32) -> f64x8
;
24755 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
24756 fn vcvtpd2ps(a
: f64x8
, src
: f32x8
, mask
: u8, rounding
: i32) -> f32x8
;
24757 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
24758 fn vcvtpd2dq(a
: f64x8
, src
: i32x8
, mask
: u8, rounding
: i32) -> i32x8
;
24759 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
24760 fn vcvtpd2udq(a
: f64x8
, src
: u32x8
, mask
: u8, rounding
: i32) -> u32x8
;
24761 #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
24762 fn vcvtdq2ps(a
: i32x16
, rounding
: i32) -> f32x16
;
24763 #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
24764 fn vcvtudq2ps(a
: u32x16
, rounding
: i32) -> f32x16
;
24766 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
24767 fn vcvtps2ph(a
: f32x16
, sae
: i32, src
: i16x16
, mask
: u16) -> i16x16
;
24768 #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
24769 fn vcvtph2ps(a
: i16x16
, src
: f32x16
, mask
: u16, sae
: i32) -> f32x16
;
24771 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
24772 fn vcvttps2dq(a
: f32x16
, src
: i32x16
, mask
: u16, rounding
: i32) -> i32x16
;
24773 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
24774 fn vcvttps2udq(a
: f32x16
, src
: i32x16
, mask
: u16, rounding
: i32) -> u32x16
;
24775 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
24776 fn vcvttpd2dq(a
: f64x8
, src
: i32x8
, mask
: u8, rounding
: i32) -> i32x8
;
24777 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
24778 fn vcvttpd2udq(a
: f64x8
, src
: i32x8
, mask
: u8, rounding
: i32) -> u32x8
;
24780 #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
24781 fn vpmovqb(a
: i64x8
, src
: i8x16
, mask
: u8) -> i8x16
;
24782 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
24783 fn vpmovsdw(a
: i32x16
, src
: i16x16
, mask
: u16) -> i16x16
;
24784 #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
24785 fn vpmovsdb(a
: i32x16
, src
: i8x16
, mask
: u16) -> i8x16
;
24786 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
24787 fn vpmovsqd(a
: i64x8
, src
: i32x8
, mask
: u8) -> i32x8
;
24788 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
24789 fn vpmovsqw(a
: i64x8
, src
: i16x8
, mask
: u8) -> i16x8
;
24790 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
24791 fn vpmovsqb(a
: i64x8
, src
: i8x16
, mask
: u8) -> i8x16
;
24792 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
24793 fn vpmovusdw(a
: u32x16
, src
: u16x16
, mask
: u16) -> u16x16
;
24794 #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
24795 fn vpmovusdb(a
: u32x16
, src
: u8x16
, mask
: u16) -> u8x16
;
24796 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
24797 fn vpmovusqd(a
: u64x8
, src
: u32x8
, mask
: u8) -> u32x8
;
24798 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
24799 fn vpmovusqw(a
: u64x8
, src
: u16x8
, mask
: u8) -> u16x8
;
24800 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
24801 fn vpmovusqb(a
: u64x8
, src
: u8x16
, mask
: u8) -> u8x16
;
24803 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
24804 fn vgatherdpd(src
: f64x8
, slice
: *const i8, offsets
: i32x8
, mask
: i8, scale
: i32) -> f64x8
;
24805 #[link_name = "llvm.x86.avx512.gather.dps.512"]
24806 fn vgatherdps(src
: f32x16
, slice
: *const i8, offsets
: i32x16
, mask
: i16, scale
: i32) -> f32x16
;
24807 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
24808 fn vgatherqpd(src
: f64x8
, slice
: *const i8, offsets
: i64x8
, mask
: i8, scale
: i32) -> f64x8
;
24809 #[link_name = "llvm.x86.avx512.gather.qps.512"]
24810 fn vgatherqps(src
: f32x8
, slice
: *const i8, offsets
: i64x8
, mask
: i8, scale
: i32) -> f32x8
;
24811 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
24812 fn vpgatherdq(src
: i64x8
, slice
: *const i8, offsets
: i32x8
, mask
: i8, scale
: i32) -> i64x8
;
24813 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
24814 fn vpgatherdd(src
: i32x16
, slice
: *const i8, offsets
: i32x16
, mask
: i16, scale
: i32) -> i32x16
;
24815 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
24816 fn vpgatherqq(src
: i64x8
, slice
: *const i8, offsets
: i64x8
, mask
: i8, scale
: i32) -> i64x8
;
24817 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
24818 fn vpgatherqd(src
: i32x8
, slice
: *const i8, offsets
: i64x8
, mask
: i8, scale
: i32) -> i32x8
;
24820 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
24821 fn vscatterdpd(slice
: *mut i8, mask
: i8, offsets
: i32x8
, src
: f64x8
, scale
: i32);
24822 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
24823 fn vscatterdps(slice
: *mut i8, mask
: i16, offsets
: i32x16
, src
: f32x16
, scale
: i32);
24824 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
24825 fn vscatterqpd(slice
: *mut i8, mask
: i8, offsets
: i64x8
, src
: f64x8
, scale
: i32);
24826 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
24827 fn vscatterqps(slice
: *mut i8, mask
: i8, offsets
: i64x8
, src
: f32x8
, scale
: i32);
24828 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
24829 fn vpscatterdq(slice
: *mut i8, mask
: i8, offsets
: i32x8
, src
: i64x8
, scale
: i32);
24830 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
24831 fn vpscatterdd(slice
: *mut i8, mask
: i16, offsets
: i32x16
, src
: i32x16
, scale
: i32);
24832 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
24833 fn vpscatterqq(slice
: *mut i8, mask
: i8, offsets
: i64x8
, src
: i64x8
, scale
: i32);
24834 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
24835 fn vpscatterqd(slice
: *mut i8, mask
: i8, offsets
: i64x8
, src
: i32x8
, scale
: i32);
24837 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
24838 fn vcmpss(a
: __m128
, b
: __m128
, op
: i32, m
: i8, sae
: i32) -> i8;
24839 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
24840 fn vcmpsd(a
: __m128d
, b
: __m128d
, op
: i32, m
: i8, sae
: i32) -> i8;
24841 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
24842 fn vcmpps(a
: f32x16
, b
: f32x16
, op
: i32, m
: i16, sae
: i32) -> i16;
24843 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
24844 fn vcmppd(a
: f64x8
, b
: f64x8
, op
: i32, m
: i8, sae
: i32) -> i8;
24845 #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
24846 fn vpcmpuq(a
: i64x8
, b
: i64x8
, op
: i32, m
: i8) -> i8;
24847 #[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
24848 fn vpcmpq(a
: i64x8
, b
: i64x8
, op
: i32, m
: i8) -> i8;
24849 #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
24850 fn vpcmpud(a
: i32x16
, b
: i32x16
, op
: i32, m
: i16) -> i16;
24851 #[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
24852 fn vpcmpd(a
: i32x16
, b
: i32x16
, op
: i32, m
: i16) -> i16;
24854 #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
24855 fn vprold(a
: i32x16
, i8: i32) -> i32x16
;
24856 #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
24857 fn vprord(a
: i32x16
, i8: i32) -> i32x16
;
24858 #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
24859 fn vprolq(a
: i64x8
, i8: i32) -> i64x8
;
24860 #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
24861 fn vprorq(a
: i64x8
, i8: i32) -> i64x8
;
24863 #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
24864 fn vprolvd(a
: i32x16
, b
: i32x16
) -> i32x16
;
24865 #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
24866 fn vprorvd(a
: i32x16
, b
: i32x16
) -> i32x16
;
24867 #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
24868 fn vprolvq(a
: i64x8
, b
: i64x8
) -> i64x8
;
24869 #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
24870 fn vprorvq(a
: i64x8
, b
: i64x8
) -> i64x8
;
24872 #[link_name = "llvm.x86.avx512.psllv.d.512"]
24873 fn vpsllvd(a
: i32x16
, b
: i32x16
) -> i32x16
;
24874 #[link_name = "llvm.x86.avx512.psrlv.d.512"]
24875 fn vpsrlvd(a
: i32x16
, b
: i32x16
) -> i32x16
;
24876 #[link_name = "llvm.x86.avx512.psllv.q.512"]
24877 fn vpsllvq(a
: i64x8
, b
: i64x8
) -> i64x8
;
24878 #[link_name = "llvm.x86.avx512.psrlv.q.512"]
24879 fn vpsrlvq(a
: i64x8
, b
: i64x8
) -> i64x8
;
24881 #[link_name = "llvm.x86.avx512.pslli.d.512"]
24882 fn vpsllid(a
: i32x16
, imm8
: u32) -> i32x16
;
24883 #[link_name = "llvm.x86.avx512.psrli.d.512"]
24884 fn vpsrlid(a
: i32x16
, imm8
: u32) -> i32x16
;
24885 #[link_name = "llvm.x86.avx512.pslli.q.512"]
24886 fn vpslliq(a
: i64x8
, imm8
: u32) -> i64x8
;
24887 #[link_name = "llvm.x86.avx512.psrli.q.512"]
24888 fn vpsrliq(a
: i64x8
, imm8
: u32) -> i64x8
;
24890 #[link_name = "llvm.x86.avx512.psll.d.512"]
24891 fn vpslld(a
: i32x16
, count
: i32x4
) -> i32x16
;
24892 #[link_name = "llvm.x86.avx512.psrl.d.512"]
24893 fn vpsrld(a
: i32x16
, count
: i32x4
) -> i32x16
;
24894 #[link_name = "llvm.x86.avx512.psll.q.512"]
24895 fn vpsllq(a
: i64x8
, count
: i64x2
) -> i64x8
;
24896 #[link_name = "llvm.x86.avx512.psrl.q.512"]
24897 fn vpsrlq(a
: i64x8
, count
: i64x2
) -> i64x8
;
24899 #[link_name = "llvm.x86.avx512.psra.d.512"]
24900 fn vpsrad(a
: i32x16
, count
: i32x4
) -> i32x16
;
24901 #[link_name = "llvm.x86.avx512.psra.q.512"]
24902 fn vpsraq(a
: i64x8
, count
: i64x2
) -> i64x8
;
24904 #[link_name = "llvm.x86.avx512.psrai.d.512"]
24905 fn vpsraid(a
: i32x16
, imm8
: u32) -> i32x16
;
24906 #[link_name = "llvm.x86.avx512.psrai.q.512"]
24907 fn vpsraiq(a
: i64x8
, imm8
: u32) -> i64x8
;
24909 #[link_name = "llvm.x86.avx512.psrav.d.512"]
24910 fn vpsravd(a
: i32x16
, count
: i32x16
) -> i32x16
;
24911 #[link_name = "llvm.x86.avx512.psrav.q.512"]
24912 fn vpsravq(a
: i64x8
, count
: i64x8
) -> i64x8
;
24914 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
24915 fn vpermilps(a
: f32x16
, b
: i32x16
) -> f32x16
;
24916 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
24917 fn vpermilpd(a
: f64x8
, b
: i64x8
) -> f64x8
;
24919 #[link_name = "llvm.x86.avx512.permvar.si.512"]
24920 fn vpermd(a
: i32x16
, idx
: i32x16
) -> i32x16
;
24921 #[link_name = "llvm.x86.avx512.permvar.di.512"]
24922 fn vpermq(a
: i64x8
, idx
: i64x8
) -> i64x8
;
24923 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
24924 fn vpermps(a
: f32x16
, idx
: i32x16
) -> f32x16
;
24925 #[link_name = "llvm.x86.avx512.permvar.df.512"]
24926 fn vpermpd(a
: f64x8
, idx
: i64x8
) -> f64x8
;
24928 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
24929 fn vpermi2d(a
: i32x16
, idx
: i32x16
, b
: i32x16
) -> i32x16
;
24930 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
24931 fn vpermi2q(a
: i64x8
, idx
: i64x8
, b
: i64x8
) -> i64x8
;
24932 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
24933 fn vpermi2ps(a
: f32x16
, idx
: i32x16
, b
: f32x16
) -> f32x16
;
24934 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
24935 fn vpermi2pd(a
: f64x8
, idx
: i64x8
, b
: f64x8
) -> f64x8
;
24937 #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
24938 fn vpcompressd(a
: i32x16
, src
: i32x16
, mask
: u16) -> i32x16
;
24939 #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
24940 fn vpcompressq(a
: i64x8
, src
: i64x8
, mask
: u8) -> i64x8
;
24941 #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
24942 fn vcompressps(a
: f32x16
, src
: f32x16
, mask
: u16) -> f32x16
;
24943 #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
24944 fn vcompresspd(a
: f64x8
, src
: f64x8
, mask
: u8) -> f64x8
;
24945 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
24946 fn vpexpandd(a
: i32x16
, src
: i32x16
, mask
: u16) -> i32x16
;
24947 #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
24948 fn vpexpandq(a
: i64x8
, src
: i64x8
, mask
: u8) -> i64x8
;
24949 #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
24950 fn vexpandps(a
: f32x16
, src
: f32x16
, mask
: u16) -> f32x16
;
24951 #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
24952 fn vexpandpd(a
: f64x8
, src
: f64x8
, mask
: u8) -> f64x8
;
24954 #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
24955 fn vaddss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
24956 #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
24957 fn vaddsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, rounding
: i32) -> f64x2
;
24958 #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
24959 fn vsubss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
24960 #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
24961 fn vsubsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, rounding
: i32) -> f64x2
;
24962 #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
24963 fn vmulss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
24964 #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
24965 fn vmulsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, rounding
: i32) -> f64x2
;
24966 #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
24967 fn vdivss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
24968 #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
24969 fn vdivsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, rounding
: i32) -> f64x2
;
24970 #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
24971 fn vmaxss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, sae
: i32) -> f32x4
;
24972 #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
24973 fn vmaxsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, sae
: i32) -> f64x2
;
24974 #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
24975 fn vminss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, sae
: i32) -> f32x4
;
24976 #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
24977 fn vminsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, sae
: i32) -> f64x2
;
24978 #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
24979 fn vsqrtss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
24980 #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
24981 fn vsqrtsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, rounding
: i32) -> f64x2
;
24982 #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
24983 fn vgetexpss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, sae
: i32) -> f32x4
;
24984 #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
24985 fn vgetexpsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, sae
: i32) -> f64x2
;
24986 #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
24987 fn vgetmantss(a
: f32x4
, b
: f32x4
, mantissas
: i32, src
: f32x4
, m
: u8, sae
: i32) -> f32x4
;
24988 #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
24989 fn vgetmantsd(a
: f64x2
, b
: f64x2
, mantissas
: i32, src
: f64x2
, m
: u8, sae
: i32) -> f64x2
;
24991 #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
24992 fn vrsqrt14ss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8) -> f32x4
;
24993 #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
24994 fn vrsqrt14sd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8) -> f64x2
;
24995 #[link_name = "llvm.x86.avx512.rcp14.ss"]
24996 fn vrcp14ss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8) -> f32x4
;
24997 #[link_name = "llvm.x86.avx512.rcp14.sd"]
24998 fn vrcp14sd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8) -> f64x2
;
25000 #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
25001 fn vrndscaless(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, imm8
: i32, sae
: i32) -> f32x4
;
25002 #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
25003 fn vrndscalesd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, imm8
: i32, sae
: i32) -> f64x2
;
25004 #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
25005 fn vscalefss(a
: f32x4
, b
: f32x4
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
25006 #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
25007 fn vscalefsd(a
: f64x2
, b
: f64x2
, src
: f64x2
, mask
: u8, rounding
: i32) -> f64x2
;
25009 #[link_name = "llvm.x86.avx512.vfmadd.f32"]
25010 fn vfmadd132ss(a
: f32, b
: f32, c
: f32, rounding
: i32) -> f32;
25011 #[link_name = "llvm.x86.avx512.vfmadd.f64"]
25012 fn vfmadd132sd(a
: f64, b
: f64, c
: f64, rounding
: i32) -> f64;
25014 #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
25015 fn vfixupimmss(a
: f32x4
, b
: f32x4
, c
: i32x4
, imm8
: i32, mask
: u8, sae
: i32) -> f32x4
;
25016 #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
25017 fn vfixupimmsd(a
: f64x2
, b
: f64x2
, c
: i64x2
, imm8
: i32, mask
: u8, sae
: i32) -> f64x2
;
25018 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
25019 fn vfixupimmssz(a
: f32x4
, b
: f32x4
, c
: i32x4
, imm8
: i32, mask
: u8, sae
: i32) -> f32x4
;
25020 #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
25021 fn vfixupimmsdz(a
: f64x2
, b
: f64x2
, c
: i64x2
, imm8
: i32, mask
: u8, sae
: i32) -> f64x2
;
25023 #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
25024 fn vcvtss2sd(a
: f64x2
, a
: f32x4
, src
: f64x2
, mask
: u8, sae
: i32) -> f64x2
;
25025 #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
25026 fn vcvtsd2ss(a
: f32x4
, b
: f64x2
, src
: f32x4
, mask
: u8, rounding
: i32) -> f32x4
;
25028 #[link_name = "llvm.x86.avx512.vcvtss2si32"]
25029 fn vcvtss2si(a
: f32x4
, rounding
: i32) -> i32;
25030 #[link_name = "llvm.x86.avx512.vcvtss2si64"]
25031 fn vcvtss2si64(a
: f32x4
, rounding
: i32) -> i64;
25032 #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
25033 fn vcvtss2usi(a
: f32x4
, rounding
: i32) -> u32;
25034 #[link_name = "llvm.x86.avx512.vcvtss2usi64"]
25035 fn vcvtss2usi64(a
: f32x4
, rounding
: i32) -> u64;
25036 #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
25037 fn vcvtsd2si(a
: f64x2
, rounding
: i32) -> i32;
25038 #[link_name = "llvm.x86.avx512.vcvtsd2si64"]
25039 fn vcvtsd2si64(a
: f64x2
, rounding
: i32) -> i64;
25040 #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
25041 fn vcvtsd2usi(a
: f64x2
, rounding
: i32) -> u32;
25042 #[link_name = "llvm.x86.avx512.vcvtsd2usi64"]
25043 fn vcvtsd2usi64(a
: f64x2
, rounding
: i32) -> u64;
25045 #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
25046 fn vcvtsi2ss(a
: f32x4
, b
: i32, rounding
: i32) -> f32x4
;
25047 #[link_name = "llvm.x86.avx512.cvtsi2ss64"]
25048 fn vcvtsi2ss64(a
: f32x4
, b
: i64, rounding
: i32) -> f32x4
;
25049 #[link_name = "llvm.x86.avx512.cvtsi2sd64"]
25050 fn vcvtsi2sd(a
: f64x2
, b
: i64, rounding
: i32) -> f64x2
;
25051 #[link_name = "llvm.x86.avx512.cvtusi2ss"]
25052 fn vcvtusi2ss(a
: f32x4
, b
: u32, rounding
: i32) -> f32x4
;
25053 #[link_name = "llvm.x86.avx512.cvtusi642ss"]
25054 fn vcvtusi2ss64(a
: f32x4
, b
: u64, rounding
: i32) -> f32x4
;
25055 #[link_name = "llvm.x86.avx512.cvtusi642sd"]
25056 fn vcvtusi2sd(a
: f64x2
, b
: u64, rounding
: i32) -> f64x2
;
25058 #[link_name = "llvm.x86.avx512.vcomi.ss"]
25059 fn vcomiss(a
: f32x4
, b
: f32x4
, imm8
: i32, sae
: i32) -> i32;
25060 #[link_name = "llvm.x86.avx512.vcomi.sd"]
25061 fn vcomisd(a
: f64x2
, b
: f64x2
, imm8
: i32, sae
: i32) -> i32;
25067 use stdarch_test
::simd_test
;
25069 use crate::core_arch
::x86
::*;
25070 use crate::hint
::black_box
;
25071 use crate::mem
::{self}
;
25073 #[simd_test(enable = "avx512f")]
25074 unsafe fn test_mm512_abs_epi32() {
25076 let a
= _mm512_setr_epi32(
25077 0, 1, -1, i32::MAX
,
25078 i32::MIN
, 100, -100, -32,
25079 0, 1, -1, i32::MAX
,
25080 i32::MIN
, 100, -100, -32,
25082 let r
= _mm512_abs_epi32(a
);
25083 let e
= _mm512_setr_epi32(
25088 i32::MAX
.wrapping_add(1),
25096 i32::MAX
.wrapping_add(1),
25101 assert_eq_m512i(r
, e
);
25104 #[simd_test(enable = "avx512f")]
25105 unsafe fn test_mm512_mask_abs_epi32() {
25107 let a
= _mm512_setr_epi32(
25108 0, 1, -1, i32::MAX
,
25109 i32::MIN
, 100, -100, -32,
25110 0, 1, -1, i32::MAX
,
25111 i32::MIN
, 100, -100, -32,
25113 let r
= _mm512_mask_abs_epi32(a
, 0, a
);
25114 assert_eq_m512i(r
, a
);
25115 let r
= _mm512_mask_abs_epi32(a
, 0b00000000_11111111, a
);
25116 let e
= _mm512_setr_epi32(
25121 i32::MAX
.wrapping_add(1),
25134 assert_eq_m512i(r
, e
);
25137 #[simd_test(enable = "avx512f")]
25138 unsafe fn test_mm512_maskz_abs_epi32() {
25140 let a
= _mm512_setr_epi32(
25141 0, 1, -1, i32::MAX
,
25142 i32::MIN
, 100, -100, -32,
25143 0, 1, -1, i32::MAX
,
25144 i32::MIN
, 100, -100, -32,
25146 let r
= _mm512_maskz_abs_epi32(0, a
);
25147 assert_eq_m512i(r
, _mm512_setzero_si512());
25148 let r
= _mm512_maskz_abs_epi32(0b00000000_11111111, a
);
25149 let e
= _mm512_setr_epi32(
25154 i32::MAX
.wrapping_add(1),
25167 assert_eq_m512i(r
, e
);
25170 #[simd_test(enable = "avx512f")]
25171 unsafe fn test_mm512_abs_ps() {
25173 let a
= _mm512_setr_ps(
25174 0., 1., -1., f32::MAX
,
25175 f32::MIN
, 100., -100., -32.,
25176 0., 1., -1., f32::MAX
,
25177 f32::MIN
, 100., -100., -32.,
25179 let r
= _mm512_abs_ps(a
);
25180 let e
= _mm512_setr_ps(
25198 assert_eq_m512(r
, e
);
25201 #[simd_test(enable = "avx512f")]
25202 unsafe fn test_mm512_mask_abs_ps() {
25203 let a
= _mm512_setr_ps(
25221 let r
= _mm512_mask_abs_ps(a
, 0, a
);
25222 assert_eq_m512(r
, a
);
25223 let r
= _mm512_mask_abs_ps(a
, 0b00000000_11111111, a
);
25224 let e
= _mm512_setr_ps(
25242 assert_eq_m512(r
, e
);
25245 #[simd_test(enable = "avx512f")]
25246 unsafe fn test_mm512_mask_mov_epi32() {
25247 let src
= _mm512_set1_epi32(1);
25248 let a
= _mm512_set1_epi32(2);
25249 let r
= _mm512_mask_mov_epi32(src
, 0, a
);
25250 assert_eq_m512i(r
, src
);
25251 let r
= _mm512_mask_mov_epi32(src
, 0b11111111_11111111, a
);
25252 assert_eq_m512i(r
, a
);
25255 #[simd_test(enable = "avx512f")]
25256 unsafe fn test_mm512_maskz_mov_epi32() {
25257 let a
= _mm512_set1_epi32(2);
25258 let r
= _mm512_maskz_mov_epi32(0, a
);
25259 assert_eq_m512i(r
, _mm512_setzero_si512());
25260 let r
= _mm512_maskz_mov_epi32(0b11111111_11111111, a
);
25261 assert_eq_m512i(r
, a
);
25264 #[simd_test(enable = "avx512f")]
25265 unsafe fn test_mm512_mask_mov_ps() {
25266 let src
= _mm512_set1_ps(1.);
25267 let a
= _mm512_set1_ps(2.);
25268 let r
= _mm512_mask_mov_ps(src
, 0, a
);
25269 assert_eq_m512(r
, src
);
25270 let r
= _mm512_mask_mov_ps(src
, 0b11111111_11111111, a
);
25271 assert_eq_m512(r
, a
);
25274 #[simd_test(enable = "avx512f")]
25275 unsafe fn test_mm512_maskz_mov_ps() {
25276 let a
= _mm512_set1_ps(2.);
25277 let r
= _mm512_maskz_mov_ps(0, a
);
25278 assert_eq_m512(r
, _mm512_setzero_ps());
25279 let r
= _mm512_maskz_mov_ps(0b11111111_11111111, a
);
25280 assert_eq_m512(r
, a
);
25283 #[simd_test(enable = "avx512f")]
25284 unsafe fn test_mm512_add_epi32() {
25285 let a
= _mm512_setr_epi32(
25303 let b
= _mm512_set1_epi32(1);
25304 let r
= _mm512_add_epi32(a
, b
);
25305 let e
= _mm512_setr_epi32(
25323 assert_eq_m512i(r
, e
);
25326 #[simd_test(enable = "avx512f")]
25327 unsafe fn test_mm512_mask_add_epi32() {
25329 let a
= _mm512_setr_epi32(
25330 0, 1, -1, i32::MAX
,
25331 i32::MIN
, 100, -100, -32,
25332 0, 1, -1, i32::MAX
,
25333 i32::MIN
, 100, -100, -32,
25335 let b
= _mm512_set1_epi32(1);
25336 let r
= _mm512_mask_add_epi32(a
, 0, a
, b
);
25337 assert_eq_m512i(r
, a
);
25338 let r
= _mm512_mask_add_epi32(a
, 0b00000000_11111111, a
, b
);
25339 let e
= _mm512_setr_epi32(
25357 assert_eq_m512i(r
, e
);
25360 #[simd_test(enable = "avx512f")]
25361 unsafe fn test_mm512_maskz_add_epi32() {
25363 let a
= _mm512_setr_epi32(
25364 0, 1, -1, i32::MAX
,
25365 i32::MIN
, 100, -100, -32,
25366 0, 1, -1, i32::MAX
,
25367 i32::MIN
, 100, -100, -32,
25369 let b
= _mm512_set1_epi32(1);
25370 let r
= _mm512_maskz_add_epi32(0, a
, b
);
25371 assert_eq_m512i(r
, _mm512_setzero_si512());
25372 let r
= _mm512_maskz_add_epi32(0b00000000_11111111, a
, b
);
25373 let e
= _mm512_setr_epi32(
25391 assert_eq_m512i(r
, e
);
25394 #[simd_test(enable = "avx512f")]
25395 unsafe fn test_mm512_add_ps() {
25396 let a
= _mm512_setr_ps(
25414 let b
= _mm512_set1_ps(1.);
25415 let r
= _mm512_add_ps(a
, b
);
25416 let e
= _mm512_setr_ps(
25434 assert_eq_m512(r
, e
);
25437 #[simd_test(enable = "avx512f")]
25438 unsafe fn test_mm512_mask_add_ps() {
25439 let a
= _mm512_setr_ps(
25457 let b
= _mm512_set1_ps(1.);
25458 let r
= _mm512_mask_add_ps(a
, 0, a
, b
);
25459 assert_eq_m512(r
, a
);
25460 let r
= _mm512_mask_add_ps(a
, 0b00000000_11111111, a
, b
);
25461 let e
= _mm512_setr_ps(
25479 assert_eq_m512(r
, e
);
25482 #[simd_test(enable = "avx512f")]
25483 unsafe fn test_mm512_maskz_add_ps() {
25484 let a
= _mm512_setr_ps(
25502 let b
= _mm512_set1_ps(1.);
25503 let r
= _mm512_maskz_add_ps(0, a
, b
);
25504 assert_eq_m512(r
, _mm512_setzero_ps());
25505 let r
= _mm512_maskz_add_ps(0b00000000_11111111, a
, b
);
25506 let e
= _mm512_setr_ps(
25524 assert_eq_m512(r
, e
);
25527 #[simd_test(enable = "avx512f")]
25528 unsafe fn test_mm512_sub_epi32() {
25529 let a
= _mm512_setr_epi32(
25547 let b
= _mm512_set1_epi32(1);
25548 let r
= _mm512_sub_epi32(a
, b
);
25549 let e
= _mm512_setr_epi32(
25567 assert_eq_m512i(r
, e
);
25570 #[simd_test(enable = "avx512f")]
25571 unsafe fn test_mm512_mask_sub_epi32() {
25572 let a
= _mm512_setr_epi32(
25590 let b
= _mm512_set1_epi32(1);
25591 let r
= _mm512_mask_sub_epi32(a
, 0, a
, b
);
25592 assert_eq_m512i(r
, a
);
25593 let r
= _mm512_mask_sub_epi32(a
, 0b00000000_11111111, a
, b
);
25594 let e
= _mm512_setr_epi32(
25612 assert_eq_m512i(r
, e
);
25615 #[simd_test(enable = "avx512f")]
25616 unsafe fn test_mm512_maskz_sub_epi32() {
25617 let a
= _mm512_setr_epi32(
25635 let b
= _mm512_set1_epi32(1);
25636 let r
= _mm512_maskz_sub_epi32(0, a
, b
);
25637 assert_eq_m512i(r
, _mm512_setzero_si512());
25638 let r
= _mm512_maskz_sub_epi32(0b00000000_11111111, a
, b
);
25639 let e
= _mm512_setr_epi32(
25657 assert_eq_m512i(r
, e
);
25660 #[simd_test(enable = "avx512f")]
25661 unsafe fn test_mm512_sub_ps() {
25662 let a
= _mm512_setr_ps(
25680 let b
= _mm512_set1_ps(1.);
25681 let r
= _mm512_sub_ps(a
, b
);
25682 let e
= _mm512_setr_ps(
25700 assert_eq_m512(r
, e
);
25703 #[simd_test(enable = "avx512f")]
25704 unsafe fn test_mm512_mask_sub_ps() {
25705 let a
= _mm512_setr_ps(
25723 let b
= _mm512_set1_ps(1.);
25724 let r
= _mm512_mask_sub_ps(a
, 0, a
, b
);
25725 assert_eq_m512(r
, a
);
25726 let r
= _mm512_mask_sub_ps(a
, 0b00000000_11111111, a
, b
);
25727 let e
= _mm512_setr_ps(
25745 assert_eq_m512(r
, e
);
25748 #[simd_test(enable = "avx512f")]
25749 unsafe fn test_mm512_maskz_sub_ps() {
25750 let a
= _mm512_setr_ps(
25768 let b
= _mm512_set1_ps(1.);
25769 let r
= _mm512_maskz_sub_ps(0, a
, b
);
25770 assert_eq_m512(r
, _mm512_setzero_ps());
25771 let r
= _mm512_maskz_sub_ps(0b00000000_11111111, a
, b
);
25772 let e
= _mm512_setr_ps(
25790 assert_eq_m512(r
, e
);
25793 #[simd_test(enable = "avx512f")]
25794 unsafe fn test_mm512_mullo_epi32() {
25795 let a
= _mm512_setr_epi32(
25813 let b
= _mm512_set1_epi32(2);
25814 let r
= _mm512_mullo_epi32(a
, b
);
25815 let e
= _mm512_setr_epi32(
25816 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
25818 assert_eq_m512i(r
, e
);
25821 #[simd_test(enable = "avx512f")]
25822 unsafe fn test_mm512_mask_mullo_epi32() {
25823 let a
= _mm512_setr_epi32(
25841 let b
= _mm512_set1_epi32(2);
25842 let r
= _mm512_mask_mullo_epi32(a
, 0, a
, b
);
25843 assert_eq_m512i(r
, a
);
25844 let r
= _mm512_mask_mullo_epi32(a
, 0b00000000_11111111, a
, b
);
25845 let e
= _mm512_setr_epi32(
25863 assert_eq_m512i(r
, e
);
25866 #[simd_test(enable = "avx512f")]
25867 unsafe fn test_mm512_maskz_mullo_epi32() {
25868 let a
= _mm512_setr_epi32(
25886 let b
= _mm512_set1_epi32(2);
25887 let r
= _mm512_maskz_mullo_epi32(0, a
, b
);
25888 assert_eq_m512i(r
, _mm512_setzero_si512());
25889 let r
= _mm512_maskz_mullo_epi32(0b00000000_11111111, a
, b
);
25890 let e
= _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
25891 assert_eq_m512i(r
, e
);
25894 #[simd_test(enable = "avx512f")]
25895 unsafe fn test_mm512_mul_ps() {
25896 let a
= _mm512_setr_ps(
25914 let b
= _mm512_set1_ps(2.);
25915 let r
= _mm512_mul_ps(a
, b
);
25916 let e
= _mm512_setr_ps(
25934 assert_eq_m512(r
, e
);
25937 #[simd_test(enable = "avx512f")]
25938 unsafe fn test_mm512_mask_mul_ps() {
25939 let a
= _mm512_setr_ps(
25957 let b
= _mm512_set1_ps(2.);
25958 let r
= _mm512_mask_mul_ps(a
, 0, a
, b
);
25959 assert_eq_m512(r
, a
);
25960 let r
= _mm512_mask_mul_ps(a
, 0b00000000_11111111, a
, b
);
25961 let e
= _mm512_setr_ps(
25979 assert_eq_m512(r
, e
);
25982 #[simd_test(enable = "avx512f")]
25983 unsafe fn test_mm512_maskz_mul_ps() {
25984 let a
= _mm512_setr_ps(
26002 let b
= _mm512_set1_ps(2.);
26003 let r
= _mm512_maskz_mul_ps(0, a
, b
);
26004 assert_eq_m512(r
, _mm512_setzero_ps());
26005 let r
= _mm512_maskz_mul_ps(0b00000000_11111111, a
, b
);
26006 let e
= _mm512_setr_ps(
26024 assert_eq_m512(r
, e
);
26027 #[simd_test(enable = "avx512f")]
26028 unsafe fn test_mm512_div_ps() {
26029 let a
= _mm512_setr_ps(
26030 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
26032 let b
= _mm512_setr_ps(
26033 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
26035 let r
= _mm512_div_ps(a
, b
);
26036 let e
= _mm512_setr_ps(
26054 assert_eq_m512(r
, e
); // 0/0 = NAN
26057 #[simd_test(enable = "avx512f")]
26058 unsafe fn test_mm512_mask_div_ps() {
26059 let a
= _mm512_setr_ps(
26060 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
26062 let b
= _mm512_setr_ps(
26063 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
26065 let r
= _mm512_mask_div_ps(a
, 0, a
, b
);
26066 assert_eq_m512(r
, a
);
26067 let r
= _mm512_mask_div_ps(a
, 0b00000000_11111111, a
, b
);
26068 let e
= _mm512_setr_ps(
26086 assert_eq_m512(r
, e
);
26089 #[simd_test(enable = "avx512f")]
26090 unsafe fn test_mm512_maskz_div_ps() {
26091 let a
= _mm512_setr_ps(
26092 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
26094 let b
= _mm512_setr_ps(
26095 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
26097 let r
= _mm512_maskz_div_ps(0, a
, b
);
26098 assert_eq_m512(r
, _mm512_setzero_ps());
26099 let r
= _mm512_maskz_div_ps(0b00000000_11111111, a
, b
);
26100 let e
= _mm512_setr_ps(
26118 assert_eq_m512(r
, e
);
26121 #[simd_test(enable = "avx512f")]
26122 unsafe fn test_mm512_max_epi32() {
26123 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26124 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26125 let r
= _mm512_max_epi32(a
, b
);
26126 let e
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26127 assert_eq_m512i(r
, e
);
26130 #[simd_test(enable = "avx512f")]
26131 unsafe fn test_mm512_mask_max_epi32() {
26132 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26133 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26134 let r
= _mm512_mask_max_epi32(a
, 0, a
, b
);
26135 assert_eq_m512i(r
, a
);
26136 let r
= _mm512_mask_max_epi32(a
, 0b00000000_11111111, a
, b
);
26137 let e
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26138 assert_eq_m512i(r
, e
);
26141 #[simd_test(enable = "avx512f")]
26142 unsafe fn test_mm512_maskz_max_epi32() {
26143 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26144 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26145 let r
= _mm512_maskz_max_epi32(0, a
, b
);
26146 assert_eq_m512i(r
, _mm512_setzero_si512());
26147 let r
= _mm512_maskz_max_epi32(0b00000000_11111111, a
, b
);
26148 let e
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
26149 assert_eq_m512i(r
, e
);
26152 #[simd_test(enable = "avx512f")]
26153 unsafe fn test_mm512_max_ps() {
26154 let a
= _mm512_setr_ps(
26155 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26157 let b
= _mm512_setr_ps(
26158 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26160 let r
= _mm512_max_ps(a
, b
);
26161 let e
= _mm512_setr_ps(
26162 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
26164 assert_eq_m512(r
, e
);
26167 #[simd_test(enable = "avx512f")]
26168 unsafe fn test_mm512_mask_max_ps() {
26169 let a
= _mm512_setr_ps(
26170 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26172 let b
= _mm512_setr_ps(
26173 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26175 let r
= _mm512_mask_max_ps(a
, 0, a
, b
);
26176 assert_eq_m512(r
, a
);
26177 let r
= _mm512_mask_max_ps(a
, 0b00000000_11111111, a
, b
);
26178 let e
= _mm512_setr_ps(
26179 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
26181 assert_eq_m512(r
, e
);
26184 #[simd_test(enable = "avx512f")]
26185 unsafe fn test_mm512_maskz_max_ps() {
26186 let a
= _mm512_setr_ps(
26187 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26189 let b
= _mm512_setr_ps(
26190 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26192 let r
= _mm512_maskz_max_ps(0, a
, b
);
26193 assert_eq_m512(r
, _mm512_setzero_ps());
26194 let r
= _mm512_maskz_max_ps(0b00000000_11111111, a
, b
);
26195 let e
= _mm512_setr_ps(
26196 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
26198 assert_eq_m512(r
, e
);
26201 #[simd_test(enable = "avx512f")]
26202 unsafe fn test_mm512_max_epu32() {
26203 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26204 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26205 let r
= _mm512_max_epu32(a
, b
);
26206 let e
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26207 assert_eq_m512i(r
, e
);
26210 #[simd_test(enable = "avx512f")]
26211 unsafe fn test_mm512_mask_max_epu32() {
26212 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26213 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26214 let r
= _mm512_mask_max_epu32(a
, 0, a
, b
);
26215 assert_eq_m512i(r
, a
);
26216 let r
= _mm512_mask_max_epu32(a
, 0b00000000_11111111, a
, b
);
26217 let e
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
26218 assert_eq_m512i(r
, e
);
26221 #[simd_test(enable = "avx512f")]
26222 unsafe fn test_mm512_maskz_max_epu32() {
26223 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26224 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26225 let r
= _mm512_maskz_max_epu32(0, a
, b
);
26226 assert_eq_m512i(r
, _mm512_setzero_si512());
26227 let r
= _mm512_maskz_max_epu32(0b00000000_11111111, a
, b
);
26228 let e
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
26229 assert_eq_m512i(r
, e
);
26232 #[simd_test(enable = "avx512f")]
26233 unsafe fn test_mm512_min_epi32() {
26234 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26235 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26236 let r
= _mm512_min_epi32(a
, b
);
26237 let e
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
26238 assert_eq_m512i(r
, e
);
26241 #[simd_test(enable = "avx512f")]
26242 unsafe fn test_mm512_mask_min_epi32() {
26243 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26244 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26245 let r
= _mm512_mask_min_epi32(a
, 0, a
, b
);
26246 assert_eq_m512i(r
, a
);
26247 let r
= _mm512_mask_min_epi32(a
, 0b00000000_11111111, a
, b
);
26248 let e
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26249 assert_eq_m512i(r
, e
);
26252 #[simd_test(enable = "avx512f")]
26253 unsafe fn test_mm512_maskz_min_epi32() {
26254 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26255 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26256 let r
= _mm512_maskz_min_epi32(0, a
, b
);
26257 assert_eq_m512i(r
, _mm512_setzero_si512());
26258 let r
= _mm512_maskz_min_epi32(0b00000000_11111111, a
, b
);
26259 let e
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
26260 assert_eq_m512i(r
, e
);
26263 #[simd_test(enable = "avx512f")]
26264 unsafe fn test_mm512_min_ps() {
26265 let a
= _mm512_setr_ps(
26266 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26268 let b
= _mm512_setr_ps(
26269 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26271 let r
= _mm512_min_ps(a
, b
);
26272 let e
= _mm512_setr_ps(
26273 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
26275 assert_eq_m512(r
, e
);
26278 #[simd_test(enable = "avx512f")]
26279 unsafe fn test_mm512_mask_min_ps() {
26280 let a
= _mm512_setr_ps(
26281 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26283 let b
= _mm512_setr_ps(
26284 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26286 let r
= _mm512_mask_min_ps(a
, 0, a
, b
);
26287 assert_eq_m512(r
, a
);
26288 let r
= _mm512_mask_min_ps(a
, 0b00000000_11111111, a
, b
);
26289 let e
= _mm512_setr_ps(
26290 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26292 assert_eq_m512(r
, e
);
26295 #[simd_test(enable = "avx512f")]
26296 unsafe fn test_mm512_maskz_min_ps() {
26297 let a
= _mm512_setr_ps(
26298 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26300 let b
= _mm512_setr_ps(
26301 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
26303 let r
= _mm512_maskz_min_ps(0, a
, b
);
26304 assert_eq_m512(r
, _mm512_setzero_ps());
26305 let r
= _mm512_maskz_min_ps(0b00000000_11111111, a
, b
);
26306 let e
= _mm512_setr_ps(
26307 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
26309 assert_eq_m512(r
, e
);
26312 #[simd_test(enable = "avx512f")]
26313 unsafe fn test_mm512_min_epu32() {
26314 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26315 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26316 let r
= _mm512_min_epu32(a
, b
);
26317 let e
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
26318 assert_eq_m512i(r
, e
);
26321 #[simd_test(enable = "avx512f")]
26322 unsafe fn test_mm512_mask_min_epu32() {
26323 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26324 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26325 let r
= _mm512_mask_min_epu32(a
, 0, a
, b
);
26326 assert_eq_m512i(r
, a
);
26327 let r
= _mm512_mask_min_epu32(a
, 0b00000000_11111111, a
, b
);
26328 let e
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26329 assert_eq_m512i(r
, e
);
26332 #[simd_test(enable = "avx512f")]
26333 unsafe fn test_mm512_maskz_min_epu32() {
26334 let a
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
26335 let b
= _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
26336 let r
= _mm512_maskz_min_epu32(0, a
, b
);
26337 assert_eq_m512i(r
, _mm512_setzero_si512());
26338 let r
= _mm512_maskz_min_epu32(0b00000000_11111111, a
, b
);
26339 let e
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
26340 assert_eq_m512i(r
, e
);
26343 #[simd_test(enable = "avx512f")]
26344 unsafe fn test_mm512_sqrt_ps() {
26345 let a
= _mm512_setr_ps(
26346 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
26348 let r
= _mm512_sqrt_ps(a
);
26349 let e
= _mm512_setr_ps(
26350 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26352 assert_eq_m512(r
, e
);
26355 #[simd_test(enable = "avx512f")]
26356 unsafe fn test_mm512_mask_sqrt_ps() {
26357 let a
= _mm512_setr_ps(
26358 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
26360 let r
= _mm512_mask_sqrt_ps(a
, 0, a
);
26361 assert_eq_m512(r
, a
);
26362 let r
= _mm512_mask_sqrt_ps(a
, 0b00000000_11111111, a
);
26363 let e
= _mm512_setr_ps(
26364 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
26366 assert_eq_m512(r
, e
);
26369 #[simd_test(enable = "avx512f")]
26370 unsafe fn test_mm512_maskz_sqrt_ps() {
26371 let a
= _mm512_setr_ps(
26372 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
26374 let r
= _mm512_maskz_sqrt_ps(0, a
);
26375 assert_eq_m512(r
, _mm512_setzero_ps());
26376 let r
= _mm512_maskz_sqrt_ps(0b00000000_11111111, a
);
26377 let e
= _mm512_setr_ps(
26378 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
26380 assert_eq_m512(r
, e
);
26383 #[simd_test(enable = "avx512f")]
26384 unsafe fn test_mm512_fmadd_ps() {
26385 let a
= _mm512_setr_ps(
26386 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26388 let b
= _mm512_setr_ps(
26389 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26391 let c
= _mm512_setr_ps(
26392 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26394 let r
= _mm512_fmadd_ps(a
, b
, c
);
26395 let e
= _mm512_setr_ps(
26396 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
26398 assert_eq_m512(r
, e
);
26401 #[simd_test(enable = "avx512f")]
26402 unsafe fn test_mm512_mask_fmadd_ps() {
26403 let a
= _mm512_setr_ps(
26404 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26406 let b
= _mm512_setr_ps(
26407 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26409 let c
= _mm512_setr_ps(
26410 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26412 let r
= _mm512_mask_fmadd_ps(a
, 0, b
, c
);
26413 assert_eq_m512(r
, a
);
26414 let r
= _mm512_mask_fmadd_ps(a
, 0b00000000_11111111, b
, c
);
26415 let e
= _mm512_setr_ps(
26416 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
26418 assert_eq_m512(r
, e
);
26421 #[simd_test(enable = "avx512f")]
26422 unsafe fn test_mm512_maskz_fmadd_ps() {
26423 let a
= _mm512_setr_ps(
26424 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26426 let b
= _mm512_setr_ps(
26427 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26429 let c
= _mm512_setr_ps(
26430 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26432 let r
= _mm512_maskz_fmadd_ps(0, a
, b
, c
);
26433 assert_eq_m512(r
, _mm512_setzero_ps());
26434 let r
= _mm512_maskz_fmadd_ps(0b00000000_11111111, a
, b
, c
);
26435 let e
= _mm512_setr_ps(
26436 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
26438 assert_eq_m512(r
, e
);
26441 #[simd_test(enable = "avx512f")]
26442 unsafe fn test_mm512_mask3_fmadd_ps() {
26443 let a
= _mm512_setr_ps(
26444 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26446 let b
= _mm512_setr_ps(
26447 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26449 let c
= _mm512_setr_ps(
26450 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26452 let r
= _mm512_mask3_fmadd_ps(a
, b
, c
, 0);
26453 assert_eq_m512(r
, c
);
26454 let r
= _mm512_mask3_fmadd_ps(a
, b
, c
, 0b00000000_11111111);
26455 let e
= _mm512_setr_ps(
26456 1., 2., 3., 4., 5., 6., 7., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
26458 assert_eq_m512(r
, e
);
26461 #[simd_test(enable = "avx512f")]
26462 unsafe fn test_mm512_fmsub_ps() {
26463 let a
= _mm512_setr_ps(
26464 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26466 let b
= _mm512_setr_ps(
26467 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26469 let c
= _mm512_setr_ps(
26470 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26472 let r
= _mm512_fmsub_ps(a
, b
, c
);
26473 let e
= _mm512_setr_ps(
26474 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
26476 assert_eq_m512(r
, e
);
26479 #[simd_test(enable = "avx512f")]
26480 unsafe fn test_mm512_mask_fmsub_ps() {
26481 let a
= _mm512_setr_ps(
26482 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26484 let b
= _mm512_setr_ps(
26485 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26487 let c
= _mm512_setr_ps(
26488 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26490 let r
= _mm512_mask_fmsub_ps(a
, 0, b
, c
);
26491 assert_eq_m512(r
, a
);
26492 let r
= _mm512_mask_fmsub_ps(a
, 0b00000000_11111111, b
, c
);
26493 let e
= _mm512_setr_ps(
26494 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
26496 assert_eq_m512(r
, e
);
26499 #[simd_test(enable = "avx512f")]
26500 unsafe fn test_mm512_maskz_fmsub_ps() {
26501 let a
= _mm512_setr_ps(
26502 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26504 let b
= _mm512_setr_ps(
26505 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26507 let c
= _mm512_setr_ps(
26508 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26510 let r
= _mm512_maskz_fmsub_ps(0, a
, b
, c
);
26511 assert_eq_m512(r
, _mm512_setzero_ps());
26512 let r
= _mm512_maskz_fmsub_ps(0b00000000_11111111, a
, b
, c
);
26513 let e
= _mm512_setr_ps(
26514 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
26516 assert_eq_m512(r
, e
);
26519 #[simd_test(enable = "avx512f")]
26520 unsafe fn test_mm512_mask3_fmsub_ps() {
26521 let a
= _mm512_setr_ps(
26522 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26524 let b
= _mm512_setr_ps(
26525 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26527 let c
= _mm512_setr_ps(
26528 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26530 let r
= _mm512_mask3_fmsub_ps(a
, b
, c
, 0);
26531 assert_eq_m512(r
, c
);
26532 let r
= _mm512_mask3_fmsub_ps(a
, b
, c
, 0b00000000_11111111);
26533 let e
= _mm512_setr_ps(
26534 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
26536 assert_eq_m512(r
, e
);
26539 #[simd_test(enable = "avx512f")]
26540 unsafe fn test_mm512_fmaddsub_ps() {
26541 let a
= _mm512_setr_ps(
26542 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26544 let b
= _mm512_setr_ps(
26545 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26547 let c
= _mm512_setr_ps(
26548 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26550 let r
= _mm512_fmaddsub_ps(a
, b
, c
);
26551 let e
= _mm512_setr_ps(
26552 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
26554 assert_eq_m512(r
, e
);
26557 #[simd_test(enable = "avx512f")]
26558 unsafe fn test_mm512_mask_fmaddsub_ps() {
26559 let a
= _mm512_setr_ps(
26560 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26562 let b
= _mm512_setr_ps(
26563 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26565 let c
= _mm512_setr_ps(
26566 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26568 let r
= _mm512_mask_fmaddsub_ps(a
, 0, b
, c
);
26569 assert_eq_m512(r
, a
);
26570 let r
= _mm512_mask_fmaddsub_ps(a
, 0b00000000_11111111, b
, c
);
26571 let e
= _mm512_setr_ps(
26572 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
26574 assert_eq_m512(r
, e
);
26577 #[simd_test(enable = "avx512f")]
26578 unsafe fn test_mm512_maskz_fmaddsub_ps() {
26579 let a
= _mm512_setr_ps(
26580 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26582 let b
= _mm512_setr_ps(
26583 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26585 let c
= _mm512_setr_ps(
26586 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26588 let r
= _mm512_maskz_fmaddsub_ps(0, a
, b
, c
);
26589 assert_eq_m512(r
, _mm512_setzero_ps());
26590 let r
= _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a
, b
, c
);
26591 let e
= _mm512_setr_ps(
26592 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
26594 assert_eq_m512(r
, e
);
26597 #[simd_test(enable = "avx512f")]
26598 unsafe fn test_mm512_mask3_fmaddsub_ps() {
26599 let a
= _mm512_setr_ps(
26600 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26602 let b
= _mm512_setr_ps(
26603 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26605 let c
= _mm512_setr_ps(
26606 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26608 let r
= _mm512_mask3_fmaddsub_ps(a
, b
, c
, 0);
26609 assert_eq_m512(r
, c
);
26610 let r
= _mm512_mask3_fmaddsub_ps(a
, b
, c
, 0b00000000_11111111);
26611 let e
= _mm512_setr_ps(
26612 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
26614 assert_eq_m512(r
, e
);
26617 #[simd_test(enable = "avx512f")]
26618 unsafe fn test_mm512_fmsubadd_ps() {
26619 let a
= _mm512_setr_ps(
26620 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26622 let b
= _mm512_setr_ps(
26623 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26625 let c
= _mm512_setr_ps(
26626 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26628 let r
= _mm512_fmsubadd_ps(a
, b
, c
);
26629 let e
= _mm512_setr_ps(
26630 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
26632 assert_eq_m512(r
, e
);
26635 #[simd_test(enable = "avx512f")]
26636 unsafe fn test_mm512_mask_fmsubadd_ps() {
26637 let a
= _mm512_setr_ps(
26638 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26640 let b
= _mm512_setr_ps(
26641 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26643 let c
= _mm512_setr_ps(
26644 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26646 let r
= _mm512_mask_fmsubadd_ps(a
, 0, b
, c
);
26647 assert_eq_m512(r
, a
);
26648 let r
= _mm512_mask_fmsubadd_ps(a
, 0b00000000_11111111, b
, c
);
26649 let e
= _mm512_setr_ps(
26650 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
26652 assert_eq_m512(r
, e
);
26655 #[simd_test(enable = "avx512f")]
26656 unsafe fn test_mm512_maskz_fmsubadd_ps() {
26657 let a
= _mm512_setr_ps(
26658 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26660 let b
= _mm512_setr_ps(
26661 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26663 let c
= _mm512_setr_ps(
26664 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26666 let r
= _mm512_maskz_fmsubadd_ps(0, a
, b
, c
);
26667 assert_eq_m512(r
, _mm512_setzero_ps());
26668 let r
= _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a
, b
, c
);
26669 let e
= _mm512_setr_ps(
26670 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
26672 assert_eq_m512(r
, e
);
26675 #[simd_test(enable = "avx512f")]
26676 unsafe fn test_mm512_mask3_fmsubadd_ps() {
26677 let a
= _mm512_setr_ps(
26678 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26680 let b
= _mm512_setr_ps(
26681 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26683 let c
= _mm512_setr_ps(
26684 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26686 let r
= _mm512_mask3_fmsubadd_ps(a
, b
, c
, 0);
26687 assert_eq_m512(r
, c
);
26688 let r
= _mm512_mask3_fmsubadd_ps(a
, b
, c
, 0b00000000_11111111);
26689 let e
= _mm512_setr_ps(
26690 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
26692 assert_eq_m512(r
, e
);
26695 #[simd_test(enable = "avx512f")]
26696 unsafe fn test_mm512_fnmadd_ps() {
26697 let a
= _mm512_setr_ps(
26698 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26700 let b
= _mm512_setr_ps(
26701 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26703 let c
= _mm512_setr_ps(
26704 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26706 let r
= _mm512_fnmadd_ps(a
, b
, c
);
26707 let e
= _mm512_setr_ps(
26708 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
26710 assert_eq_m512(r
, e
);
26713 #[simd_test(enable = "avx512f")]
26714 unsafe fn test_mm512_mask_fnmadd_ps() {
26715 let a
= _mm512_setr_ps(
26716 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26718 let b
= _mm512_setr_ps(
26719 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26721 let c
= _mm512_setr_ps(
26722 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26724 let r
= _mm512_mask_fnmadd_ps(a
, 0, b
, c
);
26725 assert_eq_m512(r
, a
);
26726 let r
= _mm512_mask_fnmadd_ps(a
, 0b00000000_11111111, b
, c
);
26727 let e
= _mm512_setr_ps(
26728 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
26730 assert_eq_m512(r
, e
);
26733 #[simd_test(enable = "avx512f")]
26734 unsafe fn test_mm512_maskz_fnmadd_ps() {
26735 let a
= _mm512_setr_ps(
26736 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26738 let b
= _mm512_setr_ps(
26739 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26741 let c
= _mm512_setr_ps(
26742 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26744 let r
= _mm512_maskz_fnmadd_ps(0, a
, b
, c
);
26745 assert_eq_m512(r
, _mm512_setzero_ps());
26746 let r
= _mm512_maskz_fnmadd_ps(0b00000000_11111111, a
, b
, c
);
26747 let e
= _mm512_setr_ps(
26748 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
26750 assert_eq_m512(r
, e
);
26753 #[simd_test(enable = "avx512f")]
26754 unsafe fn test_mm512_mask3_fnmadd_ps() {
26755 let a
= _mm512_setr_ps(
26756 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26758 let b
= _mm512_setr_ps(
26759 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26761 let c
= _mm512_setr_ps(
26762 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26764 let r
= _mm512_mask3_fnmadd_ps(a
, b
, c
, 0);
26765 assert_eq_m512(r
, c
);
26766 let r
= _mm512_mask3_fnmadd_ps(a
, b
, c
, 0b00000000_11111111);
26767 let e
= _mm512_setr_ps(
26768 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
26770 assert_eq_m512(r
, e
);
26773 #[simd_test(enable = "avx512f")]
26774 unsafe fn test_mm512_fnmsub_ps() {
26775 let a
= _mm512_setr_ps(
26776 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26778 let b
= _mm512_setr_ps(
26779 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26781 let c
= _mm512_setr_ps(
26782 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26784 let r
= _mm512_fnmsub_ps(a
, b
, c
);
26785 let e
= _mm512_setr_ps(
26786 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
26788 assert_eq_m512(r
, e
);
26791 #[simd_test(enable = "avx512f")]
26792 unsafe fn test_mm512_mask_fnmsub_ps() {
26793 let a
= _mm512_setr_ps(
26794 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26796 let b
= _mm512_setr_ps(
26797 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26799 let c
= _mm512_setr_ps(
26800 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26802 let r
= _mm512_mask_fnmsub_ps(a
, 0, b
, c
);
26803 assert_eq_m512(r
, a
);
26804 let r
= _mm512_mask_fnmsub_ps(a
, 0b00000000_11111111, b
, c
);
26805 let e
= _mm512_setr_ps(
26806 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
26808 assert_eq_m512(r
, e
);
26811 #[simd_test(enable = "avx512f")]
26812 unsafe fn test_mm512_maskz_fnmsub_ps() {
26813 let a
= _mm512_setr_ps(
26814 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26816 let b
= _mm512_setr_ps(
26817 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26819 let c
= _mm512_setr_ps(
26820 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26822 let r
= _mm512_maskz_fnmsub_ps(0, a
, b
, c
);
26823 assert_eq_m512(r
, _mm512_setzero_ps());
26824 let r
= _mm512_maskz_fnmsub_ps(0b00000000_11111111, a
, b
, c
);
26825 let e
= _mm512_setr_ps(
26826 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
26828 assert_eq_m512(r
, e
);
26831 #[simd_test(enable = "avx512f")]
26832 unsafe fn test_mm512_mask3_fnmsub_ps() {
26833 let a
= _mm512_setr_ps(
26834 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
26836 let b
= _mm512_setr_ps(
26837 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
26839 let c
= _mm512_setr_ps(
26840 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
26842 let r
= _mm512_mask3_fnmsub_ps(a
, b
, c
, 0);
26843 assert_eq_m512(r
, c
);
26844 let r
= _mm512_mask3_fnmsub_ps(a
, b
, c
, 0b00000000_11111111);
26845 let e
= _mm512_setr_ps(
26846 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
26848 assert_eq_m512(r
, e
);
26851 #[simd_test(enable = "avx512f")]
26852 unsafe fn test_mm512_rcp14_ps() {
26853 let a
= _mm512_set1_ps(3.);
26854 let r
= _mm512_rcp14_ps(a
);
26855 let e
= _mm512_set1_ps(0.33333206);
26856 assert_eq_m512(r
, e
);
26859 #[simd_test(enable = "avx512f")]
26860 unsafe fn test_mm512_mask_rcp14_ps() {
26861 let a
= _mm512_set1_ps(3.);
26862 let r
= _mm512_mask_rcp14_ps(a
, 0, a
);
26863 assert_eq_m512(r
, a
);
26864 let r
= _mm512_mask_rcp14_ps(a
, 0b11111111_00000000, a
);
26865 let e
= _mm512_setr_ps(
26866 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26867 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26869 assert_eq_m512(r
, e
);
26872 #[simd_test(enable = "avx512f")]
26873 unsafe fn test_mm512_maskz_rcp14_ps() {
26874 let a
= _mm512_set1_ps(3.);
26875 let r
= _mm512_maskz_rcp14_ps(0, a
);
26876 assert_eq_m512(r
, _mm512_setzero_ps());
26877 let r
= _mm512_maskz_rcp14_ps(0b11111111_00000000, a
);
26878 let e
= _mm512_setr_ps(
26879 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26880 0.33333206, 0.33333206, 0.33333206, 0.33333206,
26882 assert_eq_m512(r
, e
);
26885 #[simd_test(enable = "avx512f")]
26886 unsafe fn test_mm512_rsqrt14_ps() {
26887 let a
= _mm512_set1_ps(3.);
26888 let r
= _mm512_rsqrt14_ps(a
);
26889 let e
= _mm512_set1_ps(0.5773392);
26890 assert_eq_m512(r
, e
);
26893 #[simd_test(enable = "avx512f")]
26894 unsafe fn test_mm512_mask_rsqrt14_ps() {
26895 let a
= _mm512_set1_ps(3.);
26896 let r
= _mm512_mask_rsqrt14_ps(a
, 0, a
);
26897 assert_eq_m512(r
, a
);
26898 let r
= _mm512_mask_rsqrt14_ps(a
, 0b11111111_00000000, a
);
26899 let e
= _mm512_setr_ps(
26900 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
26901 0.5773392, 0.5773392, 0.5773392,
26903 assert_eq_m512(r
, e
);
26906 #[simd_test(enable = "avx512f")]
26907 unsafe fn test_mm512_maskz_rsqrt14_ps() {
26908 let a
= _mm512_set1_ps(3.);
26909 let r
= _mm512_maskz_rsqrt14_ps(0, a
);
26910 assert_eq_m512(r
, _mm512_setzero_ps());
26911 let r
= _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a
);
26912 let e
= _mm512_setr_ps(
26913 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
26914 0.5773392, 0.5773392, 0.5773392,
26916 assert_eq_m512(r
, e
);
26919 #[simd_test(enable = "avx512f")]
26920 unsafe fn test_mm512_getexp_ps() {
26921 let a
= _mm512_set1_ps(3.);
26922 let r
= _mm512_getexp_ps(a
);
26923 let e
= _mm512_set1_ps(1.);
26924 assert_eq_m512(r
, e
);
26927 #[simd_test(enable = "avx512f")]
26928 unsafe fn test_mm512_mask_getexp_ps() {
26929 let a
= _mm512_set1_ps(3.);
26930 let r
= _mm512_mask_getexp_ps(a
, 0, a
);
26931 assert_eq_m512(r
, a
);
26932 let r
= _mm512_mask_getexp_ps(a
, 0b11111111_00000000, a
);
26933 let e
= _mm512_setr_ps(
26934 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
26936 assert_eq_m512(r
, e
);
26939 #[simd_test(enable = "avx512f")]
26940 unsafe fn test_mm512_maskz_getexp_ps() {
26941 let a
= _mm512_set1_ps(3.);
26942 let r
= _mm512_maskz_getexp_ps(0, a
);
26943 assert_eq_m512(r
, _mm512_setzero_ps());
26944 let r
= _mm512_maskz_getexp_ps(0b11111111_00000000, a
);
26945 let e
= _mm512_setr_ps(
26946 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
26948 assert_eq_m512(r
, e
);
26951 #[simd_test(enable = "avx512f")]
26952 unsafe fn test_mm512_roundscale_ps() {
26953 let a
= _mm512_set1_ps(1.1);
26954 let r
= _mm512_roundscale_ps(a
, 0);
26955 let e
= _mm512_set1_ps(1.0);
26956 assert_eq_m512(r
, e
);
26959 #[simd_test(enable = "avx512f")]
26960 unsafe fn test_mm512_mask_roundscale_ps() {
26961 let a
= _mm512_set1_ps(1.1);
26962 let r
= _mm512_mask_roundscale_ps(a
, 0, a
, 0);
26963 let e
= _mm512_set1_ps(1.1);
26964 assert_eq_m512(r
, e
);
26965 let r
= _mm512_mask_roundscale_ps(a
, 0b11111111_11111111, a
, 0);
26966 let e
= _mm512_set1_ps(1.0);
26967 assert_eq_m512(r
, e
);
26970 #[simd_test(enable = "avx512f")]
26971 unsafe fn test_mm512_maskz_roundscale_ps() {
26972 let a
= _mm512_set1_ps(1.1);
26973 let r
= _mm512_maskz_roundscale_ps(0, a
, 0);
26974 assert_eq_m512(r
, _mm512_setzero_ps());
26975 let r
= _mm512_maskz_roundscale_ps(0b11111111_11111111, a
, 0);
26976 let e
= _mm512_set1_ps(1.0);
26977 assert_eq_m512(r
, e
);
26980 #[simd_test(enable = "avx512f")]
26981 unsafe fn test_mm512_scalef_ps() {
26982 let a
= _mm512_set1_ps(1.);
26983 let b
= _mm512_set1_ps(3.);
26984 let r
= _mm512_scalef_ps(a
, b
);
26985 let e
= _mm512_set1_ps(8.);
26986 assert_eq_m512(r
, e
);
26989 #[simd_test(enable = "avx512f")]
26990 unsafe fn test_mm512_mask_scalef_ps() {
26991 let a
= _mm512_set1_ps(1.);
26992 let b
= _mm512_set1_ps(3.);
26993 let r
= _mm512_mask_scalef_ps(a
, 0, a
, b
);
26994 assert_eq_m512(r
, a
);
26995 let r
= _mm512_mask_scalef_ps(a
, 0b11111111_00000000, a
, b
);
26996 let e
= _mm512_set_ps(
26997 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
26999 assert_eq_m512(r
, e
);
27002 #[simd_test(enable = "avx512f")]
27003 unsafe fn test_mm512_maskz_scalef_ps() {
27004 let a
= _mm512_set1_ps(1.);
27005 let b
= _mm512_set1_ps(3.);
27006 let r
= _mm512_maskz_scalef_ps(0, a
, b
);
27007 assert_eq_m512(r
, _mm512_setzero_ps());
27008 let r
= _mm512_maskz_scalef_ps(0b11111111_00000000, a
, b
);
27009 let e
= _mm512_set_ps(
27010 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
27012 assert_eq_m512(r
, e
);
27015 #[simd_test(enable = "avx512f")]
27016 unsafe fn test_mm512_fixupimm_ps() {
27017 let a
= _mm512_set1_ps(f32::NAN
);
27018 let b
= _mm512_set1_ps(f32::MAX
);
27019 let c
= _mm512_set1_epi32(i32::MAX
);
27020 let r
= _mm512_fixupimm_ps(a
, b
, c
, 5);
27021 let e
= _mm512_set1_ps(0.0);
27022 assert_eq_m512(r
, e
);
27025 #[simd_test(enable = "avx512f")]
27026 unsafe fn test_mm512_mask_fixupimm_ps() {
27027 let a
= _mm512_set_ps(
27045 let b
= _mm512_set1_ps(f32::MAX
);
27046 let c
= _mm512_set1_epi32(i32::MAX
);
27047 let r
= _mm512_mask_fixupimm_ps(a
, 0b11111111_00000000, b
, c
, 5);
27048 let e
= _mm512_set_ps(
27049 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
27051 assert_eq_m512(r
, e
);
27054 #[simd_test(enable = "avx512f")]
27055 unsafe fn test_mm512_maskz_fixupimm_ps() {
27056 let a
= _mm512_set_ps(
27074 let b
= _mm512_set1_ps(f32::MAX
);
27075 let c
= _mm512_set1_epi32(i32::MAX
);
27076 let r
= _mm512_maskz_fixupimm_ps(0b11111111_00000000, a
, b
, c
, 5);
27077 let e
= _mm512_set_ps(
27078 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
27080 assert_eq_m512(r
, e
);
27083 #[simd_test(enable = "avx512f")]
27084 unsafe fn test_mm512_ternarylogic_epi32() {
27085 let a
= _mm512_set1_epi32(1 << 2);
27086 let b
= _mm512_set1_epi32(1 << 1);
27087 let c
= _mm512_set1_epi32(1 << 0);
27088 let r
= _mm512_ternarylogic_epi32(a
, b
, c
, 8);
27089 let e
= _mm512_set1_epi32(0);
27090 assert_eq_m512i(r
, e
);
27093 #[simd_test(enable = "avx512f")]
27094 unsafe fn test_mm512_mask_ternarylogic_epi32() {
27095 let src
= _mm512_set1_epi32(1 << 2);
27096 let a
= _mm512_set1_epi32(1 << 1);
27097 let b
= _mm512_set1_epi32(1 << 0);
27098 let r
= _mm512_mask_ternarylogic_epi32(src
, 0, a
, b
, 8);
27099 assert_eq_m512i(r
, src
);
27100 let r
= _mm512_mask_ternarylogic_epi32(src
, 0b11111111_11111111, a
, b
, 8);
27101 let e
= _mm512_set1_epi32(0);
27102 assert_eq_m512i(r
, e
);
27105 #[simd_test(enable = "avx512f")]
27106 unsafe fn test_mm512_maskz_ternarylogic_epi32() {
27107 let a
= _mm512_set1_epi32(1 << 2);
27108 let b
= _mm512_set1_epi32(1 << 1);
27109 let c
= _mm512_set1_epi32(1 << 0);
27110 let r
= _mm512_maskz_ternarylogic_epi32(0, a
, b
, c
, 9);
27111 assert_eq_m512i(r
, _mm512_setzero_si512());
27112 let r
= _mm512_maskz_ternarylogic_epi32(0b11111111_11111111, a
, b
, c
, 8);
27113 let e
= _mm512_set1_epi32(0);
27114 assert_eq_m512i(r
, e
);
27117 #[simd_test(enable = "avx512f")]
27118 unsafe fn test_mm512_getmant_ps() {
27119 let a
= _mm512_set1_ps(10.);
27120 let r
= _mm512_getmant_ps(a
, _MM_MANT_NORM_P75_1P5
, _MM_MANT_SIGN_NAN
);
27121 let e
= _mm512_set1_ps(1.25);
27122 assert_eq_m512(r
, e
);
27125 #[simd_test(enable = "avx512f")]
27126 unsafe fn test_mm512_mask_getmant_ps() {
27127 let a
= _mm512_set1_ps(10.);
27128 let r
= _mm512_mask_getmant_ps(a
, 0, a
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
27129 assert_eq_m512(r
, a
);
27130 let r
= _mm512_mask_getmant_ps(
27132 0b11111111_00000000,
27137 let e
= _mm512_setr_ps(
27138 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
27140 assert_eq_m512(r
, e
);
27143 #[simd_test(enable = "avx512f")]
27144 unsafe fn test_mm512_maskz_getmant_ps() {
27145 let a
= _mm512_set1_ps(10.);
27146 let r
= _mm512_maskz_getmant_ps(0, a
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
27147 assert_eq_m512(r
, _mm512_setzero_ps());
27149 _mm512_maskz_getmant_ps(0b11111111_00000000, a
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
27150 let e
= _mm512_setr_ps(
27151 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
27153 assert_eq_m512(r
, e
);
27156 #[simd_test(enable = "avx512f")]
27157 unsafe fn test_mm512_add_round_ps() {
27158 let a
= _mm512_setr_ps(
27159 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27161 let b
= _mm512_set1_ps(-1.);
27162 let r
= _mm512_add_round_ps(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27163 let e
= _mm512_setr_ps(
27181 assert_eq_m512(r
, e
);
27182 let r
= _mm512_add_round_ps(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27183 let e
= _mm512_setr_ps(
27184 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
27186 assert_eq_m512(r
, e
);
27189 #[simd_test(enable = "avx512f")]
27190 unsafe fn test_mm512_mask_add_round_ps() {
27191 let a
= _mm512_setr_ps(
27192 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27194 let b
= _mm512_set1_ps(-1.);
27195 let r
= _mm512_mask_add_round_ps(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27196 assert_eq_m512(r
, a
);
27197 let r
= _mm512_mask_add_round_ps(
27199 0b11111111_00000000,
27202 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27204 let e
= _mm512_setr_ps(
27222 assert_eq_m512(r
, e
);
27225 #[simd_test(enable = "avx512f")]
27226 unsafe fn test_mm512_maskz_add_round_ps() {
27227 let a
= _mm512_setr_ps(
27228 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27230 let b
= _mm512_set1_ps(-1.);
27231 let r
= _mm512_maskz_add_round_ps(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27232 assert_eq_m512(r
, _mm512_setzero_ps());
27233 let r
= _mm512_maskz_add_round_ps(
27234 0b11111111_00000000,
27237 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27239 let e
= _mm512_setr_ps(
27257 assert_eq_m512(r
, e
);
27260 #[simd_test(enable = "avx512f")]
27261 unsafe fn test_mm512_sub_round_ps() {
27262 let a
= _mm512_setr_ps(
27263 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27265 let b
= _mm512_set1_ps(1.);
27266 let r
= _mm512_sub_round_ps(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27267 let e
= _mm512_setr_ps(
27285 assert_eq_m512(r
, e
);
27286 let r
= _mm512_sub_round_ps(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27287 let e
= _mm512_setr_ps(
27288 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
27290 assert_eq_m512(r
, e
);
27293 #[simd_test(enable = "avx512f")]
27294 unsafe fn test_mm512_mask_sub_round_ps() {
27295 let a
= _mm512_setr_ps(
27296 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27298 let b
= _mm512_set1_ps(1.);
27299 let r
= _mm512_mask_sub_round_ps(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27300 assert_eq_m512(r
, a
);
27301 let r
= _mm512_mask_sub_round_ps(
27303 0b11111111_00000000,
27306 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27308 let e
= _mm512_setr_ps(
27326 assert_eq_m512(r
, e
);
27329 #[simd_test(enable = "avx512f")]
27330 unsafe fn test_mm512_maskz_sub_round_ps() {
27331 let a
= _mm512_setr_ps(
27332 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
27334 let b
= _mm512_set1_ps(1.);
27335 let r
= _mm512_maskz_sub_round_ps(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27336 assert_eq_m512(r
, _mm512_setzero_ps());
27337 let r
= _mm512_maskz_sub_round_ps(
27338 0b11111111_00000000,
27341 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27343 let e
= _mm512_setr_ps(
27361 assert_eq_m512(r
, e
);
27364 #[simd_test(enable = "avx512f")]
27365 unsafe fn test_mm512_mul_round_ps() {
27366 let a
= _mm512_setr_ps(
27382 0.00000000000000000000007,
27384 let b
= _mm512_set1_ps(0.1);
27385 let r
= _mm512_mul_round_ps(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27386 let e
= _mm512_setr_ps(
27402 0.000000000000000000000007000001,
27404 assert_eq_m512(r
, e
);
27405 let r
= _mm512_mul_round_ps(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27406 let e
= _mm512_setr_ps(
27422 0.000000000000000000000007,
27424 assert_eq_m512(r
, e
);
27427 #[simd_test(enable = "avx512f")]
27428 unsafe fn test_mm512_mask_mul_round_ps() {
27429 let a
= _mm512_setr_ps(
27445 0.00000000000000000000007,
27447 let b
= _mm512_set1_ps(0.1);
27448 let r
= _mm512_mask_mul_round_ps(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27449 assert_eq_m512(r
, a
);
27450 let r
= _mm512_mask_mul_round_ps(
27452 0b11111111_00000000,
27455 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27457 let e
= _mm512_setr_ps(
27473 0.000000000000000000000007000001,
27475 assert_eq_m512(r
, e
);
27478 #[simd_test(enable = "avx512f")]
27479 unsafe fn test_mm512_maskz_mul_round_ps() {
27480 let a
= _mm512_setr_ps(
27496 0.00000000000000000000007,
27498 let b
= _mm512_set1_ps(0.1);
27499 let r
= _mm512_maskz_mul_round_ps(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27500 assert_eq_m512(r
, _mm512_setzero_ps());
27501 let r
= _mm512_maskz_mul_round_ps(
27502 0b11111111_00000000,
27505 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27507 let e
= _mm512_setr_ps(
27523 0.000000000000000000000007000001,
27525 assert_eq_m512(r
, e
);
27528 #[simd_test(enable = "avx512f")]
27529 unsafe fn test_mm512_div_round_ps() {
27530 let a
= _mm512_set1_ps(1.);
27531 let b
= _mm512_set1_ps(3.);
27532 let r
= _mm512_div_round_ps(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27533 let e
= _mm512_set1_ps(0.33333334);
27534 assert_eq_m512(r
, e
);
27535 let r
= _mm512_div_round_ps(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27536 let e
= _mm512_set1_ps(0.3333333);
27537 assert_eq_m512(r
, e
);
27540 #[simd_test(enable = "avx512f")]
27541 unsafe fn test_mm512_mask_div_round_ps() {
27542 let a
= _mm512_set1_ps(1.);
27543 let b
= _mm512_set1_ps(3.);
27544 let r
= _mm512_mask_div_round_ps(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27545 assert_eq_m512(r
, a
);
27546 let r
= _mm512_mask_div_round_ps(
27548 0b11111111_00000000,
27551 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27553 let e
= _mm512_setr_ps(
27554 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27555 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27557 assert_eq_m512(r
, e
);
27560 #[simd_test(enable = "avx512f")]
27561 unsafe fn test_mm512_maskz_div_round_ps() {
27562 let a
= _mm512_set1_ps(1.);
27563 let b
= _mm512_set1_ps(3.);
27564 let r
= _mm512_maskz_div_round_ps(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27565 assert_eq_m512(r
, _mm512_setzero_ps());
27566 let r
= _mm512_maskz_div_round_ps(
27567 0b11111111_00000000,
27570 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27572 let e
= _mm512_setr_ps(
27573 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27574 0.33333334, 0.33333334, 0.33333334, 0.33333334,
27576 assert_eq_m512(r
, e
);
27579 #[simd_test(enable = "avx512f")]
27580 unsafe fn test_mm512_sqrt_round_ps() {
27581 let a
= _mm512_set1_ps(3.);
27582 let r
= _mm512_sqrt_round_ps(a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27583 let e
= _mm512_set1_ps(1.7320508);
27584 assert_eq_m512(r
, e
);
27585 let r
= _mm512_sqrt_round_ps(a
, _MM_FROUND_TO_POS_INF
| _MM_FROUND_NO_EXC
);
27586 let e
= _mm512_set1_ps(1.7320509);
27587 assert_eq_m512(r
, e
);
27590 #[simd_test(enable = "avx512f")]
27591 unsafe fn test_mm512_mask_sqrt_round_ps() {
27592 let a
= _mm512_set1_ps(3.);
27593 let r
= _mm512_mask_sqrt_round_ps(a
, 0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27594 assert_eq_m512(r
, a
);
27595 let r
= _mm512_mask_sqrt_round_ps(
27597 0b11111111_00000000,
27599 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27601 let e
= _mm512_setr_ps(
27602 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
27603 1.7320508, 1.7320508, 1.7320508,
27605 assert_eq_m512(r
, e
);
27608 #[simd_test(enable = "avx512f")]
27609 unsafe fn test_mm512_maskz_sqrt_round_ps() {
27610 let a
= _mm512_set1_ps(3.);
27611 let r
= _mm512_maskz_sqrt_round_ps(0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27612 assert_eq_m512(r
, _mm512_setzero_ps());
27613 let r
= _mm512_maskz_sqrt_round_ps(
27614 0b11111111_00000000,
27616 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27618 let e
= _mm512_setr_ps(
27619 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
27620 1.7320508, 1.7320508, 1.7320508,
27622 assert_eq_m512(r
, e
);
27625 #[simd_test(enable = "avx512f")]
27626 unsafe fn test_mm512_fmadd_round_ps() {
27627 let a
= _mm512_set1_ps(0.00000007);
27628 let b
= _mm512_set1_ps(1.);
27629 let c
= _mm512_set1_ps(-1.);
27630 let r
= _mm512_fmadd_round_ps(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27631 let e
= _mm512_set1_ps(-0.99999994);
27632 assert_eq_m512(r
, e
);
27633 let r
= _mm512_fmadd_round_ps(a
, b
, c
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27634 let e
= _mm512_set1_ps(-0.9999999);
27635 assert_eq_m512(r
, e
);
27638 #[simd_test(enable = "avx512f")]
27639 unsafe fn test_mm512_mask_fmadd_round_ps() {
27640 let a
= _mm512_set1_ps(0.00000007);
27641 let b
= _mm512_set1_ps(1.);
27642 let c
= _mm512_set1_ps(-1.);
27644 _mm512_mask_fmadd_round_ps(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27645 assert_eq_m512(r
, a
);
27646 let r
= _mm512_mask_fmadd_round_ps(
27648 0b00000000_11111111,
27651 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27653 let e
= _mm512_setr_ps(
27671 assert_eq_m512(r
, e
);
27674 #[simd_test(enable = "avx512f")]
27675 unsafe fn test_mm512_maskz_fmadd_round_ps() {
27676 let a
= _mm512_set1_ps(0.00000007);
27677 let b
= _mm512_set1_ps(1.);
27678 let c
= _mm512_set1_ps(-1.);
27680 _mm512_maskz_fmadd_round_ps(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27681 assert_eq_m512(r
, _mm512_setzero_ps());
27682 let r
= _mm512_maskz_fmadd_round_ps(
27683 0b00000000_11111111,
27687 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27689 let e
= _mm512_setr_ps(
27707 assert_eq_m512(r
, e
);
27710 #[simd_test(enable = "avx512f")]
27711 unsafe fn test_mm512_mask3_fmadd_round_ps() {
27712 let a
= _mm512_set1_ps(0.00000007);
27713 let b
= _mm512_set1_ps(1.);
27714 let c
= _mm512_set1_ps(-1.);
27716 _mm512_mask3_fmadd_round_ps(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27717 assert_eq_m512(r
, c
);
27718 let r
= _mm512_mask3_fmadd_round_ps(
27722 0b00000000_11111111,
27723 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27725 let e
= _mm512_setr_ps(
27743 assert_eq_m512(r
, e
);
27746 #[simd_test(enable = "avx512f")]
27747 unsafe fn test_mm512_fmsub_round_ps() {
27748 let a
= _mm512_set1_ps(0.00000007);
27749 let b
= _mm512_set1_ps(1.);
27750 let c
= _mm512_set1_ps(1.);
27751 let r
= _mm512_fmsub_round_ps(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27752 let e
= _mm512_set1_ps(-0.99999994);
27753 assert_eq_m512(r
, e
);
27754 let r
= _mm512_fmsub_round_ps(a
, b
, c
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27755 let e
= _mm512_set1_ps(-0.9999999);
27756 assert_eq_m512(r
, e
);
27759 #[simd_test(enable = "avx512f")]
27760 unsafe fn test_mm512_mask_fmsub_round_ps() {
27761 let a
= _mm512_set1_ps(0.00000007);
27762 let b
= _mm512_set1_ps(1.);
27763 let c
= _mm512_set1_ps(1.);
27765 _mm512_mask_fmsub_round_ps(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27766 assert_eq_m512(r
, a
);
27767 let r
= _mm512_mask_fmsub_round_ps(
27769 0b00000000_11111111,
27772 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27774 let e
= _mm512_setr_ps(
27792 assert_eq_m512(r
, e
);
27795 #[simd_test(enable = "avx512f")]
27796 unsafe fn test_mm512_maskz_fmsub_round_ps() {
27797 let a
= _mm512_set1_ps(0.00000007);
27798 let b
= _mm512_set1_ps(1.);
27799 let c
= _mm512_set1_ps(1.);
27801 _mm512_maskz_fmsub_round_ps(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27802 assert_eq_m512(r
, _mm512_setzero_ps());
27803 let r
= _mm512_maskz_fmsub_round_ps(
27804 0b00000000_11111111,
27808 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27810 let e
= _mm512_setr_ps(
27828 assert_eq_m512(r
, e
);
27831 #[simd_test(enable = "avx512f")]
27832 unsafe fn test_mm512_mask3_fmsub_round_ps() {
27833 let a
= _mm512_set1_ps(0.00000007);
27834 let b
= _mm512_set1_ps(1.);
27835 let c
= _mm512_set1_ps(1.);
27837 _mm512_mask3_fmsub_round_ps(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27838 assert_eq_m512(r
, c
);
27839 let r
= _mm512_mask3_fmsub_round_ps(
27843 0b00000000_11111111,
27844 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27846 let e
= _mm512_setr_ps(
27864 assert_eq_m512(r
, e
);
27867 #[simd_test(enable = "avx512f")]
27868 unsafe fn test_mm512_fmaddsub_round_ps() {
27869 let a
= _mm512_set1_ps(0.00000007);
27870 let b
= _mm512_set1_ps(1.);
27871 let c
= _mm512_set1_ps(-1.);
27872 let r
= _mm512_fmaddsub_round_ps(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
27873 let e
= _mm512_setr_ps(
27891 assert_eq_m512(r
, e
);
27892 let r
= _mm512_fmaddsub_round_ps(a
, b
, c
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
27893 let e
= _mm512_setr_ps(
27894 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
27895 -0.9999999, 1., -0.9999999, 1., -0.9999999,
27897 assert_eq_m512(r
, e
);
27900 #[simd_test(enable = "avx512f")]
27901 unsafe fn test_mm512_mask_fmaddsub_round_ps() {
27902 let a
= _mm512_set1_ps(0.00000007);
27903 let b
= _mm512_set1_ps(1.);
27904 let c
= _mm512_set1_ps(-1.);
27905 let r
= _mm512_mask_fmaddsub_round_ps(
27910 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27912 assert_eq_m512(r
, a
);
27913 let r
= _mm512_mask_fmaddsub_round_ps(
27915 0b00000000_11111111,
27918 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27920 let e
= _mm512_setr_ps(
27938 assert_eq_m512(r
, e
);
27941 #[simd_test(enable = "avx512f")]
27942 unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
27943 let a
= _mm512_set1_ps(0.00000007);
27944 let b
= _mm512_set1_ps(1.);
27945 let c
= _mm512_set1_ps(-1.);
27946 let r
= _mm512_maskz_fmaddsub_round_ps(
27951 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27953 assert_eq_m512(r
, _mm512_setzero_ps());
27954 let r
= _mm512_maskz_fmaddsub_round_ps(
27955 0b00000000_11111111,
27959 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27961 let e
= _mm512_setr_ps(
27979 assert_eq_m512(r
, e
);
27982 #[simd_test(enable = "avx512f")]
27983 unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
27984 let a
= _mm512_set1_ps(0.00000007);
27985 let b
= _mm512_set1_ps(1.);
27986 let c
= _mm512_set1_ps(-1.);
27987 let r
= _mm512_mask3_fmaddsub_round_ps(
27992 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
27994 assert_eq_m512(r
, c
);
27995 let r
= _mm512_mask3_fmaddsub_round_ps(
27999 0b00000000_11111111,
28000 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28002 let e
= _mm512_setr_ps(
28020 assert_eq_m512(r
, e
);
28023 #[simd_test(enable = "avx512f")]
28024 unsafe fn test_mm512_fmsubadd_round_ps() {
28025 let a
= _mm512_set1_ps(0.00000007);
28026 let b
= _mm512_set1_ps(1.);
28027 let c
= _mm512_set1_ps(-1.);
28028 let r
= _mm512_fmsubadd_round_ps(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28029 let e
= _mm512_setr_ps(
28047 assert_eq_m512(r
, e
);
28048 let r
= _mm512_fmsubadd_round_ps(a
, b
, c
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
28049 let e
= _mm512_setr_ps(
28050 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
28051 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
28053 assert_eq_m512(r
, e
);
28056 #[simd_test(enable = "avx512f")]
28057 unsafe fn test_mm512_mask_fmsubadd_round_ps() {
28058 let a
= _mm512_set1_ps(0.00000007);
28059 let b
= _mm512_set1_ps(1.);
28060 let c
= _mm512_set1_ps(-1.);
28061 let r
= _mm512_mask_fmsubadd_round_ps(
28066 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28068 assert_eq_m512(r
, a
);
28069 let r
= _mm512_mask_fmsubadd_round_ps(
28071 0b00000000_11111111,
28074 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28076 let e
= _mm512_setr_ps(
28094 assert_eq_m512(r
, e
);
28097 #[simd_test(enable = "avx512f")]
28098 unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
28099 let a
= _mm512_set1_ps(0.00000007);
28100 let b
= _mm512_set1_ps(1.);
28101 let c
= _mm512_set1_ps(-1.);
28102 let r
= _mm512_maskz_fmsubadd_round_ps(
28107 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28109 assert_eq_m512(r
, _mm512_setzero_ps());
28110 let r
= _mm512_maskz_fmsubadd_round_ps(
28111 0b00000000_11111111,
28115 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28117 let e
= _mm512_setr_ps(
28135 assert_eq_m512(r
, e
);
28138 #[simd_test(enable = "avx512f")]
28139 unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
28140 let a
= _mm512_set1_ps(0.00000007);
28141 let b
= _mm512_set1_ps(1.);
28142 let c
= _mm512_set1_ps(-1.);
28143 let r
= _mm512_mask3_fmsubadd_round_ps(
28148 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28150 assert_eq_m512(r
, c
);
28151 let r
= _mm512_mask3_fmsubadd_round_ps(
28155 0b00000000_11111111,
28156 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28158 let e
= _mm512_setr_ps(
28176 assert_eq_m512(r
, e
);
28179 #[simd_test(enable = "avx512f")]
28180 unsafe fn test_mm512_fnmadd_round_ps() {
28181 let a
= _mm512_set1_ps(0.00000007);
28182 let b
= _mm512_set1_ps(1.);
28183 let c
= _mm512_set1_ps(1.);
28184 let r
= _mm512_fnmadd_round_ps(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28185 let e
= _mm512_set1_ps(0.99999994);
28186 assert_eq_m512(r
, e
);
28187 let r
= _mm512_fnmadd_round_ps(a
, b
, c
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
28188 let e
= _mm512_set1_ps(0.9999999);
28189 assert_eq_m512(r
, e
);
28192 #[simd_test(enable = "avx512f")]
28193 unsafe fn test_mm512_mask_fnmadd_round_ps() {
28194 let a
= _mm512_set1_ps(0.00000007);
28195 let b
= _mm512_set1_ps(1.);
28196 let c
= _mm512_set1_ps(1.);
28198 _mm512_mask_fnmadd_round_ps(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28199 assert_eq_m512(r
, a
);
28200 let r
= _mm512_mask_fnmadd_round_ps(
28202 0b00000000_11111111,
28205 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28207 let e
= _mm512_setr_ps(
28208 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28209 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
28210 0.00000007, 0.00000007,
28212 assert_eq_m512(r
, e
);
28215 #[simd_test(enable = "avx512f")]
28216 unsafe fn test_mm512_maskz_fnmadd_round_ps() {
28217 let a
= _mm512_set1_ps(0.00000007);
28218 let b
= _mm512_set1_ps(1.);
28219 let c
= _mm512_set1_ps(1.);
28221 _mm512_maskz_fnmadd_round_ps(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28222 assert_eq_m512(r
, _mm512_setzero_ps());
28223 let r
= _mm512_maskz_fnmadd_round_ps(
28224 0b00000000_11111111,
28228 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28230 let e
= _mm512_setr_ps(
28231 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28232 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
28234 assert_eq_m512(r
, e
);
28237 #[simd_test(enable = "avx512f")]
28238 unsafe fn test_mm512_mask3_fnmadd_round_ps() {
28239 let a
= _mm512_set1_ps(0.00000007);
28240 let b
= _mm512_set1_ps(1.);
28241 let c
= _mm512_set1_ps(1.);
28243 _mm512_mask3_fnmadd_round_ps(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28244 assert_eq_m512(r
, c
);
28245 let r
= _mm512_mask3_fnmadd_round_ps(
28249 0b00000000_11111111,
28250 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28252 let e
= _mm512_setr_ps(
28253 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28254 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
28256 assert_eq_m512(r
, e
);
28259 #[simd_test(enable = "avx512f")]
28260 unsafe fn test_mm512_fnmsub_round_ps() {
28261 let a
= _mm512_set1_ps(0.00000007);
28262 let b
= _mm512_set1_ps(1.);
28263 let c
= _mm512_set1_ps(-1.);
28264 let r
= _mm512_fnmsub_round_ps(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28265 let e
= _mm512_set1_ps(0.99999994);
28266 assert_eq_m512(r
, e
);
28267 let r
= _mm512_fnmsub_round_ps(a
, b
, c
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
28268 let e
= _mm512_set1_ps(0.9999999);
28269 assert_eq_m512(r
, e
);
28272 #[simd_test(enable = "avx512f")]
28273 unsafe fn test_mm512_mask_fnmsub_round_ps() {
28274 let a
= _mm512_set1_ps(0.00000007);
28275 let b
= _mm512_set1_ps(1.);
28276 let c
= _mm512_set1_ps(-1.);
28278 _mm512_mask_fnmsub_round_ps(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28279 assert_eq_m512(r
, a
);
28280 let r
= _mm512_mask_fnmsub_round_ps(
28282 0b00000000_11111111,
28285 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28287 let e
= _mm512_setr_ps(
28288 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28289 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
28290 0.00000007, 0.00000007,
28292 assert_eq_m512(r
, e
);
28295 #[simd_test(enable = "avx512f")]
28296 unsafe fn test_mm512_maskz_fnmsub_round_ps() {
28297 let a
= _mm512_set1_ps(0.00000007);
28298 let b
= _mm512_set1_ps(1.);
28299 let c
= _mm512_set1_ps(-1.);
28301 _mm512_maskz_fnmsub_round_ps(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28302 assert_eq_m512(r
, _mm512_setzero_ps());
28303 let r
= _mm512_maskz_fnmsub_round_ps(
28304 0b00000000_11111111,
28308 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28310 let e
= _mm512_setr_ps(
28311 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28312 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
28314 assert_eq_m512(r
, e
);
28317 #[simd_test(enable = "avx512f")]
28318 unsafe fn test_mm512_mask3_fnmsub_round_ps() {
28319 let a
= _mm512_set1_ps(0.00000007);
28320 let b
= _mm512_set1_ps(1.);
28321 let c
= _mm512_set1_ps(-1.);
28323 _mm512_mask3_fnmsub_round_ps(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28324 assert_eq_m512(r
, c
);
28325 let r
= _mm512_mask3_fnmsub_round_ps(
28329 0b00000000_11111111,
28330 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28332 let e
= _mm512_setr_ps(
28333 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
28334 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
28336 assert_eq_m512(r
, e
);
28339 #[simd_test(enable = "avx512f")]
28340 unsafe fn test_mm512_max_round_ps() {
28341 let a
= _mm512_setr_ps(
28342 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28344 let b
= _mm512_setr_ps(
28345 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28347 let r
= _mm512_max_round_ps(a
, b
, _MM_FROUND_CUR_DIRECTION
);
28348 let e
= _mm512_setr_ps(
28349 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
28351 assert_eq_m512(r
, e
);
28354 #[simd_test(enable = "avx512f")]
28355 unsafe fn test_mm512_mask_max_round_ps() {
28356 let a
= _mm512_setr_ps(
28357 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28359 let b
= _mm512_setr_ps(
28360 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28362 let r
= _mm512_mask_max_round_ps(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28363 assert_eq_m512(r
, a
);
28364 let r
= _mm512_mask_max_round_ps(a
, 0b00000000_11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28365 let e
= _mm512_setr_ps(
28366 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
28368 assert_eq_m512(r
, e
);
28371 #[simd_test(enable = "avx512f")]
28372 unsafe fn test_mm512_maskz_max_round_ps() {
28373 let a
= _mm512_setr_ps(
28374 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28376 let b
= _mm512_setr_ps(
28377 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28379 let r
= _mm512_maskz_max_round_ps(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28380 assert_eq_m512(r
, _mm512_setzero_ps());
28381 let r
= _mm512_maskz_max_round_ps(0b00000000_11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28382 let e
= _mm512_setr_ps(
28383 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
28385 assert_eq_m512(r
, e
);
28388 #[simd_test(enable = "avx512f")]
28389 unsafe fn test_mm512_min_round_ps() {
28390 let a
= _mm512_setr_ps(
28391 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28393 let b
= _mm512_setr_ps(
28394 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28396 let r
= _mm512_min_round_ps(a
, b
, _MM_FROUND_CUR_DIRECTION
);
28397 let e
= _mm512_setr_ps(
28398 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
28400 assert_eq_m512(r
, e
);
28403 #[simd_test(enable = "avx512f")]
28404 unsafe fn test_mm512_mask_min_round_ps() {
28405 let a
= _mm512_setr_ps(
28406 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28408 let b
= _mm512_setr_ps(
28409 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28411 let r
= _mm512_mask_min_round_ps(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28412 assert_eq_m512(r
, a
);
28413 let r
= _mm512_mask_min_round_ps(a
, 0b00000000_11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28414 let e
= _mm512_setr_ps(
28415 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28417 assert_eq_m512(r
, e
);
28420 #[simd_test(enable = "avx512f")]
28421 unsafe fn test_mm512_maskz_min_round_ps() {
28422 let a
= _mm512_setr_ps(
28423 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28425 let b
= _mm512_setr_ps(
28426 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
28428 let r
= _mm512_maskz_min_round_ps(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28429 assert_eq_m512(r
, _mm512_setzero_ps());
28430 let r
= _mm512_maskz_min_round_ps(0b00000000_11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
28431 let e
= _mm512_setr_ps(
28432 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
28434 assert_eq_m512(r
, e
);
28437 #[simd_test(enable = "avx512f")]
28438 unsafe fn test_mm512_getexp_round_ps() {
28439 let a
= _mm512_set1_ps(3.);
28440 let r
= _mm512_getexp_round_ps(a
, _MM_FROUND_CUR_DIRECTION
);
28441 let e
= _mm512_set1_ps(1.);
28442 assert_eq_m512(r
, e
);
28443 let r
= _mm512_getexp_round_ps(a
, _MM_FROUND_NO_EXC
);
28444 let e
= _mm512_set1_ps(1.);
28445 assert_eq_m512(r
, e
);
28448 #[simd_test(enable = "avx512f")]
28449 unsafe fn test_mm512_mask_getexp_round_ps() {
28450 let a
= _mm512_set1_ps(3.);
28451 let r
= _mm512_mask_getexp_round_ps(a
, 0, a
, _MM_FROUND_CUR_DIRECTION
);
28452 assert_eq_m512(r
, a
);
28453 let r
= _mm512_mask_getexp_round_ps(a
, 0b11111111_00000000, a
, _MM_FROUND_CUR_DIRECTION
);
28454 let e
= _mm512_setr_ps(
28455 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
28457 assert_eq_m512(r
, e
);
28460 #[simd_test(enable = "avx512f")]
28461 unsafe fn test_mm512_maskz_getexp_round_ps() {
28462 let a
= _mm512_set1_ps(3.);
28463 let r
= _mm512_maskz_getexp_round_ps(0, a
, _MM_FROUND_CUR_DIRECTION
);
28464 assert_eq_m512(r
, _mm512_setzero_ps());
28465 let r
= _mm512_maskz_getexp_round_ps(0b11111111_00000000, a
, _MM_FROUND_CUR_DIRECTION
);
28466 let e
= _mm512_setr_ps(
28467 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
28469 assert_eq_m512(r
, e
);
28472 #[simd_test(enable = "avx512f")]
28473 unsafe fn test_mm512_roundscale_round_ps() {
28474 let a
= _mm512_set1_ps(1.1);
28475 let r
= _mm512_roundscale_round_ps(a
, 0, _MM_FROUND_CUR_DIRECTION
);
28476 let e
= _mm512_set1_ps(1.0);
28477 assert_eq_m512(r
, e
);
28480 #[simd_test(enable = "avx512f")]
28481 unsafe fn test_mm512_mask_roundscale_round_ps() {
28482 let a
= _mm512_set1_ps(1.1);
28483 let r
= _mm512_mask_roundscale_round_ps(a
, 0, a
, 0, _MM_FROUND_CUR_DIRECTION
);
28484 let e
= _mm512_set1_ps(1.1);
28485 assert_eq_m512(r
, e
);
28487 _mm512_mask_roundscale_round_ps(a
, 0b11111111_11111111, a
, 0, _MM_FROUND_CUR_DIRECTION
);
28488 let e
= _mm512_set1_ps(1.0);
28489 assert_eq_m512(r
, e
);
28492 #[simd_test(enable = "avx512f")]
28493 unsafe fn test_mm512_maskz_roundscale_round_ps() {
28494 let a
= _mm512_set1_ps(1.1);
28495 let r
= _mm512_maskz_roundscale_round_ps(0, a
, 0, _MM_FROUND_CUR_DIRECTION
);
28496 assert_eq_m512(r
, _mm512_setzero_ps());
28498 _mm512_maskz_roundscale_round_ps(0b11111111_11111111, a
, 0, _MM_FROUND_CUR_DIRECTION
);
28499 let e
= _mm512_set1_ps(1.0);
28500 assert_eq_m512(r
, e
);
28503 #[simd_test(enable = "avx512f")]
28504 unsafe fn test_mm512_scalef_round_ps() {
28505 let a
= _mm512_set1_ps(1.);
28506 let b
= _mm512_set1_ps(3.);
28507 let r
= _mm512_scalef_round_ps(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28508 let e
= _mm512_set1_ps(8.);
28509 assert_eq_m512(r
, e
);
28512 #[simd_test(enable = "avx512f")]
28513 unsafe fn test_mm512_mask_scalef_round_ps() {
28514 let a
= _mm512_set1_ps(1.);
28515 let b
= _mm512_set1_ps(3.);
28517 _mm512_mask_scalef_round_ps(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28518 assert_eq_m512(r
, a
);
28519 let r
= _mm512_mask_scalef_round_ps(
28521 0b11111111_00000000,
28524 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28526 let e
= _mm512_set_ps(
28527 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
28529 assert_eq_m512(r
, e
);
28532 #[simd_test(enable = "avx512f")]
28533 unsafe fn test_mm512_maskz_scalef_round_ps() {
28534 let a
= _mm512_set1_ps(1.);
28535 let b
= _mm512_set1_ps(3.);
28537 _mm512_maskz_scalef_round_ps(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
28538 assert_eq_m512(r
, _mm512_setzero_ps());
28539 let r
= _mm512_maskz_scalef_round_ps(
28540 0b11111111_00000000,
28543 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
28545 let e
= _mm512_set_ps(
28546 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
28548 assert_eq_m512(r
, e
);
28551 #[simd_test(enable = "avx512f")]
28552 unsafe fn test_mm512_fixupimm_round_ps() {
28553 let a
= _mm512_set1_ps(f32::NAN
);
28554 let b
= _mm512_set1_ps(f32::MAX
);
28555 let c
= _mm512_set1_epi32(i32::MAX
);
28556 let r
= _mm512_fixupimm_round_ps(a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
28557 let e
= _mm512_set1_ps(0.0);
28558 assert_eq_m512(r
, e
);
28561 #[simd_test(enable = "avx512f")]
28562 unsafe fn test_mm512_mask_fixupimm_round_ps() {
28563 let a
= _mm512_set_ps(
28581 let b
= _mm512_set1_ps(f32::MAX
);
28582 let c
= _mm512_set1_epi32(i32::MAX
);
28583 let r
= _mm512_mask_fixupimm_round_ps(
28585 0b11111111_00000000,
28589 _MM_FROUND_CUR_DIRECTION
,
28591 let e
= _mm512_set_ps(
28592 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
28594 assert_eq_m512(r
, e
);
28597 #[simd_test(enable = "avx512f")]
28598 unsafe fn test_mm512_maskz_fixupimm_round_ps() {
28599 let a
= _mm512_set_ps(
28617 let b
= _mm512_set1_ps(f32::MAX
);
28618 let c
= _mm512_set1_epi32(i32::MAX
);
28619 let r
= _mm512_maskz_fixupimm_round_ps(
28620 0b11111111_00000000,
28625 _MM_FROUND_CUR_DIRECTION
,
28627 let e
= _mm512_set_ps(
28628 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
28630 assert_eq_m512(r
, e
);
28633 #[simd_test(enable = "avx512f")]
28634 unsafe fn test_mm512_getmant_round_ps() {
28635 let a
= _mm512_set1_ps(10.);
28636 let r
= _mm512_getmant_round_ps(
28640 _MM_FROUND_CUR_DIRECTION
,
28642 let e
= _mm512_set1_ps(1.25);
28643 assert_eq_m512(r
, e
);
28646 #[simd_test(enable = "avx512f")]
28647 unsafe fn test_mm512_mask_getmant_round_ps() {
28648 let a
= _mm512_set1_ps(10.);
28649 let r
= _mm512_mask_getmant_round_ps(
28655 _MM_FROUND_CUR_DIRECTION
,
28657 assert_eq_m512(r
, a
);
28658 let r
= _mm512_mask_getmant_round_ps(
28660 0b11111111_00000000,
28664 _MM_FROUND_CUR_DIRECTION
,
28666 let e
= _mm512_setr_ps(
28667 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
28669 assert_eq_m512(r
, e
);
28672 #[simd_test(enable = "avx512f")]
28673 unsafe fn test_mm512_maskz_getmant_round_ps() {
28674 let a
= _mm512_set1_ps(10.);
28675 let r
= _mm512_maskz_getmant_round_ps(
28680 _MM_FROUND_CUR_DIRECTION
,
28682 assert_eq_m512(r
, _mm512_setzero_ps());
28683 let r
= _mm512_maskz_getmant_round_ps(
28684 0b11111111_00000000,
28688 _MM_FROUND_CUR_DIRECTION
,
28690 let e
= _mm512_setr_ps(
28691 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
28693 assert_eq_m512(r
, e
);
28696 #[simd_test(enable = "avx512f")]
28697 unsafe fn test_mm512_cvtps_epi32() {
28698 let a
= _mm512_setr_ps(
28699 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28701 let r
= _mm512_cvtps_epi32(a
);
28702 let e
= _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
28703 assert_eq_m512i(r
, e
);
28706 #[simd_test(enable = "avx512f")]
28707 unsafe fn test_mm512_mask_cvtps_epi32() {
28708 let a
= _mm512_setr_ps(
28709 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28711 let src
= _mm512_set1_epi32(0);
28712 let r
= _mm512_mask_cvtps_epi32(src
, 0, a
);
28713 assert_eq_m512i(r
, src
);
28714 let r
= _mm512_mask_cvtps_epi32(src
, 0b00000000_11111111, a
);
28715 let e
= _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
28716 assert_eq_m512i(r
, e
);
28719 #[simd_test(enable = "avx512f")]
28720 unsafe fn test_mm512_maskz_cvtps_epi32() {
28721 let a
= _mm512_setr_ps(
28722 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28724 let r
= _mm512_maskz_cvtps_epi32(0, a
);
28725 assert_eq_m512i(r
, _mm512_setzero_si512());
28726 let r
= _mm512_maskz_cvtps_epi32(0b00000000_11111111, a
);
28727 let e
= _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
28728 assert_eq_m512i(r
, e
);
28731 #[simd_test(enable = "avx512f")]
28732 unsafe fn test_mm512_cvtps_epu32() {
28733 let a
= _mm512_setr_ps(
28734 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28736 let r
= _mm512_cvtps_epu32(a
);
28737 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
28738 assert_eq_m512i(r
, e
);
28741 #[simd_test(enable = "avx512f")]
28742 unsafe fn test_mm512_mask_cvtps_epu32() {
28743 let a
= _mm512_setr_ps(
28744 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28746 let src
= _mm512_set1_epi32(0);
28747 let r
= _mm512_mask_cvtps_epu32(src
, 0, a
);
28748 assert_eq_m512i(r
, src
);
28749 let r
= _mm512_mask_cvtps_epu32(src
, 0b00000000_11111111, a
);
28750 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
28751 assert_eq_m512i(r
, e
);
28754 #[simd_test(enable = "avx512f")]
28755 unsafe fn test_mm512_maskz_cvtps_epu32() {
28756 let a
= _mm512_setr_ps(
28757 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
28759 let r
= _mm512_maskz_cvtps_epu32(0, a
);
28760 assert_eq_m512i(r
, _mm512_setzero_si512());
28761 let r
= _mm512_maskz_cvtps_epu32(0b00000000_11111111, a
);
28762 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
28763 assert_eq_m512i(r
, e
);
28766 #[simd_test(enable = "avx512f")]
28767 unsafe fn test_mm512_cvtepi8_epi32() {
28768 let a
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28769 let r
= _mm512_cvtepi8_epi32(a
);
28770 let e
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28771 assert_eq_m512i(r
, e
);
28774 #[simd_test(enable = "avx512f")]
28775 unsafe fn test_mm512_mask_cvtepi8_epi32() {
28776 let a
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28777 let src
= _mm512_set1_epi32(-1);
28778 let r
= _mm512_mask_cvtepi8_epi32(src
, 0, a
);
28779 assert_eq_m512i(r
, src
);
28780 let r
= _mm512_mask_cvtepi8_epi32(src
, 0b00000000_11111111, a
);
28781 let e
= _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28782 assert_eq_m512i(r
, e
);
28785 #[simd_test(enable = "avx512f")]
28786 unsafe fn test_mm512_maskz_cvtepi8_epi32() {
28787 let a
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28788 let r
= _mm512_maskz_cvtepi8_epi32(0, a
);
28789 assert_eq_m512i(r
, _mm512_setzero_si512());
28790 let r
= _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a
);
28791 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28792 assert_eq_m512i(r
, e
);
28795 #[simd_test(enable = "avx512f")]
28796 unsafe fn test_mm512_cvtepu8_epi32() {
28797 let a
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28798 let r
= _mm512_cvtepu8_epi32(a
);
28799 let e
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28800 assert_eq_m512i(r
, e
);
28803 #[simd_test(enable = "avx512f")]
28804 unsafe fn test_mm512_mask_cvtepu8_epi32() {
28805 let a
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28806 let src
= _mm512_set1_epi32(-1);
28807 let r
= _mm512_mask_cvtepu8_epi32(src
, 0, a
);
28808 assert_eq_m512i(r
, src
);
28809 let r
= _mm512_mask_cvtepu8_epi32(src
, 0b00000000_11111111, a
);
28810 let e
= _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28811 assert_eq_m512i(r
, e
);
28814 #[simd_test(enable = "avx512f")]
28815 unsafe fn test_mm512_maskz_cvtepu8_epi32() {
28816 let a
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28817 let r
= _mm512_maskz_cvtepu8_epi32(0, a
);
28818 assert_eq_m512i(r
, _mm512_setzero_si512());
28819 let r
= _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a
);
28820 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28821 assert_eq_m512i(r
, e
);
28824 #[simd_test(enable = "avx512f")]
28825 unsafe fn test_mm512_cvtepi16_epi32() {
28826 let a
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28827 let r
= _mm512_cvtepi16_epi32(a
);
28828 let e
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28829 assert_eq_m512i(r
, e
);
28832 #[simd_test(enable = "avx512f")]
28833 unsafe fn test_mm512_mask_cvtepi16_epi32() {
28834 let a
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28835 let src
= _mm512_set1_epi32(-1);
28836 let r
= _mm512_mask_cvtepi16_epi32(src
, 0, a
);
28837 assert_eq_m512i(r
, src
);
28838 let r
= _mm512_mask_cvtepi16_epi32(src
, 0b00000000_11111111, a
);
28839 let e
= _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28840 assert_eq_m512i(r
, e
);
28843 #[simd_test(enable = "avx512f")]
28844 unsafe fn test_mm512_maskz_cvtepi16_epi32() {
28845 let a
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28846 let r
= _mm512_maskz_cvtepi16_epi32(0, a
);
28847 assert_eq_m512i(r
, _mm512_setzero_si512());
28848 let r
= _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a
);
28849 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28850 assert_eq_m512i(r
, e
);
28853 #[simd_test(enable = "avx512f")]
28854 unsafe fn test_mm512_cvtepu16_epi32() {
28855 let a
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28856 let r
= _mm512_cvtepu16_epi32(a
);
28857 let e
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28858 assert_eq_m512i(r
, e
);
28861 #[simd_test(enable = "avx512f")]
28862 unsafe fn test_mm512_mask_cvtepu16_epi32() {
28863 let a
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28864 let src
= _mm512_set1_epi32(-1);
28865 let r
= _mm512_mask_cvtepu16_epi32(src
, 0, a
);
28866 assert_eq_m512i(r
, src
);
28867 let r
= _mm512_mask_cvtepu16_epi32(src
, 0b00000000_11111111, a
);
28868 let e
= _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28869 assert_eq_m512i(r
, e
);
28872 #[simd_test(enable = "avx512f")]
28873 unsafe fn test_mm512_maskz_cvtepu16_epi32() {
28874 let a
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28875 let r
= _mm512_maskz_cvtepu16_epi32(0, a
);
28876 assert_eq_m512i(r
, _mm512_setzero_si512());
28877 let r
= _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a
);
28878 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28879 assert_eq_m512i(r
, e
);
28882 #[simd_test(enable = "avx512f")]
28883 unsafe fn test_mm512_cvtepi32_ps() {
28884 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28885 let r
= _mm512_cvtepi32_ps(a
);
28886 let e
= _mm512_set_ps(
28887 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28889 assert_eq_m512(r
, e
);
28892 #[simd_test(enable = "avx512f")]
28893 unsafe fn test_mm512_mask_cvtepi32_ps() {
28894 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28895 let src
= _mm512_set1_ps(-1.);
28896 let r
= _mm512_mask_cvtepi32_ps(src
, 0, a
);
28897 assert_eq_m512(r
, src
);
28898 let r
= _mm512_mask_cvtepi32_ps(src
, 0b00000000_11111111, a
);
28899 let e
= _mm512_set_ps(
28900 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
28902 assert_eq_m512(r
, e
);
28905 #[simd_test(enable = "avx512f")]
28906 unsafe fn test_mm512_maskz_cvtepi32_ps() {
28907 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28908 let r
= _mm512_maskz_cvtepi32_ps(0, a
);
28909 assert_eq_m512(r
, _mm512_setzero_ps());
28910 let r
= _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a
);
28911 let e
= _mm512_set_ps(
28912 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
28914 assert_eq_m512(r
, e
);
28917 #[simd_test(enable = "avx512f")]
28918 unsafe fn test_mm512_cvtepu32_ps() {
28919 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28920 let r
= _mm512_cvtepu32_ps(a
);
28921 let e
= _mm512_set_ps(
28922 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
28924 assert_eq_m512(r
, e
);
28927 #[simd_test(enable = "avx512f")]
28928 unsafe fn test_mm512_mask_cvtepu32_ps() {
28929 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28930 let src
= _mm512_set1_ps(-1.);
28931 let r
= _mm512_mask_cvtepu32_ps(src
, 0, a
);
28932 assert_eq_m512(r
, src
);
28933 let r
= _mm512_mask_cvtepu32_ps(src
, 0b00000000_11111111, a
);
28934 let e
= _mm512_set_ps(
28935 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
28937 assert_eq_m512(r
, e
);
28940 #[simd_test(enable = "avx512f")]
28941 unsafe fn test_mm512_maskz_cvtepu32_ps() {
28942 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28943 let r
= _mm512_maskz_cvtepu32_ps(0, a
);
28944 assert_eq_m512(r
, _mm512_setzero_ps());
28945 let r
= _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a
);
28946 let e
= _mm512_set_ps(
28947 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
28949 assert_eq_m512(r
, e
);
28952 #[simd_test(enable = "avx512f")]
28953 unsafe fn test_mm512_cvtepi32_epi16() {
28954 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28955 let r
= _mm512_cvtepi32_epi16(a
);
28956 let e
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28957 assert_eq_m256i(r
, e
);
28960 #[simd_test(enable = "avx512f")]
28961 unsafe fn test_mm512_mask_cvtepi32_epi16() {
28962 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28963 let src
= _mm256_set1_epi16(-1);
28964 let r
= _mm512_mask_cvtepi32_epi16(src
, 0, a
);
28965 assert_eq_m256i(r
, src
);
28966 let r
= _mm512_mask_cvtepi32_epi16(src
, 0b00000000_11111111, a
);
28967 let e
= _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28968 assert_eq_m256i(r
, e
);
28971 #[simd_test(enable = "avx512f")]
28972 unsafe fn test_mm512_maskz_cvtepi32_epi16() {
28973 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28974 let r
= _mm512_maskz_cvtepi32_epi16(0, a
);
28975 assert_eq_m256i(r
, _mm256_setzero_si256());
28976 let r
= _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a
);
28977 let e
= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
28978 assert_eq_m256i(r
, e
);
28981 #[simd_test(enable = "avx512f")]
28982 unsafe fn test_mm512_cvtepi32_epi8() {
28983 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28984 let r
= _mm512_cvtepi32_epi8(a
);
28985 let e
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28986 assert_eq_m128i(r
, e
);
28989 #[simd_test(enable = "avx512f")]
28990 unsafe fn test_mm512_mask_cvtepi32_epi8() {
28991 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
28992 let src
= _mm_set1_epi8(-1);
28993 let r
= _mm512_mask_cvtepi32_epi8(src
, 0, a
);
28994 assert_eq_m128i(r
, src
);
28995 let r
= _mm512_mask_cvtepi32_epi8(src
, 0b00000000_11111111, a
);
28996 let e
= _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
28997 assert_eq_m128i(r
, e
);
29000 #[simd_test(enable = "avx512f")]
29001 unsafe fn test_mm512_maskz_cvtepi32_epi8() {
29002 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
29003 let r
= _mm512_maskz_cvtepi32_epi8(0, a
);
29004 assert_eq_m128i(r
, _mm_setzero_si128());
29005 let r
= _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a
);
29006 let e
= _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
29007 assert_eq_m128i(r
, e
);
29010 #[simd_test(enable = "avx512f")]
29011 unsafe fn test_mm512_cvtsepi32_epi16() {
29012 let a
= _mm512_set_epi32(
29030 let r
= _mm512_cvtsepi32_epi16(a
);
29031 let e
= _mm256_set_epi16(
29049 assert_eq_m256i(r
, e
);
29052 #[simd_test(enable = "avx512f")]
29053 unsafe fn test_mm512_mask_cvtsepi32_epi16() {
29054 let a
= _mm512_set_epi32(
29072 let src
= _mm256_set1_epi16(-1);
29073 let r
= _mm512_mask_cvtsepi32_epi16(src
, 0, a
);
29074 assert_eq_m256i(r
, src
);
29075 let r
= _mm512_mask_cvtsepi32_epi16(src
, 0b00000000_11111111, a
);
29076 let e
= _mm256_set_epi16(
29094 assert_eq_m256i(r
, e
);
29097 #[simd_test(enable = "avx512f")]
29098 unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
29099 let a
= _mm512_set_epi32(
29117 let r
= _mm512_maskz_cvtsepi32_epi16(0, a
);
29118 assert_eq_m256i(r
, _mm256_setzero_si256());
29119 let r
= _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a
);
29120 let e
= _mm256_set_epi16(
29138 assert_eq_m256i(r
, e
);
29141 #[simd_test(enable = "avx512f")]
29142 unsafe fn test_mm512_cvtsepi32_epi8() {
29143 let a
= _mm512_set_epi32(
29161 let r
= _mm512_cvtsepi32_epi8(a
);
29162 let e
= _mm_set_epi8(
29180 assert_eq_m128i(r
, e
);
29183 #[simd_test(enable = "avx512f")]
29184 unsafe fn test_mm512_mask_cvtsepi32_epi8() {
29185 let a
= _mm512_set_epi32(
29203 let src
= _mm_set1_epi8(-1);
29204 let r
= _mm512_mask_cvtsepi32_epi8(src
, 0, a
);
29205 assert_eq_m128i(r
, src
);
29206 let r
= _mm512_mask_cvtsepi32_epi8(src
, 0b00000000_11111111, a
);
29207 let e
= _mm_set_epi8(
29225 assert_eq_m128i(r
, e
);
29228 #[simd_test(enable = "avx512f")]
29229 unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
29230 let a
= _mm512_set_epi32(
29248 let r
= _mm512_maskz_cvtsepi32_epi8(0, a
);
29249 assert_eq_m128i(r
, _mm_setzero_si128());
29250 let r
= _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a
);
29251 let e
= _mm_set_epi8(
29269 assert_eq_m128i(r
, e
);
29272 #[simd_test(enable = "avx512f")]
29273 unsafe fn test_mm512_cvtusepi32_epi16() {
29274 let a
= _mm512_set_epi32(
29292 let r
= _mm512_cvtusepi32_epi16(a
);
29293 let e
= _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
29294 assert_eq_m256i(r
, e
);
29297 #[simd_test(enable = "avx512f")]
29298 unsafe fn test_mm512_mask_cvtusepi32_epi16() {
29299 let a
= _mm512_set_epi32(
29317 let src
= _mm256_set1_epi16(-1);
29318 let r
= _mm512_mask_cvtusepi32_epi16(src
, 0, a
);
29319 assert_eq_m256i(r
, src
);
29320 let r
= _mm512_mask_cvtusepi32_epi16(src
, 0b00000000_11111111, a
);
29321 let e
= _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
29322 assert_eq_m256i(r
, e
);
29325 #[simd_test(enable = "avx512f")]
29326 unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
29327 let a
= _mm512_set_epi32(
29345 let r
= _mm512_maskz_cvtusepi32_epi16(0, a
);
29346 assert_eq_m256i(r
, _mm256_setzero_si256());
29347 let r
= _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a
);
29348 let e
= _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
29349 assert_eq_m256i(r
, e
);
29352 #[simd_test(enable = "avx512f")]
29353 unsafe fn test_mm512_cvtusepi32_epi8() {
29354 let a
= _mm512_set_epi32(
29372 let r
= _mm512_cvtusepi32_epi8(a
);
29373 let e
= _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
29374 assert_eq_m128i(r
, e
);
29377 #[simd_test(enable = "avx512f")]
29378 unsafe fn test_mm512_mask_cvtusepi32_epi8() {
29379 let a
= _mm512_set_epi32(
29397 let src
= _mm_set1_epi8(-1);
29398 let r
= _mm512_mask_cvtusepi32_epi8(src
, 0, a
);
29399 assert_eq_m128i(r
, src
);
29400 let r
= _mm512_mask_cvtusepi32_epi8(src
, 0b00000000_11111111, a
);
29401 let e
= _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
29402 assert_eq_m128i(r
, e
);
29405 #[simd_test(enable = "avx512f")]
29406 unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
29407 let a
= _mm512_set_epi32(
29425 let r
= _mm512_maskz_cvtusepi32_epi8(0, a
);
29426 assert_eq_m128i(r
, _mm_setzero_si128());
29427 let r
= _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a
);
29428 let e
= _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
29429 assert_eq_m128i(r
, e
);
29432 #[simd_test(enable = "avx512f")]
29433 unsafe fn test_mm512_cvt_roundps_epi32() {
29434 let a
= _mm512_setr_ps(
29435 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29437 let r
= _mm512_cvt_roundps_epi32(a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29438 let e
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29439 assert_eq_m512i(r
, e
);
29440 let r
= _mm512_cvt_roundps_epi32(a
, _MM_FROUND_TO_NEG_INF
| _MM_FROUND_NO_EXC
);
29441 let e
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
29442 assert_eq_m512i(r
, e
);
29445 #[simd_test(enable = "avx512f")]
29446 unsafe fn test_mm512_mask_cvt_roundps_epi32() {
29447 let a
= _mm512_setr_ps(
29448 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29450 let src
= _mm512_set1_epi32(0);
29452 _mm512_mask_cvt_roundps_epi32(src
, 0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29453 assert_eq_m512i(r
, src
);
29454 let r
= _mm512_mask_cvt_roundps_epi32(
29456 0b00000000_11111111,
29458 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29460 let e
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
29461 assert_eq_m512i(r
, e
);
29464 #[simd_test(enable = "avx512f")]
29465 unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
29466 let a
= _mm512_setr_ps(
29467 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29469 let r
= _mm512_maskz_cvt_roundps_epi32(0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29470 assert_eq_m512i(r
, _mm512_setzero_si512());
29471 let r
= _mm512_maskz_cvt_roundps_epi32(
29472 0b00000000_11111111,
29474 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29476 let e
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
29477 assert_eq_m512i(r
, e
);
29480 #[simd_test(enable = "avx512f")]
29481 unsafe fn test_mm512_cvt_roundps_epu32() {
29482 let a
= _mm512_setr_ps(
29483 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29485 let r
= _mm512_cvt_roundps_epu32(a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29486 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
29487 assert_eq_m512i(r
, e
);
29488 let r
= _mm512_cvt_roundps_epu32(a
, _MM_FROUND_TO_NEG_INF
| _MM_FROUND_NO_EXC
);
29489 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
29490 assert_eq_m512i(r
, e
);
29493 #[simd_test(enable = "avx512f")]
29494 unsafe fn test_mm512_mask_cvt_roundps_epu32() {
29495 let a
= _mm512_setr_ps(
29496 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29498 let src
= _mm512_set1_epi32(0);
29500 _mm512_mask_cvt_roundps_epu32(src
, 0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29501 assert_eq_m512i(r
, src
);
29502 let r
= _mm512_mask_cvt_roundps_epu32(
29504 0b00000000_11111111,
29506 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29508 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29509 assert_eq_m512i(r
, e
);
29512 #[simd_test(enable = "avx512f")]
29513 unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
29514 let a
= _mm512_setr_ps(
29515 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29517 let r
= _mm512_maskz_cvt_roundps_epu32(0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29518 assert_eq_m512i(r
, _mm512_setzero_si512());
29519 let r
= _mm512_maskz_cvt_roundps_epu32(
29520 0b00000000_11111111,
29522 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29524 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29525 assert_eq_m512i(r
, e
);
29528 #[simd_test(enable = "avx512f")]
29529 unsafe fn test_mm512_cvt_roundepi32_ps() {
29530 let a
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29531 let r
= _mm512_cvt_roundepi32_ps(a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29532 let e
= _mm512_setr_ps(
29533 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
29535 assert_eq_m512(r
, e
);
29538 #[simd_test(enable = "avx512f")]
29539 unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
29540 let a
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29541 let src
= _mm512_set1_ps(0.);
29543 _mm512_mask_cvt_roundepi32_ps(src
, 0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29544 assert_eq_m512(r
, src
);
29545 let r
= _mm512_mask_cvt_roundepi32_ps(
29547 0b00000000_11111111,
29549 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29551 let e
= _mm512_setr_ps(
29552 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
29554 assert_eq_m512(r
, e
);
29557 #[simd_test(enable = "avx512f")]
29558 unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
29559 let a
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29560 let r
= _mm512_maskz_cvt_roundepi32_ps(0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29561 assert_eq_m512(r
, _mm512_setzero_ps());
29562 let r
= _mm512_maskz_cvt_roundepi32_ps(
29563 0b00000000_11111111,
29565 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29567 let e
= _mm512_setr_ps(
29568 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
29570 assert_eq_m512(r
, e
);
29573 #[simd_test(enable = "avx512f")]
29574 unsafe fn test_mm512_cvt_roundepu32_ps() {
29575 let a
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29576 let r
= _mm512_cvt_roundepu32_ps(a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29577 let e
= _mm512_setr_ps(
29595 assert_eq_m512(r
, e
);
29598 #[simd_test(enable = "avx512f")]
29599 unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
29600 let a
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29601 let src
= _mm512_set1_ps(0.);
29603 _mm512_mask_cvt_roundepu32_ps(src
, 0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29604 assert_eq_m512(r
, src
);
29605 let r
= _mm512_mask_cvt_roundepu32_ps(
29607 0b00000000_11111111,
29609 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29611 let e
= _mm512_setr_ps(
29629 assert_eq_m512(r
, e
);
29632 #[simd_test(enable = "avx512f")]
29633 unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
29634 let a
= _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
29635 let r
= _mm512_maskz_cvt_roundepu32_ps(0, a
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
29636 assert_eq_m512(r
, _mm512_setzero_ps());
29637 let r
= _mm512_maskz_cvt_roundepu32_ps(
29638 0b00000000_11111111,
29640 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
29642 let e
= _mm512_setr_ps(
29660 assert_eq_m512(r
, e
);
29663 #[simd_test(enable = "avx512f")]
29664 unsafe fn test_mm512_cvt_roundps_ph() {
29665 let a
= _mm512_set1_ps(1.);
29666 let r
= _mm512_cvt_roundps_ph(a
, _MM_FROUND_NO_EXC
);
29667 let e
= _mm256_setr_epi64x(
29668 4323521613979991040,
29669 4323521613979991040,
29670 4323521613979991040,
29671 4323521613979991040,
29673 assert_eq_m256i(r
, e
);
29676 #[simd_test(enable = "avx512f")]
29677 unsafe fn test_mm512_mask_cvt_roundps_ph() {
29678 let a
= _mm512_set1_ps(1.);
29679 let src
= _mm256_set1_epi16(0);
29680 let r
= _mm512_mask_cvt_roundps_ph(src
, 0, a
, _MM_FROUND_NO_EXC
);
29681 assert_eq_m256i(r
, src
);
29682 let r
= _mm512_mask_cvt_roundps_ph(src
, 0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29683 let e
= _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29684 assert_eq_m256i(r
, e
);
29687 #[simd_test(enable = "avx512f")]
29688 unsafe fn test_mm512_maskz_cvt_roundps_ph() {
29689 let a
= _mm512_set1_ps(1.);
29690 let r
= _mm512_maskz_cvt_roundps_ph(0, a
, _MM_FROUND_NO_EXC
);
29691 assert_eq_m256i(r
, _mm256_setzero_si256());
29692 let r
= _mm512_maskz_cvt_roundps_ph(0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29693 let e
= _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29694 assert_eq_m256i(r
, e
);
29697 #[simd_test(enable = "avx512f")]
29698 unsafe fn test_mm512_cvtps_ph() {
29699 let a
= _mm512_set1_ps(1.);
29700 let r
= _mm512_cvtps_ph(a
, _MM_FROUND_NO_EXC
);
29701 let e
= _mm256_setr_epi64x(
29702 4323521613979991040,
29703 4323521613979991040,
29704 4323521613979991040,
29705 4323521613979991040,
29707 assert_eq_m256i(r
, e
);
29710 #[simd_test(enable = "avx512f")]
29711 unsafe fn test_mm512_mask_cvtps_ph() {
29712 let a
= _mm512_set1_ps(1.);
29713 let src
= _mm256_set1_epi16(0);
29714 let r
= _mm512_mask_cvtps_ph(src
, 0, a
, _MM_FROUND_NO_EXC
);
29715 assert_eq_m256i(r
, src
);
29716 let r
= _mm512_mask_cvtps_ph(src
, 0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29717 let e
= _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29718 assert_eq_m256i(r
, e
);
29721 #[simd_test(enable = "avx512f")]
29722 unsafe fn test_mm512_maskz_cvtps_ph() {
29723 let a
= _mm512_set1_ps(1.);
29724 let r
= _mm512_maskz_cvtps_ph(0, a
, _MM_FROUND_NO_EXC
);
29725 assert_eq_m256i(r
, _mm256_setzero_si256());
29726 let r
= _mm512_maskz_cvtps_ph(0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29727 let e
= _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
29728 assert_eq_m256i(r
, e
);
29731 #[simd_test(enable = "avx512f")]
29732 unsafe fn test_mm512_cvt_roundph_ps() {
29733 let a
= _mm256_setr_epi64x(
29734 4323521613979991040,
29735 4323521613979991040,
29736 4323521613979991040,
29737 4323521613979991040,
29739 let r
= _mm512_cvt_roundph_ps(a
, _MM_FROUND_NO_EXC
);
29740 let e
= _mm512_set1_ps(1.);
29741 assert_eq_m512(r
, e
);
29744 #[simd_test(enable = "avx512f")]
29745 unsafe fn test_mm512_mask_cvt_roundph_ps() {
29746 let a
= _mm256_setr_epi64x(
29747 4323521613979991040,
29748 4323521613979991040,
29749 4323521613979991040,
29750 4323521613979991040,
29752 let src
= _mm512_set1_ps(0.);
29753 let r
= _mm512_mask_cvt_roundph_ps(src
, 0, a
, _MM_FROUND_NO_EXC
);
29754 assert_eq_m512(r
, src
);
29755 let r
= _mm512_mask_cvt_roundph_ps(src
, 0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29756 let e
= _mm512_setr_ps(
29757 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29759 assert_eq_m512(r
, e
);
29762 #[simd_test(enable = "avx512f")]
29763 unsafe fn test_mm512_maskz_cvt_roundph_ps() {
29764 let a
= _mm256_setr_epi64x(
29765 4323521613979991040,
29766 4323521613979991040,
29767 4323521613979991040,
29768 4323521613979991040,
29770 let r
= _mm512_maskz_cvt_roundph_ps(0, a
, _MM_FROUND_NO_EXC
);
29771 assert_eq_m512(r
, _mm512_setzero_ps());
29772 let r
= _mm512_maskz_cvt_roundph_ps(0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29773 let e
= _mm512_setr_ps(
29774 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29776 assert_eq_m512(r
, e
);
29779 #[simd_test(enable = "avx512f")]
29780 unsafe fn test_mm512_cvtph_ps() {
29781 let a
= _mm256_setr_epi64x(
29782 4323521613979991040,
29783 4323521613979991040,
29784 4323521613979991040,
29785 4323521613979991040,
29787 let r
= _mm512_cvtph_ps(a
);
29788 let e
= _mm512_set1_ps(1.);
29789 assert_eq_m512(r
, e
);
29792 #[simd_test(enable = "avx512f")]
29793 unsafe fn test_mm512_mask_cvtph_ps() {
29794 let a
= _mm256_setr_epi64x(
29795 4323521613979991040,
29796 4323521613979991040,
29797 4323521613979991040,
29798 4323521613979991040,
29800 let src
= _mm512_set1_ps(0.);
29801 let r
= _mm512_mask_cvtph_ps(src
, 0, a
);
29802 assert_eq_m512(r
, src
);
29803 let r
= _mm512_mask_cvtph_ps(src
, 0b00000000_11111111, a
);
29804 let e
= _mm512_setr_ps(
29805 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29807 assert_eq_m512(r
, e
);
29810 #[simd_test(enable = "avx512f")]
29811 unsafe fn test_mm512_maskz_cvtph_ps() {
29812 let a
= _mm256_setr_epi64x(
29813 4323521613979991040,
29814 4323521613979991040,
29815 4323521613979991040,
29816 4323521613979991040,
29818 let r
= _mm512_maskz_cvtph_ps(0, a
);
29819 assert_eq_m512(r
, _mm512_setzero_ps());
29820 let r
= _mm512_maskz_cvtph_ps(0b00000000_11111111, a
);
29821 let e
= _mm512_setr_ps(
29822 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
29824 assert_eq_m512(r
, e
);
29827 #[simd_test(enable = "avx512f")]
29828 unsafe fn test_mm512_cvtt_roundps_epi32() {
29829 let a
= _mm512_setr_ps(
29830 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29832 let r
= _mm512_cvtt_roundps_epi32(a
, _MM_FROUND_NO_EXC
);
29833 let e
= _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
29834 assert_eq_m512i(r
, e
);
29837 #[simd_test(enable = "avx512f")]
29838 unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
29839 let a
= _mm512_setr_ps(
29840 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29842 let src
= _mm512_set1_epi32(0);
29843 let r
= _mm512_mask_cvtt_roundps_epi32(src
, 0, a
, _MM_FROUND_NO_EXC
);
29844 assert_eq_m512i(r
, src
);
29845 let r
= _mm512_mask_cvtt_roundps_epi32(src
, 0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29846 let e
= _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29847 assert_eq_m512i(r
, e
);
29850 #[simd_test(enable = "avx512f")]
29851 unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
29852 let a
= _mm512_setr_ps(
29853 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29855 let r
= _mm512_maskz_cvtt_roundps_epi32(0, a
, _MM_FROUND_NO_EXC
);
29856 assert_eq_m512i(r
, _mm512_setzero_si512());
29857 let r
= _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29858 let e
= _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29859 assert_eq_m512i(r
, e
);
29862 #[simd_test(enable = "avx512f")]
29863 unsafe fn test_mm512_cvtt_roundps_epu32() {
29864 let a
= _mm512_setr_ps(
29865 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29867 let r
= _mm512_cvtt_roundps_epu32(a
, _MM_FROUND_NO_EXC
);
29868 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
29869 assert_eq_m512i(r
, e
);
29872 #[simd_test(enable = "avx512f")]
29873 unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
29874 let a
= _mm512_setr_ps(
29875 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29877 let src
= _mm512_set1_epi32(0);
29878 let r
= _mm512_mask_cvtt_roundps_epu32(src
, 0, a
, _MM_FROUND_NO_EXC
);
29879 assert_eq_m512i(r
, src
);
29880 let r
= _mm512_mask_cvtt_roundps_epu32(src
, 0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29881 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29882 assert_eq_m512i(r
, e
);
29885 #[simd_test(enable = "avx512f")]
29886 unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
29887 let a
= _mm512_setr_ps(
29888 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29890 let r
= _mm512_maskz_cvtt_roundps_epu32(0, a
, _MM_FROUND_NO_EXC
);
29891 assert_eq_m512i(r
, _mm512_setzero_si512());
29892 let r
= _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a
, _MM_FROUND_NO_EXC
);
29893 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29894 assert_eq_m512i(r
, e
);
29897 #[simd_test(enable = "avx512f")]
29898 unsafe fn test_mm512_cvttps_epi32() {
29899 let a
= _mm512_setr_ps(
29900 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29902 let r
= _mm512_cvttps_epi32(a
);
29903 let e
= _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
29904 assert_eq_m512i(r
, e
);
29907 #[simd_test(enable = "avx512f")]
29908 unsafe fn test_mm512_mask_cvttps_epi32() {
29909 let a
= _mm512_setr_ps(
29910 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29912 let src
= _mm512_set1_epi32(0);
29913 let r
= _mm512_mask_cvttps_epi32(src
, 0, a
);
29914 assert_eq_m512i(r
, src
);
29915 let r
= _mm512_mask_cvttps_epi32(src
, 0b00000000_11111111, a
);
29916 let e
= _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29917 assert_eq_m512i(r
, e
);
29920 #[simd_test(enable = "avx512f")]
29921 unsafe fn test_mm512_maskz_cvttps_epi32() {
29922 let a
= _mm512_setr_ps(
29923 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29925 let r
= _mm512_maskz_cvttps_epi32(0, a
);
29926 assert_eq_m512i(r
, _mm512_setzero_si512());
29927 let r
= _mm512_maskz_cvttps_epi32(0b00000000_11111111, a
);
29928 let e
= _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
29929 assert_eq_m512i(r
, e
);
29932 #[simd_test(enable = "avx512f")]
29933 unsafe fn test_mm512_cvttps_epu32() {
29934 let a
= _mm512_setr_ps(
29935 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29937 let r
= _mm512_cvttps_epu32(a
);
29938 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
29939 assert_eq_m512i(r
, e
);
29942 #[simd_test(enable = "avx512f")]
29943 unsafe fn test_mm512_mask_cvttps_epu32() {
29944 let a
= _mm512_setr_ps(
29945 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29947 let src
= _mm512_set1_epi32(0);
29948 let r
= _mm512_mask_cvttps_epu32(src
, 0, a
);
29949 assert_eq_m512i(r
, src
);
29950 let r
= _mm512_mask_cvttps_epu32(src
, 0b00000000_11111111, a
);
29951 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29952 assert_eq_m512i(r
, e
);
29955 #[simd_test(enable = "avx512f")]
29956 unsafe fn test_mm512_maskz_cvttps_epu32() {
29957 let a
= _mm512_setr_ps(
29958 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
29960 let r
= _mm512_maskz_cvttps_epu32(0, a
);
29961 assert_eq_m512i(r
, _mm512_setzero_si512());
29962 let r
= _mm512_maskz_cvttps_epu32(0b00000000_11111111, a
);
29963 let e
= _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
29964 assert_eq_m512i(r
, e
);
29967 #[simd_test(enable = "avx512f")]
29968 unsafe fn test_mm512_i32gather_ps() {
29969 let mut arr
= [0f32; 256];
29973 // A multiplier of 4 is word-addressing
29975 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
29976 120, 128, 136, 144, 152, 160, 168, 176);
29977 let r
= _mm512_i32gather_ps(index
, arr
.as_ptr() as *const u8, 4);
29979 assert_eq_m512(r
, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
29980 120., 128., 136., 144., 152., 160., 168., 176.));
29983 #[simd_test(enable = "avx512f")]
29984 unsafe fn test_mm512_mask_i32gather_ps() {
29985 let mut arr
= [0f32; 256];
29989 let src
= _mm512_set1_ps(2.);
29990 let mask
= 0b10101010_10101010;
29992 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
29993 120, 128, 136, 144, 152, 160, 168, 176);
29994 // A multiplier of 4 is word-addressing
29995 let r
= _mm512_mask_i32gather_ps(src
, mask
, index
, arr
.as_ptr() as *const u8, 4);
29997 assert_eq_m512(r
, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
29998 2., 128., 2., 144., 2., 160., 2., 176.));
30001 #[simd_test(enable = "avx512f")]
30002 unsafe fn test_mm512_i32gather_epi32() {
30003 let mut arr
= [0i32; 256];
30007 // A multiplier of 4 is word-addressing
30009 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30010 120, 128, 136, 144, 152, 160, 168, 176);
30011 let r
= _mm512_i32gather_epi32(index
, arr
.as_ptr() as *const u8, 4);
30013 assert_eq_m512i(r
, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30014 120, 128, 136, 144, 152, 160, 168, 176));
30017 #[simd_test(enable = "avx512f")]
30018 unsafe fn test_mm512_mask_i32gather_epi32() {
30019 let mut arr
= [0i32; 256];
30023 let src
= _mm512_set1_epi32(2);
30024 let mask
= 0b10101010_10101010;
30026 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30027 128, 144, 160, 176, 192, 208, 224, 240);
30028 // A multiplier of 4 is word-addressing
30029 let r
= _mm512_mask_i32gather_epi32(src
, mask
, index
, arr
.as_ptr() as *const u8, 4);
30031 assert_eq_m512i(r
, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112,
30032 2, 144, 2, 176, 2, 208, 2, 240));
30035 #[simd_test(enable = "avx512f")]
30036 unsafe fn test_mm512_i32scatter_ps() {
30037 let mut arr
= [0f32; 256];
30039 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30040 128, 144, 160, 176, 192, 208, 224, 240);
30041 let src
= _mm512_setr_ps(
30042 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
30044 // A multiplier of 4 is word-addressing
30045 _mm512_i32scatter_ps(arr
.as_mut_ptr() as *mut u8, index
, src
, 4);
30046 let mut expected
= [0f32; 256];
30048 expected
[i
* 16] = (i
+ 1) as f32;
30050 assert_eq
!(&arr
[..], &expected
[..],);
30053 #[simd_test(enable = "avx512f")]
30054 unsafe fn test_mm512_mask_i32scatter_ps() {
30055 let mut arr
= [0f32; 256];
30056 let mask
= 0b10101010_10101010;
30058 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30059 128, 144, 160, 176, 192, 208, 224, 240);
30060 let src
= _mm512_setr_ps(
30061 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
30063 // A multiplier of 4 is word-addressing
30064 _mm512_mask_i32scatter_ps(arr
.as_mut_ptr() as *mut u8, mask
, index
, src
, 4);
30065 let mut expected
= [0f32; 256];
30067 expected
[i
* 32 + 16] = 2. * (i
+ 1) as f32;
30069 assert_eq
!(&arr
[..], &expected
[..],);
30072 #[simd_test(enable = "avx512f")]
30073 unsafe fn test_mm512_i32scatter_epi32() {
30074 let mut arr
= [0i32; 256];
30077 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30078 128, 144, 160, 176, 192, 208, 224, 240);
30079 let src
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
30080 // A multiplier of 4 is word-addressing
30081 _mm512_i32scatter_epi32(arr
.as_mut_ptr() as *mut u8, index
, src
, 4);
30082 let mut expected
= [0i32; 256];
30084 expected
[i
* 16] = (i
+ 1) as i32;
30086 assert_eq
!(&arr
[..], &expected
[..],);
30089 #[simd_test(enable = "avx512f")]
30090 unsafe fn test_mm512_mask_i32scatter_epi32() {
30091 let mut arr
= [0i32; 256];
30092 let mask
= 0b10101010_10101010;
30094 let index
= _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
30095 128, 144, 160, 176, 192, 208, 224, 240);
30096 let src
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
30097 // A multiplier of 4 is word-addressing
30098 _mm512_mask_i32scatter_epi32(arr
.as_mut_ptr() as *mut u8, mask
, index
, src
, 4);
30099 let mut expected
= [0i32; 256];
30101 expected
[i
* 32 + 16] = 2 * (i
+ 1) as i32;
30103 assert_eq
!(&arr
[..], &expected
[..],);
30106 #[simd_test(enable = "avx512f")]
30107 unsafe fn test_mm512_cmplt_ps_mask() {
30109 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30110 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30111 let b
= _mm512_set1_ps(-1.);
30112 let m
= _mm512_cmplt_ps_mask(a
, b
);
30113 assert_eq
!(m
, 0b00000101_00000101);
30116 #[simd_test(enable = "avx512f")]
30117 unsafe fn test_mm512_mask_cmplt_ps_mask() {
30119 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30120 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30121 let b
= _mm512_set1_ps(-1.);
30122 let mask
= 0b01100110_01100110;
30123 let r
= _mm512_mask_cmplt_ps_mask(mask
, a
, b
);
30124 assert_eq
!(r
, 0b00000100_00000100);
30127 #[simd_test(enable = "avx512f")]
30128 unsafe fn test_mm512_cmpnlt_ps_mask() {
30130 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30131 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30132 let b
= _mm512_set1_ps(-1.);
30133 assert_eq
!(_mm512_cmpnlt_ps_mask(a
, b
), !_mm512_cmplt_ps_mask(a
, b
));
30136 #[simd_test(enable = "avx512f")]
30137 unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
30139 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30140 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30141 let b
= _mm512_set1_ps(-1.);
30142 let mask
= 0b01111010_01111010;
30143 assert_eq
!(_mm512_mask_cmpnlt_ps_mask(mask
, a
, b
), 0b01111010_01111010);
30146 #[simd_test(enable = "avx512f")]
30147 unsafe fn test_mm512_cmpnle_ps_mask() {
30149 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30150 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30151 let b
= _mm512_set1_ps(-1.);
30152 let m
= _mm512_cmpnle_ps_mask(b
, a
);
30153 assert_eq
!(m
, 0b00001101_00001101);
30156 #[simd_test(enable = "avx512f")]
30157 unsafe fn test_mm512_mask_cmpnle_ps_mask() {
30159 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30160 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30161 let b
= _mm512_set1_ps(-1.);
30162 let mask
= 0b01100110_01100110;
30163 let r
= _mm512_mask_cmpnle_ps_mask(mask
, b
, a
);
30164 assert_eq
!(r
, 0b00000100_00000100);
30167 #[simd_test(enable = "avx512f")]
30168 unsafe fn test_mm512_cmple_ps_mask() {
30170 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30171 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30172 let b
= _mm512_set1_ps(-1.);
30173 assert_eq
!(_mm512_cmple_ps_mask(a
, b
), 0b00100101_00100101);
30176 #[simd_test(enable = "avx512f")]
30177 unsafe fn test_mm512_mask_cmple_ps_mask() {
30179 let a
= _mm512_set_ps(0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.,
30180 0., 1., -1., f32::MAX
, f32::NAN
, f32::MIN
, 100., -100.);
30181 let b
= _mm512_set1_ps(-1.);
30182 let mask
= 0b01111010_01111010;
30183 assert_eq
!(_mm512_mask_cmple_ps_mask(mask
, a
, b
), 0b00100000_00100000);
30186 #[simd_test(enable = "avx512f")]
30187 unsafe fn test_mm512_cmpeq_ps_mask() {
30189 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30190 0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30192 let b
= _mm512_set_ps(0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30193 0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30194 let m
= _mm512_cmpeq_ps_mask(b
, a
);
30195 assert_eq
!(m
, 0b11001101_11001101);
30198 #[simd_test(enable = "avx512f")]
30199 unsafe fn test_mm512_mask_cmpeq_ps_mask() {
30201 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30202 0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30204 let b
= _mm512_set_ps(0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30205 0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30206 let mask
= 0b01111010_01111010;
30207 let r
= _mm512_mask_cmpeq_ps_mask(mask
, b
, a
);
30208 assert_eq
!(r
, 0b01001000_01001000);
30211 #[simd_test(enable = "avx512f")]
30212 unsafe fn test_mm512_cmpneq_ps_mask() {
30214 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30215 0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30217 let b
= _mm512_set_ps(0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30218 0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30219 let m
= _mm512_cmpneq_ps_mask(b
, a
);
30220 assert_eq
!(m
, 0b00110010_00110010);
30223 #[simd_test(enable = "avx512f")]
30224 unsafe fn test_mm512_mask_cmpneq_ps_mask() {
30226 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30227 0., 1., -1., 13., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30229 let b
= _mm512_set_ps(0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.,
30230 0., 1., 13., 42., f32::MAX
, f32::MIN
, f32::NAN
, -100.);
30231 let mask
= 0b01111010_01111010;
30232 let r
= _mm512_mask_cmpneq_ps_mask(mask
, b
, a
);
30233 assert_eq
!(r
, 0b00110010_00110010)
30236 #[simd_test(enable = "avx512f")]
30237 unsafe fn test_mm512_cmp_ps_mask() {
30239 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.,
30240 0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.);
30241 let b
= _mm512_set1_ps(-1.);
30242 let m
= _mm512_cmp_ps_mask(a
, b
, _CMP_LT_OQ
);
30243 assert_eq
!(m
, 0b00000101_00000101);
30246 #[simd_test(enable = "avx512f")]
30247 unsafe fn test_mm512_mask_cmp_ps_mask() {
30249 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.,
30250 0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.);
30251 let b
= _mm512_set1_ps(-1.);
30252 let mask
= 0b01100110_01100110;
30253 let r
= _mm512_mask_cmp_ps_mask(mask
, a
, b
, _CMP_LT_OQ
);
30254 assert_eq
!(r
, 0b00000100_00000100);
30257 #[simd_test(enable = "avx512f")]
30258 unsafe fn test_mm512_cmp_round_ps_mask() {
30260 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.,
30261 0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.);
30262 let b
= _mm512_set1_ps(-1.);
30263 let m
= _mm512_cmp_round_ps_mask(a
, b
, _CMP_LT_OQ
, _MM_FROUND_CUR_DIRECTION
);
30264 assert_eq
!(m
, 0b00000101_00000101);
30267 #[simd_test(enable = "avx512f")]
30268 unsafe fn test_mm512_mask_cmp_round_ps_mask() {
30270 let a
= _mm512_set_ps(0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.,
30271 0., 1., -1., 13., f32::MAX
, f32::MIN
, 100., -100.);
30272 let b
= _mm512_set1_ps(-1.);
30273 let mask
= 0b01100110_01100110;
30274 let r
= _mm512_mask_cmp_round_ps_mask(mask
, a
, b
, _CMP_LT_OQ
, _MM_FROUND_CUR_DIRECTION
);
30275 assert_eq
!(r
, 0b00000100_00000100);
30278 #[simd_test(enable = "avx512f")]
30279 unsafe fn test_mm512_cmpord_ps_mask() {
30281 let a
= _mm512_set_ps(f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, -1., f32::NAN
, 0.,
30282 f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, 1., f32::NAN
, 2.);
30284 let b
= _mm512_set_ps(f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 0.,
30285 f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 2.);
30286 let m
= _mm512_cmpord_ps_mask(a
, b
);
30287 assert_eq
!(m
, 0b00000101_00000101);
30290 #[simd_test(enable = "avx512f")]
30291 unsafe fn test_mm512_mask_cmpord_ps_mask() {
30293 let a
= _mm512_set_ps(f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, -1., f32::NAN
, 0.,
30294 f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, 1., f32::NAN
, 2.);
30296 let b
= _mm512_set_ps(f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 0.,
30297 f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 2.);
30298 let mask
= 0b11000011_11000011;
30299 let m
= _mm512_mask_cmpord_ps_mask(mask
, a
, b
);
30300 assert_eq
!(m
, 0b00000001_00000001);
30303 #[simd_test(enable = "avx512f")]
30304 unsafe fn test_mm512_cmpunord_ps_mask() {
30306 let a
= _mm512_set_ps(f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, -1., f32::NAN
, 0.,
30307 f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, 1., f32::NAN
, 2.);
30309 let b
= _mm512_set_ps(f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 0.,
30310 f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 2.);
30311 let m
= _mm512_cmpunord_ps_mask(a
, b
);
30313 assert_eq
!(m
, 0b11111010_11111010);
30316 #[simd_test(enable = "avx512f")]
30317 unsafe fn test_mm512_mask_cmpunord_ps_mask() {
30319 let a
= _mm512_set_ps(f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, -1., f32::NAN
, 0.,
30320 f32::NAN
, f32::MAX
, f32::NAN
, f32::MIN
, f32::NAN
, 1., f32::NAN
, 2.);
30322 let b
= _mm512_set_ps(f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 0.,
30323 f32::NAN
, f32::NAN
, f32::NAN
, f32::NAN
, f32::MIN
, f32::MAX
, -1., 2.);
30324 let mask
= 0b00001111_00001111;
30325 let m
= _mm512_mask_cmpunord_ps_mask(mask
, a
, b
);
30326 assert_eq
!(m
, 0b000001010_00001010);
30329 #[simd_test(enable = "avx512f")]
30330 unsafe fn test_mm_cmp_ss_mask() {
30331 let a
= _mm_setr_ps(2., 1., 1., 1.);
30332 let b
= _mm_setr_ps(1., 2., 2., 2.);
30333 let m
= _mm_cmp_ss_mask(a
, b
, _CMP_GE_OS
);
30337 #[simd_test(enable = "avx512f")]
30338 unsafe fn test_mm_mask_cmp_ss_mask() {
30339 let a
= _mm_setr_ps(2., 1., 1., 1.);
30340 let b
= _mm_setr_ps(1., 2., 2., 2.);
30341 let m
= _mm_mask_cmp_ss_mask(0b10, a
, b
, _CMP_GE_OS
);
30343 let m
= _mm_mask_cmp_ss_mask(0b1, a
, b
, _CMP_GE_OS
);
30347 #[simd_test(enable = "avx512f")]
30348 unsafe fn test_mm_cmp_round_ss_mask() {
30349 let a
= _mm_setr_ps(2., 1., 1., 1.);
30350 let b
= _mm_setr_ps(1., 2., 2., 2.);
30351 let m
= _mm_cmp_round_ss_mask(a
, b
, _CMP_GE_OS
, _MM_FROUND_CUR_DIRECTION
);
30355 #[simd_test(enable = "avx512f")]
30356 unsafe fn test_mm_mask_cmp_round_ss_mask() {
30357 let a
= _mm_setr_ps(2., 1., 1., 1.);
30358 let b
= _mm_setr_ps(1., 2., 2., 2.);
30359 let m
= _mm_mask_cmp_round_ss_mask(0b10, a
, b
, _CMP_GE_OS
, _MM_FROUND_CUR_DIRECTION
);
30361 let m
= _mm_mask_cmp_round_ss_mask(0b1, a
, b
, _CMP_GE_OS
, _MM_FROUND_CUR_DIRECTION
);
30365 #[simd_test(enable = "avx512f")]
30366 unsafe fn test_mm_cmp_sd_mask() {
30367 let a
= _mm_setr_pd(2., 1.);
30368 let b
= _mm_setr_pd(1., 2.);
30369 let m
= _mm_cmp_sd_mask(a
, b
, _CMP_GE_OS
);
30373 #[simd_test(enable = "avx512f")]
30374 unsafe fn test_mm_mask_cmp_sd_mask() {
30375 let a
= _mm_setr_pd(2., 1.);
30376 let b
= _mm_setr_pd(1., 2.);
30377 let m
= _mm_mask_cmp_sd_mask(0b10, a
, b
, _CMP_GE_OS
);
30379 let m
= _mm_mask_cmp_sd_mask(0b1, a
, b
, _CMP_GE_OS
);
30383 #[simd_test(enable = "avx512f")]
30384 unsafe fn test_mm_cmp_round_sd_mask() {
30385 let a
= _mm_setr_pd(2., 1.);
30386 let b
= _mm_setr_pd(1., 2.);
30387 let m
= _mm_cmp_round_sd_mask(a
, b
, _CMP_GE_OS
, _MM_FROUND_CUR_DIRECTION
);
30391 #[simd_test(enable = "avx512f")]
30392 unsafe fn test_mm_mask_cmp_round_sd_mask() {
30393 let a
= _mm_setr_pd(2., 1.);
30394 let b
= _mm_setr_pd(1., 2.);
30395 let m
= _mm_mask_cmp_round_sd_mask(0b10, a
, b
, _CMP_GE_OS
, _MM_FROUND_CUR_DIRECTION
);
30397 let m
= _mm_mask_cmp_round_sd_mask(0b1, a
, b
, _CMP_GE_OS
, _MM_FROUND_CUR_DIRECTION
);
30401 #[simd_test(enable = "avx512f")]
30402 unsafe fn test_mm512_cmplt_epu32_mask() {
30404 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30405 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30406 let b
= _mm512_set1_epi32(-1);
30407 let m
= _mm512_cmplt_epu32_mask(a
, b
);
30408 assert_eq
!(m
, 0b11001111_11001111);
30411 #[simd_test(enable = "avx512f")]
30412 unsafe fn test_mm512_mask_cmplt_epu32_mask() {
30414 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30415 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30416 let b
= _mm512_set1_epi32(-1);
30417 let mask
= 0b01111010_01111010;
30418 let r
= _mm512_mask_cmplt_epu32_mask(mask
, a
, b
);
30419 assert_eq
!(r
, 0b01001010_01001010);
30422 #[simd_test(enable = "avx512f")]
30423 unsafe fn test_mm512_cmpgt_epu32_mask() {
30425 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30426 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30427 let b
= _mm512_set1_epi32(-1);
30428 let m
= _mm512_cmpgt_epu32_mask(b
, a
);
30429 assert_eq
!(m
, 0b11001111_11001111);
30432 #[simd_test(enable = "avx512f")]
30433 unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
30435 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30436 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30437 let b
= _mm512_set1_epi32(-1);
30438 let mask
= 0b01111010_01111010;
30439 let r
= _mm512_mask_cmpgt_epu32_mask(mask
, b
, a
);
30440 assert_eq
!(r
, 0b01001010_01001010);
30443 #[simd_test(enable = "avx512f")]
30444 unsafe fn test_mm512_cmple_epu32_mask() {
30446 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30447 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30448 let b
= _mm512_set1_epi32(-1);
30450 _mm512_cmple_epu32_mask(a
, b
),
30451 !_mm512_cmpgt_epu32_mask(a
, b
)
30455 #[simd_test(enable = "avx512f")]
30456 unsafe fn test_mm512_mask_cmple_epu32_mask() {
30458 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30459 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30460 let b
= _mm512_set1_epi32(-1);
30461 let mask
= 0b01111010_01111010;
30463 _mm512_mask_cmple_epu32_mask(mask
, a
, b
),
30464 0b01111010_01111010
30468 #[simd_test(enable = "avx512f")]
30469 unsafe fn test_mm512_cmpge_epu32_mask() {
30471 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30472 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30473 let b
= _mm512_set1_epi32(-1);
30475 _mm512_cmpge_epu32_mask(a
, b
),
30476 !_mm512_cmplt_epu32_mask(a
, b
)
30480 #[simd_test(enable = "avx512f")]
30481 unsafe fn test_mm512_mask_cmpge_epu32_mask() {
30483 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30484 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30485 let b
= _mm512_set1_epi32(-1);
30486 let mask
= 0b01111010_01111010;
30487 assert_eq
!(_mm512_mask_cmpge_epu32_mask(mask
, a
, b
), 0b01100000_0110000);
30490 #[simd_test(enable = "avx512f")]
30491 unsafe fn test_mm512_cmpeq_epu32_mask() {
30493 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30494 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30496 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30497 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30498 let m
= _mm512_cmpeq_epu32_mask(b
, a
);
30499 assert_eq
!(m
, 0b11001111_11001111);
30502 #[simd_test(enable = "avx512f")]
30503 unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
30505 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30506 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30508 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30509 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30510 let mask
= 0b01111010_01111010;
30511 let r
= _mm512_mask_cmpeq_epu32_mask(mask
, b
, a
);
30512 assert_eq
!(r
, 0b01001010_01001010);
30515 #[simd_test(enable = "avx512f")]
30516 unsafe fn test_mm512_cmpneq_epu32_mask() {
30518 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30519 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30521 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30522 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30523 let m
= _mm512_cmpneq_epu32_mask(b
, a
);
30524 assert_eq
!(m
, !_mm512_cmpeq_epu32_mask(b
, a
));
30527 #[simd_test(enable = "avx512f")]
30528 unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
30530 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, -100, 100,
30531 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, -100, 100);
30533 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30534 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30535 let mask
= 0b01111010_01111010;
30536 let r
= _mm512_mask_cmpneq_epu32_mask(mask
, b
, a
);
30537 assert_eq
!(r
, 0b00110010_00110010);
30540 #[simd_test(enable = "avx512f")]
30541 unsafe fn test_mm512_cmp_epu32_mask() {
30543 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30544 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30545 let b
= _mm512_set1_epi32(-1);
30546 let m
= _mm512_cmp_epu32_mask(a
, b
, _MM_CMPINT_LT
);
30547 assert_eq
!(m
, 0b11001111_11001111);
30550 #[simd_test(enable = "avx512f")]
30551 unsafe fn test_mm512_mask_cmp_epu32_mask() {
30553 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30554 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30555 let b
= _mm512_set1_epi32(-1);
30556 let mask
= 0b01111010_01111010;
30557 let r
= _mm512_mask_cmp_epu32_mask(mask
, a
, b
, _MM_CMPINT_LT
);
30558 assert_eq
!(r
, 0b01001010_01001010);
30561 #[simd_test(enable = "avx512f")]
30562 unsafe fn test_mm512_cmplt_epi32_mask() {
30564 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30565 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30566 let b
= _mm512_set1_epi32(-1);
30567 let m
= _mm512_cmplt_epi32_mask(a
, b
);
30568 assert_eq
!(m
, 0b00000101_00000101);
30571 #[simd_test(enable = "avx512f")]
30572 unsafe fn test_mm512_mask_cmplt_epi32_mask() {
30574 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30575 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30576 let b
= _mm512_set1_epi32(-1);
30577 let mask
= 0b01100110_01100110;
30578 let r
= _mm512_mask_cmplt_epi32_mask(mask
, a
, b
);
30579 assert_eq
!(r
, 0b00000100_00000100);
30582 #[simd_test(enable = "avx512f")]
30583 unsafe fn test_mm512_cmpgt_epi32_mask() {
30585 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30586 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30587 let b
= _mm512_set1_epi32(-1);
30588 let m
= _mm512_cmpgt_epi32_mask(b
, a
);
30589 assert_eq
!(m
, 0b00000101_00000101);
30592 #[simd_test(enable = "avx512f")]
30593 unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
30595 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30596 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30597 let b
= _mm512_set1_epi32(-1);
30598 let mask
= 0b01100110_01100110;
30599 let r
= _mm512_mask_cmpgt_epi32_mask(mask
, b
, a
);
30600 assert_eq
!(r
, 0b00000100_00000100);
30603 #[simd_test(enable = "avx512f")]
30604 unsafe fn test_mm512_cmple_epi32_mask() {
30606 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30607 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30608 let b
= _mm512_set1_epi32(-1);
30610 _mm512_cmple_epi32_mask(a
, b
),
30611 !_mm512_cmpgt_epi32_mask(a
, b
)
30615 #[simd_test(enable = "avx512f")]
30616 unsafe fn test_mm512_mask_cmple_epi32_mask() {
30618 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30619 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30620 let b
= _mm512_set1_epi32(-1);
30621 let mask
= 0b01111010_01111010;
30622 assert_eq
!(_mm512_mask_cmple_epi32_mask(mask
, a
, b
), 0b01100000_0110000);
30625 #[simd_test(enable = "avx512f")]
30626 unsafe fn test_mm512_cmpge_epi32_mask() {
30628 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30629 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30630 let b
= _mm512_set1_epi32(-1);
30632 _mm512_cmpge_epi32_mask(a
, b
),
30633 !_mm512_cmplt_epi32_mask(a
, b
)
30637 #[simd_test(enable = "avx512f")]
30638 unsafe fn test_mm512_mask_cmpge_epi32_mask() {
30640 let a
= _mm512_set_epi32(0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100,
30641 0, 1, -1, u32::MAX
as i32, i32::MAX
, i32::MIN
, 100, -100);
30642 let b
= _mm512_set1_epi32(-1);
30643 let mask
= 0b01111010_01111010;
30645 _mm512_mask_cmpge_epi32_mask(mask
, a
, b
),
30646 0b01111010_01111010
30650 #[simd_test(enable = "avx512f")]
30651 unsafe fn test_mm512_cmpeq_epi32_mask() {
30653 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30654 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30656 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30657 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30658 let m
= _mm512_cmpeq_epi32_mask(b
, a
);
30659 assert_eq
!(m
, 0b11001111_11001111);
30662 #[simd_test(enable = "avx512f")]
30663 unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
30665 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30666 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30668 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30669 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30670 let mask
= 0b01111010_01111010;
30671 let r
= _mm512_mask_cmpeq_epi32_mask(mask
, b
, a
);
30672 assert_eq
!(r
, 0b01001010_01001010);
30675 #[simd_test(enable = "avx512f")]
30676 unsafe fn test_mm512_cmpneq_epi32_mask() {
30678 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30679 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30681 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30682 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30683 let m
= _mm512_cmpneq_epi32_mask(b
, a
);
30684 assert_eq
!(m
, !_mm512_cmpeq_epi32_mask(b
, a
));
30687 #[simd_test(enable = "avx512f")]
30688 unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
30690 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, -100, 100,
30691 0, 1, -1, 13, i32::MAX
, i32::MIN
, -100, 100);
30693 let b
= _mm512_set_epi32(0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100,
30694 0, 1, 13, 42, i32::MAX
, i32::MIN
, 100, -100);
30695 let mask
= 0b01111010_01111010;
30696 let r
= _mm512_mask_cmpneq_epi32_mask(mask
, b
, a
);
30697 assert_eq
!(r
, 0b00110010_00110010)
30700 #[simd_test(enable = "avx512f")]
30701 unsafe fn test_mm512_cmp_epi32_mask() {
30703 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30704 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30705 let b
= _mm512_set1_epi32(-1);
30706 let m
= _mm512_cmp_epi32_mask(a
, b
, _MM_CMPINT_LT
);
30707 assert_eq
!(m
, 0b00000101_00000101);
30710 #[simd_test(enable = "avx512f")]
30711 unsafe fn test_mm512_mask_cmp_epi32_mask() {
30713 let a
= _mm512_set_epi32(0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100,
30714 0, 1, -1, 13, i32::MAX
, i32::MIN
, 100, -100);
30715 let b
= _mm512_set1_epi32(-1);
30716 let mask
= 0b01100110_01100110;
30717 let r
= _mm512_mask_cmp_epi32_mask(mask
, a
, b
, _MM_CMPINT_LT
);
30718 assert_eq
!(r
, 0b00000100_00000100);
30721 #[simd_test(enable = "avx512f")]
30722 unsafe fn test_mm512_set_epi8() {
30723 let r
= _mm512_set1_epi8(2);
30727 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30728 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30729 2, 2, 2, 2, 2, 2, 2, 2,
30734 #[simd_test(enable = "avx512f")]
30735 unsafe fn test_mm512_set_epi16() {
30736 let r
= _mm512_set1_epi16(2);
30740 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30746 #[simd_test(enable = "avx512f")]
30747 unsafe fn test_mm512_set_epi32() {
30748 let r
= _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
30751 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
30755 #[simd_test(enable = "avx512f")]
30756 unsafe fn test_mm512_setr_epi32() {
30757 let r
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
30760 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
30764 #[simd_test(enable = "avx512f")]
30765 unsafe fn test_mm512_set1_epi8() {
30766 let r
= _mm512_set_epi8(
30767 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30768 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30771 assert_eq_m512i(r
, _mm512_set1_epi8(2));
30774 #[simd_test(enable = "avx512f")]
30775 unsafe fn test_mm512_set1_epi16() {
30776 let r
= _mm512_set_epi16(
30777 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
30780 assert_eq_m512i(r
, _mm512_set1_epi16(2));
30783 #[simd_test(enable = "avx512f")]
30784 unsafe fn test_mm512_set1_epi32() {
30785 let r
= _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30786 assert_eq_m512i(r
, _mm512_set1_epi32(2));
30789 #[simd_test(enable = "avx512f")]
30790 unsafe fn test_mm512_setzero_si512() {
30791 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
30794 #[simd_test(enable = "avx512f")]
30795 unsafe fn test_mm512_setzero_epi32() {
30796 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
30799 #[simd_test(enable = "avx512f")]
30800 unsafe fn test_mm512_set_ps() {
30801 let r
= _mm512_setr_ps(
30802 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
30807 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
30812 #[simd_test(enable = "avx512f")]
30813 unsafe fn test_mm512_setr_ps() {
30814 let r
= _mm512_set_ps(
30815 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
30820 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
30825 #[simd_test(enable = "avx512f")]
30826 unsafe fn test_mm512_set1_ps() {
30828 let expected
= _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
30829 2., 2., 2., 2., 2., 2., 2., 2.);
30830 assert_eq_m512(expected
, _mm512_set1_ps(2.));
30833 #[simd_test(enable = "avx512f")]
30834 unsafe fn test_mm512_set4_epi32() {
30835 let r
= _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
30836 assert_eq_m512i(r
, _mm512_set4_epi32(4, 3, 2, 1));
30839 #[simd_test(enable = "avx512f")]
30840 unsafe fn test_mm512_set4_ps() {
30841 let r
= _mm512_set_ps(
30842 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
30844 assert_eq_m512(r
, _mm512_set4_ps(4., 3., 2., 1.));
30847 #[simd_test(enable = "avx512f")]
30848 unsafe fn test_mm512_setr4_epi32() {
30849 let r
= _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
30850 assert_eq_m512i(r
, _mm512_setr4_epi32(1, 2, 3, 4));
30853 #[simd_test(enable = "avx512f")]
30854 unsafe fn test_mm512_setr4_ps() {
30855 let r
= _mm512_set_ps(
30856 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
30858 assert_eq_m512(r
, _mm512_setr4_ps(1., 2., 3., 4.));
30861 #[simd_test(enable = "avx512f")]
30862 unsafe fn test_mm512_setzero_ps() {
30863 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
30866 #[simd_test(enable = "avx512f")]
30867 unsafe fn test_mm512_setzero() {
30868 assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
30871 #[simd_test(enable = "avx512f")]
30872 unsafe fn test_mm512_loadu_pd() {
30873 let a
= &[4., 3., 2., 5., 8., 9., 64., 50.];
30874 let p
= a
.as_ptr();
30875 let r
= _mm512_loadu_pd(black_box(p
));
30876 let e
= _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
30877 assert_eq_m512d(r
, e
);
30880 #[simd_test(enable = "avx512f")]
30881 unsafe fn test_mm512_storeu_pd() {
30882 let a
= _mm512_set1_pd(9.);
30883 let mut r
= _mm512_undefined_pd();
30884 _mm512_storeu_pd(&mut r
as *mut _
as *mut f64, a
);
30885 assert_eq_m512d(r
, a
);
30888 #[simd_test(enable = "avx512f")]
30889 unsafe fn test_mm512_loadu_ps() {
30891 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
30893 let p
= a
.as_ptr();
30894 let r
= _mm512_loadu_ps(black_box(p
));
30895 let e
= _mm512_setr_ps(
30896 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
30898 assert_eq_m512(r
, e
);
30901 #[simd_test(enable = "avx512f")]
30902 unsafe fn test_mm512_storeu_ps() {
30903 let a
= _mm512_set1_ps(9.);
30904 let mut r
= _mm512_undefined_ps();
30905 _mm512_storeu_ps(&mut r
as *mut _
as *mut f32, a
);
30906 assert_eq_m512(r
, a
);
30909 #[simd_test(enable = "avx512f")]
30910 unsafe fn test_mm512_setr_pd() {
30911 let r
= _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
30912 assert_eq_m512d(r
, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
30915 #[simd_test(enable = "avx512f")]
30916 unsafe fn test_mm512_set_pd() {
30917 let r
= _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
30918 assert_eq_m512d(r
, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
30921 #[simd_test(enable = "avx512f")]
30922 unsafe fn test_mm512_rol_epi32() {
30923 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30924 let r
= _mm512_rol_epi32(a
, 1);
30925 let e
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30926 assert_eq_m512i(r
, e
);
30929 #[simd_test(enable = "avx512f")]
30930 unsafe fn test_mm512_mask_rol_epi32() {
30931 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30932 let r
= _mm512_mask_rol_epi32(a
, 0, a
, 1);
30933 assert_eq_m512i(r
, a
);
30935 let r
= _mm512_mask_rol_epi32(a
, 0b11111111_11111111, a
, 1);
30936 let e
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30937 assert_eq_m512i(r
, e
);
30940 #[simd_test(enable = "avx512f")]
30941 unsafe fn test_mm512_maskz_rol_epi32() {
30942 let a
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
30943 let r
= _mm512_maskz_rol_epi32(0, a
, 1);
30944 assert_eq_m512i(r
, _mm512_setzero_si512());
30946 let r
= _mm512_maskz_rol_epi32(0b00000000_11111111, a
, 1);
30947 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
30948 assert_eq_m512i(r
, e
);
30951 #[simd_test(enable = "avx512f")]
30952 unsafe fn test_mm512_ror_epi32() {
30953 let a
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30954 let r
= _mm512_ror_epi32(a
, 1);
30955 let e
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30956 assert_eq_m512i(r
, e
);
30959 #[simd_test(enable = "avx512f")]
30960 unsafe fn test_mm512_mask_ror_epi32() {
30961 let a
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30962 let r
= _mm512_mask_ror_epi32(a
, 0, a
, 1);
30963 assert_eq_m512i(r
, a
);
30965 let r
= _mm512_mask_ror_epi32(a
, 0b11111111_11111111, a
, 1);
30966 let e
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30967 assert_eq_m512i(r
, e
);
30970 #[simd_test(enable = "avx512f")]
30971 unsafe fn test_mm512_maskz_ror_epi32() {
30972 let a
= _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
30973 let r
= _mm512_maskz_ror_epi32(0, a
, 1);
30974 assert_eq_m512i(r
, _mm512_setzero_si512());
30976 let r
= _mm512_maskz_ror_epi32(0b00000000_11111111, a
, 1);
30977 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
30978 assert_eq_m512i(r
, e
);
30981 #[simd_test(enable = "avx512f")]
30982 unsafe fn test_mm512_slli_epi32() {
30983 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30984 let r
= _mm512_slli_epi32(a
, 1);
30985 let e
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30986 assert_eq_m512i(r
, e
);
30989 #[simd_test(enable = "avx512f")]
30990 unsafe fn test_mm512_mask_slli_epi32() {
30991 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
30992 let r
= _mm512_mask_slli_epi32(a
, 0, a
, 1);
30993 assert_eq_m512i(r
, a
);
30995 let r
= _mm512_mask_slli_epi32(a
, 0b11111111_11111111, a
, 1);
30996 let e
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
30997 assert_eq_m512i(r
, e
);
31000 #[simd_test(enable = "avx512f")]
31001 unsafe fn test_mm512_maskz_slli_epi32() {
31002 let a
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31003 let r
= _mm512_maskz_slli_epi32(0, a
, 1);
31004 assert_eq_m512i(r
, _mm512_setzero_si512());
31006 let r
= _mm512_maskz_slli_epi32(0b00000000_11111111, a
, 1);
31007 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
31008 assert_eq_m512i(r
, e
);
31011 #[simd_test(enable = "avx512f")]
31012 unsafe fn test_mm512_srli_epi32() {
31013 let a
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31014 let r
= _mm512_srli_epi32(a
, 1);
31015 let e
= _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31016 assert_eq_m512i(r
, e
);
31019 #[simd_test(enable = "avx512f")]
31020 unsafe fn test_mm512_mask_srli_epi32() {
31021 let a
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31022 let r
= _mm512_mask_srli_epi32(a
, 0, a
, 1);
31023 assert_eq_m512i(r
, a
);
31025 let r
= _mm512_mask_srli_epi32(a
, 0b11111111_11111111, a
, 1);
31026 let e
= _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31027 assert_eq_m512i(r
, e
);
31030 #[simd_test(enable = "avx512f")]
31031 unsafe fn test_mm512_maskz_srli_epi32() {
31032 let a
= _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
31033 let r
= _mm512_maskz_srli_epi32(0, a
, 1);
31034 assert_eq_m512i(r
, _mm512_setzero_si512());
31036 let r
= _mm512_maskz_srli_epi32(0b00000000_11111111, a
, 1);
31037 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
31038 assert_eq_m512i(r
, e
);
31041 #[simd_test(enable = "avx512f")]
31042 unsafe fn test_mm512_rolv_epi32() {
31043 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31044 let b
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31046 let r
= _mm512_rolv_epi32(a
, b
);
31048 let e
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31049 assert_eq_m512i(r
, e
);
31052 #[simd_test(enable = "avx512f")]
31053 unsafe fn test_mm512_mask_rolv_epi32() {
31054 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31055 let b
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31057 let r
= _mm512_mask_rolv_epi32(a
, 0, a
, b
);
31058 assert_eq_m512i(r
, a
);
31060 let r
= _mm512_mask_rolv_epi32(a
, 0b11111111_11111111, a
, b
);
31062 let e
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31063 assert_eq_m512i(r
, e
);
31066 #[simd_test(enable = "avx512f")]
31067 unsafe fn test_mm512_maskz_rolv_epi32() {
31068 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31069 let b
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31071 let r
= _mm512_maskz_rolv_epi32(0, a
, b
);
31072 assert_eq_m512i(r
, _mm512_setzero_si512());
31074 let r
= _mm512_maskz_rolv_epi32(0b00000000_11111111, a
, b
);
31076 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
31077 assert_eq_m512i(r
, e
);
31080 #[simd_test(enable = "avx512f")]
31081 unsafe fn test_mm512_rorv_epi32() {
31082 let a
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31083 let b
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31085 let r
= _mm512_rorv_epi32(a
, b
);
31087 let e
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31088 assert_eq_m512i(r
, e
);
31091 #[simd_test(enable = "avx512f")]
31092 unsafe fn test_mm512_mask_rorv_epi32() {
31093 let a
= _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31094 let b
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31096 let r
= _mm512_mask_rorv_epi32(a
, 0, a
, b
);
31097 assert_eq_m512i(r
, a
);
31099 let r
= _mm512_mask_rorv_epi32(a
, 0b11111111_11111111, a
, b
);
31101 let e
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31102 assert_eq_m512i(r
, e
);
31105 #[simd_test(enable = "avx512f")]
31106 unsafe fn test_mm512_maskz_rorv_epi32() {
31107 let a
= _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
31108 let b
= _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31110 let r
= _mm512_maskz_rorv_epi32(0, a
, b
);
31111 assert_eq_m512i(r
, _mm512_setzero_si512());
31113 let r
= _mm512_maskz_rorv_epi32(0b00000000_11111111, a
, b
);
31115 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31116 assert_eq_m512i(r
, e
);
31119 #[simd_test(enable = "avx512f")]
31120 unsafe fn test_mm512_sllv_epi32() {
31121 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31122 let count
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31124 let r
= _mm512_sllv_epi32(a
, count
);
31126 let e
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31127 assert_eq_m512i(r
, e
);
31130 #[simd_test(enable = "avx512f")]
31131 unsafe fn test_mm512_mask_sllv_epi32() {
31132 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31133 let count
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31135 let r
= _mm512_mask_sllv_epi32(a
, 0, a
, count
);
31136 assert_eq_m512i(r
, a
);
31138 let r
= _mm512_mask_sllv_epi32(a
, 0b11111111_11111111, a
, count
);
31140 let e
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31141 assert_eq_m512i(r
, e
);
31144 #[simd_test(enable = "avx512f")]
31145 unsafe fn test_mm512_maskz_sllv_epi32() {
31146 let a
= _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
31147 let count
= _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31149 let r
= _mm512_maskz_sllv_epi32(0, a
, count
);
31150 assert_eq_m512i(r
, _mm512_setzero_si512());
31152 let r
= _mm512_maskz_sllv_epi32(0b00000000_11111111, a
, count
);
31154 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
31155 assert_eq_m512i(r
, e
);
31158 #[simd_test(enable = "avx512f")]
31159 unsafe fn test_mm512_srlv_epi32() {
31160 let a
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31161 let count
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31163 let r
= _mm512_srlv_epi32(a
, count
);
31165 let e
= _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31166 assert_eq_m512i(r
, e
);
31169 #[simd_test(enable = "avx512f")]
31170 unsafe fn test_mm512_mask_srlv_epi32() {
31171 let a
= _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
31172 let count
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31174 let r
= _mm512_mask_srlv_epi32(a
, 0, a
, count
);
31175 assert_eq_m512i(r
, a
);
31177 let r
= _mm512_mask_srlv_epi32(a
, 0b11111111_11111111, a
, count
);
31179 let e
= _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31180 assert_eq_m512i(r
, e
);
31183 #[simd_test(enable = "avx512f")]
31184 unsafe fn test_mm512_maskz_srlv_epi32() {
31185 let a
= _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
31186 let count
= _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
31188 let r
= _mm512_maskz_srlv_epi32(0, a
, count
);
31189 assert_eq_m512i(r
, _mm512_setzero_si512());
31191 let r
= _mm512_maskz_srlv_epi32(0b00000000_11111111, a
, count
);
31193 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
31194 assert_eq_m512i(r
, e
);
31197 #[simd_test(enable = "avx512f")]
31198 unsafe fn test_mm512_sll_epi32() {
31199 let a
= _mm512_set_epi32(
31217 let count
= _mm_set_epi32(0, 0, 0, 2);
31218 let r
= _mm512_sll_epi32(a
, count
);
31219 let e
= _mm512_set_epi32(
31237 assert_eq_m512i(r
, e
);
31240 #[simd_test(enable = "avx512f")]
31241 unsafe fn test_mm512_mask_sll_epi32() {
31242 let a
= _mm512_set_epi32(
31260 let count
= _mm_set_epi32(0, 0, 0, 2);
31261 let r
= _mm512_mask_sll_epi32(a
, 0, a
, count
);
31262 assert_eq_m512i(r
, a
);
31264 let r
= _mm512_mask_sll_epi32(a
, 0b11111111_11111111, a
, count
);
31265 let e
= _mm512_set_epi32(
31283 assert_eq_m512i(r
, e
);
31286 #[simd_test(enable = "avx512f")]
31287 unsafe fn test_mm512_maskz_sll_epi32() {
31288 let a
= _mm512_set_epi32(
31306 let count
= _mm_set_epi32(2, 0, 0, 2);
31307 let r
= _mm512_maskz_sll_epi32(0, a
, count
);
31308 assert_eq_m512i(r
, _mm512_setzero_si512());
31310 let r
= _mm512_maskz_sll_epi32(0b00000000_11111111, a
, count
);
31311 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31312 assert_eq_m512i(r
, e
);
31315 #[simd_test(enable = "avx512f")]
31316 unsafe fn test_mm512_srl_epi32() {
31317 let a
= _mm512_set_epi32(
31335 let count
= _mm_set_epi32(0, 0, 0, 2);
31336 let r
= _mm512_srl_epi32(a
, count
);
31337 let e
= _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31338 assert_eq_m512i(r
, e
);
31341 #[simd_test(enable = "avx512f")]
31342 unsafe fn test_mm512_mask_srl_epi32() {
31343 let a
= _mm512_set_epi32(
31361 let count
= _mm_set_epi32(0, 0, 0, 2);
31362 let r
= _mm512_mask_srl_epi32(a
, 0, a
, count
);
31363 assert_eq_m512i(r
, a
);
31365 let r
= _mm512_mask_srl_epi32(a
, 0b11111111_11111111, a
, count
);
31366 let e
= _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31367 assert_eq_m512i(r
, e
);
31370 #[simd_test(enable = "avx512f")]
31371 unsafe fn test_mm512_maskz_srl_epi32() {
31372 let a
= _mm512_set_epi32(
31390 let count
= _mm_set_epi32(2, 0, 0, 2);
31391 let r
= _mm512_maskz_srl_epi32(0, a
, count
);
31392 assert_eq_m512i(r
, _mm512_setzero_si512());
31394 let r
= _mm512_maskz_srl_epi32(0b00000000_11111111, a
, count
);
31395 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
31396 assert_eq_m512i(r
, e
);
31399 #[simd_test(enable = "avx512f")]
31400 unsafe fn test_mm512_sra_epi32() {
31401 let a
= _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31402 let count
= _mm_set_epi32(1, 0, 0, 2);
31403 let r
= _mm512_sra_epi32(a
, count
);
31404 let e
= _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31405 assert_eq_m512i(r
, e
);
31408 #[simd_test(enable = "avx512f")]
31409 unsafe fn test_mm512_mask_sra_epi32() {
31410 let a
= _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
31411 let count
= _mm_set_epi32(0, 0, 0, 2);
31412 let r
= _mm512_mask_sra_epi32(a
, 0, a
, count
);
31413 assert_eq_m512i(r
, a
);
31415 let r
= _mm512_mask_sra_epi32(a
, 0b11111111_11111111, a
, count
);
31416 let e
= _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
31417 assert_eq_m512i(r
, e
);
31420 #[simd_test(enable = "avx512f")]
31421 unsafe fn test_mm512_maskz_sra_epi32() {
31422 let a
= _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
31423 let count
= _mm_set_epi32(2, 0, 0, 2);
31424 let r
= _mm512_maskz_sra_epi32(0, a
, count
);
31425 assert_eq_m512i(r
, _mm512_setzero_si512());
31427 let r
= _mm512_maskz_sra_epi32(0b00000000_11111111, a
, count
);
31428 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
31429 assert_eq_m512i(r
, e
);
31432 #[simd_test(enable = "avx512f")]
31433 unsafe fn test_mm512_srav_epi32() {
31434 let a
= _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31435 let count
= _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
31436 let r
= _mm512_srav_epi32(a
, count
);
31437 let e
= _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31438 assert_eq_m512i(r
, e
);
31441 #[simd_test(enable = "avx512f")]
31442 unsafe fn test_mm512_mask_srav_epi32() {
31443 let a
= _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
31444 let count
= _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
31445 let r
= _mm512_mask_srav_epi32(a
, 0, a
, count
);
31446 assert_eq_m512i(r
, a
);
31448 let r
= _mm512_mask_srav_epi32(a
, 0b11111111_11111111, a
, count
);
31449 let e
= _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
31450 assert_eq_m512i(r
, e
);
31453 #[simd_test(enable = "avx512f")]
31454 unsafe fn test_mm512_maskz_srav_epi32() {
31455 let a
= _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
31456 let count
= _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
31457 let r
= _mm512_maskz_srav_epi32(0, a
, count
);
31458 assert_eq_m512i(r
, _mm512_setzero_si512());
31460 let r
= _mm512_maskz_srav_epi32(0b00000000_11111111, a
, count
);
31461 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
31462 assert_eq_m512i(r
, e
);
31465 #[simd_test(enable = "avx512f")]
31466 unsafe fn test_mm512_srai_epi32() {
31467 let a
= _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
31468 let r
= _mm512_srai_epi32(a
, 2);
31469 let e
= _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
31470 assert_eq_m512i(r
, e
);
31473 #[simd_test(enable = "avx512f")]
31474 unsafe fn test_mm512_mask_srai_epi32() {
31475 let a
= _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
31476 let r
= _mm512_mask_srai_epi32(a
, 0, a
, 2);
31477 assert_eq_m512i(r
, a
);
31479 let r
= _mm512_mask_srai_epi32(a
, 0b11111111_11111111, a
, 2);
31480 let e
= _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
31481 assert_eq_m512i(r
, e
);
31484 #[simd_test(enable = "avx512f")]
31485 unsafe fn test_mm512_maskz_srai_epi32() {
31486 let a
= _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
31487 let r
= _mm512_maskz_srai_epi32(0, a
, 2);
31488 assert_eq_m512i(r
, _mm512_setzero_si512());
31490 let r
= _mm512_maskz_srai_epi32(0b00000000_11111111, a
, 2);
31491 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
31492 assert_eq_m512i(r
, e
);
31495 #[simd_test(enable = "avx512f")]
31496 unsafe fn test_mm512_permute_ps() {
31497 let a
= _mm512_set_ps(
31498 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31500 let r
= _mm512_permute_ps(a
, 1);
31501 let e
= _mm512_set_ps(
31502 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31504 assert_eq_m512(r
, e
);
31507 #[simd_test(enable = "avx512f")]
31508 unsafe fn test_mm512_mask_permute_ps() {
31509 let a
= _mm512_set_ps(
31510 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31512 let r
= _mm512_mask_permute_ps(a
, 0b00000000_00000000, a
, 1);
31513 assert_eq_m512(r
, a
);
31514 let r
= _mm512_mask_permute_ps(a
, 0b11111111_11111111, a
, 1);
31515 let e
= _mm512_set_ps(
31516 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31518 assert_eq_m512(r
, e
);
31521 #[simd_test(enable = "avx512f")]
31522 unsafe fn test_mm512_maskz_permute_ps() {
31523 let a
= _mm512_set_ps(
31524 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31526 let r
= _mm512_maskz_permute_ps(0, a
, 1);
31527 assert_eq_m512(r
, _mm512_setzero_ps());
31528 let r
= _mm512_maskz_permute_ps(0b00000000_11111111, a
, 1);
31529 let e
= _mm512_set_ps(
31530 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
31532 assert_eq_m512(r
, e
);
31535 #[simd_test(enable = "avx512f")]
31536 unsafe fn test_mm512_permutevar_epi32() {
31537 let idx
= _mm512_set1_epi32(1);
31538 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31539 let r
= _mm512_permutevar_epi32(idx
, a
);
31540 let e
= _mm512_set1_epi32(14);
31541 assert_eq_m512i(r
, e
);
31544 #[simd_test(enable = "avx512f")]
31545 unsafe fn test_mm512_mask_permutevar_epi32() {
31546 let idx
= _mm512_set1_epi32(1);
31547 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31548 let r
= _mm512_mask_permutevar_epi32(a
, 0, idx
, a
);
31549 assert_eq_m512i(r
, a
);
31550 let r
= _mm512_mask_permutevar_epi32(a
, 0b11111111_11111111, idx
, a
);
31551 let e
= _mm512_set1_epi32(14);
31552 assert_eq_m512i(r
, e
);
31555 #[simd_test(enable = "avx512f")]
31556 unsafe fn test_mm512_permutevar_ps() {
31557 let a
= _mm512_set_ps(
31558 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31560 let b
= _mm512_set1_epi32(1);
31561 let r
= _mm512_permutevar_ps(a
, b
);
31562 let e
= _mm512_set_ps(
31563 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31565 assert_eq_m512(r
, e
);
31568 #[simd_test(enable = "avx512f")]
31569 unsafe fn test_mm512_mask_permutevar_ps() {
31570 let a
= _mm512_set_ps(
31571 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31573 let b
= _mm512_set1_epi32(1);
31574 let r
= _mm512_mask_permutevar_ps(a
, 0, a
, b
);
31575 assert_eq_m512(r
, a
);
31576 let r
= _mm512_mask_permutevar_ps(a
, 0b11111111_11111111, a
, b
);
31577 let e
= _mm512_set_ps(
31578 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
31580 assert_eq_m512(r
, e
);
31583 #[simd_test(enable = "avx512f")]
31584 unsafe fn test_mm512_maskz_permutevar_ps() {
31585 let a
= _mm512_set_ps(
31586 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31588 let b
= _mm512_set1_epi32(1);
31589 let r
= _mm512_maskz_permutevar_ps(0, a
, b
);
31590 assert_eq_m512(r
, _mm512_setzero_ps());
31591 let r
= _mm512_maskz_permutevar_ps(0b00000000_11111111, a
, b
);
31592 let e
= _mm512_set_ps(
31593 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
31595 assert_eq_m512(r
, e
);
31598 #[simd_test(enable = "avx512f")]
31599 unsafe fn test_mm512_permutexvar_epi32() {
31600 let idx
= _mm512_set1_epi32(1);
31601 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31602 let r
= _mm512_permutexvar_epi32(idx
, a
);
31603 let e
= _mm512_set1_epi32(14);
31604 assert_eq_m512i(r
, e
);
31607 #[simd_test(enable = "avx512f")]
31608 unsafe fn test_mm512_mask_permutexvar_epi32() {
31609 let idx
= _mm512_set1_epi32(1);
31610 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31611 let r
= _mm512_mask_permutexvar_epi32(a
, 0, idx
, a
);
31612 assert_eq_m512i(r
, a
);
31613 let r
= _mm512_mask_permutexvar_epi32(a
, 0b11111111_11111111, idx
, a
);
31614 let e
= _mm512_set1_epi32(14);
31615 assert_eq_m512i(r
, e
);
31618 #[simd_test(enable = "avx512f")]
31619 unsafe fn test_mm512_maskz_permutexvar_epi32() {
31620 let idx
= _mm512_set1_epi32(1);
31621 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31622 let r
= _mm512_maskz_permutexvar_epi32(0, idx
, a
);
31623 assert_eq_m512i(r
, _mm512_setzero_si512());
31624 let r
= _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx
, a
);
31625 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
31626 assert_eq_m512i(r
, e
);
31629 #[simd_test(enable = "avx512f")]
31630 unsafe fn test_mm512_permutexvar_ps() {
31631 let idx
= _mm512_set1_epi32(1);
31632 let a
= _mm512_set_ps(
31633 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31635 let r
= _mm512_permutexvar_ps(idx
, a
);
31636 let e
= _mm512_set1_ps(14.);
31637 assert_eq_m512(r
, e
);
31640 #[simd_test(enable = "avx512f")]
31641 unsafe fn test_mm512_mask_permutexvar_ps() {
31642 let idx
= _mm512_set1_epi32(1);
31643 let a
= _mm512_set_ps(
31644 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31646 let r
= _mm512_mask_permutexvar_ps(a
, 0, idx
, a
);
31647 assert_eq_m512(r
, a
);
31648 let r
= _mm512_mask_permutexvar_ps(a
, 0b11111111_11111111, idx
, a
);
31649 let e
= _mm512_set1_ps(14.);
31650 assert_eq_m512(r
, e
);
31653 #[simd_test(enable = "avx512f")]
31654 unsafe fn test_mm512_maskz_permutexvar_ps() {
31655 let idx
= _mm512_set1_epi32(1);
31656 let a
= _mm512_set_ps(
31657 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31659 let r
= _mm512_maskz_permutexvar_ps(0, idx
, a
);
31660 assert_eq_m512(r
, _mm512_setzero_ps());
31661 let r
= _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx
, a
);
31662 let e
= _mm512_set_ps(
31663 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
31665 assert_eq_m512(r
, e
);
31668 #[simd_test(enable = "avx512f")]
31669 unsafe fn test_mm512_permutex2var_epi32() {
31670 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31671 let idx
= _mm512_set_epi32(
31689 let b
= _mm512_set1_epi32(100);
31690 let r
= _mm512_permutex2var_epi32(a
, idx
, b
);
31691 let e
= _mm512_set_epi32(
31692 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
31694 assert_eq_m512i(r
, e
);
31697 #[simd_test(enable = "avx512f")]
31698 unsafe fn test_mm512_mask_permutex2var_epi32() {
31699 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31700 let idx
= _mm512_set_epi32(
31718 let b
= _mm512_set1_epi32(100);
31719 let r
= _mm512_mask_permutex2var_epi32(a
, 0, idx
, b
);
31720 assert_eq_m512i(r
, a
);
31721 let r
= _mm512_mask_permutex2var_epi32(a
, 0b11111111_11111111, idx
, b
);
31722 let e
= _mm512_set_epi32(
31723 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
31725 assert_eq_m512i(r
, e
);
31728 #[simd_test(enable = "avx512f")]
31729 unsafe fn test_mm512_maskz_permutex2var_epi32() {
31730 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31731 let idx
= _mm512_set_epi32(
31749 let b
= _mm512_set1_epi32(100);
31750 let r
= _mm512_maskz_permutex2var_epi32(0, a
, idx
, b
);
31751 assert_eq_m512i(r
, _mm512_setzero_si512());
31752 let r
= _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a
, idx
, b
);
31753 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
31754 assert_eq_m512i(r
, e
);
31757 #[simd_test(enable = "avx512f")]
31758 unsafe fn test_mm512_mask2_permutex2var_epi32() {
31759 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
31760 let idx
= _mm512_set_epi32(
31778 let b
= _mm512_set1_epi32(100);
31779 let r
= _mm512_mask2_permutex2var_epi32(a
, idx
, 0, b
);
31780 assert_eq_m512i(r
, idx
);
31781 let r
= _mm512_mask2_permutex2var_epi32(a
, idx
, 0b00000000_11111111, b
);
31782 let e
= _mm512_set_epi32(
31800 assert_eq_m512i(r
, e
);
31803 #[simd_test(enable = "avx512f")]
31804 unsafe fn test_mm512_permutex2var_ps() {
31805 let a
= _mm512_set_ps(
31806 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31808 let idx
= _mm512_set_epi32(
31826 let b
= _mm512_set1_ps(100.);
31827 let r
= _mm512_permutex2var_ps(a
, idx
, b
);
31828 let e
= _mm512_set_ps(
31829 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
31831 assert_eq_m512(r
, e
);
31834 #[simd_test(enable = "avx512f")]
31835 unsafe fn test_mm512_mask_permutex2var_ps() {
31836 let a
= _mm512_set_ps(
31837 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31839 let idx
= _mm512_set_epi32(
31857 let b
= _mm512_set1_ps(100.);
31858 let r
= _mm512_mask_permutex2var_ps(a
, 0, idx
, b
);
31859 assert_eq_m512(r
, a
);
31860 let r
= _mm512_mask_permutex2var_ps(a
, 0b11111111_11111111, idx
, b
);
31861 let e
= _mm512_set_ps(
31862 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
31864 assert_eq_m512(r
, e
);
31867 #[simd_test(enable = "avx512f")]
31868 unsafe fn test_mm512_maskz_permutex2var_ps() {
31869 let a
= _mm512_set_ps(
31870 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31872 let idx
= _mm512_set_epi32(
31890 let b
= _mm512_set1_ps(100.);
31891 let r
= _mm512_maskz_permutex2var_ps(0, a
, idx
, b
);
31892 assert_eq_m512(r
, _mm512_setzero_ps());
31893 let r
= _mm512_maskz_permutex2var_ps(0b00000000_11111111, a
, idx
, b
);
31894 let e
= _mm512_set_ps(
31895 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
31897 assert_eq_m512(r
, e
);
31900 #[simd_test(enable = "avx512f")]
31901 unsafe fn test_mm512_mask2_permutex2var_ps() {
31902 let a
= _mm512_set_ps(
31903 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
31905 let idx
= _mm512_set_epi32(
31923 let b
= _mm512_set1_ps(100.);
31924 let r
= _mm512_mask2_permutex2var_ps(a
, idx
, 0, b
);
31925 assert_eq_m512(r
, _mm512_setzero_ps());
31926 let r
= _mm512_mask2_permutex2var_ps(a
, idx
, 0b00000000_11111111, b
);
31927 let e
= _mm512_set_ps(
31928 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
31930 assert_eq_m512(r
, e
);
31933 #[simd_test(enable = "avx512f")]
31934 unsafe fn test_mm512_shuffle_epi32() {
31935 let a
= _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
31936 let r
= _mm512_shuffle_epi32(a
, _MM_PERM_AADD
);
31937 let e
= _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
31938 assert_eq_m512i(r
, e
);
31941 #[simd_test(enable = "avx512f")]
31942 unsafe fn test_mm512_mask_shuffle_epi32() {
31943 let a
= _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
31944 let r
= _mm512_mask_shuffle_epi32(a
, 0, a
, _MM_PERM_AADD
);
31945 assert_eq_m512i(r
, a
);
31946 let r
= _mm512_mask_shuffle_epi32(a
, 0b11111111_11111111, a
, _MM_PERM_AADD
);
31947 let e
= _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
31948 assert_eq_m512i(r
, e
);
31951 #[simd_test(enable = "avx512f")]
31952 unsafe fn test_mm512_maskz_shuffle_epi32() {
31953 let a
= _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
31954 let r
= _mm512_maskz_shuffle_epi32(0, a
, _MM_PERM_AADD
);
31955 assert_eq_m512i(r
, _mm512_setzero_si512());
31956 let r
= _mm512_maskz_shuffle_epi32(0b00000000_11111111, a
, _MM_PERM_AADD
);
31957 let e
= _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
31958 assert_eq_m512i(r
, e
);
31961 #[simd_test(enable = "avx512f")]
31962 unsafe fn test_mm512_shuffle_ps() {
31963 let a
= _mm512_setr_ps(
31964 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
31966 let b
= _mm512_setr_ps(
31967 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
31969 let r
= _mm512_shuffle_ps(a
, b
, 0x0F);
31970 let e
= _mm512_setr_ps(
31971 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
31973 assert_eq_m512(r
, e
);
31976 #[simd_test(enable = "avx512f")]
31977 unsafe fn test_mm512_mask_shuffle_ps() {
31978 let a
= _mm512_setr_ps(
31979 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
31981 let b
= _mm512_setr_ps(
31982 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
31984 let r
= _mm512_mask_shuffle_ps(a
, 0, a
, b
, 0x0F);
31985 assert_eq_m512(r
, a
);
31986 let r
= _mm512_mask_shuffle_ps(a
, 0b11111111_11111111, a
, b
, 0x0F);
31987 let e
= _mm512_setr_ps(
31988 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
31990 assert_eq_m512(r
, e
);
31993 #[simd_test(enable = "avx512f")]
31994 unsafe fn test_mm512_maskz_shuffle_ps() {
31995 let a
= _mm512_setr_ps(
31996 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
31998 let b
= _mm512_setr_ps(
31999 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32001 let r
= _mm512_maskz_shuffle_ps(0, a
, b
, 0x0F);
32002 assert_eq_m512(r
, _mm512_setzero_ps());
32003 let r
= _mm512_maskz_shuffle_ps(0b00000000_11111111, a
, b
, 0x0F);
32004 let e
= _mm512_setr_ps(
32005 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
32007 assert_eq_m512(r
, e
);
32010 #[simd_test(enable = "avx512f")]
32011 unsafe fn test_mm512_shuffle_i32x4() {
32012 let a
= _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
32013 let b
= _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
32014 let r
= _mm512_shuffle_i32x4(a
, b
, 0b00000000);
32015 let e
= _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
32016 assert_eq_m512i(r
, e
);
32019 #[simd_test(enable = "avx512f")]
32020 unsafe fn test_mm512_mask_shuffle_i32x4() {
32021 let a
= _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
32022 let b
= _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
32023 let r
= _mm512_mask_shuffle_i32x4(a
, 0, a
, b
, 0b00000000);
32024 assert_eq_m512i(r
, a
);
32025 let r
= _mm512_mask_shuffle_i32x4(a
, 0b11111111_11111111, a
, b
, 0b00000000);
32026 let e
= _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
32027 assert_eq_m512i(r
, e
);
32030 #[simd_test(enable = "avx512f")]
32031 unsafe fn test_mm512_maskz_shuffle_i32x4() {
32032 let a
= _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
32033 let b
= _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
32034 let r
= _mm512_maskz_shuffle_i32x4(0, a
, b
, 0b00000000);
32035 assert_eq_m512i(r
, _mm512_setzero_si512());
32036 let r
= _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a
, b
, 0b00000000);
32037 let e
= _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
32038 assert_eq_m512i(r
, e
);
32041 #[simd_test(enable = "avx512f")]
32042 unsafe fn test_mm512_shuffle_f32x4() {
32043 let a
= _mm512_setr_ps(
32044 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
32046 let b
= _mm512_setr_ps(
32047 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32049 let r
= _mm512_shuffle_f32x4(a
, b
, 0b00000000);
32050 let e
= _mm512_setr_ps(
32051 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
32053 assert_eq_m512(r
, e
);
32056 #[simd_test(enable = "avx512f")]
32057 unsafe fn test_mm512_mask_shuffle_f32x4() {
32058 let a
= _mm512_setr_ps(
32059 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
32061 let b
= _mm512_setr_ps(
32062 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32064 let r
= _mm512_mask_shuffle_f32x4(a
, 0, a
, b
, 0b00000000);
32065 assert_eq_m512(r
, a
);
32066 let r
= _mm512_mask_shuffle_f32x4(a
, 0b11111111_11111111, a
, b
, 0b00000000);
32067 let e
= _mm512_setr_ps(
32068 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
32070 assert_eq_m512(r
, e
);
32073 #[simd_test(enable = "avx512f")]
32074 unsafe fn test_mm512_maskz_shuffle_f32x4() {
32075 let a
= _mm512_setr_ps(
32076 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
32078 let b
= _mm512_setr_ps(
32079 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
32081 let r
= _mm512_maskz_shuffle_f32x4(0, a
, b
, 0b00000000);
32082 assert_eq_m512(r
, _mm512_setzero_ps());
32083 let r
= _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a
, b
, 0b00000000);
32084 let e
= _mm512_setr_ps(
32085 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
32087 assert_eq_m512(r
, e
);
32090 #[simd_test(enable = "avx512f")]
32091 unsafe fn test_mm512_extractf32x4_ps() {
32092 let a
= _mm512_setr_ps(
32093 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32095 let r
= _mm512_extractf32x4_ps(a
, 0x1);
32096 let e
= _mm_setr_ps(5., 6., 7., 8.);
32097 assert_eq_m128(r
, e
);
32100 #[simd_test(enable = "avx512f")]
32101 unsafe fn test_mm512_mask_extractf32x4_ps() {
32102 let a
= _mm512_setr_ps(
32103 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32105 let src
= _mm_set1_ps(100.);
32106 let r
= _mm512_mask_extractf32x4_ps(src
, 0, a
, 0x1);
32107 assert_eq_m128(r
, src
);
32108 let r
= _mm512_mask_extractf32x4_ps(src
, 0b11111111, a
, 0x1);
32109 let e
= _mm_setr_ps(5., 6., 7., 8.);
32110 assert_eq_m128(r
, e
);
32113 #[simd_test(enable = "avx512f")]
32114 unsafe fn test_mm512_maskz_extractf32x4_ps() {
32115 let a
= _mm512_setr_ps(
32116 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32118 let r
= _mm512_maskz_extractf32x4_ps(0, a
, 0x1);
32119 assert_eq_m128(r
, _mm_setzero_ps());
32120 let r
= _mm512_maskz_extractf32x4_ps(0b00000001, a
, 0x1);
32121 let e
= _mm_setr_ps(5., 0., 0., 0.);
32122 assert_eq_m128(r
, e
);
32125 #[simd_test(enable = "avx512f")]
32126 unsafe fn test_mm512_extracti32x4_epi32() {
32127 let a
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32128 let r
= _mm512_extracti32x4_epi32(a
, 0x1);
32129 let e
= _mm_setr_epi32(5, 6, 7, 8);
32130 assert_eq_m128i(r
, e
);
32133 #[simd_test(enable = "avx512f")]
32134 unsafe fn test_mm512_mask_extracti32x4_epi32() {
32135 let a
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32136 let src
= _mm_set1_epi32(100);
32137 let r
= _mm512_mask_extracti32x4_epi32(src
, 0, a
, 0x1);
32138 assert_eq_m128i(r
, src
);
32139 let r
= _mm512_mask_extracti32x4_epi32(src
, 0b11111111, a
, 0x1);
32140 let e
= _mm_setr_epi32(5, 6, 7, 8);
32141 assert_eq_m128i(r
, e
);
32144 #[simd_test(enable = "avx512f")]
32145 unsafe fn test_mm512_maskz_extracti32x4_epi32() {
32146 let a
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32147 let r
= _mm512_maskz_extracti32x4_epi32(0, a
, 0x1);
32148 assert_eq_m128i(r
, _mm_setzero_si128());
32149 let r
= _mm512_maskz_extracti32x4_epi32(0b00000001, a
, 0x1);
32150 let e
= _mm_setr_epi32(5, 0, 0, 0);
32151 assert_eq_m128i(r
, e
);
32154 #[simd_test(enable = "avx512f")]
32155 unsafe fn test_mm512_moveldup_ps() {
32156 let a
= _mm512_setr_ps(
32157 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32159 let r
= _mm512_moveldup_ps(a
);
32160 let e
= _mm512_setr_ps(
32161 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
32163 assert_eq_m512(r
, e
);
32166 #[simd_test(enable = "avx512f")]
32167 unsafe fn test_mm512_mask_moveldup_ps() {
32168 let a
= _mm512_setr_ps(
32169 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32171 let r
= _mm512_mask_moveldup_ps(a
, 0, a
);
32172 assert_eq_m512(r
, a
);
32173 let r
= _mm512_mask_moveldup_ps(a
, 0b11111111_11111111, a
);
32174 let e
= _mm512_setr_ps(
32175 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
32177 assert_eq_m512(r
, e
);
32180 #[simd_test(enable = "avx512f")]
32181 unsafe fn test_mm512_maskz_moveldup_ps() {
32182 let a
= _mm512_setr_ps(
32183 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32185 let r
= _mm512_maskz_moveldup_ps(0, a
);
32186 assert_eq_m512(r
, _mm512_setzero_ps());
32187 let r
= _mm512_maskz_moveldup_ps(0b00000000_11111111, a
);
32188 let e
= _mm512_setr_ps(
32189 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
32191 assert_eq_m512(r
, e
);
32194 #[simd_test(enable = "avx512f")]
32195 unsafe fn test_mm512_movehdup_ps() {
32196 let a
= _mm512_setr_ps(
32197 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32199 let r
= _mm512_movehdup_ps(a
);
32200 let e
= _mm512_setr_ps(
32201 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
32203 assert_eq_m512(r
, e
);
32206 #[simd_test(enable = "avx512f")]
32207 unsafe fn test_mm512_mask_movehdup_ps() {
32208 let a
= _mm512_setr_ps(
32209 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32211 let r
= _mm512_mask_movehdup_ps(a
, 0, a
);
32212 assert_eq_m512(r
, a
);
32213 let r
= _mm512_mask_movehdup_ps(a
, 0b11111111_11111111, a
);
32214 let e
= _mm512_setr_ps(
32215 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
32217 assert_eq_m512(r
, e
);
32220 #[simd_test(enable = "avx512f")]
32221 unsafe fn test_mm512_maskz_movehdup_ps() {
32222 let a
= _mm512_setr_ps(
32223 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32225 let r
= _mm512_maskz_movehdup_ps(0, a
);
32226 assert_eq_m512(r
, _mm512_setzero_ps());
32227 let r
= _mm512_maskz_movehdup_ps(0b00000000_11111111, a
);
32228 let e
= _mm512_setr_ps(
32229 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
32231 assert_eq_m512(r
, e
);
32234 #[simd_test(enable = "avx512f")]
32235 unsafe fn test_mm512_inserti32x4() {
32236 let a
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32237 let b
= _mm_setr_epi32(17, 18, 19, 20);
32238 let r
= _mm512_inserti32x4(a
, b
, 0);
32239 let e
= _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32240 assert_eq_m512i(r
, e
);
32243 #[simd_test(enable = "avx512f")]
32244 unsafe fn test_mm512_mask_inserti32x4() {
32245 let a
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32246 let b
= _mm_setr_epi32(17, 18, 19, 20);
32247 let r
= _mm512_mask_inserti32x4(a
, 0, a
, b
, 0);
32248 assert_eq_m512i(r
, a
);
32249 let r
= _mm512_mask_inserti32x4(a
, 0b11111111_11111111, a
, b
, 0);
32250 let e
= _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32251 assert_eq_m512i(r
, e
);
32254 #[simd_test(enable = "avx512f")]
32255 unsafe fn test_mm512_maskz_inserti32x4() {
32256 let a
= _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32257 let b
= _mm_setr_epi32(17, 18, 19, 20);
32258 let r
= _mm512_maskz_inserti32x4(0, a
, b
, 0);
32259 assert_eq_m512i(r
, _mm512_setzero_si512());
32260 let r
= _mm512_maskz_inserti32x4(0b00000000_11111111, a
, b
, 0);
32261 let e
= _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
32262 assert_eq_m512i(r
, e
);
32265 #[simd_test(enable = "avx512f")]
32266 unsafe fn test_mm512_insertf32x4() {
32267 let a
= _mm512_setr_ps(
32268 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32270 let b
= _mm_setr_ps(17., 18., 19., 20.);
32271 let r
= _mm512_insertf32x4(a
, b
, 0);
32272 let e
= _mm512_setr_ps(
32273 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32275 assert_eq_m512(r
, e
);
32278 #[simd_test(enable = "avx512f")]
32279 unsafe fn test_mm512_mask_insertf32x4() {
32280 let a
= _mm512_setr_ps(
32281 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32283 let b
= _mm_setr_ps(17., 18., 19., 20.);
32284 let r
= _mm512_mask_insertf32x4(a
, 0, a
, b
, 0);
32285 assert_eq_m512(r
, a
);
32286 let r
= _mm512_mask_insertf32x4(a
, 0b11111111_11111111, a
, b
, 0);
32287 let e
= _mm512_setr_ps(
32288 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32290 assert_eq_m512(r
, e
);
32293 #[simd_test(enable = "avx512f")]
32294 unsafe fn test_mm512_maskz_insertf32x4() {
32295 let a
= _mm512_setr_ps(
32296 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32298 let b
= _mm_setr_ps(17., 18., 19., 20.);
32299 let r
= _mm512_maskz_insertf32x4(0, a
, b
, 0);
32300 assert_eq_m512(r
, _mm512_setzero_ps());
32301 let r
= _mm512_maskz_insertf32x4(0b00000000_11111111, a
, b
, 0);
32302 let e
= _mm512_setr_ps(
32303 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
32305 assert_eq_m512(r
, e
);
32308 #[simd_test(enable = "avx512f")]
32309 unsafe fn test_mm512_castps128_ps512() {
32310 let a
= _mm_setr_ps(17., 18., 19., 20.);
32311 let r
= _mm512_castps128_ps512(a
);
32312 let e
= _mm512_setr_ps(
32313 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
32315 assert_eq_m512(r
, e
);
32318 #[simd_test(enable = "avx512f")]
32319 unsafe fn test_mm512_castps256_ps512() {
32320 let a
= _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
32321 let r
= _mm512_castps256_ps512(a
);
32322 let e
= _mm512_setr_ps(
32323 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
32325 assert_eq_m512(r
, e
);
32328 #[simd_test(enable = "avx512f")]
32329 unsafe fn test_mm512_zextps128_ps512() {
32330 let a
= _mm_setr_ps(17., 18., 19., 20.);
32331 let r
= _mm512_zextps128_ps512(a
);
32332 let e
= _mm512_setr_ps(
32333 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
32335 assert_eq_m512(r
, e
);
32338 #[simd_test(enable = "avx512f")]
32339 unsafe fn test_mm512_zextps256_ps512() {
32340 let a
= _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
32341 let r
= _mm512_zextps256_ps512(a
);
32342 let e
= _mm512_setr_ps(
32343 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
32345 assert_eq_m512(r
, e
);
32348 #[simd_test(enable = "avx512f")]
32349 unsafe fn test_mm512_castps512_ps128() {
32350 let a
= _mm512_setr_ps(
32351 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
32353 let r
= _mm512_castps512_ps128(a
);
32354 let e
= _mm_setr_ps(17., 18., 19., 20.);
32355 assert_eq_m128(r
, e
);
32358 #[simd_test(enable = "avx512f")]
32359 unsafe fn test_mm512_castps512_ps256() {
32360 let a
= _mm512_setr_ps(
32361 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
32363 let r
= _mm512_castps512_ps256(a
);
32364 let e
= _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
32365 assert_eq_m256(r
, e
);
32368 #[simd_test(enable = "avx512f")]
32369 unsafe fn test_mm512_castps_pd() {
32370 let a
= _mm512_set1_ps(1.);
32371 let r
= _mm512_castps_pd(a
);
32372 let e
= _mm512_set1_pd(0.007812501848093234);
32373 assert_eq_m512d(r
, e
);
32376 #[simd_test(enable = "avx512f")]
32377 unsafe fn test_mm512_castps_si512() {
32378 let a
= _mm512_set1_ps(1.);
32379 let r
= _mm512_castps_si512(a
);
32380 let e
= _mm512_set1_epi32(1065353216);
32381 assert_eq_m512i(r
, e
);
32384 #[simd_test(enable = "avx512f")]
32385 unsafe fn test_mm512_broadcastd_epi32() {
32386 let a
= _mm_set_epi32(17, 18, 19, 20);
32387 let r
= _mm512_broadcastd_epi32(a
);
32388 let e
= _mm512_set1_epi32(20);
32389 assert_eq_m512i(r
, e
);
32392 #[simd_test(enable = "avx512f")]
32393 unsafe fn test_mm512_mask_broadcastd_epi32() {
32394 let src
= _mm512_set1_epi32(20);
32395 let a
= _mm_set_epi32(17, 18, 19, 20);
32396 let r
= _mm512_mask_broadcastd_epi32(src
, 0, a
);
32397 assert_eq_m512i(r
, src
);
32398 let r
= _mm512_mask_broadcastd_epi32(src
, 0b11111111_11111111, a
);
32399 let e
= _mm512_set1_epi32(20);
32400 assert_eq_m512i(r
, e
);
32403 #[simd_test(enable = "avx512f")]
32404 unsafe fn test_mm512_maskz_broadcastd_epi32() {
32405 let a
= _mm_set_epi32(17, 18, 19, 20);
32406 let r
= _mm512_maskz_broadcastd_epi32(0, a
);
32407 assert_eq_m512i(r
, _mm512_setzero_si512());
32408 let r
= _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a
);
32409 let e
= _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
32410 assert_eq_m512i(r
, e
);
32413 #[simd_test(enable = "avx512f")]
32414 unsafe fn test_mm512_broadcastss_ps() {
32415 let a
= _mm_set_ps(17., 18., 19., 20.);
32416 let r
= _mm512_broadcastss_ps(a
);
32417 let e
= _mm512_set1_ps(20.);
32418 assert_eq_m512(r
, e
);
32421 #[simd_test(enable = "avx512f")]
32422 unsafe fn test_mm512_mask_broadcastss_ps() {
32423 let src
= _mm512_set1_ps(20.);
32424 let a
= _mm_set_ps(17., 18., 19., 20.);
32425 let r
= _mm512_mask_broadcastss_ps(src
, 0, a
);
32426 assert_eq_m512(r
, src
);
32427 let r
= _mm512_mask_broadcastss_ps(src
, 0b11111111_11111111, a
);
32428 let e
= _mm512_set1_ps(20.);
32429 assert_eq_m512(r
, e
);
32432 #[simd_test(enable = "avx512f")]
32433 unsafe fn test_mm512_maskz_broadcastss_ps() {
32434 let a
= _mm_set_ps(17., 18., 19., 20.);
32435 let r
= _mm512_maskz_broadcastss_ps(0, a
);
32436 assert_eq_m512(r
, _mm512_setzero_ps());
32437 let r
= _mm512_maskz_broadcastss_ps(0b00000000_11111111, a
);
32438 let e
= _mm512_setr_ps(
32439 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
32441 assert_eq_m512(r
, e
);
32444 #[simd_test(enable = "avx512f")]
32445 unsafe fn test_mm512_broadcast_i32x4() {
32446 let a
= _mm_set_epi32(17, 18, 19, 20);
32447 let r
= _mm512_broadcast_i32x4(a
);
32448 let e
= _mm512_set_epi32(
32449 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
32451 assert_eq_m512i(r
, e
);
32454 #[simd_test(enable = "avx512f")]
32455 unsafe fn test_mm512_mask_broadcast_i32x4() {
32456 let src
= _mm512_set1_epi32(20);
32457 let a
= _mm_set_epi32(17, 18, 19, 20);
32458 let r
= _mm512_mask_broadcast_i32x4(src
, 0, a
);
32459 assert_eq_m512i(r
, src
);
32460 let r
= _mm512_mask_broadcast_i32x4(src
, 0b11111111_11111111, a
);
32461 let e
= _mm512_set_epi32(
32462 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
32464 assert_eq_m512i(r
, e
);
32467 #[simd_test(enable = "avx512f")]
32468 unsafe fn test_mm512_maskz_broadcast_i32x4() {
32469 let a
= _mm_set_epi32(17, 18, 19, 20);
32470 let r
= _mm512_maskz_broadcast_i32x4(0, a
);
32471 assert_eq_m512i(r
, _mm512_setzero_si512());
32472 let r
= _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a
);
32473 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
32474 assert_eq_m512i(r
, e
);
32477 #[simd_test(enable = "avx512f")]
32478 unsafe fn test_mm512_broadcast_f32x4() {
32479 let a
= _mm_set_ps(17., 18., 19., 20.);
32480 let r
= _mm512_broadcast_f32x4(a
);
32481 let e
= _mm512_set_ps(
32482 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
32484 assert_eq_m512(r
, e
);
32487 #[simd_test(enable = "avx512f")]
32488 unsafe fn test_mm512_mask_broadcast_f32x4() {
32489 let src
= _mm512_set1_ps(20.);
32490 let a
= _mm_set_ps(17., 18., 19., 20.);
32491 let r
= _mm512_mask_broadcast_f32x4(src
, 0, a
);
32492 assert_eq_m512(r
, src
);
32493 let r
= _mm512_mask_broadcast_f32x4(src
, 0b11111111_11111111, a
);
32494 let e
= _mm512_set_ps(
32495 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
32497 assert_eq_m512(r
, e
);
32500 #[simd_test(enable = "avx512f")]
32501 unsafe fn test_mm512_maskz_broadcast_f32x4() {
32502 let a
= _mm_set_ps(17., 18., 19., 20.);
32503 let r
= _mm512_maskz_broadcast_f32x4(0, a
);
32504 assert_eq_m512(r
, _mm512_setzero_ps());
32505 let r
= _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a
);
32506 let e
= _mm512_set_ps(
32507 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
32509 assert_eq_m512(r
, e
);
32512 #[simd_test(enable = "avx512f")]
32513 unsafe fn test_mm512_mask_blend_epi32() {
32514 let a
= _mm512_set1_epi32(1);
32515 let b
= _mm512_set1_epi32(2);
32516 let r
= _mm512_mask_blend_epi32(0b11111111_00000000, a
, b
);
32517 let e
= _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
32518 assert_eq_m512i(r
, e
);
32521 #[simd_test(enable = "avx512f")]
32522 unsafe fn test_mm512_mask_blend_ps() {
32523 let a
= _mm512_set1_ps(1.);
32524 let b
= _mm512_set1_ps(2.);
32525 let r
= _mm512_mask_blend_ps(0b11111111_00000000, a
, b
);
32526 let e
= _mm512_set_ps(
32527 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
32529 assert_eq_m512(r
, e
);
32532 #[simd_test(enable = "avx512f")]
32533 unsafe fn test_mm512_unpackhi_epi32() {
32534 let a
= _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32535 let b
= _mm512_set_epi32(
32536 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32538 let r
= _mm512_unpackhi_epi32(a
, b
);
32539 let e
= _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
32540 assert_eq_m512i(r
, e
);
32543 #[simd_test(enable = "avx512f")]
32544 unsafe fn test_mm512_mask_unpackhi_epi32() {
32545 let a
= _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32546 let b
= _mm512_set_epi32(
32547 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32549 let r
= _mm512_mask_unpackhi_epi32(a
, 0, a
, b
);
32550 assert_eq_m512i(r
, a
);
32551 let r
= _mm512_mask_unpackhi_epi32(a
, 0b11111111_11111111, a
, b
);
32552 let e
= _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
32553 assert_eq_m512i(r
, e
);
32556 #[simd_test(enable = "avx512f")]
32557 unsafe fn test_mm512_maskz_unpackhi_epi32() {
32558 let a
= _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32559 let b
= _mm512_set_epi32(
32560 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32562 let r
= _mm512_maskz_unpackhi_epi32(0, a
, b
);
32563 assert_eq_m512i(r
, _mm512_setzero_si512());
32564 let r
= _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a
, b
);
32565 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
32566 assert_eq_m512i(r
, e
);
32569 #[simd_test(enable = "avx512f")]
32570 unsafe fn test_mm512_unpackhi_ps() {
32571 let a
= _mm512_set_ps(
32572 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32574 let b
= _mm512_set_ps(
32575 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32577 let r
= _mm512_unpackhi_ps(a
, b
);
32578 let e
= _mm512_set_ps(
32579 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
32581 assert_eq_m512(r
, e
);
32584 #[simd_test(enable = "avx512f")]
32585 unsafe fn test_mm512_mask_unpackhi_ps() {
32586 let a
= _mm512_set_ps(
32587 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32589 let b
= _mm512_set_ps(
32590 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32592 let r
= _mm512_mask_unpackhi_ps(a
, 0, a
, b
);
32593 assert_eq_m512(r
, a
);
32594 let r
= _mm512_mask_unpackhi_ps(a
, 0b11111111_11111111, a
, b
);
32595 let e
= _mm512_set_ps(
32596 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
32598 assert_eq_m512(r
, e
);
32601 #[simd_test(enable = "avx512f")]
32602 unsafe fn test_mm512_maskz_unpackhi_ps() {
32603 let a
= _mm512_set_ps(
32604 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32606 let b
= _mm512_set_ps(
32607 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32609 let r
= _mm512_maskz_unpackhi_ps(0, a
, b
);
32610 assert_eq_m512(r
, _mm512_setzero_ps());
32611 let r
= _mm512_maskz_unpackhi_ps(0b00000000_11111111, a
, b
);
32612 let e
= _mm512_set_ps(
32613 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
32615 assert_eq_m512(r
, e
);
32618 #[simd_test(enable = "avx512f")]
32619 unsafe fn test_mm512_unpacklo_epi32() {
32620 let a
= _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32621 let b
= _mm512_set_epi32(
32622 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32624 let r
= _mm512_unpacklo_epi32(a
, b
);
32625 let e
= _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
32626 assert_eq_m512i(r
, e
);
32629 #[simd_test(enable = "avx512f")]
32630 unsafe fn test_mm512_mask_unpacklo_epi32() {
32631 let a
= _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32632 let b
= _mm512_set_epi32(
32633 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32635 let r
= _mm512_mask_unpacklo_epi32(a
, 0, a
, b
);
32636 assert_eq_m512i(r
, a
);
32637 let r
= _mm512_mask_unpacklo_epi32(a
, 0b11111111_11111111, a
, b
);
32638 let e
= _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
32639 assert_eq_m512i(r
, e
);
32642 #[simd_test(enable = "avx512f")]
32643 unsafe fn test_mm512_maskz_unpacklo_epi32() {
32644 let a
= _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
32645 let b
= _mm512_set_epi32(
32646 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
32648 let r
= _mm512_maskz_unpacklo_epi32(0, a
, b
);
32649 assert_eq_m512i(r
, _mm512_setzero_si512());
32650 let r
= _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a
, b
);
32651 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
32652 assert_eq_m512i(r
, e
);
32655 #[simd_test(enable = "avx512f")]
32656 unsafe fn test_mm512_unpacklo_ps() {
32657 let a
= _mm512_set_ps(
32658 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32660 let b
= _mm512_set_ps(
32661 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32663 let r
= _mm512_unpacklo_ps(a
, b
);
32664 let e
= _mm512_set_ps(
32665 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
32667 assert_eq_m512(r
, e
);
32670 #[simd_test(enable = "avx512f")]
32671 unsafe fn test_mm512_mask_unpacklo_ps() {
32672 let a
= _mm512_set_ps(
32673 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32675 let b
= _mm512_set_ps(
32676 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32678 let r
= _mm512_mask_unpacklo_ps(a
, 0, a
, b
);
32679 assert_eq_m512(r
, a
);
32680 let r
= _mm512_mask_unpacklo_ps(a
, 0b11111111_11111111, a
, b
);
32681 let e
= _mm512_set_ps(
32682 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
32684 assert_eq_m512(r
, e
);
32687 #[simd_test(enable = "avx512f")]
32688 unsafe fn test_mm512_maskz_unpacklo_ps() {
32689 let a
= _mm512_set_ps(
32690 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
32692 let b
= _mm512_set_ps(
32693 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
32695 let r
= _mm512_maskz_unpacklo_ps(0, a
, b
);
32696 assert_eq_m512(r
, _mm512_setzero_ps());
32697 let r
= _mm512_maskz_unpacklo_ps(0b00000000_11111111, a
, b
);
32698 let e
= _mm512_set_ps(
32699 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
32701 assert_eq_m512(r
, e
);
32704 #[simd_test(enable = "avx512f")]
32705 unsafe fn test_mm512_alignr_epi32() {
32706 let a
= _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
32707 let b
= _mm512_set_epi32(
32708 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
32710 let r
= _mm512_alignr_epi32(a
, b
, 0);
32711 assert_eq_m512i(r
, b
);
32712 let r
= _mm512_alignr_epi32(a
, b
, 16);
32713 assert_eq_m512i(r
, b
);
32714 let r
= _mm512_alignr_epi32(a
, b
, 1);
32715 let e
= _mm512_set_epi32(
32716 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
32718 assert_eq_m512i(r
, e
);
32721 #[simd_test(enable = "avx512f")]
32722 unsafe fn test_mm512_mask_alignr_epi32() {
32723 let a
= _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
32724 let b
= _mm512_set_epi32(
32725 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
32727 let r
= _mm512_mask_alignr_epi32(a
, 0, a
, b
, 1);
32728 assert_eq_m512i(r
, a
);
32729 let r
= _mm512_mask_alignr_epi32(a
, 0b11111111_11111111, a
, b
, 1);
32730 let e
= _mm512_set_epi32(
32731 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
32733 assert_eq_m512i(r
, e
);
32736 #[simd_test(enable = "avx512f")]
32737 unsafe fn test_mm512_maskz_alignr_epi32() {
32738 let a
= _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
32739 let b
= _mm512_set_epi32(
32740 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
32742 let r
= _mm512_maskz_alignr_epi32(0, a
, b
, 1);
32743 assert_eq_m512i(r
, _mm512_setzero_si512());
32744 let r
= _mm512_maskz_alignr_epi32(0b00000000_11111111, a
, b
, 1);
32745 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
32746 assert_eq_m512i(r
, e
);
32749 #[simd_test(enable = "avx512f")]
32750 unsafe fn test_mm512_and_epi32() {
32751 let a
= _mm512_set_epi32(
32769 let b
= _mm512_set_epi32(
32787 let r
= _mm512_and_epi32(a
, b
);
32788 let e
= _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
32789 assert_eq_m512i(r
, e
);
32792 #[simd_test(enable = "avx512f")]
32793 unsafe fn test_mm512_mask_and_epi32() {
32794 let a
= _mm512_set_epi32(
32812 let b
= _mm512_set_epi32(
32830 let r
= _mm512_mask_and_epi32(a
, 0, a
, b
);
32831 assert_eq_m512i(r
, a
);
32833 let r
= _mm512_mask_and_epi32(a
, 0b01111111_11111111, a
, b
);
32834 let e
= _mm512_set_epi32(
32852 assert_eq_m512i(r
, e
);
32855 #[simd_test(enable = "avx512f")]
32856 unsafe fn test_mm512_maskz_and_epi32() {
32857 let a
= _mm512_set_epi32(
32875 let b
= _mm512_set_epi32(
32893 let r
= _mm512_maskz_and_epi32(0, a
, b
);
32894 assert_eq_m512i(r
, _mm512_setzero_si512());
32896 let r
= _mm512_maskz_and_epi32(0b00000000_11111111, a
, b
);
32897 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
32898 assert_eq_m512i(r
, e
);
32901 #[simd_test(enable = "avx512f")]
32902 unsafe fn test_mm512_and_si512() {
32903 let a
= _mm512_set_epi32(
32921 let b
= _mm512_set_epi32(
32939 let r
= _mm512_and_epi32(a
, b
);
32940 let e
= _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
32941 assert_eq_m512i(r
, e
);
32944 #[simd_test(enable = "avx512f")]
32945 unsafe fn test_mm512_or_epi32() {
32946 let a
= _mm512_set_epi32(
32964 let b
= _mm512_set_epi32(
32982 let r
= _mm512_or_epi32(a
, b
);
32983 let e
= _mm512_set_epi32(
32999 1 << 1 | 1 << 3 | 1 << 4,
33001 assert_eq_m512i(r
, e
);
33004 #[simd_test(enable = "avx512f")]
33005 unsafe fn test_mm512_mask_or_epi32() {
33006 let a
= _mm512_set_epi32(
33024 let b
= _mm512_set_epi32(
33042 let r
= _mm512_mask_or_epi32(a
, 0, a
, b
);
33043 assert_eq_m512i(r
, a
);
33045 let r
= _mm512_mask_or_epi32(a
, 0b11111111_11111111, a
, b
);
33046 let e
= _mm512_set_epi32(
33062 1 << 1 | 1 << 3 | 1 << 4,
33064 assert_eq_m512i(r
, e
);
33067 #[simd_test(enable = "avx512f")]
33068 unsafe fn test_mm512_maskz_or_epi32() {
33069 let a
= _mm512_set_epi32(
33087 let b
= _mm512_set_epi32(
33105 let r
= _mm512_maskz_or_epi32(0, a
, b
);
33106 assert_eq_m512i(r
, _mm512_setzero_si512());
33108 let r
= _mm512_maskz_or_epi32(0b00000000_11111111, a
, b
);
33109 let e
= _mm512_set_epi32(
33125 1 << 1 | 1 << 3 | 1 << 4,
33127 assert_eq_m512i(r
, e
);
33130 #[simd_test(enable = "avx512f")]
33131 unsafe fn test_mm512_or_si512() {
33132 let a
= _mm512_set_epi32(
33150 let b
= _mm512_set_epi32(
33168 let r
= _mm512_or_epi32(a
, b
);
33169 let e
= _mm512_set_epi32(
33185 1 << 1 | 1 << 3 | 1 << 4,
33187 assert_eq_m512i(r
, e
);
33190 #[simd_test(enable = "avx512f")]
33191 unsafe fn test_mm512_xor_epi32() {
33192 let a
= _mm512_set_epi32(
33210 let b
= _mm512_set_epi32(
33228 let r
= _mm512_xor_epi32(a
, b
);
33229 let e
= _mm512_set_epi32(
33247 assert_eq_m512i(r
, e
);
33250 #[simd_test(enable = "avx512f")]
33251 unsafe fn test_mm512_mask_xor_epi32() {
33252 let a
= _mm512_set_epi32(
33270 let b
= _mm512_set_epi32(
33288 let r
= _mm512_mask_xor_epi32(a
, 0, a
, b
);
33289 assert_eq_m512i(r
, a
);
33291 let r
= _mm512_mask_xor_epi32(a
, 0b01111111_11111111, a
, b
);
33292 let e
= _mm512_set_epi32(
33310 assert_eq_m512i(r
, e
);
33313 #[simd_test(enable = "avx512f")]
33314 unsafe fn test_mm512_maskz_xor_epi32() {
33315 let a
= _mm512_set_epi32(
33333 let b
= _mm512_set_epi32(
33351 let r
= _mm512_maskz_xor_epi32(0, a
, b
);
33352 assert_eq_m512i(r
, _mm512_setzero_si512());
33354 let r
= _mm512_maskz_xor_epi32(0b00000000_11111111, a
, b
);
33355 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
33356 assert_eq_m512i(r
, e
);
33359 #[simd_test(enable = "avx512f")]
33360 unsafe fn test_mm512_xor_si512() {
33361 let a
= _mm512_set_epi32(
33379 let b
= _mm512_set_epi32(
33397 let r
= _mm512_xor_epi32(a
, b
);
33398 let e
= _mm512_set_epi32(
33416 assert_eq_m512i(r
, e
);
33419 #[simd_test(enable = "avx512f")]
33420 unsafe fn test_mm512_andnot_epi32() {
33421 let a
= _mm512_set1_epi32(0);
33422 let b
= _mm512_set1_epi32(1 << 3 | 1 << 4);
33423 let r
= _mm512_andnot_epi32(a
, b
);
33424 let e
= _mm512_set1_epi32(1 << 3 | 1 << 4);
33425 assert_eq_m512i(r
, e
);
33428 #[simd_test(enable = "avx512f")]
33429 unsafe fn test_mm512_mask_andnot_epi32() {
33430 let a
= _mm512_set1_epi32(1 << 1 | 1 << 2);
33431 let b
= _mm512_set1_epi32(1 << 3 | 1 << 4);
33432 let r
= _mm512_mask_andnot_epi32(a
, 0, a
, b
);
33433 assert_eq_m512i(r
, a
);
33435 let r
= _mm512_mask_andnot_epi32(a
, 0b11111111_11111111, a
, b
);
33436 let e
= _mm512_set1_epi32(1 << 3 | 1 << 4);
33437 assert_eq_m512i(r
, e
);
33440 #[simd_test(enable = "avx512f")]
33441 unsafe fn test_mm512_maskz_andnot_epi32() {
33442 let a
= _mm512_set1_epi32(1 << 1 | 1 << 2);
33443 let b
= _mm512_set1_epi32(1 << 3 | 1 << 4);
33444 let r
= _mm512_maskz_andnot_epi32(0, a
, b
);
33445 assert_eq_m512i(r
, _mm512_setzero_si512());
33447 let r
= _mm512_maskz_andnot_epi32(0b00000000_11111111, a
, b
);
33448 let e
= _mm512_set_epi32(
33466 assert_eq_m512i(r
, e
);
33469 #[simd_test(enable = "avx512f")]
33470 unsafe fn test_mm512_kand() {
33471 let a
: u16 = 0b11001100_00110011;
33472 let b
: u16 = 0b11001100_00110011;
33473 let r
= _mm512_kand(a
, b
);
33474 let e
: u16 = 0b11001100_00110011;
33478 #[simd_test(enable = "avx512f")]
33479 unsafe fn test_kand_mask16() {
33480 let a
: u16 = 0b11001100_00110011;
33481 let b
: u16 = 0b11001100_00110011;
33482 let r
= _kand_mask16(a
, b
);
33483 let e
: u16 = 0b11001100_00110011;
33487 #[simd_test(enable = "avx512f")]
33488 unsafe fn test_mm512_kor() {
33489 let a
: u16 = 0b11001100_00110011;
33490 let b
: u16 = 0b00101110_00001011;
33491 let r
= _mm512_kor(a
, b
);
33492 let e
: u16 = 0b11101110_00111011;
33496 #[simd_test(enable = "avx512f")]
33497 unsafe fn test_kor_mask16() {
33498 let a
: u16 = 0b11001100_00110011;
33499 let b
: u16 = 0b00101110_00001011;
33500 let r
= _kor_mask16(a
, b
);
33501 let e
: u16 = 0b11101110_00111011;
33505 #[simd_test(enable = "avx512f")]
33506 unsafe fn test_mm512_kxor() {
33507 let a
: u16 = 0b11001100_00110011;
33508 let b
: u16 = 0b00101110_00001011;
33509 let r
= _mm512_kxor(a
, b
);
33510 let e
: u16 = 0b11100010_00111000;
33514 #[simd_test(enable = "avx512f")]
33515 unsafe fn test_kxor_mask16() {
33516 let a
: u16 = 0b11001100_00110011;
33517 let b
: u16 = 0b00101110_00001011;
33518 let r
= _kxor_mask16(a
, b
);
33519 let e
: u16 = 0b11100010_00111000;
33523 #[simd_test(enable = "avx512f")]
33524 unsafe fn test_mm512_knot() {
33525 let a
: u16 = 0b11001100_00110011;
33526 let r
= _mm512_knot(a
);
33527 let e
: u16 = 0b00110011_11001100;
33531 #[simd_test(enable = "avx512f")]
33532 unsafe fn test_knot_mask16() {
33533 let a
: u16 = 0b11001100_00110011;
33534 let r
= _knot_mask16(a
);
33535 let e
: u16 = 0b00110011_11001100;
33539 #[simd_test(enable = "avx512f")]
33540 unsafe fn test_mm512_kandn() {
33541 let a
: u16 = 0b11001100_00110011;
33542 let b
: u16 = 0b00101110_00001011;
33543 let r
= _mm512_kandn(a
, b
);
33544 let e
: u16 = 0b00100010_00001000;
33548 #[simd_test(enable = "avx512f")]
33549 unsafe fn test_kandn_mask16() {
33550 let a
: u16 = 0b11001100_00110011;
33551 let b
: u16 = 0b00101110_00001011;
33552 let r
= _kandn_mask16(a
, b
);
33553 let e
: u16 = 0b00100010_00001000;
33557 #[simd_test(enable = "avx512f")]
33558 unsafe fn test_mm512_kxnor() {
33559 let a
: u16 = 0b11001100_00110011;
33560 let b
: u16 = 0b00101110_00001011;
33561 let r
= _mm512_kxnor(a
, b
);
33562 let e
: u16 = 0b00011101_11000111;
33566 #[simd_test(enable = "avx512f")]
33567 unsafe fn test_kxnor_mask16() {
33568 let a
: u16 = 0b11001100_00110011;
33569 let b
: u16 = 0b00101110_00001011;
33570 let r
= _kxnor_mask16(a
, b
);
33571 let e
: u16 = 0b00011101_11000111;
33575 #[simd_test(enable = "avx512f")]
33576 unsafe fn test_mm512_kmov() {
33577 let a
: u16 = 0b11001100_00110011;
33578 let r
= _mm512_kmov(a
);
33579 let e
: u16 = 0b11001100_00110011;
33583 #[simd_test(enable = "avx512f")]
33584 unsafe fn test_mm512_int2mask() {
33585 let a
: i32 = 0b11001100_00110011;
33586 let r
= _mm512_int2mask(a
);
33587 let e
: u16 = 0b11001100_00110011;
33591 #[simd_test(enable = "avx512f")]
33592 unsafe fn test_mm512_mask2int() {
33593 let k1
: __mmask16
= 0b11001100_00110011;
33594 let r
= _mm512_mask2int(k1
);
33595 let e
: i32 = 0b11001100_00110011;
33599 #[simd_test(enable = "avx512f")]
33600 unsafe fn test_mm512_kunpackb() {
33601 let a
: u16 = 0b11001100_00110011;
33602 let b
: u16 = 0b00101110_00001011;
33603 let r
= _mm512_kunpackb(a
, b
);
33604 let e
: u16 = 0b00101110_00110011;
33608 #[simd_test(enable = "avx512f")]
33609 unsafe fn test_mm512_kortestc() {
33610 let a
: u16 = 0b11001100_00110011;
33611 let b
: u16 = 0b00101110_00001011;
33612 let r
= _mm512_kortestc(a
, b
);
33614 let b
: u16 = 0b11111111_11111111;
33615 let r
= _mm512_kortestc(a
, b
);
33619 #[simd_test(enable = "avx512f")]
33620 unsafe fn test_mm512_test_epi32_mask() {
33621 let a
= _mm512_set1_epi32(1 << 0);
33622 let b
= _mm512_set1_epi32(1 << 0 | 1 << 1);
33623 let r
= _mm512_test_epi32_mask(a
, b
);
33624 let e
: __mmask16
= 0b11111111_11111111;
33628 #[simd_test(enable = "avx512f")]
33629 unsafe fn test_mm512_mask_test_epi32_mask() {
33630 let a
= _mm512_set1_epi32(1 << 0);
33631 let b
= _mm512_set1_epi32(1 << 0 | 1 << 1);
33632 let r
= _mm512_mask_test_epi32_mask(0, a
, b
);
33634 let r
= _mm512_mask_test_epi32_mask(0b11111111_11111111, a
, b
);
33635 let e
: __mmask16
= 0b11111111_11111111;
33639 #[simd_test(enable = "avx512f")]
33640 unsafe fn test_mm512_testn_epi32_mask() {
33641 let a
= _mm512_set1_epi32(1 << 0);
33642 let b
= _mm512_set1_epi32(1 << 0 | 1 << 1);
33643 let r
= _mm512_testn_epi32_mask(a
, b
);
33644 let e
: __mmask16
= 0b00000000_00000000;
33648 #[simd_test(enable = "avx512f")]
33649 unsafe fn test_mm512_mask_testn_epi32_mask() {
33650 let a
= _mm512_set1_epi32(1 << 0);
33651 let b
= _mm512_set1_epi32(1 << 1);
33652 let r
= _mm512_mask_test_epi32_mask(0, a
, b
);
33654 let r
= _mm512_mask_testn_epi32_mask(0b11111111_11111111, a
, b
);
33655 let e
: __mmask16
= 0b11111111_11111111;
33659 #[simd_test(enable = "avx512f")]
33660 unsafe fn test_mm512_stream_ps() {
33663 pub data
: [f32; 16],
33665 let a
= _mm512_set1_ps(7.0);
33666 let mut mem
= Memory { data: [-1.0; 16] }
;
33668 _mm512_stream_ps(&mut mem
.data
[0] as *mut f32, a
);
33670 assert_eq
!(mem
.data
[i
], get_m512(a
, i
));
33674 #[simd_test(enable = "avx512f")]
33675 unsafe fn test_mm512_reduce_add_epi32() {
33676 let a
= _mm512_set1_epi32(1);
33677 let e
: i32 = _mm512_reduce_add_epi32(a
);
33681 #[simd_test(enable = "avx512f")]
33682 unsafe fn test_mm512_mask_reduce_add_epi32() {
33683 let a
= _mm512_set1_epi32(1);
33684 let e
: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a
);
33688 #[simd_test(enable = "avx512f")]
33689 unsafe fn test_mm512_reduce_add_ps() {
33690 let a
= _mm512_set1_ps(1.);
33691 let e
: f32 = _mm512_reduce_add_ps(a
);
33692 assert_eq
!(16., e
);
33695 #[simd_test(enable = "avx512f")]
33696 unsafe fn test_mm512_mask_reduce_add_ps() {
33697 let a
= _mm512_set1_ps(1.);
33698 let e
: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a
);
33702 #[simd_test(enable = "avx512f")]
33703 unsafe fn test_mm512_reduce_mul_epi32() {
33704 let a
= _mm512_set1_epi32(2);
33705 let e
: i32 = _mm512_reduce_mul_epi32(a
);
33706 assert_eq
!(65536, e
);
33709 #[simd_test(enable = "avx512f")]
33710 unsafe fn test_mm512_mask_reduce_mul_epi32() {
33711 let a
= _mm512_set1_epi32(2);
33712 let e
: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a
);
33713 assert_eq
!(256, e
);
33716 #[simd_test(enable = "avx512f")]
33717 unsafe fn test_mm512_reduce_mul_ps() {
33718 let a
= _mm512_set1_ps(2.);
33719 let e
: f32 = _mm512_reduce_mul_ps(a
);
33720 assert_eq
!(65536., e
);
33723 #[simd_test(enable = "avx512f")]
33724 unsafe fn test_mm512_mask_reduce_mul_ps() {
33725 let a
= _mm512_set1_ps(2.);
33726 let e
: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a
);
33727 assert_eq
!(256., e
);
33730 #[simd_test(enable = "avx512f")]
33731 unsafe fn test_mm512_reduce_max_epi32() {
33732 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33733 let e
: i32 = _mm512_reduce_max_epi32(a
);
33737 #[simd_test(enable = "avx512f")]
33738 unsafe fn test_mm512_mask_reduce_max_epi32() {
33739 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33740 let e
: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a
);
33744 #[simd_test(enable = "avx512f")]
33745 unsafe fn test_mm512_reduce_max_epu32() {
33746 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33747 let e
: u32 = _mm512_reduce_max_epu32(a
);
33751 #[simd_test(enable = "avx512f")]
33752 unsafe fn test_mm512_mask_reduce_max_epu32() {
33753 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33754 let e
: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a
);
33758 #[simd_test(enable = "avx512f")]
33759 unsafe fn test_mm512_reduce_max_ps() {
33760 let a
= _mm512_set_ps(
33761 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33763 let e
: f32 = _mm512_reduce_max_ps(a
);
33764 assert_eq
!(15., e
);
33767 #[simd_test(enable = "avx512f")]
33768 unsafe fn test_mm512_mask_reduce_max_ps() {
33769 let a
= _mm512_set_ps(
33770 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33772 let e
: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a
);
33776 #[simd_test(enable = "avx512f")]
33777 unsafe fn test_mm512_reduce_min_epi32() {
33778 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33779 let e
: i32 = _mm512_reduce_min_epi32(a
);
33783 #[simd_test(enable = "avx512f")]
33784 unsafe fn test_mm512_mask_reduce_min_epi32() {
33785 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33786 let e
: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a
);
33790 #[simd_test(enable = "avx512f")]
33791 unsafe fn test_mm512_reduce_min_epu32() {
33792 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33793 let e
: u32 = _mm512_reduce_min_epu32(a
);
33797 #[simd_test(enable = "avx512f")]
33798 unsafe fn test_mm512_mask_reduce_min_epu32() {
33799 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33800 let e
: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a
);
33804 #[simd_test(enable = "avx512f")]
33805 unsafe fn test_mm512_reduce_min_ps() {
33806 let a
= _mm512_set_ps(
33807 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33809 let e
: f32 = _mm512_reduce_min_ps(a
);
33813 #[simd_test(enable = "avx512f")]
33814 unsafe fn test_mm512_mask_reduce_min_ps() {
33815 let a
= _mm512_set_ps(
33816 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33818 let e
: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a
);
33822 #[simd_test(enable = "avx512f")]
33823 unsafe fn test_mm512_reduce_and_epi32() {
33824 let a
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33825 let e
: i32 = _mm512_reduce_and_epi32(a
);
33829 #[simd_test(enable = "avx512f")]
33830 unsafe fn test_mm512_mask_reduce_and_epi32() {
33831 let a
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33832 let e
: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a
);
33836 #[simd_test(enable = "avx512f")]
33837 unsafe fn test_mm512_reduce_or_epi32() {
33838 let a
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33839 let e
: i32 = _mm512_reduce_or_epi32(a
);
33843 #[simd_test(enable = "avx512f")]
33844 unsafe fn test_mm512_mask_reduce_or_epi32() {
33845 let a
= _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
33846 let e
: i32 = _mm512_mask_reduce_or_epi32(0b11111111_00000000, a
);
33850 #[simd_test(enable = "avx512f")]
33851 unsafe fn test_mm512_mask_compress_epi32() {
33852 let src
= _mm512_set1_epi32(200);
33853 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33854 let r
= _mm512_mask_compress_epi32(src
, 0b01010101_01010101, a
);
33855 let e
= _mm512_set_epi32(
33856 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
33858 assert_eq_m512i(r
, e
);
33861 #[simd_test(enable = "avx512f")]
33862 unsafe fn test_mm512_maskz_compress_epi32() {
33863 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33864 let r
= _mm512_maskz_compress_epi32(0b01010101_01010101, a
);
33865 let e
= _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
33866 assert_eq_m512i(r
, e
);
33869 #[simd_test(enable = "avx512f")]
33870 unsafe fn test_mm512_mask_compress_ps() {
33871 let src
= _mm512_set1_ps(200.);
33872 let a
= _mm512_set_ps(
33873 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33875 let r
= _mm512_mask_compress_ps(src
, 0b01010101_01010101, a
);
33876 let e
= _mm512_set_ps(
33877 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
33879 assert_eq_m512(r
, e
);
33882 #[simd_test(enable = "avx512f")]
33883 unsafe fn test_mm512_maskz_compress_ps() {
33884 let a
= _mm512_set_ps(
33885 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33887 let r
= _mm512_maskz_compress_ps(0b01010101_01010101, a
);
33888 let e
= _mm512_set_ps(
33889 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
33891 assert_eq_m512(r
, e
);
33894 #[simd_test(enable = "avx512f")]
33895 unsafe fn test_mm512_mask_expand_epi32() {
33896 let src
= _mm512_set1_epi32(200);
33897 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33898 let r
= _mm512_mask_expand_epi32(src
, 0b01010101_01010101, a
);
33899 let e
= _mm512_set_epi32(
33900 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
33902 assert_eq_m512i(r
, e
);
33905 #[simd_test(enable = "avx512f")]
33906 unsafe fn test_mm512_maskz_expand_epi32() {
33907 let a
= _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
33908 let r
= _mm512_maskz_expand_epi32(0b01010101_01010101, a
);
33909 let e
= _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
33910 assert_eq_m512i(r
, e
);
33913 #[simd_test(enable = "avx512f")]
33914 unsafe fn test_mm512_mask_expand_ps() {
33915 let src
= _mm512_set1_ps(200.);
33916 let a
= _mm512_set_ps(
33917 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33919 let r
= _mm512_mask_expand_ps(src
, 0b01010101_01010101, a
);
33920 let e
= _mm512_set_ps(
33921 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
33923 assert_eq_m512(r
, e
);
33926 #[simd_test(enable = "avx512f")]
33927 unsafe fn test_mm512_maskz_expand_ps() {
33928 let a
= _mm512_set_ps(
33929 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
33931 let r
= _mm512_maskz_expand_ps(0b01010101_01010101, a
);
33932 let e
= _mm512_set_ps(
33933 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
33935 assert_eq_m512(r
, e
);
33938 #[simd_test(enable = "avx512f")]
33939 unsafe fn test_mm512_loadu_epi32() {
33940 let a
= &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
33941 let p
= a
.as_ptr();
33942 let r
= _mm512_loadu_epi32(black_box(p
));
33943 let e
= _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
33944 assert_eq_m512i(r
, e
);
33947 #[simd_test(enable = "avx512f")]
33948 unsafe fn test_mm512_storeu_epi32() {
33949 let a
= _mm512_set1_epi32(9);
33950 let mut r
= _mm512_undefined_epi32();
33951 _mm512_storeu_epi32(&mut r
as *mut _
as *mut i32, a
);
33952 assert_eq_m512i(r
, a
);
33955 #[simd_test(enable = "avx512f")]
33956 unsafe fn test_mm512_loadu_si512() {
33957 let a
= &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
33958 let p
= a
.as_ptr();
33959 let r
= _mm512_loadu_si512(black_box(p
));
33960 let e
= _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
33961 assert_eq_m512i(r
, e
);
33964 #[simd_test(enable = "avx512f")]
33965 unsafe fn test_mm512_storeu_si512() {
33966 let a
= _mm512_set1_epi32(9);
33967 let mut r
= _mm512_undefined_epi32();
33968 _mm512_storeu_si512(&mut r
as *mut _
as *mut i32, a
);
33969 assert_eq_m512i(r
, a
);
33972 #[simd_test(enable = "avx512f")]
33973 unsafe fn test_mm512_load_si512() {
33976 data
: [i32; 16], // 64 bytes
33979 data
: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
33981 let p
= (a
.data
).as_ptr();
33982 let r
= _mm512_load_si512(black_box(p
));
33983 let e
= _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
33984 assert_eq_m512i(r
, e
);
33987 #[simd_test(enable = "avx512f")]
33988 unsafe fn test_mm512_store_si512() {
33989 let a
= _mm512_set1_epi32(9);
33990 let mut r
= _mm512_undefined_epi32();
33991 _mm512_store_si512(&mut r
as *mut _
as *mut i32, a
);
33992 assert_eq_m512i(r
, a
);
33995 #[simd_test(enable = "avx512f")]
33996 unsafe fn test_mm512_load_epi32() {
33999 data
: [i32; 16], // 64 bytes
34002 data
: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
34004 let p
= (a
.data
).as_ptr();
34005 let r
= _mm512_load_epi32(black_box(p
));
34006 let e
= _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
34007 assert_eq_m512i(r
, e
);
34010 #[simd_test(enable = "avx512f")]
34011 unsafe fn test_mm512_store_epi32() {
34012 let a
= _mm512_set1_epi32(9);
34013 let mut r
= _mm512_undefined_epi32();
34014 _mm512_store_epi32(&mut r
as *mut _
as *mut i32, a
);
34015 assert_eq_m512i(r
, a
);
34018 #[simd_test(enable = "avx512f")]
34019 unsafe fn test_mm512_load_ps() {
34022 data
: [f32; 16], // 64 bytes
34026 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
34029 let p
= (a
.data
).as_ptr();
34030 let r
= _mm512_load_ps(black_box(p
));
34031 let e
= _mm512_setr_ps(
34032 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
34034 assert_eq_m512(r
, e
);
34037 #[simd_test(enable = "avx512f")]
34038 unsafe fn test_mm512_store_ps() {
34039 let a
= _mm512_set1_ps(9.);
34040 let mut r
= _mm512_undefined_ps();
34041 _mm512_store_ps(&mut r
as *mut _
as *mut f32, a
);
34042 assert_eq_m512(r
, a
);
34045 #[simd_test(enable = "avx512f")]
34046 unsafe fn test_mm512_mask_set1_epi32() {
34047 let src
= _mm512_set1_epi32(2);
34049 let r
= _mm512_mask_set1_epi32(src
, 0, a
);
34050 assert_eq_m512i(r
, src
);
34051 let r
= _mm512_mask_set1_epi32(src
, 0b11111111_11111111, a
);
34052 let e
= _mm512_set1_epi32(11);
34053 assert_eq_m512i(r
, e
);
34056 #[simd_test(enable = "avx512f")]
34057 unsafe fn test_mm512_maskz_set1_epi32() {
34059 let r
= _mm512_maskz_set1_epi32(0, a
);
34060 assert_eq_m512i(r
, _mm512_setzero_si512());
34061 let r
= _mm512_maskz_set1_epi32(0b11111111_11111111, a
);
34062 let e
= _mm512_set1_epi32(11);
34063 assert_eq_m512i(r
, e
);
34066 #[simd_test(enable = "avx512f")]
34067 unsafe fn test_mm_mask_move_ss() {
34068 let src
= _mm_set_ps(10., 11., 100., 110.);
34069 let a
= _mm_set_ps(1., 2., 10., 20.);
34070 let b
= _mm_set_ps(3., 4., 30., 40.);
34071 let r
= _mm_mask_move_ss(src
, 0, a
, b
);
34072 let e
= _mm_set_ps(1., 2., 10., 110.);
34073 assert_eq_m128(r
, e
);
34074 let r
= _mm_mask_move_ss(src
, 0b11111111, a
, b
);
34075 let e
= _mm_set_ps(1., 2., 10., 40.);
34076 assert_eq_m128(r
, e
);
34079 #[simd_test(enable = "avx512f")]
34080 unsafe fn test_mm_maskz_move_ss() {
34081 let a
= _mm_set_ps(1., 2., 10., 20.);
34082 let b
= _mm_set_ps(3., 4., 30., 40.);
34083 let r
= _mm_maskz_move_ss(0, a
, b
);
34084 let e
= _mm_set_ps(1., 2., 10., 0.);
34085 assert_eq_m128(r
, e
);
34086 let r
= _mm_maskz_move_ss(0b11111111, a
, b
);
34087 let e
= _mm_set_ps(1., 2., 10., 40.);
34088 assert_eq_m128(r
, e
);
34091 #[simd_test(enable = "avx512f")]
34092 unsafe fn test_mm_mask_move_sd() {
34093 let src
= _mm_set_pd(10., 11.);
34094 let a
= _mm_set_pd(1., 2.);
34095 let b
= _mm_set_pd(3., 4.);
34096 let r
= _mm_mask_move_sd(src
, 0, a
, b
);
34097 let e
= _mm_set_pd(1., 11.);
34098 assert_eq_m128d(r
, e
);
34099 let r
= _mm_mask_move_sd(src
, 0b11111111, a
, b
);
34100 let e
= _mm_set_pd(1., 4.);
34101 assert_eq_m128d(r
, e
);
34104 #[simd_test(enable = "avx512f")]
34105 unsafe fn test_mm_maskz_move_sd() {
34106 let a
= _mm_set_pd(1., 2.);
34107 let b
= _mm_set_pd(3., 4.);
34108 let r
= _mm_maskz_move_sd(0, a
, b
);
34109 let e
= _mm_set_pd(1., 0.);
34110 assert_eq_m128d(r
, e
);
34111 let r
= _mm_maskz_move_sd(0b11111111, a
, b
);
34112 let e
= _mm_set_pd(1., 4.);
34113 assert_eq_m128d(r
, e
);
34116 #[simd_test(enable = "avx512f")]
34117 unsafe fn test_mm_mask_add_ss() {
34118 let src
= _mm_set_ps(10., 11., 100., 110.);
34119 let a
= _mm_set_ps(1., 2., 10., 20.);
34120 let b
= _mm_set_ps(3., 4., 30., 40.);
34121 let r
= _mm_mask_add_ss(src
, 0, a
, b
);
34122 let e
= _mm_set_ps(1., 2., 10., 110.);
34123 assert_eq_m128(r
, e
);
34124 let r
= _mm_mask_add_ss(src
, 0b11111111, a
, b
);
34125 let e
= _mm_set_ps(1., 2., 10., 60.);
34126 assert_eq_m128(r
, e
);
34129 #[simd_test(enable = "avx512f")]
34130 unsafe fn test_mm_maskz_add_ss() {
34131 let a
= _mm_set_ps(1., 2., 10., 20.);
34132 let b
= _mm_set_ps(3., 4., 30., 40.);
34133 let r
= _mm_maskz_add_ss(0, a
, b
);
34134 let e
= _mm_set_ps(1., 2., 10., 0.);
34135 assert_eq_m128(r
, e
);
34136 let r
= _mm_maskz_add_ss(0b11111111, a
, b
);
34137 let e
= _mm_set_ps(1., 2., 10., 60.);
34138 assert_eq_m128(r
, e
);
34141 #[simd_test(enable = "avx512f")]
34142 unsafe fn test_mm_mask_add_sd() {
34143 let src
= _mm_set_pd(10., 11.);
34144 let a
= _mm_set_pd(1., 2.);
34145 let b
= _mm_set_pd(3., 4.);
34146 let r
= _mm_mask_add_sd(src
, 0, a
, b
);
34147 let e
= _mm_set_pd(1., 11.);
34148 assert_eq_m128d(r
, e
);
34149 let r
= _mm_mask_add_sd(src
, 0b11111111, a
, b
);
34150 let e
= _mm_set_pd(1., 6.);
34151 assert_eq_m128d(r
, e
);
34154 #[simd_test(enable = "avx512f")]
34155 unsafe fn test_mm_maskz_add_sd() {
34156 let a
= _mm_set_pd(1., 2.);
34157 let b
= _mm_set_pd(3., 4.);
34158 let r
= _mm_maskz_add_sd(0, a
, b
);
34159 let e
= _mm_set_pd(1., 0.);
34160 assert_eq_m128d(r
, e
);
34161 let r
= _mm_maskz_add_sd(0b11111111, a
, b
);
34162 let e
= _mm_set_pd(1., 6.);
34163 assert_eq_m128d(r
, e
);
34166 #[simd_test(enable = "avx512f")]
34167 unsafe fn test_mm_mask_sub_ss() {
34168 let src
= _mm_set_ps(10., 11., 100., 110.);
34169 let a
= _mm_set_ps(1., 2., 10., 20.);
34170 let b
= _mm_set_ps(3., 4., 30., 40.);
34171 let r
= _mm_mask_sub_ss(src
, 0, a
, b
);
34172 let e
= _mm_set_ps(1., 2., 10., 110.);
34173 assert_eq_m128(r
, e
);
34174 let r
= _mm_mask_sub_ss(src
, 0b11111111, a
, b
);
34175 let e
= _mm_set_ps(1., 2., 10., -20.);
34176 assert_eq_m128(r
, e
);
34179 #[simd_test(enable = "avx512f")]
34180 unsafe fn test_mm_maskz_sub_ss() {
34181 let a
= _mm_set_ps(1., 2., 10., 20.);
34182 let b
= _mm_set_ps(3., 4., 30., 40.);
34183 let r
= _mm_maskz_sub_ss(0, a
, b
);
34184 let e
= _mm_set_ps(1., 2., 10., 0.);
34185 assert_eq_m128(r
, e
);
34186 let r
= _mm_maskz_sub_ss(0b11111111, a
, b
);
34187 let e
= _mm_set_ps(1., 2., 10., -20.);
34188 assert_eq_m128(r
, e
);
34191 #[simd_test(enable = "avx512f")]
34192 unsafe fn test_mm_mask_sub_sd() {
34193 let src
= _mm_set_pd(10., 11.);
34194 let a
= _mm_set_pd(1., 2.);
34195 let b
= _mm_set_pd(3., 4.);
34196 let r
= _mm_mask_sub_sd(src
, 0, a
, b
);
34197 let e
= _mm_set_pd(1., 11.);
34198 assert_eq_m128d(r
, e
);
34199 let r
= _mm_mask_sub_sd(src
, 0b11111111, a
, b
);
34200 let e
= _mm_set_pd(1., -2.);
34201 assert_eq_m128d(r
, e
);
34204 #[simd_test(enable = "avx512f")]
34205 unsafe fn test_mm_maskz_sub_sd() {
34206 let a
= _mm_set_pd(1., 2.);
34207 let b
= _mm_set_pd(3., 4.);
34208 let r
= _mm_maskz_sub_sd(0, a
, b
);
34209 let e
= _mm_set_pd(1., 0.);
34210 assert_eq_m128d(r
, e
);
34211 let r
= _mm_maskz_sub_sd(0b11111111, a
, b
);
34212 let e
= _mm_set_pd(1., -2.);
34213 assert_eq_m128d(r
, e
);
34216 #[simd_test(enable = "avx512f")]
34217 unsafe fn test_mm_mask_mul_ss() {
34218 let src
= _mm_set_ps(10., 11., 100., 110.);
34219 let a
= _mm_set_ps(1., 2., 10., 20.);
34220 let b
= _mm_set_ps(3., 4., 30., 40.);
34221 let r
= _mm_mask_mul_ss(src
, 0, a
, b
);
34222 let e
= _mm_set_ps(1., 2., 10., 110.);
34223 assert_eq_m128(r
, e
);
34224 let r
= _mm_mask_mul_ss(src
, 0b11111111, a
, b
);
34225 let e
= _mm_set_ps(1., 2., 10., 800.);
34226 assert_eq_m128(r
, e
);
34229 #[simd_test(enable = "avx512f")]
34230 unsafe fn test_mm_maskz_mul_ss() {
34231 let a
= _mm_set_ps(1., 2., 10., 20.);
34232 let b
= _mm_set_ps(3., 4., 30., 40.);
34233 let r
= _mm_maskz_mul_ss(0, a
, b
);
34234 let e
= _mm_set_ps(1., 2., 10., 0.);
34235 assert_eq_m128(r
, e
);
34236 let r
= _mm_maskz_mul_ss(0b11111111, a
, b
);
34237 let e
= _mm_set_ps(1., 2., 10., 800.);
34238 assert_eq_m128(r
, e
);
34241 #[simd_test(enable = "avx512f")]
34242 unsafe fn test_mm_mask_mul_sd() {
34243 let src
= _mm_set_pd(10., 11.);
34244 let a
= _mm_set_pd(1., 2.);
34245 let b
= _mm_set_pd(3., 4.);
34246 let r
= _mm_mask_mul_sd(src
, 0, a
, b
);
34247 let e
= _mm_set_pd(1., 11.);
34248 assert_eq_m128d(r
, e
);
34249 let r
= _mm_mask_mul_sd(src
, 0b11111111, a
, b
);
34250 let e
= _mm_set_pd(1., 8.);
34251 assert_eq_m128d(r
, e
);
34254 #[simd_test(enable = "avx512f")]
34255 unsafe fn test_mm_maskz_mul_sd() {
34256 let a
= _mm_set_pd(1., 2.);
34257 let b
= _mm_set_pd(3., 4.);
34258 let r
= _mm_maskz_mul_sd(0, a
, b
);
34259 let e
= _mm_set_pd(1., 0.);
34260 assert_eq_m128d(r
, e
);
34261 let r
= _mm_maskz_mul_sd(0b11111111, a
, b
);
34262 let e
= _mm_set_pd(1., 8.);
34263 assert_eq_m128d(r
, e
);
34266 #[simd_test(enable = "avx512f")]
34267 unsafe fn test_mm_mask_div_ss() {
34268 let src
= _mm_set_ps(10., 11., 100., 110.);
34269 let a
= _mm_set_ps(1., 2., 10., 20.);
34270 let b
= _mm_set_ps(3., 4., 30., 40.);
34271 let r
= _mm_mask_div_ss(src
, 0, a
, b
);
34272 let e
= _mm_set_ps(1., 2., 10., 110.);
34273 assert_eq_m128(r
, e
);
34274 let r
= _mm_mask_div_ss(src
, 0b11111111, a
, b
);
34275 let e
= _mm_set_ps(1., 2., 10., 0.5);
34276 assert_eq_m128(r
, e
);
34279 #[simd_test(enable = "avx512f")]
34280 unsafe fn test_mm_maskz_div_ss() {
34281 let a
= _mm_set_ps(1., 2., 10., 20.);
34282 let b
= _mm_set_ps(3., 4., 30., 40.);
34283 let r
= _mm_maskz_div_ss(0, a
, b
);
34284 let e
= _mm_set_ps(1., 2., 10., 0.);
34285 assert_eq_m128(r
, e
);
34286 let r
= _mm_maskz_div_ss(0b11111111, a
, b
);
34287 let e
= _mm_set_ps(1., 2., 10., 0.5);
34288 assert_eq_m128(r
, e
);
34291 #[simd_test(enable = "avx512f")]
34292 unsafe fn test_mm_mask_div_sd() {
34293 let src
= _mm_set_pd(10., 11.);
34294 let a
= _mm_set_pd(1., 2.);
34295 let b
= _mm_set_pd(3., 4.);
34296 let r
= _mm_mask_div_sd(src
, 0, a
, b
);
34297 let e
= _mm_set_pd(1., 11.);
34298 assert_eq_m128d(r
, e
);
34299 let r
= _mm_mask_div_sd(src
, 0b11111111, a
, b
);
34300 let e
= _mm_set_pd(1., 0.5);
34301 assert_eq_m128d(r
, e
);
34304 #[simd_test(enable = "avx512f")]
34305 unsafe fn test_mm_maskz_div_sd() {
34306 let a
= _mm_set_pd(1., 2.);
34307 let b
= _mm_set_pd(3., 4.);
34308 let r
= _mm_maskz_div_sd(0, a
, b
);
34309 let e
= _mm_set_pd(1., 0.);
34310 assert_eq_m128d(r
, e
);
34311 let r
= _mm_maskz_div_sd(0b11111111, a
, b
);
34312 let e
= _mm_set_pd(1., 0.5);
34313 assert_eq_m128d(r
, e
);
34316 #[simd_test(enable = "avx512f")]
34317 unsafe fn test_mm_mask_max_ss() {
34318 let a
= _mm_set_ps(0., 1., 2., 3.);
34319 let b
= _mm_set_ps(4., 5., 6., 7.);
34320 let r
= _mm_mask_max_ss(a
, 0, a
, b
);
34321 let e
= _mm_set_ps(0., 1., 2., 3.);
34322 assert_eq_m128(r
, e
);
34323 let r
= _mm_mask_max_ss(a
, 0b11111111, a
, b
);
34324 let e
= _mm_set_ps(0., 1., 2., 7.);
34325 assert_eq_m128(r
, e
);
34328 #[simd_test(enable = "avx512f")]
34329 unsafe fn test_mm_maskz_max_ss() {
34330 let a
= _mm_set_ps(0., 1., 2., 3.);
34331 let b
= _mm_set_ps(4., 5., 6., 7.);
34332 let r
= _mm_maskz_max_ss(0, a
, b
);
34333 let e
= _mm_set_ps(0., 1., 2., 0.);
34334 assert_eq_m128(r
, e
);
34335 let r
= _mm_maskz_max_ss(0b11111111, a
, b
);
34336 let e
= _mm_set_ps(0., 1., 2., 7.);
34337 assert_eq_m128(r
, e
);
34340 #[simd_test(enable = "avx512f")]
34341 unsafe fn test_mm_mask_max_sd() {
34342 let a
= _mm_set_pd(0., 1.);
34343 let b
= _mm_set_pd(2., 3.);
34344 let r
= _mm_mask_max_sd(a
, 0, a
, b
);
34345 let e
= _mm_set_pd(0., 1.);
34346 assert_eq_m128d(r
, e
);
34347 let r
= _mm_mask_max_sd(a
, 0b11111111, a
, b
);
34348 let e
= _mm_set_pd(0., 3.);
34349 assert_eq_m128d(r
, e
);
34352 #[simd_test(enable = "avx512f")]
34353 unsafe fn test_mm_maskz_max_sd() {
34354 let a
= _mm_set_pd(0., 1.);
34355 let b
= _mm_set_pd(2., 3.);
34356 let r
= _mm_maskz_max_sd(0, a
, b
);
34357 let e
= _mm_set_pd(0., 0.);
34358 assert_eq_m128d(r
, e
);
34359 let r
= _mm_maskz_max_sd(0b11111111, a
, b
);
34360 let e
= _mm_set_pd(0., 3.);
34361 assert_eq_m128d(r
, e
);
34364 #[simd_test(enable = "avx512f")]
34365 unsafe fn test_mm_mask_min_ss() {
34366 let a
= _mm_set_ps(0., 1., 2., 3.);
34367 let b
= _mm_set_ps(4., 5., 6., 7.);
34368 let r
= _mm_mask_min_ss(a
, 0, a
, b
);
34369 let e
= _mm_set_ps(0., 1., 2., 3.);
34370 assert_eq_m128(r
, e
);
34371 let r
= _mm_mask_min_ss(a
, 0b11111111, a
, b
);
34372 let e
= _mm_set_ps(0., 1., 2., 3.);
34373 assert_eq_m128(r
, e
);
34376 #[simd_test(enable = "avx512f")]
34377 unsafe fn test_mm_maskz_min_ss() {
34378 let a
= _mm_set_ps(0., 1., 2., 3.);
34379 let b
= _mm_set_ps(4., 5., 6., 7.);
34380 let r
= _mm_maskz_min_ss(0, a
, b
);
34381 let e
= _mm_set_ps(0., 1., 2., 0.);
34382 assert_eq_m128(r
, e
);
34383 let r
= _mm_maskz_min_ss(0b11111111, a
, b
);
34384 let e
= _mm_set_ps(0., 1., 2., 3.);
34385 assert_eq_m128(r
, e
);
34388 #[simd_test(enable = "avx512f")]
34389 unsafe fn test_mm_mask_min_sd() {
34390 let a
= _mm_set_pd(0., 1.);
34391 let b
= _mm_set_pd(2., 3.);
34392 let r
= _mm_mask_min_sd(a
, 0, a
, b
);
34393 let e
= _mm_set_pd(0., 1.);
34394 assert_eq_m128d(r
, e
);
34395 let r
= _mm_mask_min_sd(a
, 0b11111111, a
, b
);
34396 let e
= _mm_set_pd(0., 1.);
34397 assert_eq_m128d(r
, e
);
34400 #[simd_test(enable = "avx512f")]
34401 unsafe fn test_mm_maskz_min_sd() {
34402 let a
= _mm_set_pd(0., 1.);
34403 let b
= _mm_set_pd(2., 3.);
34404 let r
= _mm_maskz_min_sd(0, a
, b
);
34405 let e
= _mm_set_pd(0., 0.);
34406 assert_eq_m128d(r
, e
);
34407 let r
= _mm_maskz_min_sd(0b11111111, a
, b
);
34408 let e
= _mm_set_pd(0., 1.);
34409 assert_eq_m128d(r
, e
);
34412 #[simd_test(enable = "avx512f")]
34413 unsafe fn test_mm_mask_sqrt_ss() {
34414 let src
= _mm_set_ps(10., 11., 100., 110.);
34415 let a
= _mm_set_ps(1., 2., 10., 20.);
34416 let b
= _mm_set_ps(3., 4., 30., 4.);
34417 let r
= _mm_mask_sqrt_ss(src
, 0, a
, b
);
34418 let e
= _mm_set_ps(1., 2., 10., 110.);
34419 assert_eq_m128(r
, e
);
34420 let r
= _mm_mask_sqrt_ss(src
, 0b11111111, a
, b
);
34421 let e
= _mm_set_ps(1., 2., 10., 2.);
34422 assert_eq_m128(r
, e
);
34425 #[simd_test(enable = "avx512f")]
34426 unsafe fn test_mm_maskz_sqrt_ss() {
34427 let a
= _mm_set_ps(1., 2., 10., 20.);
34428 let b
= _mm_set_ps(3., 4., 30., 4.);
34429 let r
= _mm_maskz_sqrt_ss(0, a
, b
);
34430 let e
= _mm_set_ps(1., 2., 10., 0.);
34431 assert_eq_m128(r
, e
);
34432 let r
= _mm_maskz_sqrt_ss(0b11111111, a
, b
);
34433 let e
= _mm_set_ps(1., 2., 10., 2.);
34434 assert_eq_m128(r
, e
);
34437 #[simd_test(enable = "avx512f")]
34438 unsafe fn test_mm_mask_sqrt_sd() {
34439 let src
= _mm_set_pd(10., 11.);
34440 let a
= _mm_set_pd(1., 2.);
34441 let b
= _mm_set_pd(3., 4.);
34442 let r
= _mm_mask_sqrt_sd(src
, 0, a
, b
);
34443 let e
= _mm_set_pd(1., 11.);
34444 assert_eq_m128d(r
, e
);
34445 let r
= _mm_mask_sqrt_sd(src
, 0b11111111, a
, b
);
34446 let e
= _mm_set_pd(1., 2.);
34447 assert_eq_m128d(r
, e
);
34450 #[simd_test(enable = "avx512f")]
34451 unsafe fn test_mm_maskz_sqrt_sd() {
34452 let a
= _mm_set_pd(1., 2.);
34453 let b
= _mm_set_pd(3., 4.);
34454 let r
= _mm_maskz_sqrt_sd(0, a
, b
);
34455 let e
= _mm_set_pd(1., 0.);
34456 assert_eq_m128d(r
, e
);
34457 let r
= _mm_maskz_sqrt_sd(0b11111111, a
, b
);
34458 let e
= _mm_set_pd(1., 2.);
34459 assert_eq_m128d(r
, e
);
34462 #[simd_test(enable = "avx512f")]
34463 unsafe fn test_mm_rsqrt14_ss() {
34464 let a
= _mm_set_ps(1., 2., 10., 20.);
34465 let b
= _mm_set_ps(3., 4., 30., 4.);
34466 let r
= _mm_rsqrt14_ss(a
, b
);
34467 let e
= _mm_set_ps(1., 2., 10., 0.5);
34468 assert_eq_m128(r
, e
);
34471 #[simd_test(enable = "avx512f")]
34472 unsafe fn test_mm_mask_rsqrt14_ss() {
34473 let src
= _mm_set_ps(10., 11., 100., 110.);
34474 let a
= _mm_set_ps(1., 2., 10., 20.);
34475 let b
= _mm_set_ps(3., 4., 30., 4.);
34476 let r
= _mm_mask_rsqrt14_ss(src
, 0, a
, b
);
34477 let e
= _mm_set_ps(1., 2., 10., 110.);
34478 assert_eq_m128(r
, e
);
34479 let r
= _mm_mask_rsqrt14_ss(src
, 0b11111111, a
, b
);
34480 let e
= _mm_set_ps(1., 2., 10., 0.5);
34481 assert_eq_m128(r
, e
);
34484 #[simd_test(enable = "avx512f")]
34485 unsafe fn test_mm_maskz_rsqrt14_ss() {
34486 let a
= _mm_set_ps(1., 2., 10., 20.);
34487 let b
= _mm_set_ps(3., 4., 30., 4.);
34488 let r
= _mm_maskz_rsqrt14_ss(0, a
, b
);
34489 let e
= _mm_set_ps(1., 2., 10., 0.);
34490 assert_eq_m128(r
, e
);
34491 let r
= _mm_maskz_rsqrt14_ss(0b11111111, a
, b
);
34492 let e
= _mm_set_ps(1., 2., 10., 0.5);
34493 assert_eq_m128(r
, e
);
34496 #[simd_test(enable = "avx512f")]
34497 unsafe fn test_mm_rsqrt14_sd() {
34498 let a
= _mm_set_pd(1., 2.);
34499 let b
= _mm_set_pd(3., 4.);
34500 let r
= _mm_rsqrt14_sd(a
, b
);
34501 let e
= _mm_set_pd(1., 0.5);
34502 assert_eq_m128d(r
, e
);
34505 #[simd_test(enable = "avx512f")]
34506 unsafe fn test_mm_mask_rsqrt14_sd() {
34507 let src
= _mm_set_pd(10., 11.);
34508 let a
= _mm_set_pd(1., 2.);
34509 let b
= _mm_set_pd(3., 4.);
34510 let r
= _mm_mask_rsqrt14_sd(src
, 0, a
, b
);
34511 let e
= _mm_set_pd(1., 11.);
34512 assert_eq_m128d(r
, e
);
34513 let r
= _mm_mask_rsqrt14_sd(src
, 0b11111111, a
, b
);
34514 let e
= _mm_set_pd(1., 0.5);
34515 assert_eq_m128d(r
, e
);
34518 #[simd_test(enable = "avx512f")]
34519 unsafe fn test_mm_maskz_rsqrt14_sd() {
34520 let a
= _mm_set_pd(1., 2.);
34521 let b
= _mm_set_pd(3., 4.);
34522 let r
= _mm_maskz_rsqrt14_sd(0, a
, b
);
34523 let e
= _mm_set_pd(1., 0.);
34524 assert_eq_m128d(r
, e
);
34525 let r
= _mm_maskz_rsqrt14_sd(0b11111111, a
, b
);
34526 let e
= _mm_set_pd(1., 0.5);
34527 assert_eq_m128d(r
, e
);
34530 #[simd_test(enable = "avx512f")]
34531 unsafe fn test_mm_rcp14_ss() {
34532 let a
= _mm_set_ps(1., 2., 10., 20.);
34533 let b
= _mm_set_ps(3., 4., 30., 4.);
34534 let r
= _mm_rcp14_ss(a
, b
);
34535 let e
= _mm_set_ps(1., 2., 10., 0.25);
34536 assert_eq_m128(r
, e
);
34539 #[simd_test(enable = "avx512f")]
34540 unsafe fn test_mm_mask_rcp14_ss() {
34541 let src
= _mm_set_ps(10., 11., 100., 110.);
34542 let a
= _mm_set_ps(1., 2., 10., 20.);
34543 let b
= _mm_set_ps(3., 4., 30., 4.);
34544 let r
= _mm_mask_rcp14_ss(src
, 0, a
, b
);
34545 let e
= _mm_set_ps(1., 2., 10., 110.);
34546 assert_eq_m128(r
, e
);
34547 let r
= _mm_mask_rcp14_ss(src
, 0b11111111, a
, b
);
34548 let e
= _mm_set_ps(1., 2., 10., 0.25);
34549 assert_eq_m128(r
, e
);
34552 #[simd_test(enable = "avx512f")]
34553 unsafe fn test_mm_maskz_rcp14_ss() {
34554 let a
= _mm_set_ps(1., 2., 10., 20.);
34555 let b
= _mm_set_ps(3., 4., 30., 4.);
34556 let r
= _mm_maskz_rcp14_ss(0, a
, b
);
34557 let e
= _mm_set_ps(1., 2., 10., 0.);
34558 assert_eq_m128(r
, e
);
34559 let r
= _mm_maskz_rcp14_ss(0b11111111, a
, b
);
34560 let e
= _mm_set_ps(1., 2., 10., 0.25);
34561 assert_eq_m128(r
, e
);
34564 #[simd_test(enable = "avx512f")]
34565 unsafe fn test_mm_rcp14_sd() {
34566 let a
= _mm_set_pd(1., 2.);
34567 let b
= _mm_set_pd(3., 4.);
34568 let r
= _mm_rcp14_sd(a
, b
);
34569 let e
= _mm_set_pd(1., 0.25);
34570 assert_eq_m128d(r
, e
);
34573 #[simd_test(enable = "avx512f")]
34574 unsafe fn test_mm_mask_rcp14_sd() {
34575 let src
= _mm_set_pd(10., 11.);
34576 let a
= _mm_set_pd(1., 2.);
34577 let b
= _mm_set_pd(3., 4.);
34578 let r
= _mm_mask_rcp14_sd(src
, 0, a
, b
);
34579 let e
= _mm_set_pd(1., 11.);
34580 assert_eq_m128d(r
, e
);
34581 let r
= _mm_mask_rcp14_sd(src
, 0b11111111, a
, b
);
34582 let e
= _mm_set_pd(1., 0.25);
34583 assert_eq_m128d(r
, e
);
34586 #[simd_test(enable = "avx512f")]
34587 unsafe fn test_mm_maskz_rcp14_sd() {
34588 let a
= _mm_set_pd(1., 2.);
34589 let b
= _mm_set_pd(3., 4.);
34590 let r
= _mm_maskz_rcp14_sd(0, a
, b
);
34591 let e
= _mm_set_pd(1., 0.);
34592 assert_eq_m128d(r
, e
);
34593 let r
= _mm_maskz_rcp14_sd(0b11111111, a
, b
);
34594 let e
= _mm_set_pd(1., 0.25);
34595 assert_eq_m128d(r
, e
);
34598 #[simd_test(enable = "avx512f")]
34599 unsafe fn test_mm_getexp_ss() {
34600 let a
= _mm_set1_ps(2.);
34601 let b
= _mm_set1_ps(3.);
34602 let r
= _mm_getexp_ss(a
, b
);
34603 let e
= _mm_set_ps(2., 2., 2., 1.);
34604 assert_eq_m128(r
, e
);
34607 #[simd_test(enable = "avx512f")]
34608 unsafe fn test_mm_mask_getexp_ss() {
34609 let a
= _mm_set1_ps(2.);
34610 let b
= _mm_set1_ps(3.);
34611 let r
= _mm_mask_getexp_ss(a
, 0, a
, b
);
34612 let e
= _mm_set_ps(2., 2., 2., 2.);
34613 assert_eq_m128(r
, e
);
34614 let r
= _mm_mask_getexp_ss(a
, 0b11111111, a
, b
);
34615 let e
= _mm_set_ps(2., 2., 2., 1.);
34616 assert_eq_m128(r
, e
);
34619 #[simd_test(enable = "avx512f")]
34620 unsafe fn test_mm_maskz_getexp_ss() {
34621 let a
= _mm_set1_ps(2.);
34622 let b
= _mm_set1_ps(3.);
34623 let r
= _mm_maskz_getexp_ss(0, a
, b
);
34624 let e
= _mm_set_ps(2., 2., 2., 0.);
34625 assert_eq_m128(r
, e
);
34626 let r
= _mm_maskz_getexp_ss(0b11111111, a
, b
);
34627 let e
= _mm_set_ps(2., 2., 2., 1.);
34628 assert_eq_m128(r
, e
);
34631 #[simd_test(enable = "avx512f")]
34632 unsafe fn test_mm_getexp_sd() {
34633 let a
= _mm_set1_pd(2.);
34634 let b
= _mm_set1_pd(3.);
34635 let r
= _mm_getexp_sd(a
, b
);
34636 let e
= _mm_set_pd(2., 1.);
34637 assert_eq_m128d(r
, e
);
34640 #[simd_test(enable = "avx512f")]
34641 unsafe fn test_mm_mask_getexp_sd() {
34642 let a
= _mm_set1_pd(2.);
34643 let b
= _mm_set1_pd(3.);
34644 let r
= _mm_mask_getexp_sd(a
, 0, a
, b
);
34645 let e
= _mm_set_pd(2., 2.);
34646 assert_eq_m128d(r
, e
);
34647 let r
= _mm_mask_getexp_sd(a
, 0b11111111, a
, b
);
34648 let e
= _mm_set_pd(2., 1.);
34649 assert_eq_m128d(r
, e
);
34652 #[simd_test(enable = "avx512f")]
34653 unsafe fn test_mm_maskz_getexp_sd() {
34654 let a
= _mm_set1_pd(2.);
34655 let b
= _mm_set1_pd(3.);
34656 let r
= _mm_maskz_getexp_sd(0, a
, b
);
34657 let e
= _mm_set_pd(2., 0.);
34658 assert_eq_m128d(r
, e
);
34659 let r
= _mm_maskz_getexp_sd(0b11111111, a
, b
);
34660 let e
= _mm_set_pd(2., 1.);
34661 assert_eq_m128d(r
, e
);
34664 #[simd_test(enable = "avx512f")]
34665 unsafe fn test_mm_getmant_ss() {
34666 let a
= _mm_set1_ps(20.);
34667 let b
= _mm_set1_ps(10.);
34668 let r
= _mm_getmant_ss(a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34669 let e
= _mm_set_ps(20., 20., 20., 1.25);
34670 assert_eq_m128(r
, e
);
34673 #[simd_test(enable = "avx512f")]
34674 unsafe fn test_mm_mask_getmant_ss() {
34675 let a
= _mm_set1_ps(20.);
34676 let b
= _mm_set1_ps(10.);
34677 let r
= _mm_mask_getmant_ss(a
, 0, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34678 let e
= _mm_set_ps(20., 20., 20., 20.);
34679 assert_eq_m128(r
, e
);
34680 let r
= _mm_mask_getmant_ss(a
, 0b11111111, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34681 let e
= _mm_set_ps(20., 20., 20., 1.25);
34682 assert_eq_m128(r
, e
);
34685 #[simd_test(enable = "avx512f")]
34686 unsafe fn test_mm_maskz_getmant_ss() {
34687 let a
= _mm_set1_ps(20.);
34688 let b
= _mm_set1_ps(10.);
34689 let r
= _mm_maskz_getmant_ss(0, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34690 let e
= _mm_set_ps(20., 20., 20., 0.);
34691 assert_eq_m128(r
, e
);
34692 let r
= _mm_maskz_getmant_ss(0b11111111, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34693 let e
= _mm_set_ps(20., 20., 20., 1.25);
34694 assert_eq_m128(r
, e
);
34697 #[simd_test(enable = "avx512f")]
34698 unsafe fn test_mm_getmant_sd() {
34699 let a
= _mm_set1_pd(20.);
34700 let b
= _mm_set1_pd(10.);
34701 let r
= _mm_getmant_sd(a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34702 let e
= _mm_set_pd(20., 1.25);
34703 assert_eq_m128d(r
, e
);
34706 #[simd_test(enable = "avx512f")]
34707 unsafe fn test_mm_mask_getmant_sd() {
34708 let a
= _mm_set1_pd(20.);
34709 let b
= _mm_set1_pd(10.);
34710 let r
= _mm_mask_getmant_sd(a
, 0, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34711 let e
= _mm_set_pd(20., 20.);
34712 assert_eq_m128d(r
, e
);
34713 let r
= _mm_mask_getmant_sd(a
, 0b11111111, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34714 let e
= _mm_set_pd(20., 1.25);
34715 assert_eq_m128d(r
, e
);
34718 #[simd_test(enable = "avx512f")]
34719 unsafe fn test_mm_maskz_getmant_sd() {
34720 let a
= _mm_set1_pd(20.);
34721 let b
= _mm_set1_pd(10.);
34722 let r
= _mm_maskz_getmant_sd(0, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34723 let e
= _mm_set_pd(20., 0.);
34724 assert_eq_m128d(r
, e
);
34725 let r
= _mm_maskz_getmant_sd(0b11111111, a
, b
, _MM_MANT_NORM_1_2
, _MM_MANT_SIGN_SRC
);
34726 let e
= _mm_set_pd(20., 1.25);
34727 assert_eq_m128d(r
, e
);
34730 #[simd_test(enable = "avx512f")]
34731 unsafe fn test_mm_roundscale_ss() {
34732 let a
= _mm_set1_ps(2.2);
34733 let b
= _mm_set1_ps(1.1);
34734 let r
= _mm_roundscale_ss(a
, b
, 0);
34735 let e
= _mm_set_ps(2.2, 2.2, 2.2, 1.0);
34736 assert_eq_m128(r
, e
);
34739 #[simd_test(enable = "avx512f")]
34740 unsafe fn test_mm_mask_roundscale_ss() {
34741 let a
= _mm_set1_ps(2.2);
34742 let b
= _mm_set1_ps(1.1);
34743 let r
= _mm_mask_roundscale_ss(a
, 0, a
, b
, 0);
34744 let e
= _mm_set_ps(2.2, 2.2, 2.2, 2.2);
34745 assert_eq_m128(r
, e
);
34746 let r
= _mm_mask_roundscale_ss(a
, 0b11111111, a
, b
, 0);
34747 let e
= _mm_set_ps(2.2, 2.2, 2.2, 1.0);
34748 assert_eq_m128(r
, e
);
34751 #[simd_test(enable = "avx512f")]
34752 unsafe fn test_mm_maskz_roundscale_ss() {
34753 let a
= _mm_set1_ps(2.2);
34754 let b
= _mm_set1_ps(1.1);
34755 let r
= _mm_maskz_roundscale_ss(0, a
, b
, 0);
34756 let e
= _mm_set_ps(2.2, 2.2, 2.2, 0.0);
34757 assert_eq_m128(r
, e
);
34758 let r
= _mm_maskz_roundscale_ss(0b11111111, a
, b
, 0);
34759 let e
= _mm_set_ps(2.2, 2.2, 2.2, 1.0);
34760 assert_eq_m128(r
, e
);
34763 #[simd_test(enable = "avx512f")]
34764 unsafe fn test_mm_roundscale_sd() {
34765 let a
= _mm_set1_pd(2.2);
34766 let b
= _mm_set1_pd(1.1);
34767 let r
= _mm_roundscale_sd(a
, b
, 0);
34768 let e
= _mm_set_pd(2.2, 1.0);
34769 assert_eq_m128d(r
, e
);
34772 #[simd_test(enable = "avx512f")]
34773 unsafe fn test_mm_mask_roundscale_sd() {
34774 let a
= _mm_set1_pd(2.2);
34775 let b
= _mm_set1_pd(1.1);
34776 let r
= _mm_mask_roundscale_sd(a
, 0, a
, b
, 0);
34777 let e
= _mm_set_pd(2.2, 2.2);
34778 assert_eq_m128d(r
, e
);
34779 let r
= _mm_mask_roundscale_sd(a
, 0b11111111, a
, b
, 0);
34780 let e
= _mm_set_pd(2.2, 1.0);
34781 assert_eq_m128d(r
, e
);
34784 #[simd_test(enable = "avx512f")]
34785 unsafe fn test_mm_maskz_roundscale_sd() {
34786 let a
= _mm_set1_pd(2.2);
34787 let b
= _mm_set1_pd(1.1);
34788 let r
= _mm_maskz_roundscale_sd(0, a
, b
, 0);
34789 let e
= _mm_set_pd(2.2, 0.0);
34790 assert_eq_m128d(r
, e
);
34791 let r
= _mm_maskz_roundscale_sd(0b11111111, a
, b
, 0);
34792 let e
= _mm_set_pd(2.2, 1.0);
34793 assert_eq_m128d(r
, e
);
34796 #[simd_test(enable = "avx512f")]
34797 unsafe fn test_mm_scalef_ss() {
34798 let a
= _mm_set1_ps(1.);
34799 let b
= _mm_set1_ps(3.);
34800 let r
= _mm_scalef_ss(a
, b
);
34801 let e
= _mm_set_ps(1., 1., 1., 8.);
34802 assert_eq_m128(r
, e
);
34805 #[simd_test(enable = "avx512f")]
34806 unsafe fn test_mm_mask_scalef_ss() {
34807 let a
= _mm_set1_ps(1.);
34808 let b
= _mm_set1_ps(3.);
34809 let r
= _mm_mask_scalef_ss(a
, 0, a
, b
);
34810 let e
= _mm_set_ps(1., 1., 1., 1.);
34811 assert_eq_m128(r
, e
);
34812 let r
= _mm_mask_scalef_ss(a
, 0b11111111, a
, b
);
34813 let e
= _mm_set_ps(1., 1., 1., 8.);
34814 assert_eq_m128(r
, e
);
34817 #[simd_test(enable = "avx512f")]
34818 unsafe fn test_mm_maskz_scalef_ss() {
34819 let a
= _mm_set1_ps(1.);
34820 let b
= _mm_set1_ps(3.);
34821 let r
= _mm_maskz_scalef_ss(0, a
, b
);
34822 let e
= _mm_set_ps(1., 1., 1., 0.);
34823 assert_eq_m128(r
, e
);
34824 let r
= _mm_maskz_scalef_ss(0b11111111, a
, b
);
34825 let e
= _mm_set_ps(1., 1., 1., 8.);
34826 assert_eq_m128(r
, e
);
34829 #[simd_test(enable = "avx512f")]
34830 unsafe fn test_mm_scalef_sd() {
34831 let a
= _mm_set1_pd(1.);
34832 let b
= _mm_set1_pd(3.);
34833 let r
= _mm_scalef_sd(a
, b
);
34834 let e
= _mm_set_pd(1., 8.);
34835 assert_eq_m128d(r
, e
);
34838 #[simd_test(enable = "avx512f")]
34839 unsafe fn test_mm_mask_scalef_sd() {
34840 let a
= _mm_set1_pd(1.);
34841 let b
= _mm_set1_pd(3.);
34842 let r
= _mm_mask_scalef_sd(a
, 0, a
, b
);
34843 let e
= _mm_set_pd(1., 1.);
34844 assert_eq_m128d(r
, e
);
34845 let r
= _mm_mask_scalef_sd(a
, 0b11111111, a
, b
);
34846 let e
= _mm_set_pd(1., 8.);
34847 assert_eq_m128d(r
, e
);
34850 #[simd_test(enable = "avx512f")]
34851 unsafe fn test_mm_maskz_scalef_sd() {
34852 let a
= _mm_set1_pd(1.);
34853 let b
= _mm_set1_pd(3.);
34854 let r
= _mm_maskz_scalef_sd(0, a
, b
);
34855 let e
= _mm_set_pd(1., 0.);
34856 assert_eq_m128d(r
, e
);
34857 let r
= _mm_maskz_scalef_sd(0b11111111, a
, b
);
34858 let e
= _mm_set_pd(1., 8.);
34859 assert_eq_m128d(r
, e
);
34862 #[simd_test(enable = "avx512f")]
34863 unsafe fn test_mm_mask_fmadd_ss() {
34864 let a
= _mm_set1_ps(1.);
34865 let b
= _mm_set1_ps(2.);
34866 let c
= _mm_set1_ps(3.);
34867 let r
= _mm_mask_fmadd_ss(a
, 0, b
, c
);
34868 assert_eq_m128(r
, a
);
34869 let r
= _mm_mask_fmadd_ss(a
, 0b11111111, b
, c
);
34870 let e
= _mm_set_ps(1., 1., 1., 5.);
34871 assert_eq_m128(r
, e
);
34874 #[simd_test(enable = "avx512f")]
34875 unsafe fn test_mm_maskz_fmadd_ss() {
34876 let a
= _mm_set1_ps(1.);
34877 let b
= _mm_set1_ps(2.);
34878 let c
= _mm_set1_ps(3.);
34879 let r
= _mm_maskz_fmadd_ss(0, a
, b
, c
);
34880 let e
= _mm_set_ps(1., 1., 1., 0.);
34881 assert_eq_m128(r
, e
);
34882 let r
= _mm_maskz_fmadd_ss(0b11111111, a
, b
, c
);
34883 let e
= _mm_set_ps(1., 1., 1., 5.);
34884 assert_eq_m128(r
, e
);
34887 #[simd_test(enable = "avx512f")]
34888 unsafe fn test_mm_mask3_fmadd_ss() {
34889 let a
= _mm_set1_ps(1.);
34890 let b
= _mm_set1_ps(2.);
34891 let c
= _mm_set1_ps(3.);
34892 let r
= _mm_mask3_fmadd_ss(a
, b
, c
, 0);
34893 assert_eq_m128(r
, c
);
34894 let r
= _mm_mask3_fmadd_ss(a
, b
, c
, 0b11111111);
34895 let e
= _mm_set_ps(3., 3., 3., 5.);
34896 assert_eq_m128(r
, e
);
34899 #[simd_test(enable = "avx512f")]
34900 unsafe fn test_mm_mask_fmadd_sd() {
34901 let a
= _mm_set1_pd(1.);
34902 let b
= _mm_set1_pd(2.);
34903 let c
= _mm_set1_pd(3.);
34904 let r
= _mm_mask_fmadd_sd(a
, 0, b
, c
);
34905 assert_eq_m128d(r
, a
);
34906 let r
= _mm_mask_fmadd_sd(a
, 0b11111111, b
, c
);
34907 let e
= _mm_set_pd(1., 5.);
34908 assert_eq_m128d(r
, e
);
34911 #[simd_test(enable = "avx512f")]
34912 unsafe fn test_mm_maskz_fmadd_sd() {
34913 let a
= _mm_set1_pd(1.);
34914 let b
= _mm_set1_pd(2.);
34915 let c
= _mm_set1_pd(3.);
34916 let r
= _mm_maskz_fmadd_sd(0, a
, b
, c
);
34917 let e
= _mm_set_pd(1., 0.);
34918 assert_eq_m128d(r
, e
);
34919 let r
= _mm_maskz_fmadd_sd(0b11111111, a
, b
, c
);
34920 let e
= _mm_set_pd(1., 5.);
34921 assert_eq_m128d(r
, e
);
34924 #[simd_test(enable = "avx512f")]
34925 unsafe fn test_mm_mask3_fmadd_sd() {
34926 let a
= _mm_set1_pd(1.);
34927 let b
= _mm_set1_pd(2.);
34928 let c
= _mm_set1_pd(3.);
34929 let r
= _mm_mask3_fmadd_sd(a
, b
, c
, 0);
34930 assert_eq_m128d(r
, c
);
34931 let r
= _mm_mask3_fmadd_sd(a
, b
, c
, 0b11111111);
34932 let e
= _mm_set_pd(3., 5.);
34933 assert_eq_m128d(r
, e
);
34936 #[simd_test(enable = "avx512f")]
34937 unsafe fn test_mm_mask_fmsub_ss() {
34938 let a
= _mm_set1_ps(1.);
34939 let b
= _mm_set1_ps(2.);
34940 let c
= _mm_set1_ps(3.);
34941 let r
= _mm_mask_fmsub_ss(a
, 0, b
, c
);
34942 assert_eq_m128(r
, a
);
34943 let r
= _mm_mask_fmsub_ss(a
, 0b11111111, b
, c
);
34944 let e
= _mm_set_ps(1., 1., 1., -1.);
34945 assert_eq_m128(r
, e
);
34948 #[simd_test(enable = "avx512f")]
34949 unsafe fn test_mm_maskz_fmsub_ss() {
34950 let a
= _mm_set1_ps(1.);
34951 let b
= _mm_set1_ps(2.);
34952 let c
= _mm_set1_ps(3.);
34953 let r
= _mm_maskz_fmsub_ss(0, a
, b
, c
);
34954 let e
= _mm_set_ps(1., 1., 1., 0.);
34955 assert_eq_m128(r
, e
);
34956 let r
= _mm_maskz_fmsub_ss(0b11111111, a
, b
, c
);
34957 let e
= _mm_set_ps(1., 1., 1., -1.);
34958 assert_eq_m128(r
, e
);
34961 #[simd_test(enable = "avx512f")]
34962 unsafe fn test_mm_mask3_fmsub_ss() {
34963 let a
= _mm_set1_ps(1.);
34964 let b
= _mm_set1_ps(2.);
34965 let c
= _mm_set1_ps(3.);
34966 let r
= _mm_mask3_fmsub_ss(a
, b
, c
, 0);
34967 assert_eq_m128(r
, c
);
34968 let r
= _mm_mask3_fmsub_ss(a
, b
, c
, 0b11111111);
34969 let e
= _mm_set_ps(3., 3., 3., -1.);
34970 assert_eq_m128(r
, e
);
34973 #[simd_test(enable = "avx512f")]
34974 unsafe fn test_mm_mask_fmsub_sd() {
34975 let a
= _mm_set1_pd(1.);
34976 let b
= _mm_set1_pd(2.);
34977 let c
= _mm_set1_pd(3.);
34978 let r
= _mm_mask_fmsub_sd(a
, 0, b
, c
);
34979 assert_eq_m128d(r
, a
);
34980 let r
= _mm_mask_fmsub_sd(a
, 0b11111111, b
, c
);
34981 let e
= _mm_set_pd(1., -1.);
34982 assert_eq_m128d(r
, e
);
34985 #[simd_test(enable = "avx512f")]
34986 unsafe fn test_mm_maskz_fmsub_sd() {
34987 let a
= _mm_set1_pd(1.);
34988 let b
= _mm_set1_pd(2.);
34989 let c
= _mm_set1_pd(3.);
34990 let r
= _mm_maskz_fmsub_sd(0, a
, b
, c
);
34991 let e
= _mm_set_pd(1., 0.);
34992 assert_eq_m128d(r
, e
);
34993 let r
= _mm_maskz_fmsub_sd(0b11111111, a
, b
, c
);
34994 let e
= _mm_set_pd(1., -1.);
34995 assert_eq_m128d(r
, e
);
34998 #[simd_test(enable = "avx512f")]
34999 unsafe fn test_mm_mask3_fmsub_sd() {
35000 let a
= _mm_set1_pd(1.);
35001 let b
= _mm_set1_pd(2.);
35002 let c
= _mm_set1_pd(3.);
35003 let r
= _mm_mask3_fmsub_sd(a
, b
, c
, 0);
35004 assert_eq_m128d(r
, c
);
35005 let r
= _mm_mask3_fmsub_sd(a
, b
, c
, 0b11111111);
35006 let e
= _mm_set_pd(3., -1.);
35007 assert_eq_m128d(r
, e
);
35010 #[simd_test(enable = "avx512f")]
35011 unsafe fn test_mm_mask_fnmadd_ss() {
35012 let a
= _mm_set1_ps(1.);
35013 let b
= _mm_set1_ps(2.);
35014 let c
= _mm_set1_ps(3.);
35015 let r
= _mm_mask_fnmadd_ss(a
, 0, b
, c
);
35016 assert_eq_m128(r
, a
);
35017 let r
= _mm_mask_fnmadd_ss(a
, 0b11111111, b
, c
);
35018 let e
= _mm_set_ps(1., 1., 1., 1.);
35019 assert_eq_m128(r
, e
);
35022 #[simd_test(enable = "avx512f")]
35023 unsafe fn test_mm_maskz_fnmadd_ss() {
35024 let a
= _mm_set1_ps(1.);
35025 let b
= _mm_set1_ps(2.);
35026 let c
= _mm_set1_ps(3.);
35027 let r
= _mm_maskz_fnmadd_ss(0, a
, b
, c
);
35028 let e
= _mm_set_ps(1., 1., 1., 0.);
35029 assert_eq_m128(r
, e
);
35030 let r
= _mm_maskz_fnmadd_ss(0b11111111, a
, b
, c
);
35031 let e
= _mm_set_ps(1., 1., 1., 1.);
35032 assert_eq_m128(r
, e
);
35035 #[simd_test(enable = "avx512f")]
35036 unsafe fn test_mm_mask3_fnmadd_ss() {
35037 let a
= _mm_set1_ps(1.);
35038 let b
= _mm_set1_ps(2.);
35039 let c
= _mm_set1_ps(3.);
35040 let r
= _mm_mask3_fnmadd_ss(a
, b
, c
, 0);
35041 assert_eq_m128(r
, c
);
35042 let r
= _mm_mask3_fnmadd_ss(a
, b
, c
, 0b11111111);
35043 let e
= _mm_set_ps(3., 3., 3., 1.);
35044 assert_eq_m128(r
, e
);
35047 #[simd_test(enable = "avx512f")]
35048 unsafe fn test_mm_mask_fnmadd_sd() {
35049 let a
= _mm_set1_pd(1.);
35050 let b
= _mm_set1_pd(2.);
35051 let c
= _mm_set1_pd(3.);
35052 let r
= _mm_mask_fnmadd_sd(a
, 0, b
, c
);
35053 assert_eq_m128d(r
, a
);
35054 let r
= _mm_mask_fnmadd_sd(a
, 0b11111111, b
, c
);
35055 let e
= _mm_set_pd(1., 1.);
35056 assert_eq_m128d(r
, e
);
35059 #[simd_test(enable = "avx512f")]
35060 unsafe fn test_mm_maskz_fnmadd_sd() {
35061 let a
= _mm_set1_pd(1.);
35062 let b
= _mm_set1_pd(2.);
35063 let c
= _mm_set1_pd(3.);
35064 let r
= _mm_maskz_fnmadd_sd(0, a
, b
, c
);
35065 let e
= _mm_set_pd(1., 0.);
35066 assert_eq_m128d(r
, e
);
35067 let r
= _mm_maskz_fnmadd_sd(0b11111111, a
, b
, c
);
35068 let e
= _mm_set_pd(1., 1.);
35069 assert_eq_m128d(r
, e
);
35072 #[simd_test(enable = "avx512f")]
35073 unsafe fn test_mm_mask3_fnmadd_sd() {
35074 let a
= _mm_set1_pd(1.);
35075 let b
= _mm_set1_pd(2.);
35076 let c
= _mm_set1_pd(3.);
35077 let r
= _mm_mask3_fnmadd_sd(a
, b
, c
, 0);
35078 assert_eq_m128d(r
, c
);
35079 let r
= _mm_mask3_fnmadd_sd(a
, b
, c
, 0b11111111);
35080 let e
= _mm_set_pd(3., 1.);
35081 assert_eq_m128d(r
, e
);
35084 #[simd_test(enable = "avx512f")]
35085 unsafe fn test_mm_mask_fnmsub_ss() {
35086 let a
= _mm_set1_ps(1.);
35087 let b
= _mm_set1_ps(2.);
35088 let c
= _mm_set1_ps(3.);
35089 let r
= _mm_mask_fnmsub_ss(a
, 0, b
, c
);
35090 assert_eq_m128(r
, a
);
35091 let r
= _mm_mask_fnmsub_ss(a
, 0b11111111, b
, c
);
35092 let e
= _mm_set_ps(1., 1., 1., -5.);
35093 assert_eq_m128(r
, e
);
35096 #[simd_test(enable = "avx512f")]
35097 unsafe fn test_mm_maskz_fnmsub_ss() {
35098 let a
= _mm_set1_ps(1.);
35099 let b
= _mm_set1_ps(2.);
35100 let c
= _mm_set1_ps(3.);
35101 let r
= _mm_maskz_fnmsub_ss(0, a
, b
, c
);
35102 let e
= _mm_set_ps(1., 1., 1., 0.);
35103 assert_eq_m128(r
, e
);
35104 let r
= _mm_maskz_fnmsub_ss(0b11111111, a
, b
, c
);
35105 let e
= _mm_set_ps(1., 1., 1., -5.);
35106 assert_eq_m128(r
, e
);
35109 #[simd_test(enable = "avx512f")]
35110 unsafe fn test_mm_mask3_fnmsub_ss() {
35111 let a
= _mm_set1_ps(1.);
35112 let b
= _mm_set1_ps(2.);
35113 let c
= _mm_set1_ps(3.);
35114 let r
= _mm_mask3_fnmsub_ss(a
, b
, c
, 0);
35115 assert_eq_m128(r
, c
);
35116 let r
= _mm_mask3_fnmsub_ss(a
, b
, c
, 0b11111111);
35117 let e
= _mm_set_ps(3., 3., 3., -5.);
35118 assert_eq_m128(r
, e
);
35121 #[simd_test(enable = "avx512f")]
35122 unsafe fn test_mm_mask_fnmsub_sd() {
35123 let a
= _mm_set1_pd(1.);
35124 let b
= _mm_set1_pd(2.);
35125 let c
= _mm_set1_pd(3.);
35126 let r
= _mm_mask_fnmsub_sd(a
, 0, b
, c
);
35127 assert_eq_m128d(r
, a
);
35128 let r
= _mm_mask_fnmsub_sd(a
, 0b11111111, b
, c
);
35129 let e
= _mm_set_pd(1., -5.);
35130 assert_eq_m128d(r
, e
);
35133 #[simd_test(enable = "avx512f")]
35134 unsafe fn test_mm_maskz_fnmsub_sd() {
35135 let a
= _mm_set1_pd(1.);
35136 let b
= _mm_set1_pd(2.);
35137 let c
= _mm_set1_pd(3.);
35138 let r
= _mm_maskz_fnmsub_sd(0, a
, b
, c
);
35139 let e
= _mm_set_pd(1., 0.);
35140 assert_eq_m128d(r
, e
);
35141 let r
= _mm_maskz_fnmsub_sd(0b11111111, a
, b
, c
);
35142 let e
= _mm_set_pd(1., -5.);
35143 assert_eq_m128d(r
, e
);
35146 #[simd_test(enable = "avx512f")]
35147 unsafe fn test_mm_mask3_fnmsub_sd() {
35148 let a
= _mm_set1_pd(1.);
35149 let b
= _mm_set1_pd(2.);
35150 let c
= _mm_set1_pd(3.);
35151 let r
= _mm_mask3_fnmsub_sd(a
, b
, c
, 0);
35152 assert_eq_m128d(r
, c
);
35153 let r
= _mm_mask3_fnmsub_sd(a
, b
, c
, 0b11111111);
35154 let e
= _mm_set_pd(3., -5.);
35155 assert_eq_m128d(r
, e
);
35158 #[simd_test(enable = "avx512f")]
35159 unsafe fn test_mm_add_round_ss() {
35160 let a
= _mm_set_ps(1., 2., 10., 20.);
35161 let b
= _mm_set_ps(3., 4., 30., 40.);
35162 let r
= _mm_add_round_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35163 let e
= _mm_set_ps(1., 2., 10., 60.);
35164 assert_eq_m128(r
, e
);
35167 #[simd_test(enable = "avx512f")]
35168 unsafe fn test_mm_mask_add_round_ss() {
35169 let src
= _mm_set_ps(10., 11., 100., 110.);
35170 let a
= _mm_set_ps(1., 2., 10., 20.);
35171 let b
= _mm_set_ps(3., 4., 30., 40.);
35172 let r
= _mm_mask_add_round_ss(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35173 let e
= _mm_set_ps(1., 2., 10., 110.);
35174 assert_eq_m128(r
, e
);
35175 let r
= _mm_mask_add_round_ss(
35180 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35182 let e
= _mm_set_ps(1., 2., 10., 60.);
35183 assert_eq_m128(r
, e
);
35186 #[simd_test(enable = "avx512f")]
35187 unsafe fn test_mm_maskz_add_round_ss() {
35188 let a
= _mm_set_ps(1., 2., 10., 20.);
35189 let b
= _mm_set_ps(3., 4., 30., 40.);
35190 let r
= _mm_maskz_add_round_ss(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35191 let e
= _mm_set_ps(1., 2., 10., 0.);
35192 assert_eq_m128(r
, e
);
35193 let r
= _mm_maskz_add_round_ss(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35194 let e
= _mm_set_ps(1., 2., 10., 60.);
35195 assert_eq_m128(r
, e
);
35198 #[simd_test(enable = "avx512f")]
35199 unsafe fn test_mm_add_round_sd() {
35200 let a
= _mm_set_pd(1., 2.);
35201 let b
= _mm_set_pd(3., 4.);
35202 let r
= _mm_add_round_sd(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35203 let e
= _mm_set_pd(1., 6.);
35204 assert_eq_m128d(r
, e
);
35207 #[simd_test(enable = "avx512f")]
35208 unsafe fn test_mm_mask_add_round_sd() {
35209 let src
= _mm_set_pd(10., 11.);
35210 let a
= _mm_set_pd(1., 2.);
35211 let b
= _mm_set_pd(3., 4.);
35212 let r
= _mm_mask_add_round_sd(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35213 let e
= _mm_set_pd(1., 11.);
35214 assert_eq_m128d(r
, e
);
35215 let r
= _mm_mask_add_round_sd(
35220 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35222 let e
= _mm_set_pd(1., 6.);
35223 assert_eq_m128d(r
, e
);
35226 #[simd_test(enable = "avx512f")]
35227 unsafe fn test_mm_maskz_add_round_sd() {
35228 let a
= _mm_set_pd(1., 2.);
35229 let b
= _mm_set_pd(3., 4.);
35230 let r
= _mm_maskz_add_round_sd(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35231 let e
= _mm_set_pd(1., 0.);
35232 assert_eq_m128d(r
, e
);
35233 let r
= _mm_maskz_add_round_sd(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35234 let e
= _mm_set_pd(1., 6.);
35235 assert_eq_m128d(r
, e
);
35238 #[simd_test(enable = "avx512f")]
35239 unsafe fn test_mm_sub_round_ss() {
35240 let a
= _mm_set_ps(1., 2., 10., 20.);
35241 let b
= _mm_set_ps(3., 4., 30., 40.);
35242 let r
= _mm_sub_round_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35243 let e
= _mm_set_ps(1., 2., 10., -20.);
35244 assert_eq_m128(r
, e
);
35247 #[simd_test(enable = "avx512f")]
35248 unsafe fn test_mm_mask_sub_round_ss() {
35249 let src
= _mm_set_ps(10., 11., 100., 110.);
35250 let a
= _mm_set_ps(1., 2., 10., 20.);
35251 let b
= _mm_set_ps(3., 4., 30., 40.);
35252 let r
= _mm_mask_sub_round_ss(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35253 let e
= _mm_set_ps(1., 2., 10., 110.);
35254 assert_eq_m128(r
, e
);
35255 let r
= _mm_mask_sub_round_ss(
35260 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35262 let e
= _mm_set_ps(1., 2., 10., -20.);
35263 assert_eq_m128(r
, e
);
35266 #[simd_test(enable = "avx512f")]
35267 unsafe fn test_mm_maskz_sub_round_ss() {
35268 let a
= _mm_set_ps(1., 2., 10., 20.);
35269 let b
= _mm_set_ps(3., 4., 30., 40.);
35270 let r
= _mm_maskz_sub_round_ss(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35271 let e
= _mm_set_ps(1., 2., 10., 0.);
35272 assert_eq_m128(r
, e
);
35273 let r
= _mm_maskz_sub_round_ss(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35274 let e
= _mm_set_ps(1., 2., 10., -20.);
35275 assert_eq_m128(r
, e
);
35278 #[simd_test(enable = "avx512f")]
35279 unsafe fn test_mm_sub_round_sd() {
35280 let a
= _mm_set_pd(1., 2.);
35281 let b
= _mm_set_pd(3., 4.);
35282 let r
= _mm_sub_round_sd(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35283 let e
= _mm_set_pd(1., -2.);
35284 assert_eq_m128d(r
, e
);
35287 #[simd_test(enable = "avx512f")]
35288 unsafe fn test_mm_mask_sub_round_sd() {
35289 let src
= _mm_set_pd(10., 11.);
35290 let a
= _mm_set_pd(1., 2.);
35291 let b
= _mm_set_pd(3., 4.);
35292 let r
= _mm_mask_sub_round_sd(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35293 let e
= _mm_set_pd(1., 11.);
35294 assert_eq_m128d(r
, e
);
35295 let r
= _mm_mask_sub_round_sd(
35300 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35302 let e
= _mm_set_pd(1., -2.);
35303 assert_eq_m128d(r
, e
);
35306 #[simd_test(enable = "avx512f")]
35307 unsafe fn test_mm_maskz_sub_round_sd() {
35308 let a
= _mm_set_pd(1., 2.);
35309 let b
= _mm_set_pd(3., 4.);
35310 let r
= _mm_maskz_sub_round_sd(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35311 let e
= _mm_set_pd(1., 0.);
35312 assert_eq_m128d(r
, e
);
35313 let r
= _mm_maskz_sub_round_sd(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35314 let e
= _mm_set_pd(1., -2.);
35315 assert_eq_m128d(r
, e
);
35318 #[simd_test(enable = "avx512f")]
35319 unsafe fn test_mm_mul_round_ss() {
35320 let a
= _mm_set_ps(1., 2., 10., 20.);
35321 let b
= _mm_set_ps(3., 4., 30., 40.);
35322 let r
= _mm_mul_round_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35323 let e
= _mm_set_ps(1., 2., 10., 800.);
35324 assert_eq_m128(r
, e
);
35327 #[simd_test(enable = "avx512f")]
35328 unsafe fn test_mm_mask_mul_round_ss() {
35329 let src
= _mm_set_ps(10., 11., 100., 110.);
35330 let a
= _mm_set_ps(1., 2., 10., 20.);
35331 let b
= _mm_set_ps(3., 4., 30., 40.);
35332 let r
= _mm_mask_mul_round_ss(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35333 let e
= _mm_set_ps(1., 2., 10., 110.);
35334 assert_eq_m128(r
, e
);
35335 let r
= _mm_mask_mul_round_ss(
35340 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35342 let e
= _mm_set_ps(1., 2., 10., 800.);
35343 assert_eq_m128(r
, e
);
35346 #[simd_test(enable = "avx512f")]
35347 unsafe fn test_mm_maskz_mul_round_ss() {
35348 let a
= _mm_set_ps(1., 2., 10., 20.);
35349 let b
= _mm_set_ps(3., 4., 30., 40.);
35350 let r
= _mm_maskz_mul_round_ss(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35351 let e
= _mm_set_ps(1., 2., 10., 0.);
35352 assert_eq_m128(r
, e
);
35353 let r
= _mm_maskz_mul_round_ss(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35354 let e
= _mm_set_ps(1., 2., 10., 800.);
35355 assert_eq_m128(r
, e
);
35358 #[simd_test(enable = "avx512f")]
35359 unsafe fn test_mm_mul_round_sd() {
35360 let a
= _mm_set_pd(1., 2.);
35361 let b
= _mm_set_pd(3., 4.);
35362 let r
= _mm_mul_round_sd(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35363 let e
= _mm_set_pd(1., 8.);
35364 assert_eq_m128d(r
, e
);
35367 #[simd_test(enable = "avx512f")]
35368 unsafe fn test_mm_mask_mul_round_sd() {
35369 let src
= _mm_set_pd(10., 11.);
35370 let a
= _mm_set_pd(1., 2.);
35371 let b
= _mm_set_pd(3., 4.);
35372 let r
= _mm_mask_mul_round_sd(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35373 let e
= _mm_set_pd(1., 11.);
35374 assert_eq_m128d(r
, e
);
35375 let r
= _mm_mask_mul_round_sd(
35380 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35382 let e
= _mm_set_pd(1., 8.);
35383 assert_eq_m128d(r
, e
);
35386 #[simd_test(enable = "avx512f")]
35387 unsafe fn test_mm_maskz_mul_round_sd() {
35388 let a
= _mm_set_pd(1., 2.);
35389 let b
= _mm_set_pd(3., 4.);
35390 let r
= _mm_maskz_mul_round_sd(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35391 let e
= _mm_set_pd(1., 0.);
35392 assert_eq_m128d(r
, e
);
35393 let r
= _mm_maskz_mul_round_sd(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35394 let e
= _mm_set_pd(1., 8.);
35395 assert_eq_m128d(r
, e
);
35398 #[simd_test(enable = "avx512f")]
35399 unsafe fn test_mm_div_round_ss() {
35400 let a
= _mm_set_ps(1., 2., 10., 20.);
35401 let b
= _mm_set_ps(3., 4., 30., 40.);
35402 let r
= _mm_div_round_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35403 let e
= _mm_set_ps(1., 2., 10., 0.5);
35404 assert_eq_m128(r
, e
);
35407 #[simd_test(enable = "avx512f")]
35408 unsafe fn test_mm_mask_div_round_ss() {
35409 let src
= _mm_set_ps(10., 11., 100., 110.);
35410 let a
= _mm_set_ps(1., 2., 10., 20.);
35411 let b
= _mm_set_ps(3., 4., 30., 40.);
35412 let r
= _mm_mask_div_round_ss(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35413 let e
= _mm_set_ps(1., 2., 10., 110.);
35414 assert_eq_m128(r
, e
);
35415 let r
= _mm_mask_div_round_ss(
35420 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35422 let e
= _mm_set_ps(1., 2., 10., 0.5);
35423 assert_eq_m128(r
, e
);
35426 #[simd_test(enable = "avx512f")]
35427 unsafe fn test_mm_maskz_div_round_ss() {
35428 let a
= _mm_set_ps(1., 2., 10., 20.);
35429 let b
= _mm_set_ps(3., 4., 30., 40.);
35430 let r
= _mm_maskz_div_round_ss(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35431 let e
= _mm_set_ps(1., 2., 10., 0.);
35432 assert_eq_m128(r
, e
);
35433 let r
= _mm_maskz_div_round_ss(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35434 let e
= _mm_set_ps(1., 2., 10., 0.5);
35435 assert_eq_m128(r
, e
);
35438 #[simd_test(enable = "avx512f")]
35439 unsafe fn test_mm_div_round_sd() {
35440 let a
= _mm_set_pd(1., 2.);
35441 let b
= _mm_set_pd(3., 4.);
35442 let r
= _mm_div_round_sd(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35443 let e
= _mm_set_pd(1., 0.5);
35444 assert_eq_m128d(r
, e
);
35447 #[simd_test(enable = "avx512f")]
35448 unsafe fn test_mm_mask_div_round_sd() {
35449 let src
= _mm_set_pd(10., 11.);
35450 let a
= _mm_set_pd(1., 2.);
35451 let b
= _mm_set_pd(3., 4.);
35452 let r
= _mm_mask_div_round_sd(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35453 let e
= _mm_set_pd(1., 11.);
35454 assert_eq_m128d(r
, e
);
35455 let r
= _mm_mask_div_round_sd(
35460 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35462 let e
= _mm_set_pd(1., 0.5);
35463 assert_eq_m128d(r
, e
);
35466 #[simd_test(enable = "avx512f")]
35467 unsafe fn test_mm_maskz_div_round_sd() {
35468 let a
= _mm_set_pd(1., 2.);
35469 let b
= _mm_set_pd(3., 4.);
35470 let r
= _mm_maskz_div_round_sd(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35471 let e
= _mm_set_pd(1., 0.);
35472 assert_eq_m128d(r
, e
);
35473 let r
= _mm_maskz_div_round_sd(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35474 let e
= _mm_set_pd(1., 0.5);
35475 assert_eq_m128d(r
, e
);
35478 #[simd_test(enable = "avx512f")]
35479 unsafe fn test_mm_max_round_ss() {
35480 let a
= _mm_set_ps(0., 1., 2., 3.);
35481 let b
= _mm_set_ps(4., 5., 6., 7.);
35482 let r
= _mm_max_round_ss(a
, b
, _MM_FROUND_CUR_DIRECTION
);
35483 let e
= _mm_set_ps(0., 1., 2., 7.);
35484 assert_eq_m128(r
, e
);
35487 #[simd_test(enable = "avx512f")]
35488 unsafe fn test_mm_mask_max_round_ss() {
35489 let a
= _mm_set_ps(0., 1., 2., 3.);
35490 let b
= _mm_set_ps(4., 5., 6., 7.);
35491 let r
= _mm_mask_max_round_ss(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35492 let e
= _mm_set_ps(0., 1., 2., 3.);
35493 assert_eq_m128(r
, e
);
35494 let r
= _mm_mask_max_round_ss(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35495 let e
= _mm_set_ps(0., 1., 2., 7.);
35496 assert_eq_m128(r
, e
);
35499 #[simd_test(enable = "avx512f")]
35500 unsafe fn test_mm_maskz_max_round_ss() {
35501 let a
= _mm_set_ps(0., 1., 2., 3.);
35502 let b
= _mm_set_ps(4., 5., 6., 7.);
35503 let r
= _mm_maskz_max_round_ss(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35504 let e
= _mm_set_ps(0., 1., 2., 0.);
35505 assert_eq_m128(r
, e
);
35506 let r
= _mm_maskz_max_round_ss(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35507 let e
= _mm_set_ps(0., 1., 2., 7.);
35508 assert_eq_m128(r
, e
);
35511 #[simd_test(enable = "avx512f")]
35512 unsafe fn test_mm_max_round_sd() {
35513 let a
= _mm_set_pd(0., 1.);
35514 let b
= _mm_set_pd(2., 3.);
35515 let r
= _mm_max_round_sd(a
, b
, _MM_FROUND_CUR_DIRECTION
);
35516 let e
= _mm_set_pd(0., 3.);
35517 assert_eq_m128d(r
, e
);
35520 #[simd_test(enable = "avx512f")]
35521 unsafe fn test_mm_mask_max_round_sd() {
35522 let a
= _mm_set_pd(0., 1.);
35523 let b
= _mm_set_pd(2., 3.);
35524 let r
= _mm_mask_max_round_sd(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35525 let e
= _mm_set_pd(0., 1.);
35526 assert_eq_m128d(r
, e
);
35527 let r
= _mm_mask_max_round_sd(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35528 let e
= _mm_set_pd(0., 3.);
35529 assert_eq_m128d(r
, e
);
35532 #[simd_test(enable = "avx512f")]
35533 unsafe fn test_mm_maskz_max_round_sd() {
35534 let a
= _mm_set_pd(0., 1.);
35535 let b
= _mm_set_pd(2., 3.);
35536 let r
= _mm_maskz_max_round_sd(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35537 let e
= _mm_set_pd(0., 0.);
35538 assert_eq_m128d(r
, e
);
35539 let r
= _mm_maskz_max_round_sd(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35540 let e
= _mm_set_pd(0., 3.);
35541 assert_eq_m128d(r
, e
);
35544 #[simd_test(enable = "avx512f")]
35545 unsafe fn test_mm_min_round_ss() {
35546 let a
= _mm_set_ps(0., 1., 2., 3.);
35547 let b
= _mm_set_ps(4., 5., 6., 7.);
35548 let r
= _mm_min_round_ss(a
, b
, _MM_FROUND_CUR_DIRECTION
);
35549 let e
= _mm_set_ps(0., 1., 2., 3.);
35550 assert_eq_m128(r
, e
);
35553 #[simd_test(enable = "avx512f")]
35554 unsafe fn test_mm_mask_min_round_ss() {
35555 let a
= _mm_set_ps(0., 1., 2., 3.);
35556 let b
= _mm_set_ps(4., 5., 6., 7.);
35557 let r
= _mm_mask_min_round_ss(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35558 let e
= _mm_set_ps(0., 1., 2., 3.);
35559 assert_eq_m128(r
, e
);
35560 let r
= _mm_mask_min_round_ss(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35561 let e
= _mm_set_ps(0., 1., 2., 3.);
35562 assert_eq_m128(r
, e
);
35565 #[simd_test(enable = "avx512f")]
35566 unsafe fn test_mm_maskz_min_round_ss() {
35567 let a
= _mm_set_ps(0., 1., 2., 3.);
35568 let b
= _mm_set_ps(4., 5., 6., 7.);
35569 let r
= _mm_maskz_min_round_ss(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35570 let e
= _mm_set_ps(0., 1., 2., 0.);
35571 assert_eq_m128(r
, e
);
35572 let r
= _mm_maskz_min_round_ss(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35573 let e
= _mm_set_ps(0., 1., 2., 3.);
35574 assert_eq_m128(r
, e
);
35577 #[simd_test(enable = "avx512f")]
35578 unsafe fn test_mm_min_round_sd() {
35579 let a
= _mm_set_pd(0., 1.);
35580 let b
= _mm_set_pd(2., 3.);
35581 let r
= _mm_min_round_sd(a
, b
, _MM_FROUND_CUR_DIRECTION
);
35582 let e
= _mm_set_pd(0., 1.);
35583 assert_eq_m128d(r
, e
);
35586 #[simd_test(enable = "avx512f")]
35587 unsafe fn test_mm_mask_min_round_sd() {
35588 let a
= _mm_set_pd(0., 1.);
35589 let b
= _mm_set_pd(2., 3.);
35590 let r
= _mm_mask_min_round_sd(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35591 let e
= _mm_set_pd(0., 1.);
35592 assert_eq_m128d(r
, e
);
35593 let r
= _mm_mask_min_round_sd(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35594 let e
= _mm_set_pd(0., 1.);
35595 assert_eq_m128d(r
, e
);
35598 #[simd_test(enable = "avx512f")]
35599 unsafe fn test_mm_maskz_min_round_sd() {
35600 let a
= _mm_set_pd(0., 1.);
35601 let b
= _mm_set_pd(2., 3.);
35602 let r
= _mm_maskz_min_round_sd(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35603 let e
= _mm_set_pd(0., 0.);
35604 assert_eq_m128d(r
, e
);
35605 let r
= _mm_maskz_min_round_sd(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35606 let e
= _mm_set_pd(0., 1.);
35607 assert_eq_m128d(r
, e
);
35610 #[simd_test(enable = "avx512f")]
35611 unsafe fn test_mm_sqrt_round_ss() {
35612 let a
= _mm_set_ps(1., 2., 10., 20.);
35613 let b
= _mm_set_ps(3., 4., 30., 4.);
35614 let r
= _mm_sqrt_round_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35615 let e
= _mm_set_ps(1., 2., 10., 2.);
35616 assert_eq_m128(r
, e
);
35619 #[simd_test(enable = "avx512f")]
35620 unsafe fn test_mm_mask_sqrt_round_ss() {
35621 let src
= _mm_set_ps(10., 11., 100., 110.);
35622 let a
= _mm_set_ps(1., 2., 10., 20.);
35623 let b
= _mm_set_ps(3., 4., 30., 4.);
35624 let r
= _mm_mask_sqrt_round_ss(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35625 let e
= _mm_set_ps(1., 2., 10., 110.);
35626 assert_eq_m128(r
, e
);
35627 let r
= _mm_mask_sqrt_round_ss(
35632 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35634 let e
= _mm_set_ps(1., 2., 10., 2.);
35635 assert_eq_m128(r
, e
);
35638 #[simd_test(enable = "avx512f")]
35639 unsafe fn test_mm_maskz_sqrt_round_ss() {
35640 let a
= _mm_set_ps(1., 2., 10., 20.);
35641 let b
= _mm_set_ps(3., 4., 30., 4.);
35642 let r
= _mm_maskz_sqrt_round_ss(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35643 let e
= _mm_set_ps(1., 2., 10., 0.);
35644 assert_eq_m128(r
, e
);
35645 let r
= _mm_maskz_sqrt_round_ss(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35646 let e
= _mm_set_ps(1., 2., 10., 2.);
35647 assert_eq_m128(r
, e
);
35650 #[simd_test(enable = "avx512f")]
35651 unsafe fn test_mm_sqrt_round_sd() {
35652 let a
= _mm_set_pd(1., 2.);
35653 let b
= _mm_set_pd(3., 4.);
35654 let r
= _mm_sqrt_round_sd(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35655 let e
= _mm_set_pd(1., 2.);
35656 assert_eq_m128d(r
, e
);
35659 #[simd_test(enable = "avx512f")]
35660 unsafe fn test_mm_mask_sqrt_round_sd() {
35661 let src
= _mm_set_pd(10., 11.);
35662 let a
= _mm_set_pd(1., 2.);
35663 let b
= _mm_set_pd(3., 4.);
35664 let r
= _mm_mask_sqrt_round_sd(src
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35665 let e
= _mm_set_pd(1., 11.);
35666 assert_eq_m128d(r
, e
);
35667 let r
= _mm_mask_sqrt_round_sd(
35672 _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
,
35674 let e
= _mm_set_pd(1., 2.);
35675 assert_eq_m128d(r
, e
);
35678 #[simd_test(enable = "avx512f")]
35679 unsafe fn test_mm_maskz_sqrt_round_sd() {
35680 let a
= _mm_set_pd(1., 2.);
35681 let b
= _mm_set_pd(3., 4.);
35682 let r
= _mm_maskz_sqrt_round_sd(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35683 let e
= _mm_set_pd(1., 0.);
35684 assert_eq_m128d(r
, e
);
35685 let r
= _mm_maskz_sqrt_round_sd(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
35686 let e
= _mm_set_pd(1., 2.);
35687 assert_eq_m128d(r
, e
);
35690 #[simd_test(enable = "avx512f")]
35691 unsafe fn test_mm_getexp_round_ss() {
35692 let a
= _mm_set1_ps(2.);
35693 let b
= _mm_set1_ps(3.);
35694 let r
= _mm_getexp_round_ss(a
, b
, _MM_FROUND_CUR_DIRECTION
);
35695 let e
= _mm_set_ps(2., 2., 2., 1.);
35696 assert_eq_m128(r
, e
);
35699 #[simd_test(enable = "avx512f")]
35700 unsafe fn test_mm_mask_getexp_round_ss() {
35701 let a
= _mm_set1_ps(2.);
35702 let b
= _mm_set1_ps(3.);
35703 let r
= _mm_mask_getexp_round_ss(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35704 let e
= _mm_set_ps(2., 2., 2., 2.);
35705 assert_eq_m128(r
, e
);
35706 let r
= _mm_mask_getexp_round_ss(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35707 let e
= _mm_set_ps(2., 2., 2., 1.);
35708 assert_eq_m128(r
, e
);
35711 #[simd_test(enable = "avx512f")]
35712 unsafe fn test_mm_maskz_getexp_round_ss() {
35713 let a
= _mm_set1_ps(2.);
35714 let b
= _mm_set1_ps(3.);
35715 let r
= _mm_maskz_getexp_round_ss(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35716 let e
= _mm_set_ps(2., 2., 2., 0.);
35717 assert_eq_m128(r
, e
);
35718 let r
= _mm_maskz_getexp_round_ss(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35719 let e
= _mm_set_ps(2., 2., 2., 1.);
35720 assert_eq_m128(r
, e
);
35723 #[simd_test(enable = "avx512f")]
35724 unsafe fn test_mm_getexp_round_sd() {
35725 let a
= _mm_set1_pd(2.);
35726 let b
= _mm_set1_pd(3.);
35727 let r
= _mm_getexp_round_sd(a
, b
, _MM_FROUND_CUR_DIRECTION
);
35728 let e
= _mm_set_pd(2., 1.);
35729 assert_eq_m128d(r
, e
);
35732 #[simd_test(enable = "avx512f")]
35733 unsafe fn test_mm_mask_getexp_round_sd() {
35734 let a
= _mm_set1_pd(2.);
35735 let b
= _mm_set1_pd(3.);
35736 let r
= _mm_mask_getexp_round_sd(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35737 let e
= _mm_set_pd(2., 2.);
35738 assert_eq_m128d(r
, e
);
35739 let r
= _mm_mask_getexp_round_sd(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35740 let e
= _mm_set_pd(2., 1.);
35741 assert_eq_m128d(r
, e
);
35744 #[simd_test(enable = "avx512f")]
35745 unsafe fn test_mm_maskz_getexp_round_sd() {
35746 let a
= _mm_set1_pd(2.);
35747 let b
= _mm_set1_pd(3.);
35748 let r
= _mm_maskz_getexp_round_sd(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35749 let e
= _mm_set_pd(2., 0.);
35750 assert_eq_m128d(r
, e
);
35751 let r
= _mm_maskz_getexp_round_sd(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
35752 let e
= _mm_set_pd(2., 1.);
35753 assert_eq_m128d(r
, e
);
35756 #[simd_test(enable = "avx512f")]
35757 unsafe fn test_mm_getmant_round_ss() {
35758 let a
= _mm_set1_ps(20.);
35759 let b
= _mm_set1_ps(10.);
35760 let r
= _mm_getmant_round_ss(
35765 _MM_FROUND_CUR_DIRECTION
,
35767 let e
= _mm_set_ps(20., 20., 20., 1.25);
35768 assert_eq_m128(r
, e
);
35771 #[simd_test(enable = "avx512f")]
35772 unsafe fn test_mm_mask_getmant_round_ss() {
35773 let a
= _mm_set1_ps(20.);
35774 let b
= _mm_set1_ps(10.);
35775 let r
= _mm_mask_getmant_round_ss(
35782 _MM_FROUND_CUR_DIRECTION
,
35784 let e
= _mm_set_ps(20., 20., 20., 20.);
35785 assert_eq_m128(r
, e
);
35786 let r
= _mm_mask_getmant_round_ss(
35793 _MM_FROUND_CUR_DIRECTION
,
35795 let e
= _mm_set_ps(20., 20., 20., 1.25);
35796 assert_eq_m128(r
, e
);
35799 #[simd_test(enable = "avx512f")]
35800 unsafe fn test_mm_maskz_getmant_round_ss() {
35801 let a
= _mm_set1_ps(20.);
35802 let b
= _mm_set1_ps(10.);
35803 let r
= _mm_maskz_getmant_round_ss(
35809 _MM_FROUND_CUR_DIRECTION
,
35811 let e
= _mm_set_ps(20., 20., 20., 0.);
35812 assert_eq_m128(r
, e
);
35813 let r
= _mm_maskz_getmant_round_ss(
35819 _MM_FROUND_CUR_DIRECTION
,
35821 let e
= _mm_set_ps(20., 20., 20., 1.25);
35822 assert_eq_m128(r
, e
);
35825 #[simd_test(enable = "avx512f")]
35826 unsafe fn test_mm_getmant_round_sd() {
35827 let a
= _mm_set1_pd(20.);
35828 let b
= _mm_set1_pd(10.);
35829 let r
= _mm_getmant_round_sd(
35834 _MM_FROUND_CUR_DIRECTION
,
35836 let e
= _mm_set_pd(20., 1.25);
35837 assert_eq_m128d(r
, e
);
35840 #[simd_test(enable = "avx512f")]
35841 unsafe fn test_mm_mask_getmant_round_sd() {
35842 let a
= _mm_set1_pd(20.);
35843 let b
= _mm_set1_pd(10.);
35844 let r
= _mm_mask_getmant_round_sd(
35851 _MM_FROUND_CUR_DIRECTION
,
35853 let e
= _mm_set_pd(20., 20.);
35854 assert_eq_m128d(r
, e
);
35855 let r
= _mm_mask_getmant_round_sd(
35862 _MM_FROUND_CUR_DIRECTION
,
35864 let e
= _mm_set_pd(20., 1.25);
35865 assert_eq_m128d(r
, e
);
35868 #[simd_test(enable = "avx512f")]
35869 unsafe fn test_mm_maskz_getmant_round_sd() {
35870 let a
= _mm_set1_pd(20.);
35871 let b
= _mm_set1_pd(10.);
35872 let r
= _mm_maskz_getmant_round_sd(
35878 _MM_FROUND_CUR_DIRECTION
,
35880 let e
= _mm_set_pd(20., 0.);
35881 assert_eq_m128d(r
, e
);
35882 let r
= _mm_maskz_getmant_round_sd(
35888 _MM_FROUND_CUR_DIRECTION
,
35890 let e
= _mm_set_pd(20., 1.25);
35891 assert_eq_m128d(r
, e
);
35894 #[simd_test(enable = "avx512f")]
35895 unsafe fn test_mm_roundscale_round_ss() {
35896 let a
= _mm_set1_ps(2.2);
35897 let b
= _mm_set1_ps(1.1);
35898 let r
= _mm_roundscale_round_ss(a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35899 let e
= _mm_set_ps(2.2, 2.2, 2.2, 1.0);
35900 assert_eq_m128(r
, e
);
35903 #[simd_test(enable = "avx512f")]
35904 unsafe fn test_mm_mask_roundscale_round_ss() {
35905 let a
= _mm_set1_ps(2.2);
35906 let b
= _mm_set1_ps(1.1);
35907 let r
= _mm_mask_roundscale_round_ss(a
, 0, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35908 let e
= _mm_set_ps(2.2, 2.2, 2.2, 2.2);
35909 assert_eq_m128(r
, e
);
35910 let r
= _mm_mask_roundscale_round_ss(a
, 0b11111111, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35911 let e
= _mm_set_ps(2.2, 2.2, 2.2, 1.0);
35912 assert_eq_m128(r
, e
);
35915 #[simd_test(enable = "avx512f")]
35916 unsafe fn test_mm_maskz_roundscale_round_ss() {
35917 let a
= _mm_set1_ps(2.2);
35918 let b
= _mm_set1_ps(1.1);
35919 let r
= _mm_maskz_roundscale_round_ss(0, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35920 let e
= _mm_set_ps(2.2, 2.2, 2.2, 0.0);
35921 assert_eq_m128(r
, e
);
35922 let r
= _mm_maskz_roundscale_round_ss(0b11111111, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35923 let e
= _mm_set_ps(2.2, 2.2, 2.2, 1.0);
35924 assert_eq_m128(r
, e
);
35927 #[simd_test(enable = "avx512f")]
35928 unsafe fn test_mm_roundscale_round_sd() {
35929 let a
= _mm_set1_pd(2.2);
35930 let b
= _mm_set1_pd(1.1);
35931 let r
= _mm_roundscale_round_sd(a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35932 let e
= _mm_set_pd(2.2, 1.0);
35933 assert_eq_m128d(r
, e
);
35936 #[simd_test(enable = "avx512f")]
35937 unsafe fn test_mm_mask_roundscale_round_sd() {
35938 let a
= _mm_set1_pd(2.2);
35939 let b
= _mm_set1_pd(1.1);
35940 let r
= _mm_mask_roundscale_round_sd(a
, 0, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35941 let e
= _mm_set_pd(2.2, 2.2);
35942 assert_eq_m128d(r
, e
);
35943 let r
= _mm_mask_roundscale_round_sd(a
, 0b11111111, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35944 let e
= _mm_set_pd(2.2, 1.0);
35945 assert_eq_m128d(r
, e
);
35948 #[simd_test(enable = "avx512f")]
35949 unsafe fn test_mm_maskz_roundscale_round_sd() {
35950 let a
= _mm_set1_pd(2.2);
35951 let b
= _mm_set1_pd(1.1);
35952 let r
= _mm_maskz_roundscale_round_sd(0, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35953 let e
= _mm_set_pd(2.2, 0.0);
35954 assert_eq_m128d(r
, e
);
35955 let r
= _mm_maskz_roundscale_round_sd(0b11111111, a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
35956 let e
= _mm_set_pd(2.2, 1.0);
35957 assert_eq_m128d(r
, e
);
35960 #[simd_test(enable = "avx512f")]
35961 unsafe fn test_mm_scalef_round_ss() {
35962 let a
= _mm_set1_ps(1.);
35963 let b
= _mm_set1_ps(3.);
35964 let r
= _mm_scalef_round_ss(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
35965 let e
= _mm_set_ps(1., 1., 1., 8.);
35966 assert_eq_m128(r
, e
);
35969 #[simd_test(enable = "avx512f")]
35970 unsafe fn test_mm_mask_scalef_round_ss() {
35971 let a
= _mm_set1_ps(1.);
35972 let b
= _mm_set1_ps(3.);
35973 let r
= _mm_mask_scalef_round_ss(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
35974 let e
= _mm_set_ps(1., 1., 1., 1.);
35975 assert_eq_m128(r
, e
);
35976 let r
= _mm_mask_scalef_round_ss(
35981 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
35983 let e
= _mm_set_ps(1., 1., 1., 8.);
35984 assert_eq_m128(r
, e
);
35987 #[simd_test(enable = "avx512f")]
35988 unsafe fn test_mm_maskz_scalef_round_ss() {
35989 let a
= _mm_set1_ps(1.);
35990 let b
= _mm_set1_ps(3.);
35991 let r
= _mm_maskz_scalef_round_ss(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
35992 let e
= _mm_set_ps(1., 1., 1., 0.);
35993 assert_eq_m128(r
, e
);
35994 let r
= _mm_maskz_scalef_round_ss(
35998 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36000 let e
= _mm_set_ps(1., 1., 1., 8.);
36001 assert_eq_m128(r
, e
);
36004 #[simd_test(enable = "avx512f")]
36005 unsafe fn test_mm_scalef_round_sd() {
36006 let a
= _mm_set1_pd(1.);
36007 let b
= _mm_set1_pd(3.);
36008 let r
= _mm_scalef_round_sd(a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36009 let e
= _mm_set_pd(1., 8.);
36010 assert_eq_m128d(r
, e
);
36013 #[simd_test(enable = "avx512f")]
36014 unsafe fn test_mm_mask_scalef_round_sd() {
36015 let a
= _mm_set1_pd(1.);
36016 let b
= _mm_set1_pd(3.);
36017 let r
= _mm_mask_scalef_round_sd(a
, 0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36018 let e
= _mm_set_pd(1., 1.);
36019 assert_eq_m128d(r
, e
);
36020 let r
= _mm_mask_scalef_round_sd(
36025 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36027 let e
= _mm_set_pd(1., 8.);
36028 assert_eq_m128d(r
, e
);
36031 #[simd_test(enable = "avx512f")]
36032 unsafe fn test_mm_maskz_scalef_round_sd() {
36033 let a
= _mm_set1_pd(1.);
36034 let b
= _mm_set1_pd(3.);
36035 let r
= _mm_maskz_scalef_round_sd(0, a
, b
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36036 let e
= _mm_set_pd(1., 0.);
36037 assert_eq_m128d(r
, e
);
36038 let r
= _mm_maskz_scalef_round_sd(
36042 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36044 let e
= _mm_set_pd(1., 8.);
36045 assert_eq_m128d(r
, e
);
36048 #[simd_test(enable = "avx512f")]
36049 unsafe fn test_mm_fmadd_round_ss() {
36050 let a
= _mm_set1_ps(1.);
36051 let b
= _mm_set1_ps(2.);
36052 let c
= _mm_set1_ps(3.);
36053 let r
= _mm_fmadd_round_ss(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36054 let e
= _mm_set_ps(1., 1., 1., 5.);
36055 assert_eq_m128(r
, e
);
36058 #[simd_test(enable = "avx512f")]
36059 unsafe fn test_mm_mask_fmadd_round_ss() {
36060 let a
= _mm_set1_ps(1.);
36061 let b
= _mm_set1_ps(2.);
36062 let c
= _mm_set1_ps(3.);
36063 let r
= _mm_mask_fmadd_round_ss(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36064 assert_eq_m128(r
, a
);
36065 let r
= _mm_mask_fmadd_round_ss(
36070 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36072 let e
= _mm_set_ps(1., 1., 1., 5.);
36073 assert_eq_m128(r
, e
);
36076 #[simd_test(enable = "avx512f")]
36077 unsafe fn test_mm_maskz_fmadd_round_ss() {
36078 let a
= _mm_set1_ps(1.);
36079 let b
= _mm_set1_ps(2.);
36080 let c
= _mm_set1_ps(3.);
36081 let r
= _mm_maskz_fmadd_round_ss(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36082 let e
= _mm_set_ps(1., 1., 1., 0.);
36083 assert_eq_m128(r
, e
);
36084 let r
= _mm_maskz_fmadd_round_ss(
36089 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36091 let e
= _mm_set_ps(1., 1., 1., 5.);
36092 assert_eq_m128(r
, e
);
36095 #[simd_test(enable = "avx512f")]
36096 unsafe fn test_mm_mask3_fmadd_round_ss() {
36097 let a
= _mm_set1_ps(1.);
36098 let b
= _mm_set1_ps(2.);
36099 let c
= _mm_set1_ps(3.);
36100 let r
= _mm_mask3_fmadd_round_ss(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36101 assert_eq_m128(r
, c
);
36102 let r
= _mm_mask3_fmadd_round_ss(
36107 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36109 let e
= _mm_set_ps(3., 3., 3., 5.);
36110 assert_eq_m128(r
, e
);
36113 #[simd_test(enable = "avx512f")]
36114 unsafe fn test_mm_fmadd_round_sd() {
36115 let a
= _mm_set1_pd(1.);
36116 let b
= _mm_set1_pd(2.);
36117 let c
= _mm_set1_pd(3.);
36118 let r
= _mm_fmadd_round_sd(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36119 let e
= _mm_set_pd(1., 5.);
36120 assert_eq_m128d(r
, e
);
36123 #[simd_test(enable = "avx512f")]
36124 unsafe fn test_mm_mask_fmadd_round_sd() {
36125 let a
= _mm_set1_pd(1.);
36126 let b
= _mm_set1_pd(2.);
36127 let c
= _mm_set1_pd(3.);
36128 let r
= _mm_mask_fmadd_round_sd(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36129 assert_eq_m128d(r
, a
);
36130 let r
= _mm_mask_fmadd_round_sd(
36135 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36137 let e
= _mm_set_pd(1., 5.);
36138 assert_eq_m128d(r
, e
);
36141 #[simd_test(enable = "avx512f")]
36142 unsafe fn test_mm_maskz_fmadd_round_sd() {
36143 let a
= _mm_set1_pd(1.);
36144 let b
= _mm_set1_pd(2.);
36145 let c
= _mm_set1_pd(3.);
36146 let r
= _mm_maskz_fmadd_round_sd(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36147 let e
= _mm_set_pd(1., 0.);
36148 assert_eq_m128d(r
, e
);
36149 let r
= _mm_maskz_fmadd_round_sd(
36154 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36156 let e
= _mm_set_pd(1., 5.);
36157 assert_eq_m128d(r
, e
);
36160 #[simd_test(enable = "avx512f")]
36161 unsafe fn test_mm_mask3_fmadd_round_sd() {
36162 let a
= _mm_set1_pd(1.);
36163 let b
= _mm_set1_pd(2.);
36164 let c
= _mm_set1_pd(3.);
36165 let r
= _mm_mask3_fmadd_round_sd(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36166 assert_eq_m128d(r
, c
);
36167 let r
= _mm_mask3_fmadd_round_sd(
36172 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36174 let e
= _mm_set_pd(3., 5.);
36175 assert_eq_m128d(r
, e
);
36178 #[simd_test(enable = "avx512f")]
36179 unsafe fn test_mm_fmsub_round_ss() {
36180 let a
= _mm_set1_ps(1.);
36181 let b
= _mm_set1_ps(2.);
36182 let c
= _mm_set1_ps(3.);
36183 let r
= _mm_fmsub_round_ss(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36184 let e
= _mm_set_ps(1., 1., 1., -1.);
36185 assert_eq_m128(r
, e
);
36188 #[simd_test(enable = "avx512f")]
36189 unsafe fn test_mm_mask_fmsub_round_ss() {
36190 let a
= _mm_set1_ps(1.);
36191 let b
= _mm_set1_ps(2.);
36192 let c
= _mm_set1_ps(3.);
36193 let r
= _mm_mask_fmsub_round_ss(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36194 assert_eq_m128(r
, a
);
36195 let r
= _mm_mask_fmsub_round_ss(
36200 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36202 let e
= _mm_set_ps(1., 1., 1., -1.);
36203 assert_eq_m128(r
, e
);
36206 #[simd_test(enable = "avx512f")]
36207 unsafe fn test_mm_maskz_fmsub_round_ss() {
36208 let a
= _mm_set1_ps(1.);
36209 let b
= _mm_set1_ps(2.);
36210 let c
= _mm_set1_ps(3.);
36211 let r
= _mm_maskz_fmsub_round_ss(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36212 let e
= _mm_set_ps(1., 1., 1., 0.);
36213 assert_eq_m128(r
, e
);
36214 let r
= _mm_maskz_fmsub_round_ss(
36219 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36221 let e
= _mm_set_ps(1., 1., 1., -1.);
36222 assert_eq_m128(r
, e
);
36225 #[simd_test(enable = "avx512f")]
36226 unsafe fn test_mm_mask3_fmsub_round_ss() {
36227 let a
= _mm_set1_ps(1.);
36228 let b
= _mm_set1_ps(2.);
36229 let c
= _mm_set1_ps(3.);
36230 let r
= _mm_mask3_fmsub_round_ss(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36231 assert_eq_m128(r
, c
);
36232 let r
= _mm_mask3_fmsub_round_ss(
36237 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36239 let e
= _mm_set_ps(3., 3., 3., -1.);
36240 assert_eq_m128(r
, e
);
36243 #[simd_test(enable = "avx512f")]
36244 unsafe fn test_mm_fmsub_round_sd() {
36245 let a
= _mm_set1_pd(1.);
36246 let b
= _mm_set1_pd(2.);
36247 let c
= _mm_set1_pd(3.);
36248 let r
= _mm_fmsub_round_sd(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36249 let e
= _mm_set_pd(1., -1.);
36250 assert_eq_m128d(r
, e
);
36253 #[simd_test(enable = "avx512f")]
36254 unsafe fn test_mm_mask_fmsub_round_sd() {
36255 let a
= _mm_set1_pd(1.);
36256 let b
= _mm_set1_pd(2.);
36257 let c
= _mm_set1_pd(3.);
36258 let r
= _mm_mask_fmsub_round_sd(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36259 assert_eq_m128d(r
, a
);
36260 let r
= _mm_mask_fmsub_round_sd(
36265 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36267 let e
= _mm_set_pd(1., -1.);
36268 assert_eq_m128d(r
, e
);
36271 #[simd_test(enable = "avx512f")]
36272 unsafe fn test_mm_maskz_fmsub_round_sd() {
36273 let a
= _mm_set1_pd(1.);
36274 let b
= _mm_set1_pd(2.);
36275 let c
= _mm_set1_pd(3.);
36276 let r
= _mm_maskz_fmsub_round_sd(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36277 let e
= _mm_set_pd(1., 0.);
36278 assert_eq_m128d(r
, e
);
36279 let r
= _mm_maskz_fmsub_round_sd(
36284 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36286 let e
= _mm_set_pd(1., -1.);
36287 assert_eq_m128d(r
, e
);
36290 #[simd_test(enable = "avx512f")]
36291 unsafe fn test_mm_mask3_fmsub_round_sd() {
36292 let a
= _mm_set1_pd(1.);
36293 let b
= _mm_set1_pd(2.);
36294 let c
= _mm_set1_pd(3.);
36295 let r
= _mm_mask3_fmsub_round_sd(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36296 assert_eq_m128d(r
, c
);
36297 let r
= _mm_mask3_fmsub_round_sd(
36302 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36304 let e
= _mm_set_pd(3., -1.);
36305 assert_eq_m128d(r
, e
);
36308 #[simd_test(enable = "avx512f")]
36309 unsafe fn test_mm_fnmadd_round_ss() {
36310 let a
= _mm_set1_ps(1.);
36311 let b
= _mm_set1_ps(2.);
36312 let c
= _mm_set1_ps(3.);
36313 let r
= _mm_fnmadd_round_ss(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36314 let e
= _mm_set_ps(1., 1., 1., 1.);
36315 assert_eq_m128(r
, e
);
36318 #[simd_test(enable = "avx512f")]
36319 unsafe fn test_mm_mask_fnmadd_round_ss() {
36320 let a
= _mm_set1_ps(1.);
36321 let b
= _mm_set1_ps(2.);
36322 let c
= _mm_set1_ps(3.);
36323 let r
= _mm_mask_fnmadd_round_ss(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36324 assert_eq_m128(r
, a
);
36325 let r
= _mm_mask_fnmadd_round_ss(
36330 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36332 let e
= _mm_set_ps(1., 1., 1., 1.);
36333 assert_eq_m128(r
, e
);
36336 #[simd_test(enable = "avx512f")]
36337 unsafe fn test_mm_maskz_fnmadd_round_ss() {
36338 let a
= _mm_set1_ps(1.);
36339 let b
= _mm_set1_ps(2.);
36340 let c
= _mm_set1_ps(3.);
36342 _mm_maskz_fnmadd_round_ss(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36343 let e
= _mm_set_ps(1., 1., 1., 0.);
36344 assert_eq_m128(r
, e
);
36345 let r
= _mm_maskz_fnmadd_round_ss(
36350 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36352 let e
= _mm_set_ps(1., 1., 1., 1.);
36353 assert_eq_m128(r
, e
);
36356 #[simd_test(enable = "avx512f")]
36357 unsafe fn test_mm_mask3_fnmadd_round_ss() {
36358 let a
= _mm_set1_ps(1.);
36359 let b
= _mm_set1_ps(2.);
36360 let c
= _mm_set1_ps(3.);
36362 _mm_mask3_fnmadd_round_ss(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36363 assert_eq_m128(r
, c
);
36364 let r
= _mm_mask3_fnmadd_round_ss(
36369 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36371 let e
= _mm_set_ps(3., 3., 3., 1.);
36372 assert_eq_m128(r
, e
);
36375 #[simd_test(enable = "avx512f")]
36376 unsafe fn test_mm_fnmadd_round_sd() {
36377 let a
= _mm_set1_pd(1.);
36378 let b
= _mm_set1_pd(2.);
36379 let c
= _mm_set1_pd(3.);
36380 let r
= _mm_fnmadd_round_sd(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36381 let e
= _mm_set_pd(1., 1.);
36382 assert_eq_m128d(r
, e
);
36385 #[simd_test(enable = "avx512f")]
36386 unsafe fn test_mm_mask_fnmadd_round_sd() {
36387 let a
= _mm_set1_pd(1.);
36388 let b
= _mm_set1_pd(2.);
36389 let c
= _mm_set1_pd(3.);
36390 let r
= _mm_mask_fnmadd_round_sd(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36391 assert_eq_m128d(r
, a
);
36392 let r
= _mm_mask_fnmadd_round_sd(
36397 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36399 let e
= _mm_set_pd(1., 1.);
36400 assert_eq_m128d(r
, e
);
36403 #[simd_test(enable = "avx512f")]
36404 unsafe fn test_mm_maskz_fnmadd_round_sd() {
36405 let a
= _mm_set1_pd(1.);
36406 let b
= _mm_set1_pd(2.);
36407 let c
= _mm_set1_pd(3.);
36409 _mm_maskz_fnmadd_round_sd(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36410 let e
= _mm_set_pd(1., 0.);
36411 assert_eq_m128d(r
, e
);
36412 let r
= _mm_maskz_fnmadd_round_sd(
36417 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36419 let e
= _mm_set_pd(1., 1.);
36420 assert_eq_m128d(r
, e
);
36423 #[simd_test(enable = "avx512f")]
36424 unsafe fn test_mm_mask3_fnmadd_round_sd() {
36425 let a
= _mm_set1_pd(1.);
36426 let b
= _mm_set1_pd(2.);
36427 let c
= _mm_set1_pd(3.);
36429 _mm_mask3_fnmadd_round_sd(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36430 assert_eq_m128d(r
, c
);
36431 let r
= _mm_mask3_fnmadd_round_sd(
36436 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36438 let e
= _mm_set_pd(3., 1.);
36439 assert_eq_m128d(r
, e
);
36442 #[simd_test(enable = "avx512f")]
36443 unsafe fn test_mm_fnmsub_round_ss() {
36444 let a
= _mm_set1_ps(1.);
36445 let b
= _mm_set1_ps(2.);
36446 let c
= _mm_set1_ps(3.);
36447 let r
= _mm_fnmsub_round_ss(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36448 let e
= _mm_set_ps(1., 1., 1., -5.);
36449 assert_eq_m128(r
, e
);
36452 #[simd_test(enable = "avx512f")]
36453 unsafe fn test_mm_mask_fnmsub_round_ss() {
36454 let a
= _mm_set1_ps(1.);
36455 let b
= _mm_set1_ps(2.);
36456 let c
= _mm_set1_ps(3.);
36457 let r
= _mm_mask_fnmsub_round_ss(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36458 assert_eq_m128(r
, a
);
36459 let r
= _mm_mask_fnmsub_round_ss(
36464 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36466 let e
= _mm_set_ps(1., 1., 1., -5.);
36467 assert_eq_m128(r
, e
);
36470 #[simd_test(enable = "avx512f")]
36471 unsafe fn test_mm_maskz_fnmsub_round_ss() {
36472 let a
= _mm_set1_ps(1.);
36473 let b
= _mm_set1_ps(2.);
36474 let c
= _mm_set1_ps(3.);
36476 _mm_maskz_fnmsub_round_ss(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36477 let e
= _mm_set_ps(1., 1., 1., 0.);
36478 assert_eq_m128(r
, e
);
36479 let r
= _mm_maskz_fnmsub_round_ss(
36484 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36486 let e
= _mm_set_ps(1., 1., 1., -5.);
36487 assert_eq_m128(r
, e
);
36490 #[simd_test(enable = "avx512f")]
36491 unsafe fn test_mm_mask3_fnmsub_round_ss() {
36492 let a
= _mm_set1_ps(1.);
36493 let b
= _mm_set1_ps(2.);
36494 let c
= _mm_set1_ps(3.);
36496 _mm_mask3_fnmsub_round_ss(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36497 assert_eq_m128(r
, c
);
36498 let r
= _mm_mask3_fnmsub_round_ss(
36503 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36505 let e
= _mm_set_ps(3., 3., 3., -5.);
36506 assert_eq_m128(r
, e
);
36509 #[simd_test(enable = "avx512f")]
36510 unsafe fn test_mm_fnmsub_round_sd() {
36511 let a
= _mm_set1_pd(1.);
36512 let b
= _mm_set1_pd(2.);
36513 let c
= _mm_set1_pd(3.);
36514 let r
= _mm_fnmsub_round_sd(a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36515 let e
= _mm_set_pd(1., -5.);
36516 assert_eq_m128d(r
, e
);
36519 #[simd_test(enable = "avx512f")]
36520 unsafe fn test_mm_mask_fnmsub_round_sd() {
36521 let a
= _mm_set1_pd(1.);
36522 let b
= _mm_set1_pd(2.);
36523 let c
= _mm_set1_pd(3.);
36524 let r
= _mm_mask_fnmsub_round_sd(a
, 0, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36525 assert_eq_m128d(r
, a
);
36526 let r
= _mm_mask_fnmsub_round_sd(
36531 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36533 let e
= _mm_set_pd(1., -5.);
36534 assert_eq_m128d(r
, e
);
36537 #[simd_test(enable = "avx512f")]
36538 unsafe fn test_mm_maskz_fnmsub_round_sd() {
36539 let a
= _mm_set1_pd(1.);
36540 let b
= _mm_set1_pd(2.);
36541 let c
= _mm_set1_pd(3.);
36543 _mm_maskz_fnmsub_round_sd(0, a
, b
, c
, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36544 let e
= _mm_set_pd(1., 0.);
36545 assert_eq_m128d(r
, e
);
36546 let r
= _mm_maskz_fnmsub_round_sd(
36551 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36553 let e
= _mm_set_pd(1., -5.);
36554 assert_eq_m128d(r
, e
);
36557 #[simd_test(enable = "avx512f")]
36558 unsafe fn test_mm_mask3_fnmsub_round_sd() {
36559 let a
= _mm_set1_pd(1.);
36560 let b
= _mm_set1_pd(2.);
36561 let c
= _mm_set1_pd(3.);
36563 _mm_mask3_fnmsub_round_sd(a
, b
, c
, 0, _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
);
36564 assert_eq_m128d(r
, c
);
36565 let r
= _mm_mask3_fnmsub_round_sd(
36570 _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
,
36572 let e
= _mm_set_pd(3., -5.);
36573 assert_eq_m128d(r
, e
);
36576 #[simd_test(enable = "avx512f")]
36577 unsafe fn test_mm_fixupimm_ss() {
36578 let a
= _mm_set_ps(0., 0., 0., f32::NAN
);
36579 let b
= _mm_set1_ps(f32::MAX
);
36580 let c
= _mm_set1_epi32(i32::MAX
);
36581 let r
= _mm_fixupimm_ss(a
, b
, c
, 5);
36582 let e
= _mm_set_ps(0., 0., 0., -0.0);
36583 assert_eq_m128(r
, e
);
36586 #[simd_test(enable = "avx512f")]
36587 unsafe fn test_mm_mask_fixupimm_ss() {
36588 let a
= _mm_set_ps(0., 0., 0., f32::NAN
);
36589 let b
= _mm_set1_ps(f32::MAX
);
36590 let c
= _mm_set1_epi32(i32::MAX
);
36591 let r
= _mm_mask_fixupimm_ss(a
, 0b11111111, b
, c
, 5);
36592 let e
= _mm_set_ps(0., 0., 0., -0.0);
36593 assert_eq_m128(r
, e
);
36596 #[simd_test(enable = "avx512f")]
36597 unsafe fn test_mm_maskz_fixupimm_ss() {
36598 let a
= _mm_set_ps(0., 0., 0., f32::NAN
);
36599 let b
= _mm_set1_ps(f32::MAX
);
36600 let c
= _mm_set1_epi32(i32::MAX
);
36601 let r
= _mm_maskz_fixupimm_ss(0b00000000, a
, b
, c
, 5);
36602 let e
= _mm_set_ps(0., 0., 0., 0.0);
36603 assert_eq_m128(r
, e
);
36604 let r
= _mm_maskz_fixupimm_ss(0b11111111, a
, b
, c
, 5);
36605 let e
= _mm_set_ps(0., 0., 0., -0.0);
36606 assert_eq_m128(r
, e
);
36609 #[simd_test(enable = "avx512f")]
36610 unsafe fn test_mm_fixupimm_sd() {
36611 let a
= _mm_set_pd(0., f64::NAN
);
36612 let b
= _mm_set1_pd(f64::MAX
);
36613 let c
= _mm_set1_epi64x(i32::MAX
as i64);
36614 let r
= _mm_fixupimm_sd(a
, b
, c
, 5);
36615 let e
= _mm_set_pd(0., -0.0);
36616 assert_eq_m128d(r
, e
);
36619 #[simd_test(enable = "avx512f")]
36620 unsafe fn test_mm_mask_fixupimm_sd() {
36621 let a
= _mm_set_pd(0., f64::NAN
);
36622 let b
= _mm_set1_pd(f64::MAX
);
36623 let c
= _mm_set1_epi64x(i32::MAX
as i64);
36624 let r
= _mm_mask_fixupimm_sd(a
, 0b11111111, b
, c
, 5);
36625 let e
= _mm_set_pd(0., -0.0);
36626 assert_eq_m128d(r
, e
);
36629 #[simd_test(enable = "avx512f")]
36630 unsafe fn test_mm_maskz_fixupimm_sd() {
36631 let a
= _mm_set_pd(0., f64::NAN
);
36632 let b
= _mm_set1_pd(f64::MAX
);
36633 let c
= _mm_set1_epi64x(i32::MAX
as i64);
36634 let r
= _mm_maskz_fixupimm_sd(0b00000000, a
, b
, c
, 5);
36635 let e
= _mm_set_pd(0., 0.0);
36636 assert_eq_m128d(r
, e
);
36637 let r
= _mm_maskz_fixupimm_sd(0b11111111, a
, b
, c
, 5);
36638 let e
= _mm_set_pd(0., -0.0);
36639 assert_eq_m128d(r
, e
);
36642 #[simd_test(enable = "avx512f")]
36643 unsafe fn test_mm_fixupimm_round_ss() {
36644 let a
= _mm_set_ps(0., 0., 0., f32::NAN
);
36645 let b
= _mm_set1_ps(f32::MAX
);
36646 let c
= _mm_set1_epi32(i32::MAX
);
36647 let r
= _mm_fixupimm_round_ss(a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36648 let e
= _mm_set_ps(0., 0., 0., -0.0);
36649 assert_eq_m128(r
, e
);
36652 #[simd_test(enable = "avx512f")]
36653 unsafe fn test_mm_mask_fixupimm_round_ss() {
36654 let a
= _mm_set_ps(0., 0., 0., f32::NAN
);
36655 let b
= _mm_set1_ps(f32::MAX
);
36656 let c
= _mm_set1_epi32(i32::MAX
);
36657 let r
= _mm_mask_fixupimm_round_ss(a
, 0b11111111, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36658 let e
= _mm_set_ps(0., 0., 0., -0.0);
36659 assert_eq_m128(r
, e
);
36662 #[simd_test(enable = "avx512f")]
36663 unsafe fn test_mm_maskz_fixupimm_round_ss() {
36664 let a
= _mm_set_ps(0., 0., 0., f32::NAN
);
36665 let b
= _mm_set1_ps(f32::MAX
);
36666 let c
= _mm_set1_epi32(i32::MAX
);
36667 let r
= _mm_maskz_fixupimm_round_ss(0b00000000, a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36668 let e
= _mm_set_ps(0., 0., 0., 0.0);
36669 assert_eq_m128(r
, e
);
36670 let r
= _mm_maskz_fixupimm_round_ss(0b11111111, a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36671 let e
= _mm_set_ps(0., 0., 0., -0.0);
36672 assert_eq_m128(r
, e
);
36675 #[simd_test(enable = "avx512f")]
36676 unsafe fn test_mm_fixupimm_round_sd() {
36677 let a
= _mm_set_pd(0., f64::NAN
);
36678 let b
= _mm_set1_pd(f64::MAX
);
36679 let c
= _mm_set1_epi64x(i32::MAX
as i64);
36680 let r
= _mm_fixupimm_round_sd(a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36681 let e
= _mm_set_pd(0., -0.0);
36682 assert_eq_m128d(r
, e
);
36685 #[simd_test(enable = "avx512f")]
36686 unsafe fn test_mm_mask_fixupimm_round_sd() {
36687 let a
= _mm_set_pd(0., f64::NAN
);
36688 let b
= _mm_set1_pd(f64::MAX
);
36689 let c
= _mm_set1_epi64x(i32::MAX
as i64);
36690 let r
= _mm_mask_fixupimm_round_sd(a
, 0b11111111, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36691 let e
= _mm_set_pd(0., -0.0);
36692 assert_eq_m128d(r
, e
);
36695 #[simd_test(enable = "avx512f")]
36696 unsafe fn test_mm_maskz_fixupimm_round_sd() {
36697 let a
= _mm_set_pd(0., f64::NAN
);
36698 let b
= _mm_set1_pd(f64::MAX
);
36699 let c
= _mm_set1_epi64x(i32::MAX
as i64);
36700 let r
= _mm_maskz_fixupimm_round_sd(0b00000000, a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36701 let e
= _mm_set_pd(0., 0.0);
36702 assert_eq_m128d(r
, e
);
36703 let r
= _mm_maskz_fixupimm_round_sd(0b11111111, a
, b
, c
, 5, _MM_FROUND_CUR_DIRECTION
);
36704 let e
= _mm_set_pd(0., -0.0);
36705 assert_eq_m128d(r
, e
);
36708 #[simd_test(enable = "avx512f")]
36709 unsafe fn test_mm_mask_cvtss_sd() {
36710 let a
= _mm_set_pd(6., -7.5);
36711 let b
= _mm_set_ps(0., -0.5, 1., -1.5);
36712 let r
= _mm_mask_cvtss_sd(a
, 0, a
, b
);
36713 assert_eq_m128d(r
, a
);
36714 let r
= _mm_mask_cvtss_sd(a
, 0b11111111, a
, b
);
36715 let e
= _mm_set_pd(6., -1.5);
36716 assert_eq_m128d(r
, e
);
36719 #[simd_test(enable = "avx512f")]
36720 unsafe fn test_mm_maskz_cvtss_sd() {
36721 let a
= _mm_set_pd(6., -7.5);
36722 let b
= _mm_set_ps(0., -0.5, 1., -1.5);
36723 let r
= _mm_maskz_cvtss_sd(0, a
, b
);
36724 let e
= _mm_set_pd(6., 0.);
36725 assert_eq_m128d(r
, e
);
36726 let r
= _mm_maskz_cvtss_sd(0b11111111, a
, b
);
36727 let e
= _mm_set_pd(6., -1.5);
36728 assert_eq_m128d(r
, e
);
36731 #[simd_test(enable = "avx512f")]
36732 unsafe fn test_mm_mask_cvtsd_ss() {
36733 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36734 let b
= _mm_set_pd(6., -7.5);
36735 let r
= _mm_mask_cvtsd_ss(a
, 0, a
, b
);
36736 assert_eq_m128(r
, a
);
36737 let r
= _mm_mask_cvtsd_ss(a
, 0b11111111, a
, b
);
36738 let e
= _mm_set_ps(0., -0.5, 1., -7.5);
36739 assert_eq_m128(r
, e
);
36742 #[simd_test(enable = "avx512f")]
36743 unsafe fn test_mm_maskz_cvtsd_ss() {
36744 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36745 let b
= _mm_set_pd(6., -7.5);
36746 let r
= _mm_maskz_cvtsd_ss(0, a
, b
);
36747 let e
= _mm_set_ps(0., -0.5, 1., 0.);
36748 assert_eq_m128(r
, e
);
36749 let r
= _mm_maskz_cvtsd_ss(0b11111111, a
, b
);
36750 let e
= _mm_set_ps(0., -0.5, 1., -7.5);
36751 assert_eq_m128(r
, e
);
36754 #[simd_test(enable = "avx512f")]
36755 unsafe fn test_mm_cvt_roundss_sd() {
36756 let a
= _mm_set_pd(6., -7.5);
36757 let b
= _mm_set_ps(0., -0.5, 1., -1.5);
36758 let r
= _mm_cvt_roundss_sd(a
, b
, _MM_FROUND_CUR_DIRECTION
);
36759 let e
= _mm_set_pd(6., -1.5);
36760 assert_eq_m128d(r
, e
);
36763 #[simd_test(enable = "avx512f")]
36764 unsafe fn test_mm_mask_cvt_roundss_sd() {
36765 let a
= _mm_set_pd(6., -7.5);
36766 let b
= _mm_set_ps(0., -0.5, 1., -1.5);
36767 let r
= _mm_mask_cvt_roundss_sd(a
, 0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
36768 assert_eq_m128d(r
, a
);
36769 let r
= _mm_mask_cvt_roundss_sd(a
, 0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
36770 let e
= _mm_set_pd(6., -1.5);
36771 assert_eq_m128d(r
, e
);
36774 #[simd_test(enable = "avx512f")]
36775 unsafe fn test_mm_maskz_cvt_roundss_sd() {
36776 let a
= _mm_set_pd(6., -7.5);
36777 let b
= _mm_set_ps(0., -0.5, 1., -1.5);
36778 let r
= _mm_maskz_cvt_roundss_sd(0, a
, b
, _MM_FROUND_CUR_DIRECTION
);
36779 let e
= _mm_set_pd(6., 0.);
36780 assert_eq_m128d(r
, e
);
36781 let r
= _mm_maskz_cvt_roundss_sd(0b11111111, a
, b
, _MM_FROUND_CUR_DIRECTION
);
36782 let e
= _mm_set_pd(6., -1.5);
36783 assert_eq_m128d(r
, e
);
36786 #[simd_test(enable = "avx512f")]
36787 unsafe fn test_mm_cvt_roundsd_ss() {
36788 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36789 let b
= _mm_set_pd(6., -7.5);
36790 let r
= _mm_cvt_roundsd_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36791 let e
= _mm_set_ps(0., -0.5, 1., -7.5);
36792 assert_eq_m128(r
, e
);
36795 #[simd_test(enable = "avx512f")]
36796 unsafe fn test_mm_mask_cvt_roundsd_ss() {
36797 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36798 let b
= _mm_set_pd(6., -7.5);
36799 let r
= _mm_mask_cvt_roundsd_ss(a
, 0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36800 assert_eq_m128(r
, a
);
36802 _mm_mask_cvt_roundsd_ss(a
, 0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36803 let e
= _mm_set_ps(0., -0.5, 1., -7.5);
36804 assert_eq_m128(r
, e
);
36807 #[simd_test(enable = "avx512f")]
36808 unsafe fn test_mm_maskz_cvt_roundsd_ss() {
36809 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36810 let b
= _mm_set_pd(6., -7.5);
36811 let r
= _mm_maskz_cvt_roundsd_ss(0, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36812 let e
= _mm_set_ps(0., -0.5, 1., 0.);
36813 assert_eq_m128(r
, e
);
36814 let r
= _mm_maskz_cvt_roundsd_ss(0b11111111, a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36815 let e
= _mm_set_ps(0., -0.5, 1., -7.5);
36816 assert_eq_m128(r
, e
);
36819 #[simd_test(enable = "avx512f")]
36820 unsafe fn test_mm_cvt_roundss_si32() {
36821 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36822 let r
= _mm_cvt_roundss_si32(a
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36827 #[simd_test(enable = "avx512f")]
36828 unsafe fn test_mm_cvt_roundss_i32() {
36829 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36830 let r
= _mm_cvt_roundss_i32(a
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36835 #[simd_test(enable = "avx512f")]
36836 unsafe fn test_mm_cvt_roundss_u32() {
36837 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36838 let r
= _mm_cvt_roundss_u32(a
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36839 let e
: u32 = u32::MAX
;
36843 #[simd_test(enable = "avx512f")]
36844 unsafe fn test_mm_cvtss_i32() {
36845 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36846 let r
= _mm_cvtss_i32(a
);
36851 #[simd_test(enable = "avx512f")]
36852 unsafe fn test_mm_cvtss_u32() {
36853 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36854 let r
= _mm_cvtss_u32(a
);
36855 let e
: u32 = u32::MAX
;
36859 #[simd_test(enable = "avx512f")]
36860 unsafe fn test_mm_cvt_roundsd_si32() {
36861 let a
= _mm_set_pd(1., -1.5);
36862 let r
= _mm_cvt_roundsd_si32(a
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36867 #[simd_test(enable = "avx512f")]
36868 unsafe fn test_mm_cvt_roundsd_i32() {
36869 let a
= _mm_set_pd(1., -1.5);
36870 let r
= _mm_cvt_roundsd_i32(a
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36875 #[simd_test(enable = "avx512f")]
36876 unsafe fn test_mm_cvt_roundsd_u32() {
36877 let a
= _mm_set_pd(1., -1.5);
36878 let r
= _mm_cvt_roundsd_u32(a
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36879 let e
: u32 = u32::MAX
;
36883 #[simd_test(enable = "avx512f")]
36884 unsafe fn test_mm_cvtsd_i32() {
36885 let a
= _mm_set_pd(1., -1.5);
36886 let r
= _mm_cvtsd_i32(a
);
36891 #[simd_test(enable = "avx512f")]
36892 unsafe fn test_mm_cvtsd_u32() {
36893 let a
= _mm_set_pd(1., -1.5);
36894 let r
= _mm_cvtsd_u32(a
);
36895 let e
: u32 = u32::MAX
;
36899 #[simd_test(enable = "avx512f")]
36900 unsafe fn test_mm_cvt_roundi32_ss() {
36901 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36903 let r
= _mm_cvt_roundi32_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36904 let e
= _mm_set_ps(0., -0.5, 1., 9.);
36905 assert_eq_m128(r
, e
);
36908 #[simd_test(enable = "avx512f")]
36909 unsafe fn test_mm_cvt_roundsi32_ss() {
36910 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36912 let r
= _mm_cvt_roundsi32_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36913 let e
= _mm_set_ps(0., -0.5, 1., 9.);
36914 assert_eq_m128(r
, e
);
36917 #[simd_test(enable = "avx512f")]
36918 unsafe fn test_mm_cvt_roundu32_ss() {
36919 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36921 let r
= _mm_cvt_roundu32_ss(a
, b
, _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
);
36922 let e
= _mm_set_ps(0., -0.5, 1., 9.);
36923 assert_eq_m128(r
, e
);
36926 #[simd_test(enable = "avx512f")]
36927 unsafe fn test_mm_cvti32_ss() {
36928 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36930 let r
= _mm_cvti32_ss(a
, b
);
36931 let e
= _mm_set_ps(0., -0.5, 1., 9.);
36932 assert_eq_m128(r
, e
);
36935 #[simd_test(enable = "avx512f")]
36936 unsafe fn test_mm_cvti32_sd() {
36937 let a
= _mm_set_pd(1., -1.5);
36939 let r
= _mm_cvti32_sd(a
, b
);
36940 let e
= _mm_set_pd(1., 9.);
36941 assert_eq_m128d(r
, e
);
36944 #[simd_test(enable = "avx512f")]
36945 unsafe fn test_mm_cvtt_roundss_si32() {
36946 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36947 let r
= _mm_cvtt_roundss_si32(a
, _MM_FROUND_CUR_DIRECTION
);
36952 #[simd_test(enable = "avx512f")]
36953 unsafe fn test_mm_cvtt_roundss_i32() {
36954 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36955 let r
= _mm_cvtt_roundss_i32(a
, _MM_FROUND_CUR_DIRECTION
);
36960 #[simd_test(enable = "avx512f")]
36961 unsafe fn test_mm_cvtt_roundss_u32() {
36962 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36963 let r
= _mm_cvtt_roundss_u32(a
, _MM_FROUND_CUR_DIRECTION
);
36964 let e
: u32 = u32::MAX
;
36968 #[simd_test(enable = "avx512f")]
36969 unsafe fn test_mm_cvttss_i32() {
36970 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36971 let r
= _mm_cvttss_i32(a
);
36976 #[simd_test(enable = "avx512f")]
36977 unsafe fn test_mm_cvttss_u32() {
36978 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
36979 let r
= _mm_cvttss_u32(a
);
36980 let e
: u32 = u32::MAX
;
36984 #[simd_test(enable = "avx512f")]
36985 unsafe fn test_mm_cvtt_roundsd_si32() {
36986 let a
= _mm_set_pd(1., -1.5);
36987 let r
= _mm_cvtt_roundsd_si32(a
, _MM_FROUND_CUR_DIRECTION
);
36992 #[simd_test(enable = "avx512f")]
36993 unsafe fn test_mm_cvtt_roundsd_i32() {
36994 let a
= _mm_set_pd(1., -1.5);
36995 let r
= _mm_cvtt_roundsd_i32(a
, _MM_FROUND_CUR_DIRECTION
);
37000 #[simd_test(enable = "avx512f")]
37001 unsafe fn test_mm_cvtt_roundsd_u32() {
37002 let a
= _mm_set_pd(1., -1.5);
37003 let r
= _mm_cvtt_roundsd_u32(a
, _MM_FROUND_CUR_DIRECTION
);
37004 let e
: u32 = u32::MAX
;
37008 #[simd_test(enable = "avx512f")]
37009 unsafe fn test_mm_cvttsd_i32() {
37010 let a
= _mm_set_pd(1., -1.5);
37011 let r
= _mm_cvttsd_i32(a
);
37016 #[simd_test(enable = "avx512f")]
37017 unsafe fn test_mm_cvttsd_u32() {
37018 let a
= _mm_set_pd(1., -1.5);
37019 let r
= _mm_cvttsd_u32(a
);
37020 let e
: u32 = u32::MAX
;
37024 #[simd_test(enable = "avx512f")]
37025 unsafe fn test_mm_cvtu32_ss() {
37026 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
37028 let r
= _mm_cvtu32_ss(a
, b
);
37029 let e
= _mm_set_ps(0., -0.5, 1., 9.);
37030 assert_eq_m128(r
, e
);
37033 #[simd_test(enable = "avx512f")]
37034 unsafe fn test_mm_cvtu32_sd() {
37035 let a
= _mm_set_pd(1., -1.5);
37037 let r
= _mm_cvtu32_sd(a
, b
);
37038 let e
= _mm_set_pd(1., 9.);
37039 assert_eq_m128d(r
, e
);
37042 #[simd_test(enable = "avx512f")]
37043 unsafe fn test_mm_cvtu64_ss() {
37044 let a
= _mm_set_ps(0., -0.5, 1., -1.5);
37046 let r
= _mm_cvtu64_ss(a
, b
);
37047 let e
= _mm_set_ps(0., -0.5, 1., 9.);
37048 assert_eq_m128(r
, e
);
37051 #[simd_test(enable = "avx512f")]
37052 unsafe fn test_mm_cvtu64_sd() {
37053 let a
= _mm_set_pd(1., -1.5);
37055 let r
= _mm_cvtu64_sd(a
, b
);
37056 let e
= _mm_set_pd(1., 9.);
37057 assert_eq_m128d(r
, e
);
37060 #[simd_test(enable = "avx512f")]
37061 unsafe fn test_mm_comi_round_ss() {
37062 let a
= _mm_set1_ps(2.2);
37063 let b
= _mm_set1_ps(1.1);
37064 let r
= _mm_comi_round_ss(a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);
37069 #[simd_test(enable = "avx512f")]
37070 unsafe fn test_mm_comi_round_sd() {
37071 let a
= _mm_set1_pd(2.2);
37072 let b
= _mm_set1_pd(1.1);
37073 let r
= _mm_comi_round_sd(a
, b
, 0, _MM_FROUND_CUR_DIRECTION
);