]>
Commit | Line | Data |
---|---|---|
fc512014 | 1 | use crate::{ |
a2a8927a | 2 | arch::asm, |
fc512014 XL |
3 | core_arch::{simd::*, simd_llvm::*, x86::*}, |
4 | mem::{self, transmute}, | |
5 | ptr, | |
6 | }; | |
7 | ||
8 | #[cfg(test)] | |
9 | use stdarch_test::assert_instr; | |
10 | ||
a2a8927a XL |
11 | use super::avx512f::{vpl, vps}; |
12 | ||
fc512014 XL |
13 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst. |
14 | /// | |
15 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi16&expand=30) | |
16 | #[inline] | |
17 | #[target_feature(enable = "avx512bw")] | |
18 | #[cfg_attr(test, assert_instr(vpabsw))] | |
19 | pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i { | |
20 | let a = a.as_i16x32(); | |
21 | // all-0 is a properly initialized i16x32 | |
22 | let zero: i16x32 = mem::zeroed(); | |
23 | let sub = simd_sub(zero, a); | |
24 | let cmp: i16x32 = simd_gt(a, zero); | |
25 | transmute(simd_select(cmp, a, sub)) | |
26 | } | |
27 | ||
28 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
29 | /// | |
30 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi16&expand=31) | |
31 | #[inline] | |
32 | #[target_feature(enable = "avx512bw")] | |
33 | #[cfg_attr(test, assert_instr(vpabsw))] | |
34 | pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { | |
35 | let abs = _mm512_abs_epi16(a).as_i16x32(); | |
36 | transmute(simd_select_bitmask(k, abs, src.as_i16x32())) | |
37 | } | |
38 | ||
39 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
40 | /// | |
41 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi16&expand=32) | |
42 | #[inline] | |
43 | #[target_feature(enable = "avx512bw")] | |
44 | #[cfg_attr(test, assert_instr(vpabsw))] | |
45 | pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { | |
46 | let abs = _mm512_abs_epi16(a).as_i16x32(); | |
47 | let zero = _mm512_setzero_si512().as_i16x32(); | |
48 | transmute(simd_select_bitmask(k, abs, zero)) | |
49 | } | |
50 | ||
51 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
52 | /// | |
53 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi16&expand=28) | |
54 | #[inline] | |
55 | #[target_feature(enable = "avx512bw,avx512vl")] | |
56 | #[cfg_attr(test, assert_instr(vpabsw))] | |
57 | pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { | |
58 | let abs = _mm256_abs_epi16(a).as_i16x16(); | |
59 | transmute(simd_select_bitmask(k, abs, src.as_i16x16())) | |
60 | } | |
61 | ||
62 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
63 | /// | |
64 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi16&expand=29) | |
65 | #[inline] | |
66 | #[target_feature(enable = "avx512bw,avx512vl")] | |
67 | #[cfg_attr(test, assert_instr(vpabsw))] | |
68 | pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { | |
69 | let abs = _mm256_abs_epi16(a).as_i16x16(); | |
70 | let zero = _mm256_setzero_si256().as_i16x16(); | |
71 | transmute(simd_select_bitmask(k, abs, zero)) | |
72 | } | |
73 | ||
74 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
75 | /// | |
76 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi16&expand=25) | |
77 | #[inline] | |
78 | #[target_feature(enable = "avx512bw,avx512vl")] | |
79 | #[cfg_attr(test, assert_instr(vpabsw))] | |
80 | pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
81 | let abs = _mm_abs_epi16(a).as_i16x8(); | |
82 | transmute(simd_select_bitmask(k, abs, src.as_i16x8())) | |
83 | } | |
84 | ||
85 | /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
86 | /// | |
87 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi16&expand=26) | |
88 | #[inline] | |
89 | #[target_feature(enable = "avx512bw,avx512vl")] | |
90 | #[cfg_attr(test, assert_instr(vpabsw))] | |
91 | pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { | |
92 | let abs = _mm_abs_epi16(a).as_i16x8(); | |
93 | let zero = _mm_setzero_si128().as_i16x8(); | |
94 | transmute(simd_select_bitmask(k, abs, zero)) | |
95 | } | |
96 | ||
97 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst. | |
98 | /// | |
99 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi8&expand=57) | |
100 | #[inline] | |
101 | #[target_feature(enable = "avx512bw")] | |
102 | #[cfg_attr(test, assert_instr(vpabsb))] | |
103 | pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i { | |
104 | let a = a.as_i8x64(); | |
105 | // all-0 is a properly initialized i8x64 | |
106 | let zero: i8x64 = mem::zeroed(); | |
107 | let sub = simd_sub(zero, a); | |
108 | let cmp: i8x64 = simd_gt(a, zero); | |
109 | transmute(simd_select(cmp, a, sub)) | |
110 | } | |
111 | ||
112 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
113 | /// | |
114 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi8&expand=58) | |
115 | #[inline] | |
116 | #[target_feature(enable = "avx512bw")] | |
117 | #[cfg_attr(test, assert_instr(vpabsb))] | |
118 | pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { | |
119 | let abs = _mm512_abs_epi8(a).as_i8x64(); | |
120 | transmute(simd_select_bitmask(k, abs, src.as_i8x64())) | |
121 | } | |
122 | ||
123 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
124 | /// | |
125 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi8&expand=59) | |
126 | #[inline] | |
127 | #[target_feature(enable = "avx512bw")] | |
128 | #[cfg_attr(test, assert_instr(vpabsb))] | |
129 | pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { | |
130 | let abs = _mm512_abs_epi8(a).as_i8x64(); | |
131 | let zero = _mm512_setzero_si512().as_i8x64(); | |
132 | transmute(simd_select_bitmask(k, abs, zero)) | |
133 | } | |
134 | ||
135 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
136 | /// | |
137 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_abs_epi8&expand=55) | |
138 | #[inline] | |
139 | #[target_feature(enable = "avx512bw,avx512vl")] | |
140 | #[cfg_attr(test, assert_instr(vpabsb))] | |
141 | pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { | |
142 | let abs = _mm256_abs_epi8(a).as_i8x32(); | |
143 | transmute(simd_select_bitmask(k, abs, src.as_i8x32())) | |
144 | } | |
145 | ||
146 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
147 | /// | |
148 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_abs_epi8&expand=56) | |
149 | #[inline] | |
150 | #[target_feature(enable = "avx512bw,avx512vl")] | |
151 | #[cfg_attr(test, assert_instr(vpabsb))] | |
152 | pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { | |
153 | let abs = _mm256_abs_epi8(a).as_i8x32(); | |
154 | let zero = _mm256_setzero_si256().as_i8x32(); | |
155 | transmute(simd_select_bitmask(k, abs, zero)) | |
156 | } | |
157 | ||
158 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set) | |
159 | /// | |
160 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_abs_epi8&expand=52) | |
161 | #[inline] | |
162 | #[target_feature(enable = "avx512bw,avx512vl")] | |
163 | #[cfg_attr(test, assert_instr(vpabsb))] | |
164 | pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { | |
165 | let abs = _mm_abs_epi8(a).as_i8x16(); | |
166 | transmute(simd_select_bitmask(k, abs, src.as_i8x16())) | |
167 | } | |
168 | ||
169 | /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
170 | /// | |
171 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_abs_epi8&expand=53) | |
172 | #[inline] | |
173 | #[target_feature(enable = "avx512bw,avx512vl")] | |
174 | #[cfg_attr(test, assert_instr(vpabsb))] | |
175 | pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { | |
176 | let abs = _mm_abs_epi8(a).as_i8x16(); | |
177 | let zero = _mm_setzero_si128().as_i8x16(); | |
178 | transmute(simd_select_bitmask(k, abs, zero)) | |
179 | } | |
180 | ||
181 | /// Add packed 16-bit integers in a and b, and store the results in dst. | |
182 | /// | |
183 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi16&expand=91) | |
184 | #[inline] | |
185 | #[target_feature(enable = "avx512bw")] | |
186 | #[cfg_attr(test, assert_instr(vpaddw))] | |
187 | pub unsafe fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i { | |
188 | transmute(simd_add(a.as_i16x32(), b.as_i16x32())) | |
189 | } | |
190 | ||
191 | /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
192 | /// | |
193 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi16&expand=92) | |
194 | #[inline] | |
195 | #[target_feature(enable = "avx512bw")] | |
196 | #[cfg_attr(test, assert_instr(vpaddw))] | |
197 | pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
198 | let add = _mm512_add_epi16(a, b).as_i16x32(); | |
199 | transmute(simd_select_bitmask(k, add, src.as_i16x32())) | |
200 | } | |
201 | ||
202 | /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
203 | /// | |
204 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi16&expand=93) | |
205 | #[inline] | |
206 | #[target_feature(enable = "avx512bw")] | |
207 | #[cfg_attr(test, assert_instr(vpaddw))] | |
208 | pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
209 | let add = _mm512_add_epi16(a, b).as_i16x32(); | |
210 | let zero = _mm512_setzero_si512().as_i16x32(); | |
211 | transmute(simd_select_bitmask(k, add, zero)) | |
212 | } | |
213 | ||
214 | /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
215 | /// | |
216 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi&expand=89) | |
217 | #[inline] | |
218 | #[target_feature(enable = "avx512bw,avx512vl")] | |
219 | #[cfg_attr(test, assert_instr(vpaddw))] | |
220 | pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
221 | let add = _mm256_add_epi16(a, b).as_i16x16(); | |
222 | transmute(simd_select_bitmask(k, add, src.as_i16x16())) | |
223 | } | |
224 | ||
225 | /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
226 | /// | |
227 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi16&expand=90) | |
228 | #[inline] | |
229 | #[target_feature(enable = "avx512bw,avx512vl")] | |
230 | #[cfg_attr(test, assert_instr(vpaddw))] | |
231 | pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
232 | let add = _mm256_add_epi16(a, b).as_i16x16(); | |
233 | let zero = _mm256_setzero_si256().as_i16x16(); | |
234 | transmute(simd_select_bitmask(k, add, zero)) | |
235 | } | |
236 | ||
237 | /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
238 | /// | |
239 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi16&expand=86) | |
240 | #[inline] | |
241 | #[target_feature(enable = "avx512bw,avx512vl")] | |
242 | #[cfg_attr(test, assert_instr(vpaddw))] | |
243 | pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
244 | let add = _mm_add_epi16(a, b).as_i16x8(); | |
245 | transmute(simd_select_bitmask(k, add, src.as_i16x8())) | |
246 | } | |
247 | ||
248 | /// Add packed 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
249 | /// | |
250 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi16&expand=87) | |
251 | #[inline] | |
252 | #[target_feature(enable = "avx512bw,avx512vl")] | |
253 | #[cfg_attr(test, assert_instr(vpaddw))] | |
254 | pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
255 | let add = _mm_add_epi16(a, b).as_i16x8(); | |
256 | let zero = _mm_setzero_si128().as_i16x8(); | |
257 | transmute(simd_select_bitmask(k, add, zero)) | |
258 | } | |
259 | ||
260 | /// Add packed 8-bit integers in a and b, and store the results in dst. | |
261 | /// | |
262 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi8&expand=118) | |
263 | #[inline] | |
264 | #[target_feature(enable = "avx512bw")] | |
265 | #[cfg_attr(test, assert_instr(vpaddb))] | |
266 | pub unsafe fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i { | |
267 | transmute(simd_add(a.as_i8x64(), b.as_i8x64())) | |
268 | } | |
269 | ||
270 | /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
271 | /// | |
272 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi8&expand=119) | |
273 | #[inline] | |
274 | #[target_feature(enable = "avx512bw")] | |
275 | #[cfg_attr(test, assert_instr(vpaddb))] | |
276 | pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
277 | let add = _mm512_add_epi8(a, b).as_i8x64(); | |
278 | transmute(simd_select_bitmask(k, add, src.as_i8x64())) | |
279 | } | |
280 | ||
281 | /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
282 | /// | |
283 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi8&expand=120) | |
284 | #[inline] | |
285 | #[target_feature(enable = "avx512bw")] | |
286 | #[cfg_attr(test, assert_instr(vpaddb))] | |
287 | pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
288 | let add = _mm512_add_epi8(a, b).as_i8x64(); | |
289 | let zero = _mm512_setzero_si512().as_i8x64(); | |
290 | transmute(simd_select_bitmask(k, add, zero)) | |
291 | } | |
292 | ||
293 | /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
294 | /// | |
295 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_add_epi8&expand=116) | |
296 | #[inline] | |
297 | #[target_feature(enable = "avx512bw,avx512vl")] | |
298 | #[cfg_attr(test, assert_instr(vpaddb))] | |
299 | pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
300 | let add = _mm256_add_epi8(a, b).as_i8x32(); | |
301 | transmute(simd_select_bitmask(k, add, src.as_i8x32())) | |
302 | } | |
303 | ||
304 | /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
305 | /// | |
306 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_add_epi8&expand=117) | |
307 | #[inline] | |
308 | #[target_feature(enable = "avx512bw,avx512vl")] | |
309 | #[cfg_attr(test, assert_instr(vpaddb))] | |
310 | pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
311 | let add = _mm256_add_epi8(a, b).as_i8x32(); | |
312 | let zero = _mm256_setzero_si256().as_i8x32(); | |
313 | transmute(simd_select_bitmask(k, add, zero)) | |
314 | } | |
315 | ||
316 | /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
317 | /// | |
318 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_epi8&expand=113) | |
319 | #[inline] | |
320 | #[target_feature(enable = "avx512bw,avx512vl")] | |
321 | #[cfg_attr(test, assert_instr(vpaddb))] | |
322 | pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
323 | let add = _mm_add_epi8(a, b).as_i8x16(); | |
324 | transmute(simd_select_bitmask(k, add, src.as_i8x16())) | |
325 | } | |
326 | ||
327 | /// Add packed 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
328 | /// | |
329 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_epi8&expand=114) | |
330 | #[inline] | |
331 | #[target_feature(enable = "avx512bw,avx512vl")] | |
332 | #[cfg_attr(test, assert_instr(vpaddb))] | |
333 | pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
334 | let add = _mm_add_epi8(a, b).as_i8x16(); | |
335 | let zero = _mm_setzero_si128().as_i8x16(); | |
336 | transmute(simd_select_bitmask(k, add, zero)) | |
337 | } | |
338 | ||
339 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst. | |
340 | /// | |
341 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu16&expand=197) | |
342 | #[inline] | |
343 | #[target_feature(enable = "avx512bw")] | |
344 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
345 | pub unsafe fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i { | |
346 | transmute(vpaddusw( | |
347 | a.as_u16x32(), | |
348 | b.as_u16x32(), | |
349 | _mm512_setzero_si512().as_u16x32(), | |
350 | 0b11111111_11111111_11111111_11111111, | |
351 | )) | |
352 | } | |
353 | ||
354 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
355 | /// | |
356 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu16&expand=198) | |
357 | #[inline] | |
358 | #[target_feature(enable = "avx512bw")] | |
359 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
360 | pub unsafe fn _mm512_mask_adds_epu16( | |
361 | src: __m512i, | |
362 | k: __mmask32, | |
363 | a: __m512i, | |
364 | b: __m512i, | |
365 | ) -> __m512i { | |
366 | transmute(vpaddusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k)) | |
367 | } | |
368 | ||
369 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
370 | /// | |
371 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu16&expand=199) | |
372 | #[inline] | |
373 | #[target_feature(enable = "avx512bw")] | |
374 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
375 | pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
376 | transmute(vpaddusw( | |
377 | a.as_u16x32(), | |
378 | b.as_u16x32(), | |
379 | _mm512_setzero_si512().as_u16x32(), | |
380 | k, | |
381 | )) | |
382 | } | |
383 | ||
384 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
385 | /// | |
386 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epu16&expand=195) | |
387 | #[inline] | |
388 | #[target_feature(enable = "avx512bw,avx512vl")] | |
389 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
390 | pub unsafe fn _mm256_mask_adds_epu16( | |
391 | src: __m256i, | |
392 | k: __mmask16, | |
393 | a: __m256i, | |
394 | b: __m256i, | |
395 | ) -> __m256i { | |
396 | transmute(vpaddusw256( | |
397 | a.as_u16x16(), | |
398 | b.as_u16x16(), | |
399 | src.as_u16x16(), | |
400 | k, | |
401 | )) | |
402 | } | |
403 | ||
404 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
405 | /// | |
406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epu16&expand=196) | |
407 | #[inline] | |
408 | #[target_feature(enable = "avx512bw,avx512vl")] | |
409 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
410 | pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
411 | transmute(vpaddusw256( | |
412 | a.as_u16x16(), | |
413 | b.as_u16x16(), | |
414 | _mm256_setzero_si256().as_u16x16(), | |
415 | k, | |
416 | )) | |
417 | } | |
418 | ||
419 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
420 | /// | |
421 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epu16&expand=192) | |
422 | #[inline] | |
423 | #[target_feature(enable = "avx512bw,avx512vl")] | |
424 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
425 | pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
426 | transmute(vpaddusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k)) | |
427 | } | |
428 | ||
429 | /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
430 | /// | |
431 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epu16&expand=193) | |
432 | #[inline] | |
433 | #[target_feature(enable = "avx512bw,avx512vl")] | |
434 | #[cfg_attr(test, assert_instr(vpaddusw))] | |
435 | pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
436 | transmute(vpaddusw128( | |
437 | a.as_u16x8(), | |
438 | b.as_u16x8(), | |
439 | _mm_setzero_si128().as_u16x8(), | |
440 | k, | |
441 | )) | |
442 | } | |
443 | ||
444 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst. | |
445 | /// | |
446 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epu8&expand=206) | |
447 | #[inline] | |
448 | #[target_feature(enable = "avx512bw")] | |
449 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
450 | pub unsafe fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i { | |
451 | transmute(vpaddusb( | |
452 | a.as_u8x64(), | |
453 | b.as_u8x64(), | |
454 | _mm512_setzero_si512().as_u8x64(), | |
455 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
456 | )) | |
457 | } | |
458 | ||
459 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
460 | /// | |
461 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epu8&expand=207) | |
462 | #[inline] | |
463 | #[target_feature(enable = "avx512bw")] | |
464 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
465 | pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
466 | transmute(vpaddusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k)) | |
467 | } | |
468 | ||
469 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
470 | /// | |
471 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epu8&expand=208) | |
472 | #[inline] | |
473 | #[target_feature(enable = "avx512bw")] | |
474 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
475 | pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
476 | transmute(vpaddusb( | |
477 | a.as_u8x64(), | |
478 | b.as_u8x64(), | |
479 | _mm512_setzero_si512().as_u8x64(), | |
480 | k, | |
481 | )) | |
482 | } | |
483 | ||
484 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
485 | /// | |
486 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epu8&expand=204) | |
487 | #[inline] | |
488 | #[target_feature(enable = "avx512bw,avx512vl")] | |
489 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
490 | pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
491 | transmute(vpaddusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k)) | |
492 | } | |
493 | ||
494 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
495 | /// | |
496 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epu8&expand=205) | |
497 | #[inline] | |
498 | #[target_feature(enable = "avx512bw,avx512vl")] | |
499 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
500 | pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
501 | transmute(vpaddusb256( | |
502 | a.as_u8x32(), | |
503 | b.as_u8x32(), | |
504 | _mm256_setzero_si256().as_u8x32(), | |
505 | k, | |
506 | )) | |
507 | } | |
508 | ||
509 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
510 | /// | |
511 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epu8&expand=201) | |
512 | #[inline] | |
513 | #[target_feature(enable = "avx512bw,avx512vl")] | |
514 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
515 | pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
516 | transmute(vpaddusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k)) | |
517 | } | |
518 | ||
519 | /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
520 | /// | |
521 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epu8&expand=202) | |
522 | #[inline] | |
523 | #[target_feature(enable = "avx512bw,avx512vl")] | |
524 | #[cfg_attr(test, assert_instr(vpaddusb))] | |
525 | pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
526 | transmute(vpaddusb128( | |
527 | a.as_u8x16(), | |
528 | b.as_u8x16(), | |
529 | _mm_setzero_si128().as_u8x16(), | |
530 | k, | |
531 | )) | |
532 | } | |
533 | ||
534 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst. | |
535 | /// | |
536 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi16&expand=179) | |
537 | #[inline] | |
538 | #[target_feature(enable = "avx512bw")] | |
539 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
540 | pub unsafe fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i { | |
541 | transmute(vpaddsw( | |
542 | a.as_i16x32(), | |
543 | b.as_i16x32(), | |
544 | _mm512_setzero_si512().as_i16x32(), | |
545 | 0b11111111_11111111_11111111_11111111, | |
546 | )) | |
547 | } | |
548 | ||
549 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
550 | /// | |
551 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi16&expand=180) | |
552 | #[inline] | |
553 | #[target_feature(enable = "avx512bw")] | |
554 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
555 | pub unsafe fn _mm512_mask_adds_epi16( | |
556 | src: __m512i, | |
557 | k: __mmask32, | |
558 | a: __m512i, | |
559 | b: __m512i, | |
560 | ) -> __m512i { | |
561 | transmute(vpaddsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k)) | |
562 | } | |
563 | ||
564 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
565 | /// | |
566 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi16&expand=181) | |
567 | #[inline] | |
568 | #[target_feature(enable = "avx512bw")] | |
569 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
570 | pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
571 | transmute(vpaddsw( | |
572 | a.as_i16x32(), | |
573 | b.as_i16x32(), | |
574 | _mm512_setzero_si512().as_i16x32(), | |
575 | k, | |
576 | )) | |
577 | } | |
578 | ||
579 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
580 | /// | |
581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epi16&expand=177) | |
582 | #[inline] | |
583 | #[target_feature(enable = "avx512bw,avx512vl")] | |
584 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
585 | pub unsafe fn _mm256_mask_adds_epi16( | |
586 | src: __m256i, | |
587 | k: __mmask16, | |
588 | a: __m256i, | |
589 | b: __m256i, | |
590 | ) -> __m256i { | |
591 | transmute(vpaddsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k)) | |
592 | } | |
593 | ||
594 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
595 | /// | |
596 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epi16&expand=178) | |
597 | #[inline] | |
598 | #[target_feature(enable = "avx512bw,avx512vl")] | |
599 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
600 | pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
601 | transmute(vpaddsw256( | |
602 | a.as_i16x16(), | |
603 | b.as_i16x16(), | |
604 | _mm256_setzero_si256().as_i16x16(), | |
605 | k, | |
606 | )) | |
607 | } | |
608 | ||
609 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
610 | /// | |
611 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epi16&expand=174) | |
612 | #[inline] | |
613 | #[target_feature(enable = "avx512bw,avx512vl")] | |
614 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
615 | pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
616 | transmute(vpaddsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k)) | |
617 | } | |
618 | ||
619 | /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
620 | /// | |
621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epi16&expand=175) | |
622 | #[inline] | |
623 | #[target_feature(enable = "avx512bw,avx512vl")] | |
624 | #[cfg_attr(test, assert_instr(vpaddsw))] | |
625 | pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
626 | transmute(vpaddsw128( | |
627 | a.as_i16x8(), | |
628 | b.as_i16x8(), | |
629 | _mm_setzero_si128().as_i16x8(), | |
630 | k, | |
631 | )) | |
632 | } | |
633 | ||
634 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst. | |
635 | /// | |
636 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_adds_epi8&expand=188) | |
637 | #[inline] | |
638 | #[target_feature(enable = "avx512bw")] | |
639 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
640 | pub unsafe fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i { | |
641 | transmute(vpaddsb( | |
642 | a.as_i8x64(), | |
643 | b.as_i8x64(), | |
644 | _mm512_setzero_si512().as_i8x64(), | |
645 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
646 | )) | |
647 | } | |
648 | ||
649 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
650 | /// | |
651 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_adds_epi8&expand=189) | |
652 | #[inline] | |
653 | #[target_feature(enable = "avx512bw")] | |
654 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
655 | pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
656 | transmute(vpaddsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k)) | |
657 | } | |
658 | ||
659 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
660 | /// | |
661 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_adds_epi8&expand=190) | |
662 | #[inline] | |
663 | #[target_feature(enable = "avx512bw")] | |
664 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
665 | pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
666 | transmute(vpaddsb( | |
667 | a.as_i8x64(), | |
668 | b.as_i8x64(), | |
669 | _mm512_setzero_si512().as_i8x64(), | |
670 | k, | |
671 | )) | |
672 | } | |
673 | ||
674 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
675 | /// | |
676 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_adds_epi8&expand=186) | |
677 | #[inline] | |
678 | #[target_feature(enable = "avx512bw,avx512vl")] | |
679 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
680 | pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
681 | transmute(vpaddsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k)) | |
682 | } | |
683 | ||
684 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
685 | /// | |
686 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_adds_epi8&expand=187) | |
687 | #[inline] | |
688 | #[target_feature(enable = "avx512bw,avx512vl")] | |
689 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
690 | pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
691 | transmute(vpaddsb256( | |
692 | a.as_i8x32(), | |
693 | b.as_i8x32(), | |
694 | _mm256_setzero_si256().as_i8x32(), | |
695 | k, | |
696 | )) | |
697 | } | |
698 | ||
699 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
700 | /// | |
701 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_adds_epi8&expand=183) | |
702 | #[inline] | |
703 | #[target_feature(enable = "avx512bw,avx512vl")] | |
704 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
705 | pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
706 | transmute(vpaddsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k)) | |
707 | } | |
708 | ||
709 | /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
710 | /// | |
711 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_adds_epi8&expand=184) | |
712 | #[inline] | |
713 | #[target_feature(enable = "avx512bw,avx512vl")] | |
714 | #[cfg_attr(test, assert_instr(vpaddsb))] | |
715 | pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
716 | transmute(vpaddsb128( | |
717 | a.as_i8x16(), | |
718 | b.as_i8x16(), | |
719 | _mm_setzero_si128().as_i8x16(), | |
720 | k, | |
721 | )) | |
722 | } | |
723 | ||
724 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst. | |
725 | /// | |
726 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi16&expand=5685) | |
727 | #[inline] | |
728 | #[target_feature(enable = "avx512bw")] | |
729 | #[cfg_attr(test, assert_instr(vpsubw))] | |
730 | pub unsafe fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i { | |
731 | transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) | |
732 | } | |
733 | ||
734 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
735 | /// | |
736 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi16&expand=5683) | |
737 | #[inline] | |
738 | #[target_feature(enable = "avx512bw")] | |
739 | #[cfg_attr(test, assert_instr(vpsubw))] | |
740 | pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
741 | let sub = _mm512_sub_epi16(a, b).as_i16x32(); | |
742 | transmute(simd_select_bitmask(k, sub, src.as_i16x32())) | |
743 | } | |
744 | ||
745 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
746 | /// | |
747 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi16&expand=5684) | |
748 | #[inline] | |
749 | #[target_feature(enable = "avx512bw")] | |
750 | #[cfg_attr(test, assert_instr(vpsubw))] | |
751 | pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
752 | let sub = _mm512_sub_epi16(a, b).as_i16x32(); | |
753 | let zero = _mm512_setzero_si512().as_i16x32(); | |
754 | transmute(simd_select_bitmask(k, sub, zero)) | |
755 | } | |
756 | ||
757 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
758 | /// | |
759 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi16&expand=5680) | |
760 | #[inline] | |
761 | #[target_feature(enable = "avx512bw,avx512vl")] | |
762 | #[cfg_attr(test, assert_instr(vpsubw))] | |
763 | pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
764 | let sub = _mm256_sub_epi16(a, b).as_i16x16(); | |
765 | transmute(simd_select_bitmask(k, sub, src.as_i16x16())) | |
766 | } | |
767 | ||
768 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
769 | /// | |
770 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi16&expand=5681) | |
771 | #[inline] | |
772 | #[target_feature(enable = "avx512bw,avx512vl")] | |
773 | #[cfg_attr(test, assert_instr(vpsubw))] | |
774 | pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
775 | let sub = _mm256_sub_epi16(a, b).as_i16x16(); | |
776 | let zero = _mm256_setzero_si256().as_i16x16(); | |
777 | transmute(simd_select_bitmask(k, sub, zero)) | |
778 | } | |
779 | ||
780 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
781 | /// | |
782 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi16&expand=5677) | |
783 | #[inline] | |
784 | #[target_feature(enable = "avx512bw,avx512vl")] | |
785 | #[cfg_attr(test, assert_instr(vpsubw))] | |
786 | pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
787 | let sub = _mm_sub_epi16(a, b).as_i16x8(); | |
788 | transmute(simd_select_bitmask(k, sub, src.as_i16x8())) | |
789 | } | |
790 | ||
791 | /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
792 | /// | |
793 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi16&expand=5678) | |
794 | #[inline] | |
795 | #[target_feature(enable = "avx512bw,avx512vl")] | |
796 | #[cfg_attr(test, assert_instr(vpsubw))] | |
797 | pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
798 | let sub = _mm_sub_epi16(a, b).as_i16x8(); | |
799 | let zero = _mm_setzero_si128().as_i16x8(); | |
800 | transmute(simd_select_bitmask(k, sub, zero)) | |
801 | } | |
802 | ||
803 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst. | |
804 | /// | |
805 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi8&expand=5712) | |
806 | #[inline] | |
807 | #[target_feature(enable = "avx512bw")] | |
808 | #[cfg_attr(test, assert_instr(vpsubb))] | |
809 | pub unsafe fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i { | |
810 | transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) | |
811 | } | |
812 | ||
813 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
814 | /// | |
815 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi8&expand=5710) | |
816 | #[inline] | |
817 | #[target_feature(enable = "avx512bw")] | |
818 | #[cfg_attr(test, assert_instr(vpsubb))] | |
819 | pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
820 | let sub = _mm512_sub_epi8(a, b).as_i8x64(); | |
821 | transmute(simd_select_bitmask(k, sub, src.as_i8x64())) | |
822 | } | |
823 | ||
824 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
825 | /// | |
826 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi8&expand=5711) | |
827 | #[inline] | |
828 | #[target_feature(enable = "avx512bw")] | |
829 | #[cfg_attr(test, assert_instr(vpsubb))] | |
830 | pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
831 | let sub = _mm512_sub_epi8(a, b).as_i8x64(); | |
832 | let zero = _mm512_setzero_si512().as_i8x64(); | |
833 | transmute(simd_select_bitmask(k, sub, zero)) | |
834 | } | |
835 | ||
836 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
837 | /// | |
838 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sub_epi8&expand=5707) | |
839 | #[inline] | |
840 | #[target_feature(enable = "avx512bw,avx512vl")] | |
841 | #[cfg_attr(test, assert_instr(vpsubb))] | |
842 | pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
843 | let sub = _mm256_sub_epi8(a, b).as_i8x32(); | |
844 | transmute(simd_select_bitmask(k, sub, src.as_i8x32())) | |
845 | } | |
846 | ||
847 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
848 | /// | |
849 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sub_epi8&expand=5708) | |
850 | #[inline] | |
851 | #[target_feature(enable = "avx512bw,avx512vl")] | |
852 | #[cfg_attr(test, assert_instr(vpsubb))] | |
853 | pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
854 | let sub = _mm256_sub_epi8(a, b).as_i8x32(); | |
855 | let zero = _mm256_setzero_si256().as_i8x32(); | |
856 | transmute(simd_select_bitmask(k, sub, zero)) | |
857 | } | |
858 | ||
859 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
860 | /// | |
861 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_epi8&expand=5704) | |
862 | #[inline] | |
863 | #[target_feature(enable = "avx512bw,avx512vl")] | |
864 | #[cfg_attr(test, assert_instr(vpsubb))] | |
865 | pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
866 | let sub = _mm_sub_epi8(a, b).as_i8x16(); | |
867 | transmute(simd_select_bitmask(k, sub, src.as_i8x16())) | |
868 | } | |
869 | ||
870 | /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
871 | /// | |
872 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_epi8&expand=5705) | |
873 | #[inline] | |
874 | #[target_feature(enable = "avx512bw,avx512vl")] | |
875 | #[cfg_attr(test, assert_instr(vpsubb))] | |
876 | pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
877 | let sub = _mm_sub_epi8(a, b).as_i8x16(); | |
878 | let zero = _mm_setzero_si128().as_i8x16(); | |
879 | transmute(simd_select_bitmask(k, sub, zero)) | |
880 | } | |
881 | ||
882 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst. | |
883 | /// | |
884 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu16&expand=5793) | |
885 | #[inline] | |
886 | #[target_feature(enable = "avx512bw")] | |
887 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
888 | pub unsafe fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i { | |
889 | transmute(vpsubusw( | |
890 | a.as_u16x32(), | |
891 | b.as_u16x32(), | |
892 | _mm512_setzero_si512().as_u16x32(), | |
893 | 0b11111111_11111111_11111111_11111111, | |
894 | )) | |
895 | } | |
896 | ||
897 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
898 | /// | |
899 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu16&expand=5791) | |
900 | #[inline] | |
901 | #[target_feature(enable = "avx512bw")] | |
902 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
903 | pub unsafe fn _mm512_mask_subs_epu16( | |
904 | src: __m512i, | |
905 | k: __mmask32, | |
906 | a: __m512i, | |
907 | b: __m512i, | |
908 | ) -> __m512i { | |
909 | transmute(vpsubusw(a.as_u16x32(), b.as_u16x32(), src.as_u16x32(), k)) | |
910 | } | |
911 | ||
912 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
913 | /// | |
914 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu16&expand=5792) | |
915 | #[inline] | |
916 | #[target_feature(enable = "avx512bw")] | |
917 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
918 | pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
919 | transmute(vpsubusw( | |
920 | a.as_u16x32(), | |
921 | b.as_u16x32(), | |
922 | _mm512_setzero_si512().as_u16x32(), | |
923 | k, | |
924 | )) | |
925 | } | |
926 | ||
927 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
928 | /// | |
929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epu16&expand=5788) | |
930 | #[inline] | |
931 | #[target_feature(enable = "avx512bw,avx512vl")] | |
932 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
933 | pub unsafe fn _mm256_mask_subs_epu16( | |
934 | src: __m256i, | |
935 | k: __mmask16, | |
936 | a: __m256i, | |
937 | b: __m256i, | |
938 | ) -> __m256i { | |
939 | transmute(vpsubusw256( | |
940 | a.as_u16x16(), | |
941 | b.as_u16x16(), | |
942 | src.as_u16x16(), | |
943 | k, | |
944 | )) | |
945 | } | |
946 | ||
947 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
948 | /// | |
949 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epu16&expand=5789) | |
950 | #[inline] | |
951 | #[target_feature(enable = "avx512bw,avx512vl")] | |
952 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
953 | pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
954 | transmute(vpsubusw256( | |
955 | a.as_u16x16(), | |
956 | b.as_u16x16(), | |
957 | _mm256_setzero_si256().as_u16x16(), | |
958 | k, | |
959 | )) | |
960 | } | |
961 | ||
962 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
963 | /// | |
964 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epu16&expand=5785) | |
965 | #[inline] | |
966 | #[target_feature(enable = "avx512bw,avx512vl")] | |
967 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
968 | pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
969 | transmute(vpsubusw128(a.as_u16x8(), b.as_u16x8(), src.as_u16x8(), k)) | |
970 | } | |
971 | ||
972 | /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
973 | /// | |
974 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epu16&expand=5786) | |
975 | #[inline] | |
976 | #[target_feature(enable = "avx512bw,avx512vl")] | |
977 | #[cfg_attr(test, assert_instr(vpsubusw))] | |
978 | pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
979 | transmute(vpsubusw128( | |
980 | a.as_u16x8(), | |
981 | b.as_u16x8(), | |
982 | _mm_setzero_si128().as_u16x8(), | |
983 | k, | |
984 | )) | |
985 | } | |
986 | ||
987 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst. | |
988 | /// | |
989 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epu8&expand=5802) | |
990 | #[inline] | |
991 | #[target_feature(enable = "avx512bw")] | |
992 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
993 | pub unsafe fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i { | |
994 | transmute(vpsubusb( | |
995 | a.as_u8x64(), | |
996 | b.as_u8x64(), | |
997 | _mm512_setzero_si512().as_u8x64(), | |
998 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
999 | )) | |
1000 | } | |
1001 | ||
1002 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1003 | /// | |
1004 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epu8&expand=5800) | |
1005 | #[inline] | |
1006 | #[target_feature(enable = "avx512bw")] | |
1007 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
1008 | pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1009 | transmute(vpsubusb(a.as_u8x64(), b.as_u8x64(), src.as_u8x64(), k)) | |
1010 | } | |
1011 | ||
1012 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1013 | /// | |
1014 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epu8&expand=5801) | |
1015 | #[inline] | |
1016 | #[target_feature(enable = "avx512bw")] | |
1017 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
1018 | pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1019 | transmute(vpsubusb( | |
1020 | a.as_u8x64(), | |
1021 | b.as_u8x64(), | |
1022 | _mm512_setzero_si512().as_u8x64(), | |
1023 | k, | |
1024 | )) | |
1025 | } | |
1026 | ||
1027 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1028 | /// | |
1029 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epu8&expand=5797) | |
1030 | #[inline] | |
1031 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1032 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
1033 | pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1034 | transmute(vpsubusb256(a.as_u8x32(), b.as_u8x32(), src.as_u8x32(), k)) | |
1035 | } | |
1036 | ||
1037 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1038 | /// | |
1039 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epu8&expand=5798) | |
1040 | #[inline] | |
1041 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1042 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
1043 | pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1044 | transmute(vpsubusb256( | |
1045 | a.as_u8x32(), | |
1046 | b.as_u8x32(), | |
1047 | _mm256_setzero_si256().as_u8x32(), | |
1048 | k, | |
1049 | )) | |
1050 | } | |
1051 | ||
1052 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1053 | /// | |
1054 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epu8&expand=5794) | |
1055 | #[inline] | |
1056 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1057 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
1058 | pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1059 | transmute(vpsubusb128(a.as_u8x16(), b.as_u8x16(), src.as_u8x16(), k)) | |
1060 | } | |
1061 | ||
1062 | /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1063 | /// | |
1064 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epu8&expand=5795) | |
1065 | #[inline] | |
1066 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1067 | #[cfg_attr(test, assert_instr(vpsubusb))] | |
1068 | pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1069 | transmute(vpsubusb128( | |
1070 | a.as_u8x16(), | |
1071 | b.as_u8x16(), | |
1072 | _mm_setzero_si128().as_u8x16(), | |
1073 | k, | |
1074 | )) | |
1075 | } | |
1076 | ||
1077 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst. | |
1078 | /// | |
1079 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi16&expand=5775) | |
1080 | #[inline] | |
1081 | #[target_feature(enable = "avx512bw")] | |
1082 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1083 | pub unsafe fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i { | |
1084 | transmute(vpsubsw( | |
1085 | a.as_i16x32(), | |
1086 | b.as_i16x32(), | |
1087 | _mm512_setzero_si512().as_i16x32(), | |
1088 | 0b11111111_11111111_11111111_11111111, | |
1089 | )) | |
1090 | } | |
1091 | ||
1092 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1093 | /// | |
1094 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi16&expand=5773) | |
1095 | #[inline] | |
1096 | #[target_feature(enable = "avx512bw")] | |
1097 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1098 | pub unsafe fn _mm512_mask_subs_epi16( | |
1099 | src: __m512i, | |
1100 | k: __mmask32, | |
1101 | a: __m512i, | |
1102 | b: __m512i, | |
1103 | ) -> __m512i { | |
1104 | transmute(vpsubsw(a.as_i16x32(), b.as_i16x32(), src.as_i16x32(), k)) | |
1105 | } | |
1106 | ||
1107 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1108 | /// | |
1109 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi16&expand=5774) | |
1110 | #[inline] | |
1111 | #[target_feature(enable = "avx512bw")] | |
1112 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1113 | pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1114 | transmute(vpsubsw( | |
1115 | a.as_i16x32(), | |
1116 | b.as_i16x32(), | |
1117 | _mm512_setzero_si512().as_i16x32(), | |
1118 | k, | |
1119 | )) | |
1120 | } | |
1121 | ||
1122 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1123 | /// | |
1124 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epi16&expand=5770) | |
1125 | #[inline] | |
1126 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1127 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1128 | pub unsafe fn _mm256_mask_subs_epi16( | |
1129 | src: __m256i, | |
1130 | k: __mmask16, | |
1131 | a: __m256i, | |
1132 | b: __m256i, | |
1133 | ) -> __m256i { | |
1134 | transmute(vpsubsw256(a.as_i16x16(), b.as_i16x16(), src.as_i16x16(), k)) | |
1135 | } | |
1136 | ||
1137 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1138 | /// | |
1139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epi16&expand=5771) | |
1140 | #[inline] | |
1141 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1142 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1143 | pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1144 | transmute(vpsubsw256( | |
1145 | a.as_i16x16(), | |
1146 | b.as_i16x16(), | |
1147 | _mm256_setzero_si256().as_i16x16(), | |
1148 | k, | |
1149 | )) | |
1150 | } | |
1151 | ||
1152 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1153 | /// | |
1154 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epi16&expand=5767) | |
1155 | #[inline] | |
1156 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1157 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1158 | pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1159 | transmute(vpsubsw128(a.as_i16x8(), b.as_i16x8(), src.as_i16x8(), k)) | |
1160 | } | |
1161 | ||
1162 | /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1163 | /// | |
1164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epi16&expand=5768) | |
1165 | #[inline] | |
1166 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1167 | #[cfg_attr(test, assert_instr(vpsubsw))] | |
1168 | pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1169 | transmute(vpsubsw128( | |
1170 | a.as_i16x8(), | |
1171 | b.as_i16x8(), | |
1172 | _mm_setzero_si128().as_i16x8(), | |
1173 | k, | |
1174 | )) | |
1175 | } | |
1176 | ||
1177 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst. | |
1178 | /// | |
1179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_subs_epi8&expand=5784) | |
1180 | #[inline] | |
1181 | #[target_feature(enable = "avx512bw")] | |
1182 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1183 | pub unsafe fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i { | |
1184 | transmute(vpsubsb( | |
1185 | a.as_i8x64(), | |
1186 | b.as_i8x64(), | |
1187 | _mm512_setzero_si512().as_i8x64(), | |
1188 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
1189 | )) | |
1190 | } | |
1191 | ||
1192 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1193 | /// | |
1194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_subs_epi8&expand=5782) | |
1195 | #[inline] | |
1196 | #[target_feature(enable = "avx512bw")] | |
1197 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1198 | pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1199 | transmute(vpsubsb(a.as_i8x64(), b.as_i8x64(), src.as_i8x64(), k)) | |
1200 | } | |
1201 | ||
1202 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1203 | /// | |
1204 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_subs_epi8&expand=5783) | |
1205 | #[inline] | |
1206 | #[target_feature(enable = "avx512bw")] | |
1207 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1208 | pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1209 | transmute(vpsubsb( | |
1210 | a.as_i8x64(), | |
1211 | b.as_i8x64(), | |
1212 | _mm512_setzero_si512().as_i8x64(), | |
1213 | k, | |
1214 | )) | |
1215 | } | |
1216 | ||
1217 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1218 | /// | |
1219 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_subs_epi8&expand=5779) | |
1220 | #[inline] | |
1221 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1222 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1223 | pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1224 | transmute(vpsubsb256(a.as_i8x32(), b.as_i8x32(), src.as_i8x32(), k)) | |
1225 | } | |
1226 | ||
1227 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1228 | /// | |
1229 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_subs_epi8&expand=5780) | |
1230 | #[inline] | |
1231 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1232 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1233 | pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1234 | transmute(vpsubsb256( | |
1235 | a.as_i8x32(), | |
1236 | b.as_i8x32(), | |
1237 | _mm256_setzero_si256().as_i8x32(), | |
1238 | k, | |
1239 | )) | |
1240 | } | |
1241 | ||
1242 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1243 | /// | |
1244 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_subs_epi8&expand=5776) | |
1245 | #[inline] | |
1246 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1247 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1248 | pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1249 | transmute(vpsubsb128(a.as_i8x16(), b.as_i8x16(), src.as_i8x16(), k)) | |
1250 | } | |
1251 | ||
1252 | /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1253 | /// | |
1254 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_subs_epi8&expand=5777) | |
1255 | #[inline] | |
1256 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1257 | #[cfg_attr(test, assert_instr(vpsubsb))] | |
1258 | pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1259 | transmute(vpsubsb128( | |
1260 | a.as_i8x16(), | |
1261 | b.as_i8x16(), | |
1262 | _mm_setzero_si128().as_i8x16(), | |
1263 | k, | |
1264 | )) | |
1265 | } | |
1266 | ||
1267 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. | |
1268 | /// | |
1269 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epu16&expand=3973) | |
1270 | #[inline] | |
1271 | #[target_feature(enable = "avx512bw")] | |
1272 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1273 | pub unsafe fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i { | |
1274 | transmute(vpmulhuw(a.as_u16x32(), b.as_u16x32())) | |
1275 | } | |
1276 | ||
1277 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1278 | /// | |
1279 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epu16&expand=3971) | |
1280 | #[inline] | |
1281 | #[target_feature(enable = "avx512bw")] | |
1282 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1283 | pub unsafe fn _mm512_mask_mulhi_epu16( | |
1284 | src: __m512i, | |
1285 | k: __mmask32, | |
1286 | a: __m512i, | |
1287 | b: __m512i, | |
1288 | ) -> __m512i { | |
1289 | let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); | |
1290 | transmute(simd_select_bitmask(k, mul, src.as_u16x32())) | |
1291 | } | |
1292 | ||
1293 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1294 | /// | |
1295 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epu16&expand=3972) | |
1296 | #[inline] | |
1297 | #[target_feature(enable = "avx512bw")] | |
1298 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1299 | pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1300 | let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); | |
1301 | let zero = _mm512_setzero_si512().as_u16x32(); | |
1302 | transmute(simd_select_bitmask(k, mul, zero)) | |
1303 | } | |
1304 | ||
1305 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1306 | /// | |
1307 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhi_epu16&expand=3968) | |
1308 | #[inline] | |
1309 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1310 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1311 | pub unsafe fn _mm256_mask_mulhi_epu16( | |
1312 | src: __m256i, | |
1313 | k: __mmask16, | |
1314 | a: __m256i, | |
1315 | b: __m256i, | |
1316 | ) -> __m256i { | |
1317 | let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); | |
1318 | transmute(simd_select_bitmask(k, mul, src.as_u16x16())) | |
1319 | } | |
1320 | ||
1321 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1322 | /// | |
1323 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhi_epu16&expand=3969) | |
1324 | #[inline] | |
1325 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1326 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1327 | pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1328 | let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); | |
1329 | let zero = _mm256_setzero_si256().as_u16x16(); | |
1330 | transmute(simd_select_bitmask(k, mul, zero)) | |
1331 | } | |
1332 | ||
1333 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1334 | /// | |
1335 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhi_epu16&expand=3965) | |
1336 | #[inline] | |
1337 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1338 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1339 | pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1340 | let mul = _mm_mulhi_epu16(a, b).as_u16x8(); | |
1341 | transmute(simd_select_bitmask(k, mul, src.as_u16x8())) | |
1342 | } | |
1343 | ||
1344 | /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1345 | /// | |
1346 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhi_epu16&expand=3966) | |
1347 | #[inline] | |
1348 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1349 | #[cfg_attr(test, assert_instr(vpmulhuw))] | |
1350 | pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1351 | let mul = _mm_mulhi_epu16(a, b).as_u16x8(); | |
1352 | let zero = _mm_setzero_si128().as_u16x8(); | |
1353 | transmute(simd_select_bitmask(k, mul, zero)) | |
1354 | } | |
1355 | ||
1356 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. | |
1357 | /// | |
1358 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhi_epi16&expand=3962) | |
1359 | #[inline] | |
1360 | #[target_feature(enable = "avx512bw")] | |
1361 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1362 | pub unsafe fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i { | |
1363 | transmute(vpmulhw(a.as_i16x32(), b.as_i16x32())) | |
1364 | } | |
1365 | ||
1366 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1367 | /// | |
1368 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhi_epi16&expand=3960) | |
1369 | #[inline] | |
1370 | #[target_feature(enable = "avx512bw")] | |
1371 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1372 | pub unsafe fn _mm512_mask_mulhi_epi16( | |
1373 | src: __m512i, | |
1374 | k: __mmask32, | |
1375 | a: __m512i, | |
1376 | b: __m512i, | |
1377 | ) -> __m512i { | |
1378 | let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); | |
1379 | transmute(simd_select_bitmask(k, mul, src.as_i16x32())) | |
1380 | } | |
1381 | ||
1382 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1383 | /// | |
1384 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhi_epi16&expand=3961) | |
1385 | #[inline] | |
1386 | #[target_feature(enable = "avx512bw")] | |
1387 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1388 | pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1389 | let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); | |
1390 | let zero = _mm512_setzero_si512().as_i16x32(); | |
1391 | transmute(simd_select_bitmask(k, mul, zero)) | |
1392 | } | |
1393 | ||
1394 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1395 | /// | |
1396 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhi_epi16&expand=3957) | |
1397 | #[inline] | |
1398 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1399 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1400 | pub unsafe fn _mm256_mask_mulhi_epi16( | |
1401 | src: __m256i, | |
1402 | k: __mmask16, | |
1403 | a: __m256i, | |
1404 | b: __m256i, | |
1405 | ) -> __m256i { | |
1406 | let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); | |
1407 | transmute(simd_select_bitmask(k, mul, src.as_i16x16())) | |
1408 | } | |
1409 | ||
1410 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1411 | /// | |
1412 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhi_epi16&expand=3958) | |
1413 | #[inline] | |
1414 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1415 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1416 | pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1417 | let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); | |
1418 | let zero = _mm256_setzero_si256().as_i16x16(); | |
1419 | transmute(simd_select_bitmask(k, mul, zero)) | |
1420 | } | |
1421 | ||
1422 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1423 | /// | |
1424 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhi_epi16&expand=3954) | |
1425 | #[inline] | |
1426 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1427 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1428 | pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1429 | let mul = _mm_mulhi_epi16(a, b).as_i16x8(); | |
1430 | transmute(simd_select_bitmask(k, mul, src.as_i16x8())) | |
1431 | } | |
1432 | ||
1433 | /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1434 | /// | |
1435 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhi_epi16&expand=3955) | |
1436 | #[inline] | |
1437 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1438 | #[cfg_attr(test, assert_instr(vpmulhw))] | |
1439 | pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1440 | let mul = _mm_mulhi_epi16(a, b).as_i16x8(); | |
1441 | let zero = _mm_setzero_si128().as_i16x8(); | |
1442 | transmute(simd_select_bitmask(k, mul, zero)) | |
1443 | } | |
1444 | ||
1445 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst. | |
1446 | /// | |
1447 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mulhrs_epi16&expand=3986) | |
1448 | #[inline] | |
1449 | #[target_feature(enable = "avx512bw")] | |
1450 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1451 | pub unsafe fn _mm512_mulhrs_epi16(a: __m512i, b: __m512i) -> __m512i { | |
1452 | transmute(vpmulhrsw(a.as_i16x32(), b.as_i16x32())) | |
1453 | } | |
1454 | ||
1455 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1456 | /// | |
1457 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mulhrs_epi16&expand=3984) | |
1458 | #[inline] | |
1459 | #[target_feature(enable = "avx512bw")] | |
1460 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1461 | pub unsafe fn _mm512_mask_mulhrs_epi16( | |
1462 | src: __m512i, | |
1463 | k: __mmask32, | |
1464 | a: __m512i, | |
1465 | b: __m512i, | |
1466 | ) -> __m512i { | |
1467 | let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); | |
1468 | transmute(simd_select_bitmask(k, mul, src.as_i16x32())) | |
1469 | } | |
1470 | ||
1471 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1472 | /// | |
1473 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mulhrs_epi16&expand=3985) | |
1474 | #[inline] | |
1475 | #[target_feature(enable = "avx512bw")] | |
1476 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1477 | pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1478 | let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); | |
1479 | let zero = _mm512_setzero_si512().as_i16x32(); | |
1480 | transmute(simd_select_bitmask(k, mul, zero)) | |
1481 | } | |
1482 | ||
1483 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1484 | /// | |
1485 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mulhrs_epi16&expand=3981) | |
1486 | #[inline] | |
1487 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1488 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1489 | pub unsafe fn _mm256_mask_mulhrs_epi16( | |
1490 | src: __m256i, | |
1491 | k: __mmask16, | |
1492 | a: __m256i, | |
1493 | b: __m256i, | |
1494 | ) -> __m256i { | |
1495 | let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); | |
1496 | transmute(simd_select_bitmask(k, mul, src.as_i16x16())) | |
1497 | } | |
1498 | ||
1499 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1500 | /// | |
1501 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mulhrs_epi16&expand=3982) | |
1502 | #[inline] | |
1503 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1504 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1505 | pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1506 | let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); | |
1507 | let zero = _mm256_setzero_si256().as_i16x16(); | |
1508 | transmute(simd_select_bitmask(k, mul, zero)) | |
1509 | } | |
1510 | ||
1511 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1512 | /// | |
1513 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mulhrs_epi16&expand=3978) | |
1514 | #[inline] | |
1515 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1516 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1517 | pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1518 | let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); | |
1519 | transmute(simd_select_bitmask(k, mul, src.as_i16x8())) | |
1520 | } | |
1521 | ||
1522 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1523 | /// | |
1524 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mulhrs_epi16&expand=3979) | |
1525 | #[inline] | |
1526 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1527 | #[cfg_attr(test, assert_instr(vpmulhrsw))] | |
1528 | pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1529 | let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); | |
1530 | let zero = _mm_setzero_si128().as_i16x8(); | |
1531 | transmute(simd_select_bitmask(k, mul, zero)) | |
1532 | } | |
1533 | ||
1534 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst. | |
1535 | /// | |
1536 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi16&expand=3996) | |
1537 | #[inline] | |
1538 | #[target_feature(enable = "avx512bw")] | |
1539 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1540 | pub unsafe fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i { | |
1541 | transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) | |
1542 | } | |
1543 | ||
1544 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1545 | /// | |
1546 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi16&expand=3994) | |
1547 | #[inline] | |
1548 | #[target_feature(enable = "avx512bw")] | |
1549 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1550 | pub unsafe fn _mm512_mask_mullo_epi16( | |
1551 | src: __m512i, | |
1552 | k: __mmask32, | |
1553 | a: __m512i, | |
1554 | b: __m512i, | |
1555 | ) -> __m512i { | |
1556 | let mul = _mm512_mullo_epi16(a, b).as_i16x32(); | |
1557 | transmute(simd_select_bitmask(k, mul, src.as_i16x32())) | |
1558 | } | |
1559 | ||
1560 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1561 | /// | |
1562 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi16&expand=3995) | |
1563 | #[inline] | |
1564 | #[target_feature(enable = "avx512bw")] | |
1565 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1566 | pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1567 | let mul = _mm512_mullo_epi16(a, b).as_i16x32(); | |
1568 | let zero = _mm512_setzero_si512().as_i16x32(); | |
1569 | transmute(simd_select_bitmask(k, mul, zero)) | |
1570 | } | |
1571 | ||
1572 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1573 | /// | |
1574 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mullo_epi16&expand=3991) | |
1575 | #[inline] | |
1576 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1577 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1578 | pub unsafe fn _mm256_mask_mullo_epi16( | |
1579 | src: __m256i, | |
1580 | k: __mmask16, | |
1581 | a: __m256i, | |
1582 | b: __m256i, | |
1583 | ) -> __m256i { | |
1584 | let mul = _mm256_mullo_epi16(a, b).as_i16x16(); | |
1585 | transmute(simd_select_bitmask(k, mul, src.as_i16x16())) | |
1586 | } | |
1587 | ||
1588 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1589 | /// | |
1590 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mullo_epi16&expand=3992) | |
1591 | #[inline] | |
1592 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1593 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1594 | pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1595 | let mul = _mm256_mullo_epi16(a, b).as_i16x16(); | |
1596 | let zero = _mm256_setzero_si256().as_i16x16(); | |
1597 | transmute(simd_select_bitmask(k, mul, zero)) | |
1598 | } | |
1599 | ||
1600 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1601 | /// | |
1602 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mullo_epi16&expand=3988) | |
1603 | #[inline] | |
1604 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1605 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1606 | pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1607 | let mul = _mm_mullo_epi16(a, b).as_i16x8(); | |
1608 | transmute(simd_select_bitmask(k, mul, src.as_i16x8())) | |
1609 | } | |
1610 | ||
1611 | /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1612 | /// | |
1613 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mullo_epi16&expand=3989) | |
1614 | #[inline] | |
1615 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1616 | #[cfg_attr(test, assert_instr(vpmullw))] | |
1617 | pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1618 | let mul = _mm_mullo_epi16(a, b).as_i16x8(); | |
1619 | let zero = _mm_setzero_si128().as_i16x8(); | |
1620 | transmute(simd_select_bitmask(k, mul, zero)) | |
1621 | } | |
1622 | ||
1623 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst. | |
1624 | /// | |
1625 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu16&expand=3609) | |
1626 | #[inline] | |
1627 | #[target_feature(enable = "avx512bw")] | |
1628 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1629 | pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i { | |
1630 | transmute(vpmaxuw(a.as_u16x32(), b.as_u16x32())) | |
1631 | } | |
1632 | ||
1633 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1634 | /// | |
1635 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu16&expand=3607) | |
1636 | #[inline] | |
1637 | #[target_feature(enable = "avx512bw")] | |
1638 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1639 | pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1640 | let max = _mm512_max_epu16(a, b).as_u16x32(); | |
1641 | transmute(simd_select_bitmask(k, max, src.as_u16x32())) | |
1642 | } | |
1643 | ||
1644 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1645 | /// | |
1646 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu16&expand=3608) | |
1647 | #[inline] | |
1648 | #[target_feature(enable = "avx512bw")] | |
1649 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1650 | pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1651 | let max = _mm512_max_epu16(a, b).as_u16x32(); | |
1652 | let zero = _mm512_setzero_si512().as_u16x32(); | |
1653 | transmute(simd_select_bitmask(k, max, zero)) | |
1654 | } | |
1655 | ||
1656 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1657 | /// | |
1658 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epu16&expand=3604) | |
1659 | #[inline] | |
1660 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1661 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1662 | pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1663 | let max = _mm256_max_epu16(a, b).as_u16x16(); | |
1664 | transmute(simd_select_bitmask(k, max, src.as_u16x16())) | |
1665 | } | |
1666 | ||
1667 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1668 | /// | |
1669 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epu16&expand=3605) | |
1670 | #[inline] | |
1671 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1672 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1673 | pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1674 | let max = _mm256_max_epu16(a, b).as_u16x16(); | |
1675 | let zero = _mm256_setzero_si256().as_u16x16(); | |
1676 | transmute(simd_select_bitmask(k, max, zero)) | |
1677 | } | |
1678 | ||
1679 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1680 | /// | |
1681 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epu16&expand=3601) | |
1682 | #[inline] | |
1683 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1684 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1685 | pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1686 | let max = _mm_max_epu16(a, b).as_u16x8(); | |
1687 | transmute(simd_select_bitmask(k, max, src.as_u16x8())) | |
1688 | } | |
1689 | ||
1690 | /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1691 | /// | |
1692 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epu16&expand=3602) | |
1693 | #[inline] | |
1694 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1695 | #[cfg_attr(test, assert_instr(vpmaxuw))] | |
1696 | pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1697 | let max = _mm_max_epu16(a, b).as_u16x8(); | |
1698 | let zero = _mm_setzero_si128().as_u16x8(); | |
1699 | transmute(simd_select_bitmask(k, max, zero)) | |
1700 | } | |
1701 | ||
1702 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst. | |
1703 | /// | |
1704 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu8&expand=3636) | |
1705 | #[inline] | |
1706 | #[target_feature(enable = "avx512bw")] | |
1707 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1708 | pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i { | |
1709 | transmute(vpmaxub(a.as_u8x64(), b.as_u8x64())) | |
1710 | } | |
1711 | ||
1712 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1713 | /// | |
1714 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu8&expand=3634) | |
1715 | #[inline] | |
1716 | #[target_feature(enable = "avx512bw")] | |
1717 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1718 | pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1719 | let max = _mm512_max_epu8(a, b).as_u8x64(); | |
1720 | transmute(simd_select_bitmask(k, max, src.as_u8x64())) | |
1721 | } | |
1722 | ||
1723 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1724 | /// | |
1725 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu8&expand=3635) | |
1726 | #[inline] | |
1727 | #[target_feature(enable = "avx512bw")] | |
1728 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1729 | pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1730 | let max = _mm512_max_epu8(a, b).as_u8x64(); | |
1731 | let zero = _mm512_setzero_si512().as_u8x64(); | |
1732 | transmute(simd_select_bitmask(k, max, zero)) | |
1733 | } | |
1734 | ||
1735 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1736 | /// | |
1737 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epu8&expand=3631) | |
1738 | #[inline] | |
1739 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1740 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1741 | pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1742 | let max = _mm256_max_epu8(a, b).as_u8x32(); | |
1743 | transmute(simd_select_bitmask(k, max, src.as_u8x32())) | |
1744 | } | |
1745 | ||
1746 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1747 | /// | |
1748 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epu8&expand=3632) | |
1749 | #[inline] | |
1750 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1751 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1752 | pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1753 | let max = _mm256_max_epu8(a, b).as_u8x32(); | |
1754 | let zero = _mm256_setzero_si256().as_u8x32(); | |
1755 | transmute(simd_select_bitmask(k, max, zero)) | |
1756 | } | |
1757 | ||
1758 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1759 | /// | |
1760 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epu8&expand=3628) | |
1761 | #[inline] | |
1762 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1763 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1764 | pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1765 | let max = _mm_max_epu8(a, b).as_u8x16(); | |
1766 | transmute(simd_select_bitmask(k, max, src.as_u8x16())) | |
1767 | } | |
1768 | ||
1769 | /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1770 | /// | |
1771 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epu8&expand=3629) | |
1772 | #[inline] | |
1773 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1774 | #[cfg_attr(test, assert_instr(vpmaxub))] | |
1775 | pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1776 | let max = _mm_max_epu8(a, b).as_u8x16(); | |
1777 | let zero = _mm_setzero_si128().as_u8x16(); | |
1778 | transmute(simd_select_bitmask(k, max, zero)) | |
1779 | } | |
1780 | ||
1781 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst. | |
1782 | /// | |
1783 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi16&expand=3573) | |
1784 | #[inline] | |
1785 | #[target_feature(enable = "avx512bw")] | |
1786 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1787 | pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i { | |
1788 | transmute(vpmaxsw(a.as_i16x32(), b.as_i16x32())) | |
1789 | } | |
1790 | ||
1791 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1792 | /// | |
1793 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi16&expand=3571) | |
1794 | #[inline] | |
1795 | #[target_feature(enable = "avx512bw")] | |
1796 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1797 | pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1798 | let max = _mm512_max_epi16(a, b).as_i16x32(); | |
1799 | transmute(simd_select_bitmask(k, max, src.as_i16x32())) | |
1800 | } | |
1801 | ||
1802 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1803 | /// | |
1804 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi16&expand=3572) | |
1805 | #[inline] | |
1806 | #[target_feature(enable = "avx512bw")] | |
1807 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1808 | pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1809 | let max = _mm512_max_epi16(a, b).as_i16x32(); | |
1810 | let zero = _mm512_setzero_si512().as_i16x32(); | |
1811 | transmute(simd_select_bitmask(k, max, zero)) | |
1812 | } | |
1813 | ||
1814 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1815 | /// | |
1816 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epi16&expand=3568) | |
1817 | #[inline] | |
1818 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1819 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1820 | pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1821 | let max = _mm256_max_epi16(a, b).as_i16x16(); | |
1822 | transmute(simd_select_bitmask(k, max, src.as_i16x16())) | |
1823 | } | |
1824 | ||
1825 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1826 | /// | |
1827 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epi16&expand=3569) | |
1828 | #[inline] | |
1829 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1830 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1831 | pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1832 | let max = _mm256_max_epi16(a, b).as_i16x16(); | |
1833 | let zero = _mm256_setzero_si256().as_i16x16(); | |
1834 | transmute(simd_select_bitmask(k, max, zero)) | |
1835 | } | |
1836 | ||
1837 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1838 | /// | |
1839 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epi16&expand=3565) | |
1840 | #[inline] | |
1841 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1842 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1843 | pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1844 | let max = _mm_max_epi16(a, b).as_i16x8(); | |
1845 | transmute(simd_select_bitmask(k, max, src.as_i16x8())) | |
1846 | } | |
1847 | ||
1848 | /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1849 | /// | |
1850 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epi16&expand=3566) | |
1851 | #[inline] | |
1852 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1853 | #[cfg_attr(test, assert_instr(vpmaxsw))] | |
1854 | pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
1855 | let max = _mm_max_epi16(a, b).as_i16x8(); | |
1856 | let zero = _mm_setzero_si128().as_i16x8(); | |
1857 | transmute(simd_select_bitmask(k, max, zero)) | |
1858 | } | |
1859 | ||
1860 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst. | |
1861 | /// | |
1862 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi8&expand=3600) | |
1863 | #[inline] | |
1864 | #[target_feature(enable = "avx512bw")] | |
1865 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1866 | pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i { | |
1867 | transmute(vpmaxsb(a.as_i8x64(), b.as_i8x64())) | |
1868 | } | |
1869 | ||
1870 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1871 | /// | |
1872 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi8&expand=3598) | |
1873 | #[inline] | |
1874 | #[target_feature(enable = "avx512bw")] | |
1875 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1876 | pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1877 | let max = _mm512_max_epi8(a, b).as_i8x64(); | |
1878 | transmute(simd_select_bitmask(k, max, src.as_i8x64())) | |
1879 | } | |
1880 | ||
1881 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1882 | /// | |
1883 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi8&expand=3599) | |
1884 | #[inline] | |
1885 | #[target_feature(enable = "avx512bw")] | |
1886 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1887 | pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
1888 | let max = _mm512_max_epi8(a, b).as_i8x64(); | |
1889 | let zero = _mm512_setzero_si512().as_i8x64(); | |
1890 | transmute(simd_select_bitmask(k, max, zero)) | |
1891 | } | |
1892 | ||
1893 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1894 | /// | |
1895 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_max_epi8&expand=3595) | |
1896 | #[inline] | |
1897 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1898 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1899 | pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1900 | let max = _mm256_max_epi8(a, b).as_i8x32(); | |
1901 | transmute(simd_select_bitmask(k, max, src.as_i8x32())) | |
1902 | } | |
1903 | ||
1904 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1905 | /// | |
1906 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_max_epi8&expand=3596) | |
1907 | #[inline] | |
1908 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1909 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1910 | pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
1911 | let max = _mm256_max_epi8(a, b).as_i8x32(); | |
1912 | let zero = _mm256_setzero_si256().as_i8x32(); | |
1913 | transmute(simd_select_bitmask(k, max, zero)) | |
1914 | } | |
1915 | ||
1916 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1917 | /// | |
1918 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_epi8&expand=3592) | |
1919 | #[inline] | |
1920 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1921 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1922 | pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1923 | let max = _mm_max_epi8(a, b).as_i8x16(); | |
1924 | transmute(simd_select_bitmask(k, max, src.as_i8x16())) | |
1925 | } | |
1926 | ||
1927 | /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1928 | /// | |
1929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_epi8&expand=3593) | |
1930 | #[inline] | |
1931 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1932 | #[cfg_attr(test, assert_instr(vpmaxsb))] | |
1933 | pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
1934 | let max = _mm_max_epi8(a, b).as_i8x16(); | |
1935 | let zero = _mm_setzero_si128().as_i8x16(); | |
1936 | transmute(simd_select_bitmask(k, max, zero)) | |
1937 | } | |
1938 | ||
1939 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst. | |
1940 | /// | |
1941 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu16&expand=3723) | |
1942 | #[inline] | |
1943 | #[target_feature(enable = "avx512bw")] | |
1944 | #[cfg_attr(test, assert_instr(vpminuw))] | |
1945 | pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i { | |
1946 | transmute(vpminuw(a.as_u16x32(), b.as_u16x32())) | |
1947 | } | |
1948 | ||
1949 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1950 | /// | |
1951 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu16&expand=3721) | |
1952 | #[inline] | |
1953 | #[target_feature(enable = "avx512bw")] | |
1954 | #[cfg_attr(test, assert_instr(vpminuw))] | |
1955 | pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1956 | let min = _mm512_min_epu16(a, b).as_u16x32(); | |
1957 | transmute(simd_select_bitmask(k, min, src.as_u16x32())) | |
1958 | } | |
1959 | ||
1960 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1961 | /// | |
1962 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu16&expand=3722) | |
1963 | #[inline] | |
1964 | #[target_feature(enable = "avx512bw")] | |
1965 | #[cfg_attr(test, assert_instr(vpminuw))] | |
1966 | pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
1967 | let min = _mm512_min_epu16(a, b).as_u16x32(); | |
1968 | let zero = _mm512_setzero_si512().as_u16x32(); | |
1969 | transmute(simd_select_bitmask(k, min, zero)) | |
1970 | } | |
1971 | ||
1972 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1973 | /// | |
1974 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epu16&expand=3718) | |
1975 | #[inline] | |
1976 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1977 | #[cfg_attr(test, assert_instr(vpminuw))] | |
1978 | pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1979 | let min = _mm256_min_epu16(a, b).as_u16x16(); | |
1980 | transmute(simd_select_bitmask(k, min, src.as_u16x16())) | |
1981 | } | |
1982 | ||
1983 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
1984 | /// | |
1985 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epu16&expand=3719) | |
1986 | #[inline] | |
1987 | #[target_feature(enable = "avx512bw,avx512vl")] | |
1988 | #[cfg_attr(test, assert_instr(vpminuw))] | |
1989 | pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
1990 | let min = _mm256_min_epu16(a, b).as_u16x16(); | |
1991 | let zero = _mm256_setzero_si256().as_u16x16(); | |
1992 | transmute(simd_select_bitmask(k, min, zero)) | |
1993 | } | |
1994 | ||
1995 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
1996 | /// | |
1997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epu16&expand=3715) | |
1998 | #[inline] | |
1999 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2000 | #[cfg_attr(test, assert_instr(vpminuw))] | |
2001 | pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
2002 | let min = _mm_min_epu16(a, b).as_u16x8(); | |
2003 | transmute(simd_select_bitmask(k, min, src.as_u16x8())) | |
2004 | } | |
2005 | ||
2006 | /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2007 | /// | |
2008 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epu16&expand=3716) | |
2009 | #[inline] | |
2010 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2011 | #[cfg_attr(test, assert_instr(vpminuw))] | |
2012 | pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
2013 | let min = _mm_min_epu16(a, b).as_u16x8(); | |
2014 | let zero = _mm_setzero_si128().as_u16x8(); | |
2015 | transmute(simd_select_bitmask(k, min, zero)) | |
2016 | } | |
2017 | ||
2018 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst. | |
2019 | /// | |
2020 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu8&expand=3750) | |
2021 | #[inline] | |
2022 | #[target_feature(enable = "avx512bw")] | |
2023 | #[cfg_attr(test, assert_instr(vpminub))] | |
2024 | pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i { | |
2025 | transmute(vpminub(a.as_u8x64(), b.as_u8x64())) | |
2026 | } | |
2027 | ||
2028 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2029 | /// | |
2030 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu8&expand=3748) | |
2031 | #[inline] | |
2032 | #[target_feature(enable = "avx512bw")] | |
2033 | #[cfg_attr(test, assert_instr(vpminub))] | |
2034 | pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
2035 | let min = _mm512_min_epu8(a, b).as_u8x64(); | |
2036 | transmute(simd_select_bitmask(k, min, src.as_u8x64())) | |
2037 | } | |
2038 | ||
2039 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2040 | /// | |
2041 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu8&expand=3749) | |
2042 | #[inline] | |
2043 | #[target_feature(enable = "avx512bw")] | |
2044 | #[cfg_attr(test, assert_instr(vpminub))] | |
2045 | pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
2046 | let min = _mm512_min_epu8(a, b).as_u8x64(); | |
2047 | let zero = _mm512_setzero_si512().as_u8x64(); | |
2048 | transmute(simd_select_bitmask(k, min, zero)) | |
2049 | } | |
2050 | ||
2051 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2052 | /// | |
2053 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epu8&expand=3745) | |
2054 | #[inline] | |
2055 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2056 | #[cfg_attr(test, assert_instr(vpminub))] | |
2057 | pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
2058 | let min = _mm256_min_epu8(a, b).as_u8x32(); | |
2059 | transmute(simd_select_bitmask(k, min, src.as_u8x32())) | |
2060 | } | |
2061 | ||
2062 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2063 | /// | |
2064 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epu8&expand=3746) | |
2065 | #[inline] | |
2066 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2067 | #[cfg_attr(test, assert_instr(vpminub))] | |
2068 | pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
2069 | let min = _mm256_min_epu8(a, b).as_u8x32(); | |
2070 | let zero = _mm256_setzero_si256().as_u8x32(); | |
2071 | transmute(simd_select_bitmask(k, min, zero)) | |
2072 | } | |
2073 | ||
2074 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2075 | /// | |
2076 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epu8&expand=3742) | |
2077 | #[inline] | |
2078 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2079 | #[cfg_attr(test, assert_instr(vpminub))] | |
2080 | pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
2081 | let min = _mm_min_epu8(a, b).as_u8x16(); | |
2082 | transmute(simd_select_bitmask(k, min, src.as_u8x16())) | |
2083 | } | |
2084 | ||
2085 | /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2086 | /// | |
2087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epu8&expand=3743) | |
2088 | #[inline] | |
2089 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2090 | #[cfg_attr(test, assert_instr(vpminub))] | |
2091 | pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
2092 | let min = _mm_min_epu8(a, b).as_u8x16(); | |
2093 | let zero = _mm_setzero_si128().as_u8x16(); | |
2094 | transmute(simd_select_bitmask(k, min, zero)) | |
2095 | } | |
2096 | ||
2097 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst. | |
2098 | /// | |
2099 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi16&expand=3687) | |
2100 | #[inline] | |
2101 | #[target_feature(enable = "avx512bw")] | |
2102 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2103 | pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i { | |
2104 | transmute(vpminsw(a.as_i16x32(), b.as_i16x32())) | |
2105 | } | |
2106 | ||
2107 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2108 | /// | |
2109 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi16&expand=3685) | |
2110 | #[inline] | |
2111 | #[target_feature(enable = "avx512bw")] | |
2112 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2113 | pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
2114 | let min = _mm512_min_epi16(a, b).as_i16x32(); | |
2115 | transmute(simd_select_bitmask(k, min, src.as_i16x32())) | |
2116 | } | |
2117 | ||
2118 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2119 | /// | |
2120 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi16&expand=3686) | |
2121 | #[inline] | |
2122 | #[target_feature(enable = "avx512bw")] | |
2123 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2124 | pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
2125 | let min = _mm512_min_epi16(a, b).as_i16x32(); | |
2126 | let zero = _mm512_setzero_si512().as_i16x32(); | |
2127 | transmute(simd_select_bitmask(k, min, zero)) | |
2128 | } | |
2129 | ||
2130 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2131 | /// | |
2132 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epi16&expand=3682) | |
2133 | #[inline] | |
2134 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2135 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2136 | pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
2137 | let min = _mm256_min_epi16(a, b).as_i16x16(); | |
2138 | transmute(simd_select_bitmask(k, min, src.as_i16x16())) | |
2139 | } | |
2140 | ||
2141 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2142 | /// | |
2143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epi16&expand=3683) | |
2144 | #[inline] | |
2145 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2146 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2147 | pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
2148 | let min = _mm256_min_epi16(a, b).as_i16x16(); | |
2149 | let zero = _mm256_setzero_si256().as_i16x16(); | |
2150 | transmute(simd_select_bitmask(k, min, zero)) | |
2151 | } | |
2152 | ||
2153 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2154 | /// | |
2155 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi16&expand=3679) | |
2156 | #[inline] | |
2157 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2158 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2159 | pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
2160 | let min = _mm_min_epi16(a, b).as_i16x8(); | |
2161 | transmute(simd_select_bitmask(k, min, src.as_i16x8())) | |
2162 | } | |
2163 | ||
2164 | /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2165 | /// | |
2166 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi16&expand=3680) | |
2167 | #[inline] | |
2168 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2169 | #[cfg_attr(test, assert_instr(vpminsw))] | |
2170 | pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
2171 | let min = _mm_min_epi16(a, b).as_i16x8(); | |
2172 | let zero = _mm_setzero_si128().as_i16x8(); | |
2173 | transmute(simd_select_bitmask(k, min, zero)) | |
2174 | } | |
2175 | ||
2176 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst. | |
2177 | /// | |
2178 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi8&expand=3714) | |
2179 | #[inline] | |
2180 | #[target_feature(enable = "avx512bw")] | |
2181 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2182 | pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i { | |
2183 | transmute(vpminsb(a.as_i8x64(), b.as_i8x64())) | |
2184 | } | |
2185 | ||
2186 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2187 | /// | |
2188 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi8&expand=3712) | |
2189 | #[inline] | |
2190 | #[target_feature(enable = "avx512bw")] | |
2191 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2192 | pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
2193 | let min = _mm512_min_epi8(a, b).as_i8x64(); | |
2194 | transmute(simd_select_bitmask(k, min, src.as_i8x64())) | |
2195 | } | |
2196 | ||
2197 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2198 | /// | |
2199 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi8&expand=3713) | |
2200 | #[inline] | |
2201 | #[target_feature(enable = "avx512bw")] | |
2202 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2203 | pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
2204 | let min = _mm512_min_epi8(a, b).as_i8x64(); | |
2205 | let zero = _mm512_setzero_si512().as_i8x64(); | |
2206 | transmute(simd_select_bitmask(k, min, zero)) | |
2207 | } | |
2208 | ||
2209 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2210 | /// | |
2211 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_min_epi8&expand=3709) | |
2212 | #[inline] | |
2213 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2214 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2215 | pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
2216 | let min = _mm256_min_epi8(a, b).as_i8x32(); | |
2217 | transmute(simd_select_bitmask(k, min, src.as_i8x32())) | |
2218 | } | |
2219 | ||
2220 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2221 | /// | |
2222 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_min_epi8&expand=3710) | |
2223 | #[inline] | |
2224 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2225 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2226 | pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
2227 | let min = _mm256_min_epi8(a, b).as_i8x32(); | |
2228 | let zero = _mm256_setzero_si256().as_i8x32(); | |
2229 | transmute(simd_select_bitmask(k, min, zero)) | |
2230 | } | |
2231 | ||
2232 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
2233 | /// | |
2234 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_epi8&expand=3706) | |
2235 | #[inline] | |
2236 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2237 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2238 | pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
2239 | let min = _mm_min_epi8(a, b).as_i8x16(); | |
2240 | transmute(simd_select_bitmask(k, min, src.as_i8x16())) | |
2241 | } | |
2242 | ||
2243 | /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
2244 | /// | |
2245 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_epi8&expand=3707) | |
2246 | #[inline] | |
2247 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2248 | #[cfg_attr(test, assert_instr(vpminsb))] | |
2249 | pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
2250 | let min = _mm_min_epi8(a, b).as_i8x16(); | |
2251 | let zero = _mm_setzero_si128().as_i8x16(); | |
2252 | transmute(simd_select_bitmask(k, min, zero)) | |
2253 | } | |
2254 | ||
2255 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. | |
2256 | /// | |
2257 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050) | |
2258 | #[inline] | |
2259 | #[target_feature(enable = "avx512bw")] | |
2260 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2261 | pub unsafe fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
2262 | simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) | |
2263 | } | |
2264 | ||
2265 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
2266 | /// | |
2267 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu16_mask&expand=1051) | |
2268 | #[inline] | |
2269 | #[target_feature(enable = "avx512bw")] | |
2270 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2271 | pub unsafe fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
2272 | _mm512_cmplt_epu16_mask(a, b) & k1 | |
2273 | } | |
2274 | ||
cdc7bbd5 XL |
2275 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. |
2276 | /// | |
2277 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_cmplt_epu16_mask&expand=1050) | |
2278 | #[inline] | |
2279 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2280 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2281 | pub unsafe fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
2282 | simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) | |
2283 | } | |
2284 | ||
2285 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
2286 | /// | |
2287 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epu16_mask&expand=1049) | |
2288 | #[inline] | |
2289 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2290 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2291 | pub unsafe fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
2292 | _mm256_cmplt_epu16_mask(a, b) & k1 | |
2293 | } | |
2294 | ||
2295 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. | |
2296 | /// | |
2297 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16_mask&expand=1018) | |
2298 | #[inline] | |
2299 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2300 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2301 | pub unsafe fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
2302 | simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) | |
2303 | } | |
2304 | ||
2305 | /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
2306 | /// | |
2307 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi16_mask&expand=1019) | |
2308 | #[inline] | |
2309 | #[target_feature(enable = "avx512bw,avx512vl")] | |
2310 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2311 | pub unsafe fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
2312 | _mm_cmplt_epu16_mask(a, b) & k1 | |
2313 | } | |
2314 | ||
fc512014 XL |
2315 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. |
2316 | /// | |
2317 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_cmplt_epu8_mask&expand=1068) | |
2318 | #[inline] | |
2319 | #[target_feature(enable = "avx512bw")] | |
2320 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2321 | pub unsafe fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
2322 | simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) | |
2323 | } | |
2324 | ||
2325 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
2326 | /// | |
2327 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu8_mask&expand=1069) | |
2328 | #[inline] | |
2329 | #[target_feature(enable = "avx512bw")] | |
2330 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2331 | pub unsafe fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
2332 | _mm512_cmplt_epu8_mask(a, b) & k1 | |
2333 | } | |
2334 | ||
cdc7bbd5 | 2335 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2336 | /// |
cdc7bbd5 | 2337 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epu8_mask&expand=1066) |
fc512014 | 2338 | #[inline] |
cdc7bbd5 | 2339 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2340 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2341 | pub unsafe fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2342 | simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) | |
fc512014 XL |
2343 | } |
2344 | ||
cdc7bbd5 | 2345 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2346 | /// |
cdc7bbd5 | 2347 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epu8_mask&expand=1067) |
fc512014 | 2348 | #[inline] |
cdc7bbd5 | 2349 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2350 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2351 | pub unsafe fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2352 | _mm256_cmplt_epu8_mask(a, b) & k1 | |
fc512014 XL |
2353 | } |
2354 | ||
cdc7bbd5 | 2355 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2356 | /// |
cdc7bbd5 | 2357 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epu8_mask&expand=1064) |
fc512014 | 2358 | #[inline] |
cdc7bbd5 | 2359 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2360 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2361 | pub unsafe fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2362 | simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) | |
fc512014 XL |
2363 | } |
2364 | ||
cdc7bbd5 | 2365 | /// Compare packed unsigned 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2366 | /// |
cdc7bbd5 | 2367 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epu8_mask&expand=1065) |
fc512014 | 2368 | #[inline] |
cdc7bbd5 | 2369 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2370 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2371 | pub unsafe fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2372 | _mm_cmplt_epu8_mask(a, b) & k1 | |
fc512014 XL |
2373 | } |
2374 | ||
cdc7bbd5 | 2375 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2376 | /// |
cdc7bbd5 | 2377 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi16_mask&expand=1022) |
fc512014 XL |
2378 | #[inline] |
2379 | #[target_feature(enable = "avx512bw")] | |
2380 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2381 | pub unsafe fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2382 | simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) | |
fc512014 XL |
2383 | } |
2384 | ||
cdc7bbd5 | 2385 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2386 | /// |
cdc7bbd5 | 2387 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi16_mask&expand=1023) |
fc512014 XL |
2388 | #[inline] |
2389 | #[target_feature(enable = "avx512bw")] | |
2390 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2391 | pub unsafe fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2392 | _mm512_cmplt_epi16_mask(a, b) & k1 | |
fc512014 XL |
2393 | } |
2394 | ||
cdc7bbd5 | 2395 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2396 | /// |
cdc7bbd5 | 2397 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epi16_mask&expand=1020) |
fc512014 | 2398 | #[inline] |
cdc7bbd5 | 2399 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2400 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2401 | pub unsafe fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2402 | simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) | |
fc512014 XL |
2403 | } |
2404 | ||
cdc7bbd5 | 2405 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2406 | /// |
cdc7bbd5 | 2407 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epi16_mask&expand=1021) |
fc512014 | 2408 | #[inline] |
cdc7bbd5 | 2409 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2410 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2411 | pub unsafe fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2412 | _mm256_cmplt_epi16_mask(a, b) & k1 | |
fc512014 XL |
2413 | } |
2414 | ||
cdc7bbd5 | 2415 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2416 | /// |
cdc7bbd5 | 2417 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16_mask&expand=1018) |
fc512014 | 2418 | #[inline] |
cdc7bbd5 | 2419 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2420 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2421 | pub unsafe fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2422 | simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) | |
fc512014 XL |
2423 | } |
2424 | ||
cdc7bbd5 | 2425 | /// Compare packed signed 16-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2426 | /// |
cdc7bbd5 | 2427 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi16_mask&expand=1019) |
fc512014 | 2428 | #[inline] |
cdc7bbd5 | 2429 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2430 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2431 | pub unsafe fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2432 | _mm_cmplt_epi16_mask(a, b) & k1 | |
fc512014 XL |
2433 | } |
2434 | ||
cdc7bbd5 | 2435 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2436 | /// |
cdc7bbd5 | 2437 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi8_mask&expand=1044) |
fc512014 XL |
2438 | #[inline] |
2439 | #[target_feature(enable = "avx512bw")] | |
2440 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2441 | pub unsafe fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2442 | simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) | |
fc512014 XL |
2443 | } |
2444 | ||
cdc7bbd5 | 2445 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2446 | /// |
cdc7bbd5 | 2447 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi8_mask&expand=1045) |
fc512014 XL |
2448 | #[inline] |
2449 | #[target_feature(enable = "avx512bw")] | |
2450 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2451 | pub unsafe fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2452 | _mm512_cmplt_epi8_mask(a, b) & k1 | |
fc512014 XL |
2453 | } |
2454 | ||
cdc7bbd5 | 2455 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2456 | /// |
cdc7bbd5 | 2457 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmplt_epi8_mask&expand=1042) |
fc512014 | 2458 | #[inline] |
cdc7bbd5 | 2459 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2460 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2461 | pub unsafe fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2462 | simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) | |
fc512014 XL |
2463 | } |
2464 | ||
cdc7bbd5 | 2465 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2466 | /// |
cdc7bbd5 | 2467 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmplt_epi8_mask&expand=1043) |
fc512014 | 2468 | #[inline] |
cdc7bbd5 | 2469 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2470 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2471 | pub unsafe fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2472 | _mm256_cmplt_epi8_mask(a, b) & k1 | |
fc512014 XL |
2473 | } |
2474 | ||
cdc7bbd5 | 2475 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k. |
fc512014 | 2476 | /// |
cdc7bbd5 | 2477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8_mask&expand=1040) |
fc512014 | 2478 | #[inline] |
cdc7bbd5 | 2479 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2480 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2481 | pub unsafe fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2482 | simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) | |
fc512014 XL |
2483 | } |
2484 | ||
cdc7bbd5 | 2485 | /// Compare packed signed 8-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2486 | /// |
cdc7bbd5 | 2487 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmplt_epi8_mask&expand=1041) |
fc512014 | 2488 | #[inline] |
cdc7bbd5 | 2489 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2490 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2491 | pub unsafe fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2492 | _mm_cmplt_epi8_mask(a, b) & k1 | |
fc512014 XL |
2493 | } |
2494 | ||
cdc7bbd5 | 2495 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2496 | /// |
cdc7bbd5 | 2497 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu16_mask&expand=927) |
fc512014 XL |
2498 | #[inline] |
2499 | #[target_feature(enable = "avx512bw")] | |
2500 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2501 | pub unsafe fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2502 | simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) | |
fc512014 XL |
2503 | } |
2504 | ||
cdc7bbd5 | 2505 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2506 | /// |
cdc7bbd5 | 2507 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu16_mask&expand=928) |
fc512014 XL |
2508 | #[inline] |
2509 | #[target_feature(enable = "avx512bw")] | |
2510 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2511 | pub unsafe fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2512 | _mm512_cmpgt_epu16_mask(a, b) & k1 | |
fc512014 XL |
2513 | } |
2514 | ||
cdc7bbd5 | 2515 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2516 | /// |
cdc7bbd5 | 2517 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epu16_mask&expand=925) |
fc512014 | 2518 | #[inline] |
cdc7bbd5 | 2519 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2520 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2521 | pub unsafe fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2522 | simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) | |
fc512014 XL |
2523 | } |
2524 | ||
cdc7bbd5 | 2525 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2526 | /// |
cdc7bbd5 | 2527 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epu16_mask&expand=926) |
fc512014 | 2528 | #[inline] |
cdc7bbd5 | 2529 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2530 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2531 | pub unsafe fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2532 | _mm256_cmpgt_epu16_mask(a, b) & k1 | |
fc512014 XL |
2533 | } |
2534 | ||
cdc7bbd5 | 2535 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2536 | /// |
cdc7bbd5 | 2537 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epu16_mask&expand=923) |
fc512014 | 2538 | #[inline] |
cdc7bbd5 | 2539 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2540 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2541 | pub unsafe fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2542 | simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) | |
fc512014 XL |
2543 | } |
2544 | ||
cdc7bbd5 | 2545 | /// Compare packed unsigned 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2546 | /// |
cdc7bbd5 | 2547 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epu16_mask&expand=924) |
fc512014 | 2548 | #[inline] |
cdc7bbd5 | 2549 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2550 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2551 | pub unsafe fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2552 | _mm_cmpgt_epu16_mask(a, b) & k1 | |
fc512014 XL |
2553 | } |
2554 | ||
cdc7bbd5 | 2555 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2556 | /// |
cdc7bbd5 | 2557 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu8_mask&expand=945) |
fc512014 XL |
2558 | #[inline] |
2559 | #[target_feature(enable = "avx512bw")] | |
2560 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2561 | pub unsafe fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2562 | simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) | |
fc512014 XL |
2563 | } |
2564 | ||
cdc7bbd5 | 2565 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2566 | /// |
cdc7bbd5 | 2567 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu8_mask&expand=946) |
fc512014 XL |
2568 | #[inline] |
2569 | #[target_feature(enable = "avx512bw")] | |
2570 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2571 | pub unsafe fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2572 | _mm512_cmpgt_epu8_mask(a, b) & k1 | |
fc512014 XL |
2573 | } |
2574 | ||
cdc7bbd5 | 2575 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2576 | /// |
cdc7bbd5 | 2577 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epu8_mask&expand=943) |
fc512014 | 2578 | #[inline] |
cdc7bbd5 | 2579 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2580 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2581 | pub unsafe fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2582 | simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) | |
fc512014 XL |
2583 | } |
2584 | ||
cdc7bbd5 | 2585 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2586 | /// |
cdc7bbd5 | 2587 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epu8_mask&expand=944) |
fc512014 | 2588 | #[inline] |
cdc7bbd5 | 2589 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2590 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2591 | pub unsafe fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2592 | _mm256_cmpgt_epu8_mask(a, b) & k1 | |
fc512014 XL |
2593 | } |
2594 | ||
cdc7bbd5 | 2595 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2596 | /// |
cdc7bbd5 | 2597 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epu8_mask&expand=941) |
fc512014 | 2598 | #[inline] |
cdc7bbd5 | 2599 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2600 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2601 | pub unsafe fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2602 | simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) | |
fc512014 XL |
2603 | } |
2604 | ||
cdc7bbd5 | 2605 | /// Compare packed unsigned 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2606 | /// |
cdc7bbd5 | 2607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epu8_mask&expand=942) |
fc512014 | 2608 | #[inline] |
cdc7bbd5 | 2609 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2610 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2611 | pub unsafe fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2612 | _mm_cmpgt_epu8_mask(a, b) & k1 | |
fc512014 XL |
2613 | } |
2614 | ||
cdc7bbd5 | 2615 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2616 | /// |
cdc7bbd5 | 2617 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi16_mask&expand=897) |
fc512014 XL |
2618 | #[inline] |
2619 | #[target_feature(enable = "avx512bw")] | |
2620 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2621 | pub unsafe fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2622 | simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) | |
fc512014 XL |
2623 | } |
2624 | ||
cdc7bbd5 | 2625 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2626 | /// |
cdc7bbd5 | 2627 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi16_mask&expand=898) |
fc512014 XL |
2628 | #[inline] |
2629 | #[target_feature(enable = "avx512bw")] | |
2630 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2631 | pub unsafe fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2632 | _mm512_cmpgt_epi16_mask(a, b) & k1 | |
fc512014 XL |
2633 | } |
2634 | ||
cdc7bbd5 | 2635 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2636 | /// |
cdc7bbd5 | 2637 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16_mask&expand=895) |
fc512014 | 2638 | #[inline] |
cdc7bbd5 | 2639 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2640 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2641 | pub unsafe fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2642 | simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) | |
fc512014 XL |
2643 | } |
2644 | ||
cdc7bbd5 | 2645 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2646 | /// |
cdc7bbd5 | 2647 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epi16_mask&expand=896) |
fc512014 | 2648 | #[inline] |
cdc7bbd5 | 2649 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2650 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2651 | pub unsafe fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2652 | _mm256_cmpgt_epi16_mask(a, b) & k1 | |
fc512014 XL |
2653 | } |
2654 | ||
cdc7bbd5 | 2655 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2656 | /// |
cdc7bbd5 | 2657 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16_mask&expand=893) |
fc512014 | 2658 | #[inline] |
cdc7bbd5 | 2659 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2660 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2661 | pub unsafe fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { |
2662 | simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) | |
fc512014 XL |
2663 | } |
2664 | ||
cdc7bbd5 | 2665 | /// Compare packed signed 16-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2666 | /// |
cdc7bbd5 | 2667 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epi16_mask&expand=894) |
fc512014 | 2668 | #[inline] |
cdc7bbd5 | 2669 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2670 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2671 | pub unsafe fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { |
2672 | _mm_cmpgt_epi16_mask(a, b) & k1 | |
fc512014 XL |
2673 | } |
2674 | ||
cdc7bbd5 | 2675 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2676 | /// |
cdc7bbd5 | 2677 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi8_mask&expand=921) |
fc512014 XL |
2678 | #[inline] |
2679 | #[target_feature(enable = "avx512bw")] | |
2680 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2681 | pub unsafe fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { |
2682 | simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) | |
fc512014 XL |
2683 | } |
2684 | ||
cdc7bbd5 | 2685 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2686 | /// |
cdc7bbd5 | 2687 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi8_mask&expand=922) |
fc512014 XL |
2688 | #[inline] |
2689 | #[target_feature(enable = "avx512bw")] | |
2690 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2691 | pub unsafe fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { |
2692 | _mm512_cmpgt_epi8_mask(a, b) & k1 | |
fc512014 XL |
2693 | } |
2694 | ||
cdc7bbd5 | 2695 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2696 | /// |
cdc7bbd5 | 2697 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8_mask&expand=919) |
fc512014 | 2698 | #[inline] |
cdc7bbd5 | 2699 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2700 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2701 | pub unsafe fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { |
2702 | simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) | |
fc512014 XL |
2703 | } |
2704 | ||
cdc7bbd5 | 2705 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2706 | /// |
cdc7bbd5 | 2707 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpgt_epi8_mask&expand=920) |
fc512014 | 2708 | #[inline] |
cdc7bbd5 | 2709 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2710 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2711 | pub unsafe fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { |
2712 | _mm256_cmpgt_epi8_mask(a, b) & k1 | |
fc512014 XL |
2713 | } |
2714 | ||
cdc7bbd5 | 2715 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k. |
fc512014 | 2716 | /// |
cdc7bbd5 | 2717 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8_mask&expand=917) |
fc512014 | 2718 | #[inline] |
cdc7bbd5 | 2719 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2720 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2721 | pub unsafe fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { |
2722 | simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) | |
fc512014 XL |
2723 | } |
2724 | ||
cdc7bbd5 | 2725 | /// Compare packed signed 8-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2726 | /// |
cdc7bbd5 | 2727 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpgt_epi8_mask&expand=918) |
fc512014 | 2728 | #[inline] |
cdc7bbd5 | 2729 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2730 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2731 | pub unsafe fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { |
2732 | _mm_cmpgt_epi8_mask(a, b) & k1 | |
fc512014 XL |
2733 | } |
2734 | ||
cdc7bbd5 | 2735 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2736 | /// |
cdc7bbd5 | 2737 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu16_mask&expand=989) |
fc512014 XL |
2738 | #[inline] |
2739 | #[target_feature(enable = "avx512bw")] | |
2740 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2741 | pub unsafe fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { |
2742 | simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) | |
fc512014 XL |
2743 | } |
2744 | ||
cdc7bbd5 | 2745 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2746 | /// |
cdc7bbd5 | 2747 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu16_mask&expand=990) |
fc512014 XL |
2748 | #[inline] |
2749 | #[target_feature(enable = "avx512bw")] | |
2750 | #[cfg_attr(test, assert_instr(vpcmp))] | |
cdc7bbd5 XL |
2751 | pub unsafe fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { |
2752 | _mm512_cmple_epu16_mask(a, b) & k1 | |
fc512014 XL |
2753 | } |
2754 | ||
cdc7bbd5 | 2755 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2756 | /// |
cdc7bbd5 | 2757 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epu16_mask&expand=987) |
fc512014 | 2758 | #[inline] |
cdc7bbd5 | 2759 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2760 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2761 | pub unsafe fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { |
2762 | simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) | |
fc512014 XL |
2763 | } |
2764 | ||
cdc7bbd5 | 2765 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2766 | /// |
cdc7bbd5 | 2767 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epu16_mask&expand=988) |
fc512014 | 2768 | #[inline] |
cdc7bbd5 | 2769 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 2770 | #[cfg_attr(test, assert_instr(vpcmp))] |
cdc7bbd5 XL |
2771 | pub unsafe fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { |
2772 | _mm256_cmple_epu16_mask(a, b) & k1 | |
fc512014 XL |
2773 | } |
2774 | ||
cdc7bbd5 | 2775 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2776 | /// |
cdc7bbd5 | 2777 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epu16_mask&expand=985) |
fc512014 | 2778 | #[inline] |
cdc7bbd5 XL |
2779 | #[target_feature(enable = "avx512bw,avx512vl")] |
2780 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2781 | pub unsafe fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
2782 | simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) | |
fc512014 XL |
2783 | } |
2784 | ||
cdc7bbd5 | 2785 | /// Compare packed unsigned 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2786 | /// |
cdc7bbd5 | 2787 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epu16_mask&expand=986) |
fc512014 | 2788 | #[inline] |
cdc7bbd5 XL |
2789 | #[target_feature(enable = "avx512bw,avx512vl")] |
2790 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2791 | pub unsafe fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
2792 | _mm_cmple_epu16_mask(a, b) & k1 | |
fc512014 XL |
2793 | } |
2794 | ||
cdc7bbd5 | 2795 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2796 | /// |
cdc7bbd5 | 2797 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu8_mask&expand=1007) |
fc512014 XL |
2798 | #[inline] |
2799 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2800 | #[cfg_attr(test, assert_instr(vpcmp))] |
2801 | pub unsafe fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
2802 | simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) | |
fc512014 XL |
2803 | } |
2804 | ||
cdc7bbd5 | 2805 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2806 | /// |
cdc7bbd5 | 2807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu8_mask&expand=1008) |
fc512014 XL |
2808 | #[inline] |
2809 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2810 | #[cfg_attr(test, assert_instr(vpcmp))] |
2811 | pub unsafe fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
2812 | _mm512_cmple_epu8_mask(a, b) & k1 | |
fc512014 XL |
2813 | } |
2814 | ||
cdc7bbd5 | 2815 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2816 | /// |
cdc7bbd5 | 2817 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epu8_mask&expand=1005) |
fc512014 | 2818 | #[inline] |
cdc7bbd5 XL |
2819 | #[target_feature(enable = "avx512bw,avx512vl")] |
2820 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2821 | pub unsafe fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
2822 | simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) | |
fc512014 XL |
2823 | } |
2824 | ||
cdc7bbd5 | 2825 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2826 | /// |
cdc7bbd5 | 2827 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epu8_mask&expand=1006) |
fc512014 | 2828 | #[inline] |
cdc7bbd5 XL |
2829 | #[target_feature(enable = "avx512bw,avx512vl")] |
2830 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2831 | pub unsafe fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
2832 | _mm256_cmple_epu8_mask(a, b) & k1 | |
fc512014 XL |
2833 | } |
2834 | ||
cdc7bbd5 | 2835 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2836 | /// |
cdc7bbd5 | 2837 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epu8_mask&expand=1003) |
fc512014 | 2838 | #[inline] |
cdc7bbd5 XL |
2839 | #[target_feature(enable = "avx512bw,avx512vl")] |
2840 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2841 | pub unsafe fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
2842 | simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) | |
fc512014 XL |
2843 | } |
2844 | ||
cdc7bbd5 | 2845 | /// Compare packed unsigned 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2846 | /// |
cdc7bbd5 | 2847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epu8_mask&expand=1004) |
fc512014 | 2848 | #[inline] |
cdc7bbd5 XL |
2849 | #[target_feature(enable = "avx512bw,avx512vl")] |
2850 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2851 | pub unsafe fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
2852 | _mm_cmple_epu8_mask(a, b) & k1 | |
fc512014 XL |
2853 | } |
2854 | ||
cdc7bbd5 | 2855 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2856 | /// |
cdc7bbd5 | 2857 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi16_mask&expand=965) |
fc512014 XL |
2858 | #[inline] |
2859 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2860 | #[cfg_attr(test, assert_instr(vpcmp))] |
2861 | pub unsafe fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
2862 | simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) | |
fc512014 XL |
2863 | } |
2864 | ||
cdc7bbd5 | 2865 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2866 | /// |
cdc7bbd5 | 2867 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi16_mask&expand=966) |
fc512014 XL |
2868 | #[inline] |
2869 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2870 | #[cfg_attr(test, assert_instr(vpcmp))] |
2871 | pub unsafe fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
2872 | _mm512_cmple_epi16_mask(a, b) & k1 | |
fc512014 XL |
2873 | } |
2874 | ||
cdc7bbd5 | 2875 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2876 | /// |
cdc7bbd5 | 2877 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epi16_mask&expand=963) |
fc512014 | 2878 | #[inline] |
cdc7bbd5 XL |
2879 | #[target_feature(enable = "avx512bw,avx512vl")] |
2880 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2881 | pub unsafe fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
2882 | simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) | |
fc512014 XL |
2883 | } |
2884 | ||
cdc7bbd5 | 2885 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2886 | /// |
cdc7bbd5 | 2887 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epi16_mask&expand=964) |
fc512014 | 2888 | #[inline] |
cdc7bbd5 XL |
2889 | #[target_feature(enable = "avx512bw,avx512vl")] |
2890 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2891 | pub unsafe fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
2892 | _mm256_cmple_epi16_mask(a, b) & k1 | |
fc512014 XL |
2893 | } |
2894 | ||
cdc7bbd5 | 2895 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2896 | /// |
cdc7bbd5 | 2897 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epi16_mask&expand=961) |
fc512014 | 2898 | #[inline] |
cdc7bbd5 XL |
2899 | #[target_feature(enable = "avx512bw,avx512vl")] |
2900 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2901 | pub unsafe fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
2902 | simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) | |
fc512014 XL |
2903 | } |
2904 | ||
cdc7bbd5 | 2905 | /// Compare packed signed 16-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2906 | /// |
cdc7bbd5 | 2907 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epi16_mask&expand=962) |
fc512014 | 2908 | #[inline] |
cdc7bbd5 XL |
2909 | #[target_feature(enable = "avx512bw,avx512vl")] |
2910 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2911 | pub unsafe fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
2912 | _mm_cmple_epi16_mask(a, b) & k1 | |
fc512014 XL |
2913 | } |
2914 | ||
cdc7bbd5 | 2915 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2916 | /// |
cdc7bbd5 | 2917 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi8_mask&expand=983) |
fc512014 XL |
2918 | #[inline] |
2919 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2920 | #[cfg_attr(test, assert_instr(vpcmp))] |
2921 | pub unsafe fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
2922 | simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) | |
fc512014 XL |
2923 | } |
2924 | ||
cdc7bbd5 | 2925 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2926 | /// |
cdc7bbd5 | 2927 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi8_mask&expand=984) |
fc512014 | 2928 | #[inline] |
cdc7bbd5 XL |
2929 | #[target_feature(enable = "avx512bw")] |
2930 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2931 | pub unsafe fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
2932 | _mm512_cmple_epi8_mask(a, b) & k1 | |
fc512014 XL |
2933 | } |
2934 | ||
cdc7bbd5 | 2935 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2936 | /// |
cdc7bbd5 | 2937 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmple_epi8_mask&expand=981) |
fc512014 XL |
2938 | #[inline] |
2939 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
2940 | #[cfg_attr(test, assert_instr(vpcmp))] |
2941 | pub unsafe fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
2942 | simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) | |
fc512014 XL |
2943 | } |
2944 | ||
cdc7bbd5 | 2945 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2946 | /// |
cdc7bbd5 | 2947 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmple_epi8_mask&expand=982) |
fc512014 XL |
2948 | #[inline] |
2949 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
2950 | #[cfg_attr(test, assert_instr(vpcmp))] |
2951 | pub unsafe fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
2952 | _mm256_cmple_epi8_mask(a, b) & k1 | |
fc512014 XL |
2953 | } |
2954 | ||
cdc7bbd5 | 2955 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k. |
fc512014 | 2956 | /// |
cdc7bbd5 | 2957 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_epi8_mask&expand=979) |
fc512014 XL |
2958 | #[inline] |
2959 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
2960 | #[cfg_attr(test, assert_instr(vpcmp))] |
2961 | pub unsafe fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
2962 | simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) | |
fc512014 XL |
2963 | } |
2964 | ||
cdc7bbd5 | 2965 | /// Compare packed signed 8-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2966 | /// |
cdc7bbd5 | 2967 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmple_epi8_mask&expand=980) |
fc512014 | 2968 | #[inline] |
cdc7bbd5 XL |
2969 | #[target_feature(enable = "avx512bw,avx512vl")] |
2970 | #[cfg_attr(test, assert_instr(vpcmp))] | |
2971 | pub unsafe fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
2972 | _mm_cmple_epi8_mask(a, b) & k1 | |
fc512014 XL |
2973 | } |
2974 | ||
cdc7bbd5 | 2975 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
fc512014 | 2976 | /// |
cdc7bbd5 | 2977 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu16_mask&expand=867) |
fc512014 XL |
2978 | #[inline] |
2979 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2980 | #[cfg_attr(test, assert_instr(vpcmp))] |
2981 | pub unsafe fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
2982 | simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) | |
fc512014 XL |
2983 | } |
2984 | ||
cdc7bbd5 | 2985 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 2986 | /// |
cdc7bbd5 | 2987 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu16_mask&expand=868) |
fc512014 XL |
2988 | #[inline] |
2989 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
2990 | #[cfg_attr(test, assert_instr(vpcmp))] |
2991 | pub unsafe fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
2992 | _mm512_cmpge_epu16_mask(a, b) & k1 | |
fc512014 XL |
2993 | } |
2994 | ||
cdc7bbd5 | 2995 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
fc512014 | 2996 | /// |
cdc7bbd5 | 2997 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epu16_mask&expand=865) |
fc512014 XL |
2998 | #[inline] |
2999 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
3000 | #[cfg_attr(test, assert_instr(vpcmp))] |
3001 | pub unsafe fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
3002 | simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) | |
fc512014 XL |
3003 | } |
3004 | ||
cdc7bbd5 | 3005 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 3006 | /// |
cdc7bbd5 | 3007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epu16_mask&expand=866) |
fc512014 XL |
3008 | #[inline] |
3009 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
3010 | #[cfg_attr(test, assert_instr(vpcmp))] |
3011 | pub unsafe fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
3012 | _mm256_cmpge_epu16_mask(a, b) & k1 | |
fc512014 XL |
3013 | } |
3014 | ||
cdc7bbd5 | 3015 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
fc512014 | 3016 | /// |
cdc7bbd5 | 3017 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epu16_mask&expand=863) |
fc512014 XL |
3018 | #[inline] |
3019 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
3020 | #[cfg_attr(test, assert_instr(vpcmp))] |
3021 | pub unsafe fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
3022 | simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) | |
fc512014 XL |
3023 | } |
3024 | ||
cdc7bbd5 | 3025 | /// Compare packed unsigned 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 3026 | /// |
cdc7bbd5 | 3027 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epu16_mask&expand=864) |
fc512014 XL |
3028 | #[inline] |
3029 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
3030 | #[cfg_attr(test, assert_instr(vpcmp))] |
3031 | pub unsafe fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
3032 | _mm_cmpge_epu16_mask(a, b) & k1 | |
fc512014 XL |
3033 | } |
3034 | ||
cdc7bbd5 | 3035 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
fc512014 | 3036 | /// |
cdc7bbd5 | 3037 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu8_mask&expand=885) |
fc512014 XL |
3038 | #[inline] |
3039 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
3040 | #[cfg_attr(test, assert_instr(vpcmp))] |
3041 | pub unsafe fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
3042 | simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) | |
fc512014 XL |
3043 | } |
3044 | ||
cdc7bbd5 | 3045 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 3046 | /// |
cdc7bbd5 | 3047 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu8_mask&expand=886) |
fc512014 XL |
3048 | #[inline] |
3049 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
3050 | #[cfg_attr(test, assert_instr(vpcmp))] |
3051 | pub unsafe fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
3052 | _mm512_cmpge_epu8_mask(a, b) & k1 | |
fc512014 XL |
3053 | } |
3054 | ||
cdc7bbd5 | 3055 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
fc512014 | 3056 | /// |
cdc7bbd5 | 3057 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epu8_mask&expand=883) |
fc512014 | 3058 | #[inline] |
cdc7bbd5 XL |
3059 | #[target_feature(enable = "avx512bw,avx512vl")] |
3060 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3061 | pub unsafe fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
3062 | simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) | |
fc512014 XL |
3063 | } |
3064 | ||
cdc7bbd5 | 3065 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 3066 | /// |
cdc7bbd5 | 3067 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epu8_mask&expand=884) |
fc512014 XL |
3068 | #[inline] |
3069 | #[target_feature(enable = "avx512bw,avx512vl")] | |
cdc7bbd5 XL |
3070 | #[cfg_attr(test, assert_instr(vpcmp))] |
3071 | pub unsafe fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
3072 | _mm256_cmpge_epu8_mask(a, b) & k1 | |
fc512014 XL |
3073 | } |
3074 | ||
cdc7bbd5 | 3075 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. |
fc512014 | 3076 | /// |
cdc7bbd5 XL |
3077 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epu8_mask&expand=881) |
3078 | #[inline] | |
3079 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3080 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3081 | pub unsafe fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
3082 | simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) | |
3083 | } | |
3084 | ||
3085 | /// Compare packed unsigned 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3086 | /// | |
3087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epu8_mask&expand=882) | |
3088 | #[inline] | |
3089 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3090 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3091 | pub unsafe fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
3092 | _mm_cmpge_epu8_mask(a, b) & k1 | |
3093 | } | |
3094 | ||
3095 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. | |
3096 | /// | |
3097 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi16_mask&expand=843) | |
3098 | #[inline] | |
3099 | #[target_feature(enable = "avx512bw")] | |
3100 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3101 | pub unsafe fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
3102 | simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) | |
3103 | } | |
3104 | ||
3105 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3106 | /// | |
3107 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi16_mask&expand=844) | |
3108 | #[inline] | |
3109 | #[target_feature(enable = "avx512bw")] | |
3110 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3111 | pub unsafe fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
3112 | _mm512_cmpge_epi16_mask(a, b) & k1 | |
3113 | } | |
3114 | ||
3115 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. | |
3116 | /// | |
3117 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epi16_mask&expand=841) | |
3118 | #[inline] | |
3119 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3120 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3121 | pub unsafe fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
3122 | simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) | |
3123 | } | |
3124 | ||
3125 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3126 | /// | |
3127 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epi16_mask&expand=842) | |
3128 | #[inline] | |
3129 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3130 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3131 | pub unsafe fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
3132 | _mm256_cmpge_epi16_mask(a, b) & k1 | |
3133 | } | |
3134 | ||
3135 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. | |
3136 | /// | |
3137 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epi16_mask&expand=839) | |
3138 | #[inline] | |
3139 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3140 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3141 | pub unsafe fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
3142 | simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) | |
3143 | } | |
3144 | ||
3145 | /// Compare packed signed 16-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3146 | /// | |
3147 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epi16_mask&expand=840) | |
3148 | #[inline] | |
3149 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3150 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3151 | pub unsafe fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
3152 | _mm_cmpge_epi16_mask(a, b) & k1 | |
3153 | } | |
3154 | ||
3155 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. | |
3156 | /// | |
3157 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi8_mask&expand=861) | |
3158 | #[inline] | |
3159 | #[target_feature(enable = "avx512bw")] | |
3160 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3161 | pub unsafe fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
3162 | simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) | |
3163 | } | |
3164 | ||
3165 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3166 | /// | |
3167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi8_mask&expand=862) | |
3168 | #[inline] | |
3169 | #[target_feature(enable = "avx512bw")] | |
3170 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3171 | pub unsafe fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
3172 | _mm512_cmpge_epi8_mask(a, b) & k1 | |
3173 | } | |
3174 | ||
3175 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. | |
3176 | /// | |
3177 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpge_epi8_mask&expand=859) | |
3178 | #[inline] | |
3179 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3180 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3181 | pub unsafe fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
3182 | simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) | |
3183 | } | |
3184 | ||
3185 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3186 | /// | |
3187 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpge_epi8_mask&expand=860) | |
3188 | #[inline] | |
3189 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3190 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3191 | pub unsafe fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
3192 | _mm256_cmpge_epi8_mask(a, b) & k1 | |
3193 | } | |
3194 | ||
3195 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k. | |
3196 | /// | |
3197 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_epi8_mask&expand=857) | |
3198 | #[inline] | |
3199 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3200 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3201 | pub unsafe fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
3202 | simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) | |
3203 | } | |
3204 | ||
3205 | /// Compare packed signed 8-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3206 | /// | |
3207 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpge_epi8_mask&expand=858) | |
3208 | #[inline] | |
3209 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3210 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3211 | pub unsafe fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
3212 | _mm_cmpge_epi8_mask(a, b) & k1 | |
3213 | } | |
3214 | ||
3215 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. | |
3216 | /// | |
3217 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu16_mask&expand=801) | |
3218 | #[inline] | |
3219 | #[target_feature(enable = "avx512bw")] | |
3220 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3221 | pub unsafe fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
3222 | simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) | |
3223 | } | |
3224 | ||
3225 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3226 | /// | |
3227 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu16_mask&expand=802) | |
3228 | #[inline] | |
3229 | #[target_feature(enable = "avx512bw")] | |
3230 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3231 | pub unsafe fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
3232 | _mm512_cmpeq_epu16_mask(a, b) & k1 | |
3233 | } | |
3234 | ||
3235 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. | |
3236 | /// | |
3237 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epu16_mask&expand=799) | |
3238 | #[inline] | |
3239 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3240 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3241 | pub unsafe fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
3242 | simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) | |
3243 | } | |
3244 | ||
3245 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3246 | /// | |
3247 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epu16_mask&expand=800) | |
3248 | #[inline] | |
3249 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3250 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3251 | pub unsafe fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
3252 | _mm256_cmpeq_epu16_mask(a, b) & k1 | |
3253 | } | |
3254 | ||
3255 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k. | |
3256 | /// | |
3257 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epu16_mask&expand=797) | |
3258 | #[inline] | |
3259 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3260 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3261 | pub unsafe fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
3262 | simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) | |
3263 | } | |
3264 | ||
3265 | /// Compare packed unsigned 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3266 | /// | |
3267 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epu16_mask&expand=798) | |
3268 | #[inline] | |
3269 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3270 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3271 | pub unsafe fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
3272 | _mm_cmpeq_epu16_mask(a, b) & k1 | |
3273 | } | |
3274 | ||
3275 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. | |
3276 | /// | |
3277 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu8_mask&expand=819) | |
3278 | #[inline] | |
3279 | #[target_feature(enable = "avx512bw")] | |
3280 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3281 | pub unsafe fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
3282 | simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) | |
3283 | } | |
3284 | ||
3285 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3286 | /// | |
3287 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu8_mask&expand=820) | |
3288 | #[inline] | |
3289 | #[target_feature(enable = "avx512bw")] | |
3290 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3291 | pub unsafe fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
3292 | _mm512_cmpeq_epu8_mask(a, b) & k1 | |
3293 | } | |
3294 | ||
3295 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. | |
3296 | /// | |
3297 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epu8_mask&expand=817) | |
3298 | #[inline] | |
3299 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3300 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3301 | pub unsafe fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
3302 | simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) | |
3303 | } | |
3304 | ||
3305 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3306 | /// | |
3307 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epu8_mask&expand=818) | |
3308 | #[inline] | |
3309 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3310 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3311 | pub unsafe fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
3312 | _mm256_cmpeq_epu8_mask(a, b) & k1 | |
3313 | } | |
3314 | ||
3315 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k. | |
3316 | /// | |
3317 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epu8_mask&expand=815) | |
3318 | #[inline] | |
3319 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3320 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3321 | pub unsafe fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
3322 | simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) | |
3323 | } | |
3324 | ||
3325 | /// Compare packed unsigned 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3326 | /// | |
3327 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epu8_mask&expand=816) | |
3328 | #[inline] | |
3329 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3330 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3331 | pub unsafe fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
3332 | _mm_cmpeq_epu8_mask(a, b) & k1 | |
3333 | } | |
3334 | ||
3335 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. | |
3336 | /// | |
3337 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi16_mask&expand=771) | |
3338 | #[inline] | |
3339 | #[target_feature(enable = "avx512bw")] | |
3340 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3341 | pub unsafe fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
3342 | simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) | |
3343 | } | |
3344 | ||
3345 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3346 | /// | |
3347 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi16_mask&expand=772) | |
3348 | #[inline] | |
3349 | #[target_feature(enable = "avx512bw")] | |
3350 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3351 | pub unsafe fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
3352 | _mm512_cmpeq_epi16_mask(a, b) & k1 | |
3353 | } | |
3354 | ||
3355 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. | |
3356 | /// | |
3357 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16_mask&expand=769) | |
3358 | #[inline] | |
3359 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3360 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3361 | pub unsafe fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
3362 | simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) | |
3363 | } | |
3364 | ||
3365 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3366 | /// | |
3367 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epi16_mask&expand=770) | |
3368 | #[inline] | |
3369 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3370 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3371 | pub unsafe fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
3372 | _mm256_cmpeq_epi16_mask(a, b) & k1 | |
3373 | } | |
3374 | ||
3375 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k. | |
3376 | /// | |
3377 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16_mask&expand=767) | |
3378 | #[inline] | |
3379 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3380 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3381 | pub unsafe fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
3382 | simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) | |
3383 | } | |
3384 | ||
3385 | /// Compare packed signed 16-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3386 | /// | |
3387 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epi16_mask&expand=768) | |
3388 | #[inline] | |
3389 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3390 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3391 | pub unsafe fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
3392 | _mm_cmpeq_epi16_mask(a, b) & k1 | |
3393 | } | |
3394 | ||
3395 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. | |
3396 | /// | |
3397 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi8_mask&expand=795) | |
3398 | #[inline] | |
3399 | #[target_feature(enable = "avx512bw")] | |
3400 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3401 | pub unsafe fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
3402 | simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) | |
3403 | } | |
3404 | ||
3405 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3406 | /// | |
3407 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi8_mask&expand=796) | |
3408 | #[inline] | |
3409 | #[target_feature(enable = "avx512bw")] | |
3410 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3411 | pub unsafe fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
3412 | _mm512_cmpeq_epi8_mask(a, b) & k1 | |
3413 | } | |
3414 | ||
3415 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. | |
3416 | /// | |
3417 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8_mask&expand=793) | |
3418 | #[inline] | |
3419 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3420 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3421 | pub unsafe fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
3422 | simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) | |
3423 | } | |
3424 | ||
3425 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3426 | /// | |
3427 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpeq_epi8_mask&expand=794) | |
3428 | #[inline] | |
3429 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3430 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3431 | pub unsafe fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
3432 | _mm256_cmpeq_epi8_mask(a, b) & k1 | |
3433 | } | |
3434 | ||
3435 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k. | |
3436 | /// | |
3437 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8_mask&expand=791) | |
3438 | #[inline] | |
3439 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3440 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3441 | pub unsafe fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
3442 | simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) | |
3443 | } | |
3444 | ||
3445 | /// Compare packed signed 8-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3446 | /// | |
3447 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpeq_epi8_mask&expand=792) | |
3448 | #[inline] | |
3449 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3450 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3451 | pub unsafe fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
3452 | _mm_cmpeq_epi8_mask(a, b) & k1 | |
3453 | } | |
3454 | ||
3455 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3456 | /// | |
3457 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu16_mask&expand=1106) | |
3458 | #[inline] | |
3459 | #[target_feature(enable = "avx512bw")] | |
3460 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3461 | pub unsafe fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
3462 | simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) | |
3463 | } | |
3464 | ||
3465 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3466 | /// | |
3467 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu16_mask&expand=1107) | |
3468 | #[inline] | |
3469 | #[target_feature(enable = "avx512bw")] | |
3470 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3471 | pub unsafe fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
3472 | _mm512_cmpneq_epu16_mask(a, b) & k1 | |
3473 | } | |
3474 | ||
3475 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3476 | /// | |
3477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epu16_mask&expand=1104) | |
3478 | #[inline] | |
3479 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3480 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3481 | pub unsafe fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
3482 | simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) | |
3483 | } | |
3484 | ||
3485 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3486 | /// | |
3487 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epu16_mask&expand=1105) | |
3488 | #[inline] | |
3489 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3490 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3491 | pub unsafe fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
3492 | _mm256_cmpneq_epu16_mask(a, b) & k1 | |
3493 | } | |
3494 | ||
3495 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3496 | /// | |
3497 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epu16_mask&expand=1102) | |
3498 | #[inline] | |
3499 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3500 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3501 | pub unsafe fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
3502 | simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) | |
3503 | } | |
3504 | ||
3505 | /// Compare packed unsigned 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3506 | /// | |
3507 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epu16_mask&expand=1103) | |
3508 | #[inline] | |
3509 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3510 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3511 | pub unsafe fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
3512 | _mm_cmpneq_epu16_mask(a, b) & k1 | |
3513 | } | |
3514 | ||
3515 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3516 | /// | |
3517 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu8_mask&expand=1124) | |
3518 | #[inline] | |
3519 | #[target_feature(enable = "avx512bw")] | |
3520 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3521 | pub unsafe fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
3522 | simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) | |
3523 | } | |
3524 | ||
3525 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3526 | /// | |
3527 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu8_mask&expand=1125) | |
3528 | #[inline] | |
3529 | #[target_feature(enable = "avx512bw")] | |
3530 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3531 | pub unsafe fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
3532 | _mm512_cmpneq_epu8_mask(a, b) & k1 | |
3533 | } | |
3534 | ||
3535 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3536 | /// | |
3537 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epu8_mask&expand=1122) | |
3538 | #[inline] | |
3539 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3540 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3541 | pub unsafe fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
3542 | simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) | |
3543 | } | |
3544 | ||
3545 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3546 | /// | |
3547 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epu8_mask&expand=1123) | |
3548 | #[inline] | |
3549 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3550 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3551 | pub unsafe fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
3552 | _mm256_cmpneq_epu8_mask(a, b) & k1 | |
3553 | } | |
3554 | ||
3555 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3556 | /// | |
3557 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epu8_mask&expand=1120) | |
3558 | #[inline] | |
3559 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3560 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3561 | pub unsafe fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
3562 | simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) | |
3563 | } | |
3564 | ||
3565 | /// Compare packed unsigned 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3566 | /// | |
3567 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epu8_mask&expand=1121) | |
3568 | #[inline] | |
3569 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3570 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3571 | pub unsafe fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
3572 | _mm_cmpneq_epu8_mask(a, b) & k1 | |
3573 | } | |
3574 | ||
3575 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3576 | /// | |
3577 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi16_mask&expand=1082) | |
3578 | #[inline] | |
3579 | #[target_feature(enable = "avx512bw")] | |
3580 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3581 | pub unsafe fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
3582 | simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) | |
3583 | } | |
3584 | ||
3585 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3586 | /// | |
3587 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi16_mask&expand=1083) | |
3588 | #[inline] | |
3589 | #[target_feature(enable = "avx512bw")] | |
3590 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3591 | pub unsafe fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
3592 | _mm512_cmpneq_epi16_mask(a, b) & k1 | |
3593 | } | |
3594 | ||
3595 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3596 | /// | |
3597 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epi16_mask&expand=1080) | |
3598 | #[inline] | |
3599 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3600 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3601 | pub unsafe fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
3602 | simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) | |
3603 | } | |
3604 | ||
3605 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3606 | /// | |
3607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epi16_mask&expand=1081) | |
3608 | #[inline] | |
3609 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3610 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3611 | pub unsafe fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
3612 | _mm256_cmpneq_epi16_mask(a, b) & k1 | |
3613 | } | |
3614 | ||
3615 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3616 | /// | |
3617 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epi16_mask&expand=1078) | |
3618 | #[inline] | |
3619 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3620 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3621 | pub unsafe fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
3622 | simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) | |
3623 | } | |
3624 | ||
3625 | /// Compare packed signed 16-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3626 | /// | |
3627 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epi16_mask&expand=1079) | |
3628 | #[inline] | |
3629 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3630 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3631 | pub unsafe fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
3632 | _mm_cmpneq_epi16_mask(a, b) & k1 | |
3633 | } | |
3634 | ||
3635 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3636 | /// | |
3637 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi8_mask&expand=1100) | |
3638 | #[inline] | |
3639 | #[target_feature(enable = "avx512bw")] | |
3640 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3641 | pub unsafe fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
3642 | simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) | |
3643 | } | |
3644 | ||
3645 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3646 | /// | |
3647 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi8_mask&expand=1101) | |
3648 | #[inline] | |
3649 | #[target_feature(enable = "avx512bw")] | |
3650 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3651 | pub unsafe fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
3652 | _mm512_cmpneq_epi8_mask(a, b) & k1 | |
3653 | } | |
3654 | ||
3655 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3656 | /// | |
3657 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpneq_epi8_mask&expand=1098) | |
3658 | #[inline] | |
3659 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3660 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3661 | pub unsafe fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
3662 | simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) | |
3663 | } | |
3664 | ||
3665 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3666 | /// | |
3667 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmpneq_epi8_mask&expand=1099) | |
3668 | #[inline] | |
3669 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3670 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3671 | pub unsafe fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
3672 | _mm256_cmpneq_epi8_mask(a, b) & k1 | |
3673 | } | |
3674 | ||
3675 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k. | |
3676 | /// | |
3677 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_epi8_mask&expand=1096) | |
3678 | #[inline] | |
3679 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3680 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3681 | pub unsafe fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
3682 | simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) | |
3683 | } | |
3684 | ||
3685 | /// Compare packed signed 8-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3686 | /// | |
3687 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmpneq_epi8_mask&expand=1097) | |
3688 | #[inline] | |
3689 | #[target_feature(enable = "avx512bw,avx512vl")] | |
3690 | #[cfg_attr(test, assert_instr(vpcmp))] | |
3691 | pub unsafe fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
3692 | _mm_cmpneq_epi8_mask(a, b) & k1 | |
3693 | } | |
3694 | ||
17df50a5 | 3695 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by `IMM8`, and store the results in mask vector k. |
cdc7bbd5 XL |
3696 | /// |
3697 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu16_mask&expand=715) | |
3698 | #[inline] | |
3699 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
3700 | #[rustc_legacy_const_generics(2)] |
3701 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3702 | pub unsafe fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 { | |
3703 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3704 | let a = a.as_u16x32(); |
3705 | let b = b.as_u16x32(); | |
17df50a5 | 3706 | let r = vpcmpuw(a, b, IMM8, 0b11111111_11111111_11111111_11111111); |
cdc7bbd5 XL |
3707 | transmute(r) |
3708 | } | |
3709 | ||
3710 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3711 | /// | |
3712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu16_mask&expand=716) | |
3713 | #[inline] | |
3714 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
3715 | #[rustc_legacy_const_generics(3)] |
3716 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3717 | pub unsafe fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3718 | k1: __mmask32, |
3719 | a: __m512i, | |
3720 | b: __m512i, | |
cdc7bbd5 | 3721 | ) -> __mmask32 { |
17df50a5 | 3722 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3723 | let a = a.as_u16x32(); |
3724 | let b = b.as_u16x32(); | |
17df50a5 | 3725 | let r = vpcmpuw(a, b, IMM8, k1); |
cdc7bbd5 XL |
3726 | transmute(r) |
3727 | } | |
3728 | ||
3729 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3730 | /// | |
3731 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu16_mask&expand=713) | |
3732 | #[inline] | |
3733 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3734 | #[rustc_legacy_const_generics(2)] |
3735 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3736 | pub unsafe fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 { | |
3737 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3738 | let a = a.as_u16x16(); |
3739 | let b = b.as_u16x16(); | |
17df50a5 | 3740 | let r = vpcmpuw256(a, b, IMM8, 0b11111111_11111111); |
cdc7bbd5 XL |
3741 | transmute(r) |
3742 | } | |
3743 | ||
3744 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3745 | /// | |
3746 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu16_mask&expand=714) | |
3747 | #[inline] | |
3748 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3749 | #[rustc_legacy_const_generics(3)] |
3750 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3751 | pub unsafe fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3752 | k1: __mmask16, |
3753 | a: __m256i, | |
3754 | b: __m256i, | |
cdc7bbd5 | 3755 | ) -> __mmask16 { |
17df50a5 | 3756 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3757 | let a = a.as_u16x16(); |
3758 | let b = b.as_u16x16(); | |
17df50a5 | 3759 | let r = vpcmpuw256(a, b, IMM8, k1); |
cdc7bbd5 XL |
3760 | transmute(r) |
3761 | } | |
3762 | ||
3763 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3764 | /// | |
3765 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu16_mask&expand=711) | |
3766 | #[inline] | |
3767 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3768 | #[rustc_legacy_const_generics(2)] |
3769 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3770 | pub unsafe fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 { | |
3771 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3772 | let a = a.as_u16x8(); |
3773 | let b = b.as_u16x8(); | |
17df50a5 | 3774 | let r = vpcmpuw128(a, b, IMM8, 0b11111111); |
cdc7bbd5 XL |
3775 | transmute(r) |
3776 | } | |
3777 | ||
3778 | /// Compare packed unsigned 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3779 | /// | |
3780 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu16_mask&expand=712) | |
3781 | #[inline] | |
3782 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3783 | #[rustc_legacy_const_generics(3)] |
3784 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3785 | pub unsafe fn _mm_mask_cmp_epu16_mask<const IMM8: i32>( | |
3786 | k1: __mmask8, | |
3787 | a: __m128i, | |
3788 | b: __m128i, | |
3789 | ) -> __mmask8 { | |
3790 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3791 | let a = a.as_u16x8(); |
3792 | let b = b.as_u16x8(); | |
17df50a5 | 3793 | let r = vpcmpuw128(a, b, IMM8, k1); |
cdc7bbd5 XL |
3794 | transmute(r) |
3795 | } | |
3796 | ||
3797 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3798 | /// | |
3799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu8_mask&expand=733) | |
3800 | #[inline] | |
3801 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
3802 | #[rustc_legacy_const_generics(2)] |
3803 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3804 | pub unsafe fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 { | |
3805 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3806 | let a = a.as_u8x64(); |
3807 | let b = b.as_u8x64(); | |
17df50a5 XL |
3808 | let r = vpcmpub( |
3809 | a, | |
3810 | b, | |
3811 | IMM8, | |
3812 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
3813 | ); | |
cdc7bbd5 XL |
3814 | transmute(r) |
3815 | } | |
3816 | ||
3817 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3818 | /// | |
3819 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu8_mask&expand=734) | |
3820 | #[inline] | |
3821 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
3822 | #[rustc_legacy_const_generics(3)] |
3823 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3824 | pub unsafe fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3825 | k1: __mmask64, |
3826 | a: __m512i, | |
3827 | b: __m512i, | |
cdc7bbd5 | 3828 | ) -> __mmask64 { |
17df50a5 | 3829 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3830 | let a = a.as_u8x64(); |
3831 | let b = b.as_u8x64(); | |
17df50a5 | 3832 | let r = vpcmpub(a, b, IMM8, k1); |
cdc7bbd5 XL |
3833 | transmute(r) |
3834 | } | |
3835 | ||
3836 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3837 | /// | |
3838 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epu8_mask&expand=731) | |
3839 | #[inline] | |
3840 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3841 | #[rustc_legacy_const_generics(2)] |
3842 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3843 | pub unsafe fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 { | |
3844 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3845 | let a = a.as_u8x32(); |
3846 | let b = b.as_u8x32(); | |
17df50a5 | 3847 | let r = vpcmpub256(a, b, IMM8, 0b11111111_11111111_11111111_11111111); |
cdc7bbd5 XL |
3848 | transmute(r) |
3849 | } | |
3850 | ||
3851 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3852 | /// | |
3853 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epu8_mask&expand=732) | |
3854 | #[inline] | |
3855 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3856 | #[rustc_legacy_const_generics(3)] |
3857 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3858 | pub unsafe fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3859 | k1: __mmask32, |
3860 | a: __m256i, | |
3861 | b: __m256i, | |
cdc7bbd5 | 3862 | ) -> __mmask32 { |
17df50a5 | 3863 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3864 | let a = a.as_u8x32(); |
3865 | let b = b.as_u8x32(); | |
17df50a5 | 3866 | let r = vpcmpub256(a, b, IMM8, k1); |
cdc7bbd5 XL |
3867 | transmute(r) |
3868 | } | |
3869 | ||
3870 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3871 | /// | |
3872 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epu8_mask&expand=729) | |
3873 | #[inline] | |
3874 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3875 | #[rustc_legacy_const_generics(2)] |
3876 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3877 | pub unsafe fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 { | |
3878 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3879 | let a = a.as_u8x16(); |
3880 | let b = b.as_u8x16(); | |
17df50a5 | 3881 | let r = vpcmpub128(a, b, IMM8, 0b11111111_11111111); |
cdc7bbd5 XL |
3882 | transmute(r) |
3883 | } | |
3884 | ||
3885 | /// Compare packed unsigned 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3886 | /// | |
3887 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epu8_mask&expand=730) | |
3888 | #[inline] | |
3889 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3890 | #[rustc_legacy_const_generics(3)] |
3891 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3892 | pub unsafe fn _mm_mask_cmp_epu8_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3893 | k1: __mmask16, |
3894 | a: __m128i, | |
3895 | b: __m128i, | |
cdc7bbd5 | 3896 | ) -> __mmask16 { |
17df50a5 | 3897 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3898 | let a = a.as_u8x16(); |
3899 | let b = b.as_u8x16(); | |
17df50a5 | 3900 | let r = vpcmpub128(a, b, IMM8, k1); |
cdc7bbd5 XL |
3901 | transmute(r) |
3902 | } | |
3903 | ||
3904 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3905 | /// | |
3906 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi16_mask&expand=691) | |
3907 | #[inline] | |
3908 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
3909 | #[rustc_legacy_const_generics(2)] |
3910 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3911 | pub unsafe fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 { | |
3912 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3913 | let a = a.as_i16x32(); |
3914 | let b = b.as_i16x32(); | |
17df50a5 | 3915 | let r = vpcmpw(a, b, IMM8, 0b11111111_11111111_11111111_11111111); |
cdc7bbd5 XL |
3916 | transmute(r) |
3917 | } | |
3918 | ||
3919 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3920 | /// | |
3921 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi16_mask&expand=692) | |
3922 | #[inline] | |
3923 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
3924 | #[rustc_legacy_const_generics(3)] |
3925 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3926 | pub unsafe fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3927 | k1: __mmask32, |
3928 | a: __m512i, | |
3929 | b: __m512i, | |
cdc7bbd5 | 3930 | ) -> __mmask32 { |
17df50a5 | 3931 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3932 | let a = a.as_i16x32(); |
3933 | let b = b.as_i16x32(); | |
17df50a5 | 3934 | let r = vpcmpw(a, b, IMM8, k1); |
cdc7bbd5 XL |
3935 | transmute(r) |
3936 | } | |
3937 | ||
3938 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3939 | /// | |
3940 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi16_mask&expand=689) | |
3941 | #[inline] | |
3942 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3943 | #[rustc_legacy_const_generics(2)] |
3944 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3945 | pub unsafe fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 { | |
3946 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3947 | let a = a.as_i16x16(); |
3948 | let b = b.as_i16x16(); | |
17df50a5 | 3949 | let r = vpcmpw256(a, b, IMM8, 0b11111111_11111111); |
cdc7bbd5 XL |
3950 | transmute(r) |
3951 | } | |
3952 | ||
3953 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3954 | /// | |
3955 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi16_mask&expand=690) | |
3956 | #[inline] | |
3957 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3958 | #[rustc_legacy_const_generics(3)] |
3959 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3960 | pub unsafe fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
3961 | k1: __mmask16, |
3962 | a: __m256i, | |
3963 | b: __m256i, | |
cdc7bbd5 | 3964 | ) -> __mmask16 { |
17df50a5 | 3965 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
3966 | let a = a.as_i16x16(); |
3967 | let b = b.as_i16x16(); | |
17df50a5 | 3968 | let r = vpcmpw256(a, b, IMM8, k1); |
cdc7bbd5 XL |
3969 | transmute(r) |
3970 | } | |
3971 | ||
3972 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
3973 | /// | |
3974 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi16_mask&expand=687) | |
3975 | #[inline] | |
3976 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3977 | #[rustc_legacy_const_generics(2)] |
3978 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3979 | pub unsafe fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 { | |
3980 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
3981 | let a = a.as_i16x8(); |
3982 | let b = b.as_i16x8(); | |
17df50a5 | 3983 | let r = vpcmpw128(a, b, IMM8, 0b11111111); |
cdc7bbd5 XL |
3984 | transmute(r) |
3985 | } | |
3986 | ||
3987 | /// Compare packed signed 16-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
3988 | /// | |
3989 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi16_mask&expand=688) | |
3990 | #[inline] | |
3991 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
3992 | #[rustc_legacy_const_generics(3)] |
3993 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
3994 | pub unsafe fn _mm_mask_cmp_epi16_mask<const IMM8: i32>( | |
3995 | k1: __mmask8, | |
3996 | a: __m128i, | |
3997 | b: __m128i, | |
3998 | ) -> __mmask8 { | |
3999 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
4000 | let a = a.as_i16x8(); |
4001 | let b = b.as_i16x8(); | |
17df50a5 | 4002 | let r = vpcmpw128(a, b, IMM8, k1); |
cdc7bbd5 XL |
4003 | transmute(r) |
4004 | } | |
4005 | ||
4006 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
4007 | /// | |
4008 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi8_mask&expand=709) | |
4009 | #[inline] | |
4010 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
4011 | #[rustc_legacy_const_generics(2)] |
4012 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
4013 | pub unsafe fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 { | |
4014 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
4015 | let a = a.as_i8x64(); |
4016 | let b = b.as_i8x64(); | |
17df50a5 XL |
4017 | let r = vpcmpb( |
4018 | a, | |
4019 | b, | |
4020 | IMM8, | |
4021 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
4022 | ); | |
cdc7bbd5 XL |
4023 | transmute(r) |
4024 | } | |
4025 | ||
4026 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
4027 | /// | |
4028 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi8_mask&expand=710) | |
4029 | #[inline] | |
4030 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
4031 | #[rustc_legacy_const_generics(3)] |
4032 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
4033 | pub unsafe fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
4034 | k1: __mmask64, |
4035 | a: __m512i, | |
4036 | b: __m512i, | |
cdc7bbd5 | 4037 | ) -> __mmask64 { |
17df50a5 | 4038 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
4039 | let a = a.as_i8x64(); |
4040 | let b = b.as_i8x64(); | |
17df50a5 | 4041 | let r = vpcmpb(a, b, IMM8, k1); |
cdc7bbd5 XL |
4042 | transmute(r) |
4043 | } | |
4044 | ||
4045 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
4046 | /// | |
4047 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_epi8_mask&expand=707) | |
4048 | #[inline] | |
4049 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
4050 | #[rustc_legacy_const_generics(2)] |
4051 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
4052 | pub unsafe fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 { | |
4053 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
4054 | let a = a.as_i8x32(); |
4055 | let b = b.as_i8x32(); | |
17df50a5 | 4056 | let r = vpcmpb256(a, b, IMM8, 0b11111111_11111111_11111111_11111111); |
cdc7bbd5 XL |
4057 | transmute(r) |
4058 | } | |
4059 | ||
4060 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
4061 | /// | |
4062 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cmp_epi8_mask&expand=708) | |
4063 | #[inline] | |
4064 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
4065 | #[rustc_legacy_const_generics(3)] |
4066 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
4067 | pub unsafe fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
4068 | k1: __mmask32, |
4069 | a: __m256i, | |
4070 | b: __m256i, | |
cdc7bbd5 | 4071 | ) -> __mmask32 { |
17df50a5 | 4072 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
4073 | let a = a.as_i8x32(); |
4074 | let b = b.as_i8x32(); | |
17df50a5 | 4075 | let r = vpcmpb256(a, b, IMM8, k1); |
cdc7bbd5 XL |
4076 | transmute(r) |
4077 | } | |
4078 | ||
4079 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k. | |
4080 | /// | |
4081 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_epi8_mask&expand=705) | |
4082 | #[inline] | |
4083 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
4084 | #[rustc_legacy_const_generics(2)] |
4085 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
4086 | pub unsafe fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 { | |
4087 | static_assert_imm3!(IMM8); | |
cdc7bbd5 XL |
4088 | let a = a.as_i8x16(); |
4089 | let b = b.as_i8x16(); | |
17df50a5 | 4090 | let r = vpcmpb128(a, b, IMM8, 0b11111111_11111111); |
cdc7bbd5 XL |
4091 | transmute(r) |
4092 | } | |
4093 | ||
4094 | /// Compare packed signed 8-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set). | |
4095 | /// | |
4096 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_epi8_mask&expand=706) | |
4097 | #[inline] | |
4098 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
4099 | #[rustc_legacy_const_generics(3)] |
4100 | #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))] | |
4101 | pub unsafe fn _mm_mask_cmp_epi8_mask<const IMM8: i32>( | |
cdc7bbd5 XL |
4102 | k1: __mmask16, |
4103 | a: __m128i, | |
4104 | b: __m128i, | |
cdc7bbd5 | 4105 | ) -> __mmask16 { |
17df50a5 | 4106 | static_assert_imm3!(IMM8); |
cdc7bbd5 XL |
4107 | let a = a.as_i8x16(); |
4108 | let b = b.as_i8x16(); | |
17df50a5 | 4109 | let r = vpcmpb128(a, b, IMM8, k1); |
cdc7bbd5 XL |
4110 | transmute(r) |
4111 | } | |
4112 | ||
4113 | /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. | |
4114 | /// | |
4115 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi16&expand=3368) | |
4116 | #[inline] | |
4117 | #[target_feature(enable = "avx512bw")] | |
4118 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 | |
4119 | pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i { | |
4120 | ptr::read_unaligned(mem_addr as *const __m512i) | |
4121 | } | |
4122 | ||
4123 | /// Load 256-bits (composed of 16 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. | |
4124 | /// | |
4125 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_epi16&expand=3365) | |
4126 | #[inline] | |
4127 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4128 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 | |
4129 | pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i { | |
4130 | ptr::read_unaligned(mem_addr as *const __m256i) | |
4131 | } | |
4132 | ||
4133 | /// Load 128-bits (composed of 8 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. | |
4134 | /// | |
4135 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_epi16&expand=3362) | |
4136 | #[inline] | |
4137 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4138 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 | |
4139 | pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i { | |
4140 | ptr::read_unaligned(mem_addr as *const __m128i) | |
4141 | } | |
4142 | ||
4143 | /// Load 512-bits (composed of 64 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. | |
4144 | /// | |
4145 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_epi8&expand=3395) | |
4146 | #[inline] | |
4147 | #[target_feature(enable = "avx512bw")] | |
4148 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 | |
4149 | pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i { | |
4150 | ptr::read_unaligned(mem_addr as *const __m512i) | |
4151 | } | |
4152 | ||
4153 | /// Load 256-bits (composed of 32 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. | |
4154 | /// | |
4155 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_epi8&expand=3392) | |
4156 | #[inline] | |
4157 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4158 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 | |
4159 | pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i { | |
4160 | ptr::read_unaligned(mem_addr as *const __m256i) | |
4161 | } | |
4162 | ||
4163 | /// Load 128-bits (composed of 16 packed 8-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. | |
4164 | /// | |
4165 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_epi8&expand=3389) | |
4166 | #[inline] | |
4167 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4168 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 | |
4169 | pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i { | |
4170 | ptr::read_unaligned(mem_addr as *const __m128i) | |
4171 | } | |
4172 | ||
4173 | /// Store 512-bits (composed of 32 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. | |
4174 | /// | |
4175 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi16&expand=5622) | |
4176 | #[inline] | |
4177 | #[target_feature(enable = "avx512bw")] | |
4178 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 | |
4179 | pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) { | |
4180 | ptr::write_unaligned(mem_addr as *mut __m512i, a); | |
4181 | } | |
4182 | ||
4183 | /// Store 256-bits (composed of 16 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. | |
4184 | /// | |
4185 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_epi16&expand=5620) | |
4186 | #[inline] | |
4187 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4188 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 | |
4189 | pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) { | |
4190 | ptr::write_unaligned(mem_addr as *mut __m256i, a); | |
4191 | } | |
4192 | ||
4193 | /// Store 128-bits (composed of 8 packed 16-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. | |
4194 | /// | |
4195 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_epi16&expand=5618) | |
4196 | #[inline] | |
4197 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4198 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16 | |
4199 | pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) { | |
4200 | ptr::write_unaligned(mem_addr as *mut __m128i, a); | |
4201 | } | |
4202 | ||
4203 | /// Store 512-bits (composed of 64 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. | |
4204 | /// | |
4205 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi8&expand=5640) | |
4206 | #[inline] | |
4207 | #[target_feature(enable = "avx512bw")] | |
4208 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 | |
4209 | pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) { | |
4210 | ptr::write_unaligned(mem_addr as *mut __m512i, a); | |
4211 | } | |
4212 | ||
4213 | /// Store 256-bits (composed of 32 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. | |
4214 | /// | |
4215 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_epi8&expand=5638) | |
4216 | #[inline] | |
4217 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4218 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 | |
4219 | pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) { | |
4220 | ptr::write_unaligned(mem_addr as *mut __m256i, a); | |
4221 | } | |
4222 | ||
4223 | /// Store 128-bits (composed of 16 packed 8-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. | |
4224 | /// | |
4225 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_epi8&expand=5636) | |
4226 | #[inline] | |
4227 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4228 | #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8 | |
4229 | pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) { | |
4230 | ptr::write_unaligned(mem_addr as *mut __m128i, a); | |
4231 | } | |
4232 | ||
a2a8927a XL |
4233 | /// Load packed 16-bit integers from memory into dst using writemask k |
4234 | /// (elements are copied from src when the corresponding mask bit is not set). | |
4235 | /// mem_addr does not need to be aligned on any particular boundary. | |
4236 | /// | |
4237 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_epi16) | |
4238 | #[inline] | |
4239 | #[target_feature(enable = "avx512f,avx512bw")] | |
4240 | pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i { | |
4241 | let mut dst: __m512i = src; | |
4242 | asm!( | |
4243 | vpl!("vmovdqu16 {dst}{{{k}}}"), | |
4244 | p = in(reg) mem_addr, | |
4245 | k = in(kreg) k, | |
4246 | dst = inout(zmm_reg) dst, | |
4247 | options(pure, readonly, nostack) | |
4248 | ); | |
4249 | dst | |
4250 | } | |
4251 | ||
4252 | /// Load packed 16-bit integers from memory into dst using zeromask k | |
4253 | /// (elements are zeroed out when the corresponding mask bit is not set). | |
4254 | /// mem_addr does not need to be aligned on any particular boundary. | |
4255 | /// | |
4256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_epi16) | |
4257 | #[inline] | |
4258 | #[target_feature(enable = "avx512f,avx512bw")] | |
4259 | pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i { | |
4260 | let mut dst: __m512i; | |
4261 | asm!( | |
4262 | vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"), | |
4263 | p = in(reg) mem_addr, | |
4264 | k = in(kreg) k, | |
4265 | dst = out(zmm_reg) dst, | |
4266 | options(pure, readonly, nostack) | |
4267 | ); | |
4268 | dst | |
4269 | } | |
4270 | ||
4271 | /// Load packed 8-bit integers from memory into dst using writemask k | |
4272 | /// (elements are copied from src when the corresponding mask bit is not set). | |
4273 | /// mem_addr does not need to be aligned on any particular boundary. | |
4274 | /// | |
4275 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_epi8) | |
4276 | #[inline] | |
4277 | #[target_feature(enable = "avx512f,avx512bw")] | |
4278 | pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i { | |
4279 | let mut dst: __m512i = src; | |
4280 | asm!( | |
4281 | vpl!("vmovdqu8 {dst}{{{k}}}"), | |
4282 | p = in(reg) mem_addr, | |
4283 | k = in(kreg) k, | |
4284 | dst = inout(zmm_reg) dst, | |
4285 | options(pure, readonly, nostack) | |
4286 | ); | |
4287 | dst | |
4288 | } | |
4289 | ||
4290 | /// Load packed 8-bit integers from memory into dst using zeromask k | |
4291 | /// (elements are zeroed out when the corresponding mask bit is not set). | |
4292 | /// mem_addr does not need to be aligned on any particular boundary. | |
4293 | /// | |
4294 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_epi8) | |
4295 | #[inline] | |
4296 | #[target_feature(enable = "avx512f,avx512bw")] | |
4297 | pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i { | |
4298 | let mut dst: __m512i; | |
4299 | asm!( | |
4300 | vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"), | |
4301 | p = in(reg) mem_addr, | |
4302 | k = in(kreg) k, | |
4303 | dst = out(zmm_reg) dst, | |
4304 | options(pure, readonly, nostack) | |
4305 | ); | |
4306 | dst | |
4307 | } | |
4308 | ||
4309 | /// Load packed 16-bit integers from memory into dst using writemask k | |
4310 | /// (elements are copied from src when the corresponding mask bit is not set). | |
4311 | /// mem_addr does not need to be aligned on any particular boundary. | |
4312 | /// | |
4313 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_loadu_epi16) | |
4314 | #[inline] | |
4315 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] | |
4316 | pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i { | |
4317 | let mut dst: __m256i = src; | |
4318 | asm!( | |
4319 | vpl!("vmovdqu16 {dst}{{{k}}}"), | |
4320 | p = in(reg) mem_addr, | |
4321 | k = in(kreg) k, | |
4322 | dst = inout(ymm_reg) dst, | |
4323 | options(pure, readonly, nostack) | |
4324 | ); | |
4325 | dst | |
4326 | } | |
4327 | ||
4328 | /// Load packed 16-bit integers from memory into dst using zeromask k | |
4329 | /// (elements are zeroed out when the corresponding mask bit is not set). | |
4330 | /// mem_addr does not need to be aligned on any particular boundary. | |
4331 | /// | |
4332 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_loadu_epi16) | |
4333 | #[inline] | |
4334 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] | |
4335 | pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i { | |
4336 | let mut dst: __m256i; | |
4337 | asm!( | |
4338 | vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"), | |
4339 | p = in(reg) mem_addr, | |
4340 | k = in(kreg) k, | |
4341 | dst = out(ymm_reg) dst, | |
4342 | options(pure, readonly, nostack) | |
4343 | ); | |
4344 | dst | |
4345 | } | |
4346 | ||
4347 | /// Load packed 8-bit integers from memory into dst using writemask k | |
4348 | /// (elements are copied from src when the corresponding mask bit is not set). | |
4349 | /// mem_addr does not need to be aligned on any particular boundary. | |
4350 | /// | |
4351 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_loadu_epi8) | |
4352 | #[inline] | |
4353 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] | |
4354 | pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i { | |
4355 | let mut dst: __m256i = src; | |
4356 | asm!( | |
4357 | vpl!("vmovdqu8 {dst}{{{k}}}"), | |
4358 | p = in(reg) mem_addr, | |
4359 | k = in(kreg) k, | |
4360 | dst = inout(ymm_reg) dst, | |
4361 | options(pure, readonly, nostack) | |
4362 | ); | |
4363 | dst | |
4364 | } | |
4365 | ||
4366 | /// Load packed 8-bit integers from memory into dst using zeromask k | |
4367 | /// (elements are zeroed out when the corresponding mask bit is not set). | |
4368 | /// mem_addr does not need to be aligned on any particular boundary. | |
4369 | /// | |
4370 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_loadu_epi8) | |
4371 | #[inline] | |
4372 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] | |
4373 | pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i { | |
4374 | let mut dst: __m256i; | |
4375 | asm!( | |
4376 | vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"), | |
4377 | p = in(reg) mem_addr, | |
4378 | k = in(kreg) k, | |
4379 | dst = out(ymm_reg) dst, | |
4380 | options(pure, readonly, nostack) | |
4381 | ); | |
4382 | dst | |
4383 | } | |
4384 | ||
4385 | /// Load packed 16-bit integers from memory into dst using writemask k | |
4386 | /// (elements are copied from src when the corresponding mask bit is not set). | |
4387 | /// mem_addr does not need to be aligned on any particular boundary. | |
4388 | /// | |
4389 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_loadu_epi16) | |
4390 | #[inline] | |
4391 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] | |
4392 | pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i { | |
4393 | let mut dst: __m128i = src; | |
4394 | asm!( | |
4395 | vpl!("vmovdqu16 {dst}{{{k}}}"), | |
4396 | p = in(reg) mem_addr, | |
4397 | k = in(kreg) k, | |
4398 | dst = inout(xmm_reg) dst, | |
4399 | options(pure, readonly, nostack) | |
4400 | ); | |
4401 | dst | |
4402 | } | |
4403 | ||
4404 | /// Load packed 16-bit integers from memory into dst using zeromask k | |
4405 | /// (elements are zeroed out when the corresponding mask bit is not set). | |
4406 | /// mem_addr does not need to be aligned on any particular boundary. | |
4407 | /// | |
4408 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_loadu_epi16) | |
4409 | #[inline] | |
4410 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] | |
4411 | pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i { | |
4412 | let mut dst: __m128i; | |
4413 | asm!( | |
4414 | vpl!("vmovdqu16 {dst}{{{k}}} {{z}}"), | |
4415 | p = in(reg) mem_addr, | |
4416 | k = in(kreg) k, | |
4417 | dst = out(xmm_reg) dst, | |
4418 | options(pure, readonly, nostack) | |
4419 | ); | |
4420 | dst | |
4421 | } | |
4422 | ||
4423 | /// Load packed 8-bit integers from memory into dst using writemask k | |
4424 | /// (elements are copied from src when the corresponding mask bit is not set). | |
4425 | /// mem_addr does not need to be aligned on any particular boundary. | |
4426 | /// | |
4427 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_loadu_epi8) | |
4428 | #[inline] | |
4429 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] | |
4430 | pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i { | |
4431 | let mut dst: __m128i = src; | |
4432 | asm!( | |
4433 | vpl!("vmovdqu8 {dst}{{{k}}}"), | |
4434 | p = in(reg) mem_addr, | |
4435 | k = in(kreg) k, | |
4436 | dst = inout(xmm_reg) dst, | |
4437 | options(pure, readonly, nostack) | |
4438 | ); | |
4439 | dst | |
4440 | } | |
4441 | ||
4442 | /// Load packed 8-bit integers from memory into dst using zeromask k | |
4443 | /// (elements are zeroed out when the corresponding mask bit is not set). | |
4444 | /// mem_addr does not need to be aligned on any particular boundary. | |
4445 | /// | |
4446 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_loadu_epi8) | |
4447 | #[inline] | |
4448 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] | |
4449 | pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i { | |
4450 | let mut dst: __m128i; | |
4451 | asm!( | |
4452 | vpl!("vmovdqu8 {dst}{{{k}}} {{z}}"), | |
4453 | p = in(reg) mem_addr, | |
4454 | k = in(kreg) k, | |
4455 | dst = out(xmm_reg) dst, | |
4456 | options(pure, readonly, nostack) | |
4457 | ); | |
4458 | dst | |
4459 | } | |
4460 | ||
4461 | /// Store packed 16-bit integers from a into memory using writemask k. | |
4462 | /// mem_addr does not need to be aligned on any particular boundary. | |
4463 | /// | |
4464 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi16) | |
4465 | #[inline] | |
4466 | #[target_feature(enable = "avx512f,avx512bw")] | |
4467 | pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) { | |
4468 | asm!( | |
4469 | vps!("vmovdqu16", "{{{mask}}}, {a}"), | |
4470 | p = in(reg) mem_addr, | |
4471 | mask = in(kreg) mask, | |
4472 | a = in(zmm_reg) a, | |
4473 | options(nostack) | |
4474 | ); | |
4475 | } | |
4476 | ||
4477 | /// Store packed 8-bit integers from a into memory using writemask k. | |
4478 | /// mem_addr does not need to be aligned on any particular boundary. | |
4479 | /// | |
4480 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi8) | |
4481 | #[inline] | |
4482 | #[target_feature(enable = "avx512f,avx512bw")] | |
4483 | pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) { | |
4484 | asm!( | |
4485 | vps!("vmovdqu8", "{{{mask}}}, {a}"), | |
4486 | p = in(reg) mem_addr, | |
4487 | mask = in(kreg) mask, | |
4488 | a = in(zmm_reg) a, | |
4489 | options(nostack) | |
4490 | ); | |
4491 | } | |
4492 | ||
4493 | /// Store packed 16-bit integers from a into memory using writemask k. | |
4494 | /// mem_addr does not need to be aligned on any particular boundary. | |
4495 | /// | |
4496 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_storeu_epi16) | |
4497 | #[inline] | |
4498 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] | |
4499 | pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) { | |
4500 | asm!( | |
4501 | vps!("vmovdqu16", "{{{mask}}}, {a}"), | |
4502 | p = in(reg) mem_addr, | |
4503 | mask = in(kreg) mask, | |
4504 | a = in(ymm_reg) a, | |
4505 | options(nostack) | |
4506 | ); | |
4507 | } | |
4508 | ||
4509 | /// Store packed 8-bit integers from a into memory using writemask k. | |
4510 | /// mem_addr does not need to be aligned on any particular boundary. | |
4511 | /// | |
4512 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_storeu_epi8) | |
4513 | #[inline] | |
4514 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx")] | |
4515 | pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) { | |
4516 | asm!( | |
4517 | vps!("vmovdqu8", "{{{mask}}}, {a}"), | |
4518 | p = in(reg) mem_addr, | |
4519 | mask = in(kreg) mask, | |
4520 | a = in(ymm_reg) a, | |
4521 | options(nostack) | |
4522 | ); | |
4523 | } | |
4524 | ||
4525 | /// Store packed 16-bit integers from a into memory using writemask k. | |
4526 | /// mem_addr does not need to be aligned on any particular boundary. | |
4527 | /// | |
4528 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_storeu_epi16) | |
4529 | #[inline] | |
4530 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] | |
4531 | pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) { | |
4532 | asm!( | |
4533 | vps!("vmovdqu16", "{{{mask}}}, {a}"), | |
4534 | p = in(reg) mem_addr, | |
4535 | mask = in(kreg) mask, | |
4536 | a = in(xmm_reg) a, | |
4537 | options(nostack) | |
4538 | ); | |
4539 | } | |
4540 | ||
4541 | /// Store packed 8-bit integers from a into memory using writemask k. | |
4542 | /// mem_addr does not need to be aligned on any particular boundary. | |
4543 | /// | |
4544 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_storeu_epi8) | |
4545 | #[inline] | |
4546 | #[target_feature(enable = "avx512f,avx512bw,avx512vl,avx,sse")] | |
4547 | pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) { | |
4548 | asm!( | |
4549 | vps!("vmovdqu8", "{{{mask}}}, {a}"), | |
4550 | p = in(reg) mem_addr, | |
4551 | mask = in(kreg) mask, | |
4552 | a = in(xmm_reg) a, | |
4553 | options(nostack) | |
4554 | ); | |
4555 | } | |
4556 | ||
cdc7bbd5 XL |
4557 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst. |
4558 | /// | |
4559 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_madd_epi16&expand=3511) | |
4560 | #[inline] | |
4561 | #[target_feature(enable = "avx512bw")] | |
4562 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4563 | pub unsafe fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i { | |
4564 | transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) | |
4565 | } | |
4566 | ||
4567 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4568 | /// | |
4569 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_madd_epi16&expand=3512) | |
4570 | #[inline] | |
4571 | #[target_feature(enable = "avx512bw")] | |
4572 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4573 | pub unsafe fn _mm512_mask_madd_epi16( | |
4574 | src: __m512i, | |
4575 | k: __mmask16, | |
4576 | a: __m512i, | |
4577 | b: __m512i, | |
4578 | ) -> __m512i { | |
4579 | let madd = _mm512_madd_epi16(a, b).as_i32x16(); | |
4580 | transmute(simd_select_bitmask(k, madd, src.as_i32x16())) | |
4581 | } | |
4582 | ||
4583 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4584 | /// | |
4585 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_madd_epi16&expand=3513) | |
4586 | #[inline] | |
4587 | #[target_feature(enable = "avx512bw")] | |
4588 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4589 | pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { | |
4590 | let madd = _mm512_madd_epi16(a, b).as_i32x16(); | |
4591 | let zero = _mm512_setzero_si512().as_i32x16(); | |
4592 | transmute(simd_select_bitmask(k, madd, zero)) | |
4593 | } | |
4594 | ||
4595 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4596 | /// | |
4597 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_madd_epi16&expand=3509) | |
4598 | #[inline] | |
4599 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4600 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4601 | pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i { | |
4602 | let madd = _mm256_madd_epi16(a, b).as_i32x8(); | |
4603 | transmute(simd_select_bitmask(k, madd, src.as_i32x8())) | |
4604 | } | |
4605 | ||
4606 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4607 | /// | |
4608 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_madd_epi16&expand=3510) | |
4609 | #[inline] | |
4610 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4611 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4612 | pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { | |
4613 | let madd = _mm256_madd_epi16(a, b).as_i32x8(); | |
4614 | let zero = _mm256_setzero_si256().as_i32x8(); | |
4615 | transmute(simd_select_bitmask(k, madd, zero)) | |
4616 | } | |
4617 | ||
4618 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4619 | /// | |
4620 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_madd_epi16&expand=3506) | |
4621 | #[inline] | |
4622 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4623 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4624 | pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4625 | let madd = _mm_madd_epi16(a, b).as_i32x4(); | |
4626 | transmute(simd_select_bitmask(k, madd, src.as_i32x4())) | |
4627 | } | |
4628 | ||
4629 | /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4630 | /// | |
4631 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_madd_epi16&expand=3507) | |
4632 | #[inline] | |
4633 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4634 | #[cfg_attr(test, assert_instr(vpmaddwd))] | |
4635 | pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4636 | let madd = _mm_madd_epi16(a, b).as_i32x4(); | |
4637 | let zero = _mm_setzero_si128().as_i32x4(); | |
4638 | transmute(simd_select_bitmask(k, madd, zero)) | |
4639 | } | |
4640 | ||
4641 | /// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst. | |
4642 | /// | |
4643 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maddubs_epi16&expand=3539) | |
4644 | #[inline] | |
4645 | #[target_feature(enable = "avx512bw")] | |
4646 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4647 | pub unsafe fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i { | |
4648 | transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) | |
4649 | } | |
4650 | ||
4651 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4652 | /// | |
4653 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_maddubs_epi16&expand=3540) | |
4654 | #[inline] | |
4655 | #[target_feature(enable = "avx512bw")] | |
4656 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4657 | pub unsafe fn _mm512_mask_maddubs_epi16( | |
4658 | src: __m512i, | |
4659 | k: __mmask32, | |
4660 | a: __m512i, | |
4661 | b: __m512i, | |
4662 | ) -> __m512i { | |
4663 | let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); | |
4664 | transmute(simd_select_bitmask(k, madd, src.as_i16x32())) | |
4665 | } | |
4666 | ||
4667 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4668 | /// | |
4669 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_maddubs_epi16&expand=3541) | |
4670 | #[inline] | |
4671 | #[target_feature(enable = "avx512bw")] | |
4672 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4673 | pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
4674 | let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); | |
4675 | let zero = _mm512_setzero_si512().as_i16x32(); | |
4676 | transmute(simd_select_bitmask(k, madd, zero)) | |
4677 | } | |
4678 | ||
4679 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4680 | /// | |
4681 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_maddubs_epi16&expand=3537) | |
4682 | #[inline] | |
4683 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4684 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4685 | pub unsafe fn _mm256_mask_maddubs_epi16( | |
4686 | src: __m256i, | |
4687 | k: __mmask16, | |
4688 | a: __m256i, | |
4689 | b: __m256i, | |
4690 | ) -> __m256i { | |
4691 | let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); | |
4692 | transmute(simd_select_bitmask(k, madd, src.as_i16x16())) | |
4693 | } | |
4694 | ||
4695 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4696 | /// | |
4697 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_maddubs_epi16&expand=3538) | |
4698 | #[inline] | |
4699 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4700 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4701 | pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
4702 | let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); | |
4703 | let zero = _mm256_setzero_si256().as_i16x16(); | |
4704 | transmute(simd_select_bitmask(k, madd, zero)) | |
4705 | } | |
4706 | ||
4707 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4708 | /// | |
4709 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_maddubs_epi16&expand=3534) | |
4710 | #[inline] | |
4711 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4712 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4713 | pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4714 | let madd = _mm_maddubs_epi16(a, b).as_i16x8(); | |
4715 | transmute(simd_select_bitmask(k, madd, src.as_i16x8())) | |
4716 | } | |
4717 | ||
4718 | /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4719 | /// | |
4720 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_maddubs_epi16&expand=3535) | |
4721 | #[inline] | |
4722 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4723 | #[cfg_attr(test, assert_instr(vpmaddubsw))] | |
4724 | pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4725 | let madd = _mm_maddubs_epi16(a, b).as_i16x8(); | |
4726 | let zero = _mm_setzero_si128().as_i16x8(); | |
4727 | transmute(simd_select_bitmask(k, madd, zero)) | |
4728 | } | |
4729 | ||
4730 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst. | |
4731 | /// | |
4732 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi32&expand=4091) | |
4733 | #[inline] | |
4734 | #[target_feature(enable = "avx512bw")] | |
4735 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4736 | pub unsafe fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { | |
4737 | transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) | |
4738 | } | |
4739 | ||
4740 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4741 | /// | |
4742 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi32&expand=4089) | |
4743 | #[inline] | |
4744 | #[target_feature(enable = "avx512bw")] | |
4745 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4746 | pub unsafe fn _mm512_mask_packs_epi32( | |
4747 | src: __m512i, | |
4748 | k: __mmask32, | |
4749 | a: __m512i, | |
4750 | b: __m512i, | |
4751 | ) -> __m512i { | |
4752 | let pack = _mm512_packs_epi32(a, b).as_i16x32(); | |
4753 | transmute(simd_select_bitmask(k, pack, src.as_i16x32())) | |
4754 | } | |
4755 | ||
4756 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4757 | /// | |
4758 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi32&expand=4090) | |
4759 | #[inline] | |
4760 | #[target_feature(enable = "avx512bw")] | |
4761 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4762 | pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
4763 | let pack = _mm512_packs_epi32(a, b).as_i16x32(); | |
4764 | let zero = _mm512_setzero_si512().as_i16x32(); | |
4765 | transmute(simd_select_bitmask(k, pack, zero)) | |
4766 | } | |
4767 | ||
4768 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4769 | /// | |
4770 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packs_epi32&expand=4086) | |
4771 | #[inline] | |
4772 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4773 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4774 | pub unsafe fn _mm256_mask_packs_epi32( | |
4775 | src: __m256i, | |
4776 | k: __mmask16, | |
4777 | a: __m256i, | |
4778 | b: __m256i, | |
4779 | ) -> __m256i { | |
4780 | let pack = _mm256_packs_epi32(a, b).as_i16x16(); | |
4781 | transmute(simd_select_bitmask(k, pack, src.as_i16x16())) | |
4782 | } | |
4783 | ||
4784 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4785 | /// | |
4786 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packs_epi32&expand=4087) | |
fc512014 XL |
4787 | #[inline] |
4788 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4789 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4790 | pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
4791 | let pack = _mm256_packs_epi32(a, b).as_i16x16(); | |
4792 | let zero = _mm256_setzero_si256().as_i16x16(); | |
4793 | transmute(simd_select_bitmask(k, pack, zero)) | |
4794 | } | |
4795 | ||
4796 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4797 | /// | |
4798 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packs_epi32&expand=4083) | |
4799 | #[inline] | |
4800 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4801 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4802 | pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4803 | let pack = _mm_packs_epi32(a, b).as_i16x8(); | |
4804 | transmute(simd_select_bitmask(k, pack, src.as_i16x8())) | |
4805 | } | |
4806 | ||
4807 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4808 | /// | |
4809 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packs_epi32&expand=4084) | |
4810 | #[inline] | |
4811 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4812 | #[cfg_attr(test, assert_instr(vpackssdw))] | |
4813 | pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4814 | let pack = _mm_packs_epi32(a, b).as_i16x8(); | |
4815 | let zero = _mm_setzero_si128().as_i16x8(); | |
4816 | transmute(simd_select_bitmask(k, pack, zero)) | |
4817 | } | |
4818 | ||
4819 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst. | |
4820 | /// | |
4821 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packs_epi16&expand=4082) | |
4822 | #[inline] | |
4823 | #[target_feature(enable = "avx512bw")] | |
4824 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4825 | pub unsafe fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { | |
4826 | transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) | |
4827 | } | |
4828 | ||
4829 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4830 | /// | |
4831 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packs_epi16&expand=4080) | |
4832 | #[inline] | |
4833 | #[target_feature(enable = "avx512bw")] | |
4834 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4835 | pub unsafe fn _mm512_mask_packs_epi16( | |
4836 | src: __m512i, | |
4837 | k: __mmask64, | |
4838 | a: __m512i, | |
4839 | b: __m512i, | |
4840 | ) -> __m512i { | |
4841 | let pack = _mm512_packs_epi16(a, b).as_i8x64(); | |
4842 | transmute(simd_select_bitmask(k, pack, src.as_i8x64())) | |
4843 | } | |
4844 | ||
4845 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4846 | /// | |
4847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packs_epi16&expand=4081) | |
4848 | #[inline] | |
4849 | #[target_feature(enable = "avx512bw")] | |
4850 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4851 | pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
4852 | let pack = _mm512_packs_epi16(a, b).as_i8x64(); | |
4853 | let zero = _mm512_setzero_si512().as_i8x64(); | |
4854 | transmute(simd_select_bitmask(k, pack, zero)) | |
4855 | } | |
4856 | ||
4857 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4858 | /// | |
4859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packs_epi16&expand=4077) | |
4860 | #[inline] | |
4861 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4862 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4863 | pub unsafe fn _mm256_mask_packs_epi16( | |
4864 | src: __m256i, | |
4865 | k: __mmask32, | |
4866 | a: __m256i, | |
4867 | b: __m256i, | |
4868 | ) -> __m256i { | |
4869 | let pack = _mm256_packs_epi16(a, b).as_i8x32(); | |
4870 | transmute(simd_select_bitmask(k, pack, src.as_i8x32())) | |
4871 | } | |
4872 | ||
4873 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4874 | /// | |
4875 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm256_maskz_packs_epi16&expand=4078) | |
4876 | #[inline] | |
4877 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4878 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4879 | pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
4880 | let pack = _mm256_packs_epi16(a, b).as_i8x32(); | |
4881 | let zero = _mm256_setzero_si256().as_i8x32(); | |
4882 | transmute(simd_select_bitmask(k, pack, zero)) | |
4883 | } | |
4884 | ||
4885 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4886 | /// | |
4887 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packs_epi16&expand=4074) | |
4888 | #[inline] | |
4889 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4890 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4891 | pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
4892 | let pack = _mm_packs_epi16(a, b).as_i8x16(); | |
4893 | transmute(simd_select_bitmask(k, pack, src.as_i8x16())) | |
4894 | } | |
4895 | ||
4896 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4897 | /// | |
4898 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packs_epi16&expand=4075) | |
4899 | #[inline] | |
4900 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4901 | #[cfg_attr(test, assert_instr(vpacksswb))] | |
4902 | pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
4903 | let pack = _mm_packs_epi16(a, b).as_i8x16(); | |
4904 | let zero = _mm_setzero_si128().as_i8x16(); | |
4905 | transmute(simd_select_bitmask(k, pack, zero)) | |
4906 | } | |
4907 | ||
4908 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst. | |
4909 | /// | |
4910 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi32&expand=4130) | |
4911 | #[inline] | |
4912 | #[target_feature(enable = "avx512bw")] | |
4913 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4914 | pub unsafe fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { | |
4915 | transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) | |
4916 | } | |
4917 | ||
4918 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4919 | /// | |
4920 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi32&expand=4128) | |
4921 | #[inline] | |
4922 | #[target_feature(enable = "avx512bw")] | |
4923 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4924 | pub unsafe fn _mm512_mask_packus_epi32( | |
4925 | src: __m512i, | |
4926 | k: __mmask32, | |
4927 | a: __m512i, | |
4928 | b: __m512i, | |
4929 | ) -> __m512i { | |
4930 | let pack = _mm512_packus_epi32(a, b).as_i16x32(); | |
4931 | transmute(simd_select_bitmask(k, pack, src.as_i16x32())) | |
4932 | } | |
4933 | ||
4934 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4935 | /// | |
4936 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi32&expand=4129) | |
4937 | #[inline] | |
4938 | #[target_feature(enable = "avx512bw")] | |
4939 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4940 | pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
4941 | let pack = _mm512_packus_epi32(a, b).as_i16x32(); | |
4942 | let zero = _mm512_setzero_si512().as_i16x32(); | |
4943 | transmute(simd_select_bitmask(k, pack, zero)) | |
4944 | } | |
4945 | ||
4946 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4947 | /// | |
4948 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packus_epi32&expand=4125) | |
4949 | #[inline] | |
4950 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4951 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4952 | pub unsafe fn _mm256_mask_packus_epi32( | |
4953 | src: __m256i, | |
4954 | k: __mmask16, | |
4955 | a: __m256i, | |
4956 | b: __m256i, | |
4957 | ) -> __m256i { | |
4958 | let pack = _mm256_packus_epi32(a, b).as_i16x16(); | |
4959 | transmute(simd_select_bitmask(k, pack, src.as_i16x16())) | |
4960 | } | |
4961 | ||
4962 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4963 | /// | |
4964 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packus_epi32&expand=4126) | |
4965 | #[inline] | |
4966 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4967 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4968 | pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
4969 | let pack = _mm256_packus_epi32(a, b).as_i16x16(); | |
4970 | let zero = _mm256_setzero_si256().as_i16x16(); | |
4971 | transmute(simd_select_bitmask(k, pack, zero)) | |
4972 | } | |
4973 | ||
4974 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
4975 | /// | |
4976 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packus_epi32&expand=4122) | |
4977 | #[inline] | |
4978 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4979 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4980 | pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4981 | let pack = _mm_packus_epi32(a, b).as_i16x8(); | |
4982 | transmute(simd_select_bitmask(k, pack, src.as_i16x8())) | |
4983 | } | |
4984 | ||
4985 | /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
4986 | /// | |
4987 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packus_epi32&expand=4123) | |
4988 | #[inline] | |
4989 | #[target_feature(enable = "avx512bw,avx512vl")] | |
4990 | #[cfg_attr(test, assert_instr(vpackusdw))] | |
4991 | pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
4992 | let pack = _mm_packus_epi32(a, b).as_i16x8(); | |
4993 | let zero = _mm_setzero_si128().as_i16x8(); | |
4994 | transmute(simd_select_bitmask(k, pack, zero)) | |
4995 | } | |
4996 | ||
4997 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst. | |
4998 | /// | |
4999 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_packus_epi16&expand=4121) | |
5000 | #[inline] | |
5001 | #[target_feature(enable = "avx512bw")] | |
5002 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5003 | pub unsafe fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { | |
5004 | transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) | |
5005 | } | |
5006 | ||
5007 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5008 | /// | |
5009 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_packus_epi16&expand=4119) | |
5010 | #[inline] | |
5011 | #[target_feature(enable = "avx512bw")] | |
5012 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5013 | pub unsafe fn _mm512_mask_packus_epi16( | |
5014 | src: __m512i, | |
5015 | k: __mmask64, | |
5016 | a: __m512i, | |
5017 | b: __m512i, | |
5018 | ) -> __m512i { | |
5019 | let pack = _mm512_packus_epi16(a, b).as_i8x64(); | |
5020 | transmute(simd_select_bitmask(k, pack, src.as_i8x64())) | |
5021 | } | |
5022 | ||
5023 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5024 | /// | |
5025 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_packus_epi16&expand=4120) | |
5026 | #[inline] | |
5027 | #[target_feature(enable = "avx512bw")] | |
5028 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5029 | pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
5030 | let pack = _mm512_packus_epi16(a, b).as_i8x64(); | |
5031 | let zero = _mm512_setzero_si512().as_i8x64(); | |
5032 | transmute(simd_select_bitmask(k, pack, zero)) | |
5033 | } | |
5034 | ||
5035 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5036 | /// | |
5037 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_packus_epi16&expand=4116) | |
5038 | #[inline] | |
5039 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5040 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5041 | pub unsafe fn _mm256_mask_packus_epi16( | |
5042 | src: __m256i, | |
5043 | k: __mmask32, | |
5044 | a: __m256i, | |
5045 | b: __m256i, | |
5046 | ) -> __m256i { | |
5047 | let pack = _mm256_packus_epi16(a, b).as_i8x32(); | |
5048 | transmute(simd_select_bitmask(k, pack, src.as_i8x32())) | |
5049 | } | |
5050 | ||
5051 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5052 | /// | |
5053 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_packus_epi16&expand=4117) | |
5054 | #[inline] | |
5055 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5056 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5057 | pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
5058 | let pack = _mm256_packus_epi16(a, b).as_i8x32(); | |
5059 | let zero = _mm256_setzero_si256().as_i8x32(); | |
5060 | transmute(simd_select_bitmask(k, pack, zero)) | |
5061 | } | |
5062 | ||
5063 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5064 | /// | |
5065 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_packus_epi16&expand=4113) | |
5066 | #[inline] | |
5067 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5068 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5069 | pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
5070 | let pack = _mm_packus_epi16(a, b).as_i8x16(); | |
5071 | transmute(simd_select_bitmask(k, pack, src.as_i8x16())) | |
5072 | } | |
5073 | ||
5074 | /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5075 | /// | |
5076 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_packus_epi16&expand=4114) | |
5077 | #[inline] | |
5078 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5079 | #[cfg_attr(test, assert_instr(vpackuswb))] | |
5080 | pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
5081 | let pack = _mm_packus_epi16(a, b).as_i8x16(); | |
5082 | let zero = _mm_setzero_si128().as_i8x16(); | |
5083 | transmute(simd_select_bitmask(k, pack, zero)) | |
5084 | } | |
5085 | ||
5086 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst. | |
5087 | /// | |
5088 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu16&expand=388) | |
5089 | #[inline] | |
5090 | #[target_feature(enable = "avx512bw")] | |
5091 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5092 | pub unsafe fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i { | |
5093 | transmute(vpavgw(a.as_u16x32(), b.as_u16x32())) | |
5094 | } | |
5095 | ||
5096 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5097 | /// | |
5098 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu16&expand=389) | |
5099 | #[inline] | |
5100 | #[target_feature(enable = "avx512bw")] | |
5101 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5102 | pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
5103 | let avg = _mm512_avg_epu16(a, b).as_u16x32(); | |
5104 | transmute(simd_select_bitmask(k, avg, src.as_u16x32())) | |
5105 | } | |
5106 | ||
5107 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5108 | /// | |
5109 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu16&expand=390) | |
5110 | #[inline] | |
5111 | #[target_feature(enable = "avx512bw")] | |
5112 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5113 | pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
5114 | let avg = _mm512_avg_epu16(a, b).as_u16x32(); | |
5115 | let zero = _mm512_setzero_si512().as_u16x32(); | |
5116 | transmute(simd_select_bitmask(k, avg, zero)) | |
5117 | } | |
5118 | ||
5119 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5120 | /// | |
5121 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_avg_epu16&expand=386) | |
5122 | #[inline] | |
5123 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5124 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5125 | pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
5126 | let avg = _mm256_avg_epu16(a, b).as_u16x16(); | |
5127 | transmute(simd_select_bitmask(k, avg, src.as_u16x16())) | |
5128 | } | |
5129 | ||
5130 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5131 | /// | |
5132 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_avg_epu16&expand=387) | |
5133 | #[inline] | |
5134 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5135 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5136 | pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
5137 | let avg = _mm256_avg_epu16(a, b).as_u16x16(); | |
5138 | let zero = _mm256_setzero_si256().as_u16x16(); | |
5139 | transmute(simd_select_bitmask(k, avg, zero)) | |
5140 | } | |
5141 | ||
5142 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5143 | /// | |
5144 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_avg_epu16&expand=383) | |
5145 | #[inline] | |
5146 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5147 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5148 | pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
5149 | let avg = _mm_avg_epu16(a, b).as_u16x8(); | |
5150 | transmute(simd_select_bitmask(k, avg, src.as_u16x8())) | |
5151 | } | |
5152 | ||
5153 | /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5154 | /// | |
5155 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_avg_epu16&expand=384) | |
5156 | #[inline] | |
5157 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5158 | #[cfg_attr(test, assert_instr(vpavgw))] | |
5159 | pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
5160 | let avg = _mm_avg_epu16(a, b).as_u16x8(); | |
5161 | let zero = _mm_setzero_si128().as_u16x8(); | |
5162 | transmute(simd_select_bitmask(k, avg, zero)) | |
5163 | } | |
5164 | ||
5165 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst. | |
5166 | /// | |
5167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_avg_epu8&expand=397) | |
5168 | #[inline] | |
5169 | #[target_feature(enable = "avx512bw")] | |
5170 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5171 | pub unsafe fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i { | |
5172 | transmute(vpavgb(a.as_u8x64(), b.as_u8x64())) | |
5173 | } | |
5174 | ||
5175 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5176 | /// | |
5177 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_avg_epu8&expand=398) | |
5178 | #[inline] | |
5179 | #[target_feature(enable = "avx512bw")] | |
5180 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5181 | pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
5182 | let avg = _mm512_avg_epu8(a, b).as_u8x64(); | |
5183 | transmute(simd_select_bitmask(k, avg, src.as_u8x64())) | |
5184 | } | |
5185 | ||
5186 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5187 | /// | |
5188 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_avg_epu8&expand=399) | |
5189 | #[inline] | |
5190 | #[target_feature(enable = "avx512bw")] | |
5191 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5192 | pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
5193 | let avg = _mm512_avg_epu8(a, b).as_u8x64(); | |
5194 | let zero = _mm512_setzero_si512().as_u8x64(); | |
5195 | transmute(simd_select_bitmask(k, avg, zero)) | |
5196 | } | |
5197 | ||
5198 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5199 | /// | |
5200 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_avg_epu8&expand=395) | |
5201 | #[inline] | |
5202 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5203 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5204 | pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
5205 | let avg = _mm256_avg_epu8(a, b).as_u8x32(); | |
5206 | transmute(simd_select_bitmask(k, avg, src.as_u8x32())) | |
5207 | } | |
5208 | ||
5209 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5210 | /// | |
5211 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_avg_epu8&expand=396) | |
5212 | #[inline] | |
5213 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5214 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5215 | pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
5216 | let avg = _mm256_avg_epu8(a, b).as_u8x32(); | |
5217 | let zero = _mm256_setzero_si256().as_u8x32(); | |
5218 | transmute(simd_select_bitmask(k, avg, zero)) | |
5219 | } | |
5220 | ||
5221 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5222 | /// | |
5223 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_avg_epu8&expand=392) | |
5224 | #[inline] | |
5225 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5226 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5227 | pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
5228 | let avg = _mm_avg_epu8(a, b).as_u8x16(); | |
5229 | transmute(simd_select_bitmask(k, avg, src.as_u8x16())) | |
5230 | } | |
5231 | ||
5232 | /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5233 | /// | |
5234 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_avg_epu8&expand=393) | |
5235 | #[inline] | |
5236 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5237 | #[cfg_attr(test, assert_instr(vpavgb))] | |
5238 | pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
5239 | let avg = _mm_avg_epu8(a, b).as_u8x16(); | |
5240 | let zero = _mm_setzero_si128().as_u8x16(); | |
5241 | transmute(simd_select_bitmask(k, avg, zero)) | |
5242 | } | |
5243 | ||
5244 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst. | |
5245 | /// | |
5246 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi16&expand=5271) | |
5247 | #[inline] | |
5248 | #[target_feature(enable = "avx512bw")] | |
5249 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5250 | pub unsafe fn _mm512_sll_epi16(a: __m512i, count: __m128i) -> __m512i { | |
5251 | transmute(vpsllw(a.as_i16x32(), count.as_i16x8())) | |
5252 | } | |
5253 | ||
5254 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5255 | /// | |
5256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi16&expand=5269) | |
5257 | #[inline] | |
5258 | #[target_feature(enable = "avx512bw")] | |
5259 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5260 | pub unsafe fn _mm512_mask_sll_epi16( | |
5261 | src: __m512i, | |
5262 | k: __mmask32, | |
5263 | a: __m512i, | |
5264 | count: __m128i, | |
5265 | ) -> __m512i { | |
5266 | let shf = _mm512_sll_epi16(a, count).as_i16x32(); | |
5267 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) | |
5268 | } | |
5269 | ||
5270 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5271 | /// | |
5272 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi16&expand=5270) | |
5273 | #[inline] | |
5274 | #[target_feature(enable = "avx512bw")] | |
5275 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5276 | pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { | |
5277 | let shf = _mm512_sll_epi16(a, count).as_i16x32(); | |
5278 | let zero = _mm512_setzero_si512().as_i16x32(); | |
5279 | transmute(simd_select_bitmask(k, shf, zero)) | |
5280 | } | |
5281 | ||
5282 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5283 | /// | |
5284 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sll_epi16&expand=5266) | |
5285 | #[inline] | |
5286 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5287 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5288 | pub unsafe fn _mm256_mask_sll_epi16( | |
5289 | src: __m256i, | |
5290 | k: __mmask16, | |
5291 | a: __m256i, | |
5292 | count: __m128i, | |
5293 | ) -> __m256i { | |
5294 | let shf = _mm256_sll_epi16(a, count).as_i16x16(); | |
5295 | transmute(simd_select_bitmask(k, shf, src.as_i16x16())) | |
5296 | } | |
5297 | ||
5298 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5299 | /// | |
5300 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sll_epi16&expand=5267) | |
5301 | #[inline] | |
5302 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5303 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5304 | pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { | |
5305 | let shf = _mm256_sll_epi16(a, count).as_i16x16(); | |
5306 | let zero = _mm256_setzero_si256().as_i16x16(); | |
5307 | transmute(simd_select_bitmask(k, shf, zero)) | |
5308 | } | |
5309 | ||
5310 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5311 | /// | |
5312 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sll_epi16&expand=5263) | |
5313 | #[inline] | |
5314 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5315 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5316 | pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5317 | let shf = _mm_sll_epi16(a, count).as_i16x8(); | |
5318 | transmute(simd_select_bitmask(k, shf, src.as_i16x8())) | |
5319 | } | |
5320 | ||
5321 | /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5322 | /// | |
5323 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sll_epi16&expand=5264) | |
5324 | #[inline] | |
5325 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5326 | #[cfg_attr(test, assert_instr(vpsllw))] | |
5327 | pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5328 | let shf = _mm_sll_epi16(a, count).as_i16x8(); | |
5329 | let zero = _mm_setzero_si128().as_i16x8(); | |
5330 | transmute(simd_select_bitmask(k, shf, zero)) | |
5331 | } | |
5332 | ||
5333 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. | |
5334 | /// | |
5335 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi16&expand=5301) | |
5336 | #[inline] | |
5337 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5338 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5339 | #[rustc_legacy_const_generics(1)] | |
5340 | pub unsafe fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i { | |
5341 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5342 | let a = a.as_i16x32(); |
17df50a5 | 5343 | let r = vpslliw(a, IMM8); |
fc512014 XL |
5344 | transmute(r) |
5345 | } | |
5346 | ||
5347 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5348 | /// | |
5349 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi16&expand=5299) | |
5350 | #[inline] | |
5351 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5352 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5353 | #[rustc_legacy_const_generics(3)] | |
5354 | pub unsafe fn _mm512_mask_slli_epi16<const IMM8: u32>( | |
5355 | src: __m512i, | |
5356 | k: __mmask32, | |
5357 | a: __m512i, | |
5358 | ) -> __m512i { | |
5359 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5360 | let a = a.as_i16x32(); |
17df50a5 | 5361 | let shf = vpslliw(a, IMM8); |
fc512014 XL |
5362 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) |
5363 | } | |
5364 | ||
5365 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5366 | /// | |
5367 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi16&expand=5300) | |
5368 | #[inline] | |
5369 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5370 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5371 | #[rustc_legacy_const_generics(2)] | |
5372 | pub unsafe fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i { | |
5373 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5374 | let a = a.as_i16x32(); |
17df50a5 | 5375 | let shf = vpslliw(a, IMM8); |
fc512014 XL |
5376 | let zero = _mm512_setzero_si512().as_i16x32(); |
5377 | transmute(simd_select_bitmask(k, shf, zero)) | |
5378 | } | |
5379 | ||
5380 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5381 | /// | |
5382 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_slli_epi16&expand=5296) | |
5383 | #[inline] | |
5384 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5385 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5386 | #[rustc_legacy_const_generics(3)] | |
5387 | pub unsafe fn _mm256_mask_slli_epi16<const IMM8: u32>( | |
5388 | src: __m256i, | |
5389 | k: __mmask16, | |
5390 | a: __m256i, | |
5391 | ) -> __m256i { | |
5392 | static_assert_imm_u8!(IMM8); | |
5393 | let imm8 = IMM8 as i32; | |
5394 | let r = pslliw256(a.as_i16x16(), imm8); | |
5395 | transmute(simd_select_bitmask(k, r, src.as_i16x16())) | |
fc512014 XL |
5396 | } |
5397 | ||
5398 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5399 | /// | |
5400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_slli_epi16&expand=5297) | |
5401 | #[inline] | |
5402 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5403 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5404 | #[rustc_legacy_const_generics(2)] | |
5405 | pub unsafe fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i { | |
5406 | static_assert_imm_u8!(IMM8); | |
5407 | let imm8 = IMM8 as i32; | |
5408 | let r = pslliw256(a.as_i16x16(), imm8); | |
fc512014 | 5409 | let zero = _mm256_setzero_si256().as_i16x16(); |
17df50a5 | 5410 | transmute(simd_select_bitmask(k, r, zero)) |
fc512014 XL |
5411 | } |
5412 | ||
5413 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5414 | /// | |
5415 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_slli_epi16&expand=5293) | |
5416 | #[inline] | |
5417 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5418 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5419 | #[rustc_legacy_const_generics(3)] | |
5420 | pub unsafe fn _mm_mask_slli_epi16<const IMM8: u32>( | |
5421 | src: __m128i, | |
5422 | k: __mmask8, | |
5423 | a: __m128i, | |
5424 | ) -> __m128i { | |
5425 | static_assert_imm_u8!(IMM8); | |
5426 | let imm8 = IMM8 as i32; | |
5427 | let r = pslliw128(a.as_i16x8(), imm8); | |
5428 | transmute(simd_select_bitmask(k, r, src.as_i16x8())) | |
fc512014 XL |
5429 | } |
5430 | ||
5431 | /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5432 | /// | |
5433 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_slli_epi16&expand=5294) | |
5434 | #[inline] | |
5435 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5436 | #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))] |
5437 | #[rustc_legacy_const_generics(2)] | |
5438 | pub unsafe fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i { | |
5439 | static_assert_imm_u8!(IMM8); | |
5440 | let imm8 = IMM8 as i32; | |
5441 | let r = pslliw128(a.as_i16x8(), imm8); | |
fc512014 | 5442 | let zero = _mm_setzero_si128().as_i16x8(); |
17df50a5 | 5443 | transmute(simd_select_bitmask(k, r, zero)) |
fc512014 XL |
5444 | } |
5445 | ||
5446 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. | |
5447 | /// | |
5448 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi16&expand=5333) | |
5449 | #[inline] | |
5450 | #[target_feature(enable = "avx512bw")] | |
5451 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5452 | pub unsafe fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i { | |
5453 | transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) | |
5454 | } | |
5455 | ||
5456 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5457 | /// | |
5458 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi16&expand=5331) | |
5459 | #[inline] | |
5460 | #[target_feature(enable = "avx512bw")] | |
5461 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5462 | pub unsafe fn _mm512_mask_sllv_epi16( | |
5463 | src: __m512i, | |
5464 | k: __mmask32, | |
5465 | a: __m512i, | |
5466 | count: __m512i, | |
5467 | ) -> __m512i { | |
5468 | let shf = _mm512_sllv_epi16(a, count).as_i16x32(); | |
5469 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) | |
5470 | } | |
5471 | ||
5472 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5473 | /// | |
5474 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi16&expand=5332) | |
5475 | #[inline] | |
5476 | #[target_feature(enable = "avx512bw")] | |
5477 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5478 | pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { | |
5479 | let shf = _mm512_sllv_epi16(a, count).as_i16x32(); | |
5480 | let zero = _mm512_setzero_si512().as_i16x32(); | |
5481 | transmute(simd_select_bitmask(k, shf, zero)) | |
5482 | } | |
5483 | ||
5484 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. | |
5485 | /// | |
5486 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi16&expand=5330) | |
5487 | #[inline] | |
5488 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5489 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5490 | pub unsafe fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i { | |
5491 | transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) | |
5492 | } | |
5493 | ||
5494 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5495 | /// | |
5496 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sllv_epi16&expand=5328) | |
5497 | #[inline] | |
5498 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5499 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5500 | pub unsafe fn _mm256_mask_sllv_epi16( | |
5501 | src: __m256i, | |
5502 | k: __mmask16, | |
5503 | a: __m256i, | |
5504 | count: __m256i, | |
5505 | ) -> __m256i { | |
5506 | let shf = _mm256_sllv_epi16(a, count).as_i16x16(); | |
5507 | transmute(simd_select_bitmask(k, shf, src.as_i16x16())) | |
5508 | } | |
5509 | ||
5510 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5511 | /// | |
5512 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sllv_epi16&expand=5329) | |
5513 | #[inline] | |
5514 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5515 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5516 | pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { | |
5517 | let shf = _mm256_sllv_epi16(a, count).as_i16x16(); | |
5518 | let zero = _mm256_setzero_si256().as_i16x16(); | |
5519 | transmute(simd_select_bitmask(k, shf, zero)) | |
5520 | } | |
5521 | ||
5522 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. | |
5523 | /// | |
5524 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi16&expand=5327) | |
5525 | #[inline] | |
5526 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5527 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5528 | pub unsafe fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i { | |
5529 | transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) | |
5530 | } | |
5531 | ||
5532 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5533 | /// | |
5534 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sllv_epi16&expand=5325) | |
5535 | #[inline] | |
5536 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5537 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5538 | pub unsafe fn _mm_mask_sllv_epi16( | |
5539 | src: __m128i, | |
5540 | k: __mmask8, | |
5541 | a: __m128i, | |
5542 | count: __m128i, | |
5543 | ) -> __m128i { | |
5544 | let shf = _mm_sllv_epi16(a, count).as_i16x8(); | |
5545 | transmute(simd_select_bitmask(k, shf, src.as_i16x8())) | |
5546 | } | |
5547 | ||
5548 | /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5549 | /// | |
5550 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sllv_epi16&expand=5326) | |
5551 | #[inline] | |
5552 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5553 | #[cfg_attr(test, assert_instr(vpsllvw))] | |
5554 | pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5555 | let shf = _mm_sllv_epi16(a, count).as_i16x8(); | |
5556 | let zero = _mm_setzero_si128().as_i16x8(); | |
5557 | transmute(simd_select_bitmask(k, shf, zero)) | |
5558 | } | |
5559 | ||
5560 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst. | |
5561 | /// | |
5562 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi16&expand=5483) | |
5563 | #[inline] | |
5564 | #[target_feature(enable = "avx512bw")] | |
5565 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5566 | pub unsafe fn _mm512_srl_epi16(a: __m512i, count: __m128i) -> __m512i { | |
5567 | transmute(vpsrlw(a.as_i16x32(), count.as_i16x8())) | |
5568 | } | |
5569 | ||
5570 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5571 | /// | |
5572 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi16&expand=5481) | |
5573 | #[inline] | |
5574 | #[target_feature(enable = "avx512bw")] | |
5575 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5576 | pub unsafe fn _mm512_mask_srl_epi16( | |
5577 | src: __m512i, | |
5578 | k: __mmask32, | |
5579 | a: __m512i, | |
5580 | count: __m128i, | |
5581 | ) -> __m512i { | |
5582 | let shf = _mm512_srl_epi16(a, count).as_i16x32(); | |
5583 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) | |
5584 | } | |
5585 | ||
5586 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5587 | /// | |
5588 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi16&expand=5482) | |
5589 | #[inline] | |
5590 | #[target_feature(enable = "avx512bw")] | |
5591 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5592 | pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { | |
5593 | let shf = _mm512_srl_epi16(a, count).as_i16x32(); | |
5594 | let zero = _mm512_setzero_si512().as_i16x32(); | |
5595 | transmute(simd_select_bitmask(k, shf, zero)) | |
5596 | } | |
5597 | ||
5598 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5599 | /// | |
5600 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srl_epi16&expand=5478) | |
5601 | #[inline] | |
5602 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5603 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5604 | pub unsafe fn _mm256_mask_srl_epi16( | |
5605 | src: __m256i, | |
5606 | k: __mmask16, | |
5607 | a: __m256i, | |
5608 | count: __m128i, | |
5609 | ) -> __m256i { | |
5610 | let shf = _mm256_srl_epi16(a, count).as_i16x16(); | |
5611 | transmute(simd_select_bitmask(k, shf, src.as_i16x16())) | |
5612 | } | |
5613 | ||
5614 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5615 | /// | |
5616 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srl_epi16&expand=5479) | |
5617 | #[inline] | |
5618 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5619 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5620 | pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { | |
5621 | let shf = _mm256_srl_epi16(a, count).as_i16x16(); | |
5622 | let zero = _mm256_setzero_si256().as_i16x16(); | |
5623 | transmute(simd_select_bitmask(k, shf, zero)) | |
5624 | } | |
5625 | ||
5626 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5627 | /// | |
5628 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srl_epi16&expand=5475) | |
5629 | #[inline] | |
5630 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5631 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5632 | pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5633 | let shf = _mm_srl_epi16(a, count).as_i16x8(); | |
5634 | transmute(simd_select_bitmask(k, shf, src.as_i16x8())) | |
5635 | } | |
5636 | ||
5637 | /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5638 | /// | |
5639 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srl_epi16&expand=5476) | |
5640 | #[inline] | |
5641 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5642 | #[cfg_attr(test, assert_instr(vpsrlw))] | |
5643 | pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5644 | let shf = _mm_srl_epi16(a, count).as_i16x8(); | |
5645 | let zero = _mm_setzero_si128().as_i16x8(); | |
5646 | transmute(simd_select_bitmask(k, shf, zero)) | |
5647 | } | |
5648 | ||
5649 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. | |
5650 | /// | |
5651 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi16&expand=5513) | |
5652 | #[inline] | |
5653 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5654 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5655 | #[rustc_legacy_const_generics(1)] | |
5656 | pub unsafe fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i { | |
5657 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5658 | let a = a.as_i16x32(); |
17df50a5 | 5659 | let r = vpsrliw(a, IMM8); |
fc512014 XL |
5660 | transmute(r) |
5661 | } | |
5662 | ||
5663 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5664 | /// | |
5665 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi16&expand=5511) | |
5666 | #[inline] | |
5667 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5668 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5669 | #[rustc_legacy_const_generics(3)] | |
5670 | pub unsafe fn _mm512_mask_srli_epi16<const IMM8: u32>( | |
5671 | src: __m512i, | |
5672 | k: __mmask32, | |
5673 | a: __m512i, | |
5674 | ) -> __m512i { | |
5675 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5676 | let a = a.as_i16x32(); |
17df50a5 | 5677 | let shf = vpsrliw(a, IMM8); |
fc512014 XL |
5678 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) |
5679 | } | |
5680 | ||
5681 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5682 | /// | |
5683 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi16&expand=5512) | |
5684 | #[inline] | |
5685 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5686 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5687 | #[rustc_legacy_const_generics(2)] | |
5688 | pub unsafe fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i { | |
5689 | static_assert_imm8!(IMM8); | |
fc512014 XL |
5690 | //imm8 should be u32, it seems the document to verify is incorrect |
5691 | let a = a.as_i16x32(); | |
17df50a5 | 5692 | let shf = vpsrliw(a, IMM8 as u32); |
fc512014 XL |
5693 | let zero = _mm512_setzero_si512().as_i16x32(); |
5694 | transmute(simd_select_bitmask(k, shf, zero)) | |
5695 | } | |
5696 | ||
5697 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5698 | /// | |
5699 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srli_epi16&expand=5508) | |
5700 | #[inline] | |
5701 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5702 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5703 | #[rustc_legacy_const_generics(3)] | |
5704 | pub unsafe fn _mm256_mask_srli_epi16<const IMM8: i32>( | |
5705 | src: __m256i, | |
5706 | k: __mmask16, | |
5707 | a: __m256i, | |
5708 | ) -> __m256i { | |
5709 | static_assert_imm8!(IMM8); | |
5710 | let shf = _mm256_srli_epi16::<IMM8>(a); | |
cdc7bbd5 | 5711 | transmute(simd_select_bitmask(k, shf.as_i16x16(), src.as_i16x16())) |
fc512014 XL |
5712 | } |
5713 | ||
5714 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5715 | /// | |
5716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srli_epi16&expand=5509) | |
5717 | #[inline] | |
5718 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5719 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5720 | #[rustc_legacy_const_generics(2)] | |
5721 | pub unsafe fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i { | |
5722 | static_assert_imm8!(IMM8); | |
5723 | let shf = _mm256_srli_epi16::<IMM8>(a); | |
fc512014 | 5724 | let zero = _mm256_setzero_si256().as_i16x16(); |
cdc7bbd5 | 5725 | transmute(simd_select_bitmask(k, shf.as_i16x16(), zero)) |
fc512014 XL |
5726 | } |
5727 | ||
5728 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5729 | /// | |
5730 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srli_epi16&expand=5505) | |
5731 | #[inline] | |
5732 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5733 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5734 | #[rustc_legacy_const_generics(3)] | |
5735 | pub unsafe fn _mm_mask_srli_epi16<const IMM8: i32>( | |
5736 | src: __m128i, | |
5737 | k: __mmask8, | |
5738 | a: __m128i, | |
5739 | ) -> __m128i { | |
5740 | static_assert_imm8!(IMM8); | |
5741 | let shf = _mm_srli_epi16::<IMM8>(a); | |
cdc7bbd5 | 5742 | transmute(simd_select_bitmask(k, shf.as_i16x8(), src.as_i16x8())) |
fc512014 XL |
5743 | } |
5744 | ||
5745 | /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5746 | /// | |
5747 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srli_epi16&expand=5506) | |
5748 | #[inline] | |
5749 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
5750 | #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))] |
5751 | #[rustc_legacy_const_generics(2)] | |
5752 | pub unsafe fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i { | |
5753 | static_assert_imm8!(IMM8); | |
5754 | let shf = _mm_srli_epi16::<IMM8>(a); | |
fc512014 | 5755 | let zero = _mm_setzero_si128().as_i16x8(); |
cdc7bbd5 | 5756 | transmute(simd_select_bitmask(k, shf.as_i16x8(), zero)) |
fc512014 XL |
5757 | } |
5758 | ||
5759 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. | |
5760 | /// | |
5761 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi16&expand=5545) | |
5762 | #[inline] | |
5763 | #[target_feature(enable = "avx512bw")] | |
5764 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5765 | pub unsafe fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i { | |
5766 | transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) | |
5767 | } | |
5768 | ||
5769 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5770 | /// | |
5771 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi16&expand=5543) | |
5772 | #[inline] | |
5773 | #[target_feature(enable = "avx512bw")] | |
5774 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5775 | pub unsafe fn _mm512_mask_srlv_epi16( | |
5776 | src: __m512i, | |
5777 | k: __mmask32, | |
5778 | a: __m512i, | |
5779 | count: __m512i, | |
5780 | ) -> __m512i { | |
5781 | let shf = _mm512_srlv_epi16(a, count).as_i16x32(); | |
5782 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) | |
5783 | } | |
5784 | ||
5785 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5786 | /// | |
5787 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi16&expand=5544) | |
5788 | #[inline] | |
5789 | #[target_feature(enable = "avx512bw")] | |
5790 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5791 | pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { | |
5792 | let shf = _mm512_srlv_epi16(a, count).as_i16x32(); | |
5793 | let zero = _mm512_setzero_si512().as_i16x32(); | |
5794 | transmute(simd_select_bitmask(k, shf, zero)) | |
5795 | } | |
5796 | ||
5797 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. | |
5798 | /// | |
5799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi16&expand=5542) | |
5800 | #[inline] | |
5801 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5802 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5803 | pub unsafe fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i { | |
5804 | transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) | |
5805 | } | |
5806 | ||
5807 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5808 | /// | |
5809 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srlv_epi16&expand=5540) | |
5810 | #[inline] | |
5811 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5812 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5813 | pub unsafe fn _mm256_mask_srlv_epi16( | |
5814 | src: __m256i, | |
5815 | k: __mmask16, | |
5816 | a: __m256i, | |
5817 | count: __m256i, | |
5818 | ) -> __m256i { | |
5819 | let shf = _mm256_srlv_epi16(a, count).as_i16x16(); | |
5820 | transmute(simd_select_bitmask(k, shf, src.as_i16x16())) | |
5821 | } | |
5822 | ||
5823 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5824 | /// | |
5825 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srlv_epi16&expand=5541) | |
5826 | #[inline] | |
5827 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5828 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5829 | pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { | |
5830 | let shf = _mm256_srlv_epi16(a, count).as_i16x16(); | |
5831 | let zero = _mm256_setzero_si256().as_i16x16(); | |
5832 | transmute(simd_select_bitmask(k, shf, zero)) | |
5833 | } | |
5834 | ||
5835 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. | |
5836 | /// | |
5837 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi16&expand=5539) | |
5838 | #[inline] | |
5839 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5840 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5841 | pub unsafe fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i { | |
5842 | transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) | |
5843 | } | |
5844 | ||
5845 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5846 | /// | |
5847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srlv_epi16&expand=5537) | |
5848 | #[inline] | |
5849 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5850 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5851 | pub unsafe fn _mm_mask_srlv_epi16( | |
5852 | src: __m128i, | |
5853 | k: __mmask8, | |
5854 | a: __m128i, | |
5855 | count: __m128i, | |
5856 | ) -> __m128i { | |
5857 | let shf = _mm_srlv_epi16(a, count).as_i16x8(); | |
5858 | transmute(simd_select_bitmask(k, shf, src.as_i16x8())) | |
5859 | } | |
5860 | ||
5861 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5862 | /// | |
5863 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srlv_epi16&expand=5538) | |
5864 | #[inline] | |
5865 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5866 | #[cfg_attr(test, assert_instr(vpsrlvw))] | |
5867 | pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5868 | let shf = _mm_srlv_epi16(a, count).as_i16x8(); | |
5869 | let zero = _mm_setzero_si128().as_i16x8(); | |
5870 | transmute(simd_select_bitmask(k, shf, zero)) | |
5871 | } | |
5872 | ||
5873 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst. | |
5874 | /// | |
5875 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi16&expand=5398) | |
5876 | #[inline] | |
5877 | #[target_feature(enable = "avx512bw")] | |
5878 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5879 | pub unsafe fn _mm512_sra_epi16(a: __m512i, count: __m128i) -> __m512i { | |
5880 | transmute(vpsraw(a.as_i16x32(), count.as_i16x8())) | |
5881 | } | |
5882 | ||
5883 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5884 | /// | |
5885 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi16&expand=5396) | |
5886 | #[inline] | |
5887 | #[target_feature(enable = "avx512bw")] | |
5888 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5889 | pub unsafe fn _mm512_mask_sra_epi16( | |
5890 | src: __m512i, | |
5891 | k: __mmask32, | |
5892 | a: __m512i, | |
5893 | count: __m128i, | |
5894 | ) -> __m512i { | |
5895 | let shf = _mm512_sra_epi16(a, count).as_i16x32(); | |
5896 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) | |
5897 | } | |
5898 | ||
5899 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5900 | /// | |
5901 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi16&expand=5397) | |
5902 | #[inline] | |
5903 | #[target_feature(enable = "avx512bw")] | |
5904 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5905 | pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { | |
5906 | let shf = _mm512_sra_epi16(a, count).as_i16x32(); | |
5907 | let zero = _mm512_setzero_si512().as_i16x32(); | |
5908 | transmute(simd_select_bitmask(k, shf, zero)) | |
5909 | } | |
5910 | ||
5911 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5912 | /// | |
5913 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_sra_epi16&expand=5393) | |
5914 | #[inline] | |
5915 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5916 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5917 | pub unsafe fn _mm256_mask_sra_epi16( | |
5918 | src: __m256i, | |
5919 | k: __mmask16, | |
5920 | a: __m256i, | |
5921 | count: __m128i, | |
5922 | ) -> __m256i { | |
5923 | let shf = _mm256_sra_epi16(a, count).as_i16x16(); | |
5924 | transmute(simd_select_bitmask(k, shf, src.as_i16x16())) | |
5925 | } | |
5926 | ||
5927 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5928 | /// | |
5929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_sra_epi16&expand=5394) | |
5930 | #[inline] | |
5931 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5932 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5933 | pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { | |
5934 | let shf = _mm256_sra_epi16(a, count).as_i16x16(); | |
5935 | let zero = _mm256_setzero_si256().as_i16x16(); | |
5936 | transmute(simd_select_bitmask(k, shf, zero)) | |
5937 | } | |
5938 | ||
5939 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5940 | /// | |
5941 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sra_epi16&expand=5390) | |
5942 | #[inline] | |
5943 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5944 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5945 | pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5946 | let shf = _mm_sra_epi16(a, count).as_i16x8(); | |
5947 | transmute(simd_select_bitmask(k, shf, src.as_i16x8())) | |
5948 | } | |
5949 | ||
5950 | /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5951 | /// | |
5952 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sra_epi16&expand=5391) | |
5953 | #[inline] | |
5954 | #[target_feature(enable = "avx512bw,avx512vl")] | |
5955 | #[cfg_attr(test, assert_instr(vpsraw))] | |
5956 | pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
5957 | let shf = _mm_sra_epi16(a, count).as_i16x8(); | |
5958 | let zero = _mm_setzero_si128().as_i16x8(); | |
5959 | transmute(simd_select_bitmask(k, shf, zero)) | |
5960 | } | |
5961 | ||
5962 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. | |
5963 | /// | |
5964 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi16&expand=5427) | |
5965 | #[inline] | |
5966 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5967 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
5968 | #[rustc_legacy_const_generics(1)] | |
5969 | pub unsafe fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i { | |
5970 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5971 | let a = a.as_i16x32(); |
17df50a5 | 5972 | let r = vpsraiw(a, IMM8); |
fc512014 XL |
5973 | transmute(r) |
5974 | } | |
5975 | ||
5976 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
5977 | /// | |
5978 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi16&expand=5425) | |
5979 | #[inline] | |
5980 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5981 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
5982 | #[rustc_legacy_const_generics(3)] | |
5983 | pub unsafe fn _mm512_mask_srai_epi16<const IMM8: u32>( | |
5984 | src: __m512i, | |
5985 | k: __mmask32, | |
5986 | a: __m512i, | |
5987 | ) -> __m512i { | |
5988 | static_assert_imm_u8!(IMM8); | |
fc512014 | 5989 | let a = a.as_i16x32(); |
17df50a5 | 5990 | let shf = vpsraiw(a, IMM8); |
fc512014 XL |
5991 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) |
5992 | } | |
5993 | ||
5994 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
5995 | /// | |
5996 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi16&expand=5426) | |
5997 | #[inline] | |
5998 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
5999 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
6000 | #[rustc_legacy_const_generics(2)] | |
6001 | pub unsafe fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i { | |
6002 | static_assert_imm_u8!(IMM8); | |
fc512014 | 6003 | let a = a.as_i16x32(); |
17df50a5 | 6004 | let shf = vpsraiw(a, IMM8); |
fc512014 XL |
6005 | let zero = _mm512_setzero_si512().as_i16x32(); |
6006 | transmute(simd_select_bitmask(k, shf, zero)) | |
6007 | } | |
6008 | ||
6009 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6010 | /// | |
6011 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srai_epi16&expand=5422) | |
6012 | #[inline] | |
6013 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
6014 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
6015 | #[rustc_legacy_const_generics(3)] | |
6016 | pub unsafe fn _mm256_mask_srai_epi16<const IMM8: u32>( | |
6017 | src: __m256i, | |
6018 | k: __mmask16, | |
6019 | a: __m256i, | |
6020 | ) -> __m256i { | |
6021 | static_assert_imm_u8!(IMM8); | |
6022 | let imm8 = IMM8 as i32; | |
6023 | let r = psraiw256(a.as_i16x16(), imm8); | |
6024 | transmute(simd_select_bitmask(k, r, src.as_i16x16())) | |
fc512014 XL |
6025 | } |
6026 | ||
6027 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6028 | /// | |
6029 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srai_epi16&expand=5423) | |
6030 | #[inline] | |
6031 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
6032 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
6033 | #[rustc_legacy_const_generics(2)] | |
6034 | pub unsafe fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i { | |
6035 | static_assert_imm_u8!(IMM8); | |
6036 | let imm8 = IMM8 as i32; | |
6037 | let r = psraiw256(a.as_i16x16(), imm8); | |
fc512014 | 6038 | let zero = _mm256_setzero_si256().as_i16x16(); |
17df50a5 | 6039 | transmute(simd_select_bitmask(k, r, zero)) |
fc512014 XL |
6040 | } |
6041 | ||
6042 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6043 | /// | |
6044 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srai_epi16&expand=5419) | |
6045 | #[inline] | |
6046 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
6047 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
6048 | #[rustc_legacy_const_generics(3)] | |
6049 | pub unsafe fn _mm_mask_srai_epi16<const IMM8: u32>( | |
6050 | src: __m128i, | |
6051 | k: __mmask8, | |
6052 | a: __m128i, | |
6053 | ) -> __m128i { | |
6054 | static_assert_imm_u8!(IMM8); | |
6055 | let imm8 = IMM8 as i32; | |
6056 | let r = psraiw128(a.as_i16x8(), imm8); | |
6057 | transmute(simd_select_bitmask(k, r, src.as_i16x8())) | |
fc512014 XL |
6058 | } |
6059 | ||
6060 | /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6061 | /// | |
6062 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srai_epi16&expand=5420) | |
6063 | #[inline] | |
6064 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
6065 | #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))] |
6066 | #[rustc_legacy_const_generics(2)] | |
6067 | pub unsafe fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i { | |
6068 | static_assert_imm_u8!(IMM8); | |
6069 | let imm8 = IMM8 as i32; | |
6070 | let r = psraiw128(a.as_i16x8(), imm8); | |
fc512014 | 6071 | let zero = _mm_setzero_si128().as_i16x8(); |
17df50a5 | 6072 | transmute(simd_select_bitmask(k, r, zero)) |
fc512014 XL |
6073 | } |
6074 | ||
6075 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. | |
6076 | /// | |
6077 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi16&expand=5456) | |
6078 | #[inline] | |
6079 | #[target_feature(enable = "avx512bw")] | |
6080 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6081 | pub unsafe fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i { | |
6082 | transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) | |
6083 | } | |
6084 | ||
6085 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6086 | /// | |
6087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi16&expand=5454) | |
6088 | #[inline] | |
6089 | #[target_feature(enable = "avx512bw")] | |
6090 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6091 | pub unsafe fn _mm512_mask_srav_epi16( | |
6092 | src: __m512i, | |
6093 | k: __mmask32, | |
6094 | a: __m512i, | |
6095 | count: __m512i, | |
6096 | ) -> __m512i { | |
6097 | let shf = _mm512_srav_epi16(a, count).as_i16x32(); | |
6098 | transmute(simd_select_bitmask(k, shf, src.as_i16x32())) | |
6099 | } | |
6100 | ||
6101 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6102 | /// | |
6103 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi16&expand=5455) | |
6104 | #[inline] | |
6105 | #[target_feature(enable = "avx512bw")] | |
6106 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6107 | pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { | |
6108 | let shf = _mm512_srav_epi16(a, count).as_i16x32(); | |
6109 | let zero = _mm512_setzero_si512().as_i16x32(); | |
6110 | transmute(simd_select_bitmask(k, shf, zero)) | |
6111 | } | |
6112 | ||
6113 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. | |
6114 | /// | |
6115 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi16&expand=5453) | |
6116 | #[inline] | |
6117 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6118 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6119 | pub unsafe fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i { | |
6120 | transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) | |
6121 | } | |
6122 | ||
6123 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6124 | /// | |
6125 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_srav_epi16&expand=5451) | |
6126 | #[inline] | |
6127 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6128 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6129 | pub unsafe fn _mm256_mask_srav_epi16( | |
6130 | src: __m256i, | |
6131 | k: __mmask16, | |
6132 | a: __m256i, | |
6133 | count: __m256i, | |
6134 | ) -> __m256i { | |
6135 | let shf = _mm256_srav_epi16(a, count).as_i16x16(); | |
6136 | transmute(simd_select_bitmask(k, shf, src.as_i16x16())) | |
6137 | } | |
6138 | ||
6139 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6140 | /// | |
6141 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_srav_epi16&expand=5452) | |
6142 | #[inline] | |
6143 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6144 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6145 | pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { | |
6146 | let shf = _mm256_srav_epi16(a, count).as_i16x16(); | |
6147 | let zero = _mm256_setzero_si256().as_i16x16(); | |
6148 | transmute(simd_select_bitmask(k, shf, zero)) | |
6149 | } | |
6150 | ||
6151 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. | |
6152 | /// | |
6153 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi16&expand=5450) | |
6154 | #[inline] | |
6155 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6156 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6157 | pub unsafe fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i { | |
6158 | transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) | |
6159 | } | |
6160 | ||
6161 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6162 | /// | |
6163 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_srav_epi16&expand=5448) | |
6164 | #[inline] | |
6165 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6166 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6167 | pub unsafe fn _mm_mask_srav_epi16( | |
6168 | src: __m128i, | |
6169 | k: __mmask8, | |
6170 | a: __m128i, | |
6171 | count: __m128i, | |
6172 | ) -> __m128i { | |
6173 | let shf = _mm_srav_epi16(a, count).as_i16x8(); | |
6174 | transmute(simd_select_bitmask(k, shf, src.as_i16x8())) | |
6175 | } | |
6176 | ||
6177 | /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6178 | /// | |
6179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_srav_epi16&expand=5449) | |
6180 | #[inline] | |
6181 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6182 | #[cfg_attr(test, assert_instr(vpsravw))] | |
6183 | pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { | |
6184 | let shf = _mm_srav_epi16(a, count).as_i16x8(); | |
6185 | let zero = _mm_setzero_si128().as_i16x8(); | |
6186 | transmute(simd_select_bitmask(k, shf, zero)) | |
6187 | } | |
6188 | ||
6189 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. | |
6190 | /// | |
6191 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi16&expand=4226) | |
6192 | #[inline] | |
6193 | #[target_feature(enable = "avx512bw")] | |
6194 | #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w | |
6195 | pub unsafe fn _mm512_permutex2var_epi16(a: __m512i, idx: __m512i, b: __m512i) -> __m512i { | |
6196 | transmute(vpermi2w(a.as_i16x32(), idx.as_i16x32(), b.as_i16x32())) | |
6197 | } | |
6198 | ||
6199 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). | |
6200 | /// | |
6201 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi16&expand=4223) | |
6202 | #[inline] | |
6203 | #[target_feature(enable = "avx512bw")] | |
6204 | #[cfg_attr(test, assert_instr(vpermt2w))] | |
6205 | pub unsafe fn _mm512_mask_permutex2var_epi16( | |
6206 | a: __m512i, | |
6207 | k: __mmask32, | |
6208 | idx: __m512i, | |
6209 | b: __m512i, | |
6210 | ) -> __m512i { | |
6211 | let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); | |
6212 | transmute(simd_select_bitmask(k, permute, a.as_i16x32())) | |
6213 | } | |
6214 | ||
6215 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6216 | /// | |
6217 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi16&expand=4225) | |
6218 | #[inline] | |
6219 | #[target_feature(enable = "avx512bw")] | |
6220 | #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w | |
6221 | pub unsafe fn _mm512_maskz_permutex2var_epi16( | |
6222 | k: __mmask32, | |
6223 | a: __m512i, | |
6224 | idx: __m512i, | |
6225 | b: __m512i, | |
6226 | ) -> __m512i { | |
6227 | let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); | |
6228 | let zero = _mm512_setzero_si512().as_i16x32(); | |
6229 | transmute(simd_select_bitmask(k, permute, zero)) | |
6230 | } | |
6231 | ||
6232 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). | |
6233 | /// | |
6234 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi16&expand=4224) | |
6235 | #[inline] | |
6236 | #[target_feature(enable = "avx512bw")] | |
6237 | #[cfg_attr(test, assert_instr(vpermi2w))] | |
6238 | pub unsafe fn _mm512_mask2_permutex2var_epi16( | |
6239 | a: __m512i, | |
6240 | idx: __m512i, | |
6241 | k: __mmask32, | |
6242 | b: __m512i, | |
6243 | ) -> __m512i { | |
6244 | let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); | |
6245 | transmute(simd_select_bitmask(k, permute, idx.as_i16x32())) | |
6246 | } | |
6247 | ||
6248 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. | |
6249 | /// | |
6250 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_epi16&expand=4222) | |
6251 | #[inline] | |
6252 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6253 | #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w | |
6254 | pub unsafe fn _mm256_permutex2var_epi16(a: __m256i, idx: __m256i, b: __m256i) -> __m256i { | |
6255 | transmute(vpermi2w256(a.as_i16x16(), idx.as_i16x16(), b.as_i16x16())) | |
6256 | } | |
6257 | ||
6258 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). | |
6259 | /// | |
6260 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutex2var_epi16&expand=4219) | |
6261 | #[inline] | |
6262 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6263 | #[cfg_attr(test, assert_instr(vpermt2w))] | |
6264 | pub unsafe fn _mm256_mask_permutex2var_epi16( | |
6265 | a: __m256i, | |
6266 | k: __mmask16, | |
6267 | idx: __m256i, | |
6268 | b: __m256i, | |
6269 | ) -> __m256i { | |
6270 | let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); | |
6271 | transmute(simd_select_bitmask(k, permute, a.as_i16x16())) | |
6272 | } | |
6273 | ||
6274 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6275 | /// | |
6276 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutex2var_epi16&expand=4221) | |
6277 | #[inline] | |
6278 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6279 | #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w | |
6280 | pub unsafe fn _mm256_maskz_permutex2var_epi16( | |
6281 | k: __mmask16, | |
6282 | a: __m256i, | |
6283 | idx: __m256i, | |
6284 | b: __m256i, | |
6285 | ) -> __m256i { | |
6286 | let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); | |
6287 | let zero = _mm256_setzero_si256().as_i16x16(); | |
6288 | transmute(simd_select_bitmask(k, permute, zero)) | |
6289 | } | |
6290 | ||
6291 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). | |
6292 | /// | |
6293 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask2_permutex2var_epi16&expand=4220) | |
6294 | #[inline] | |
6295 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6296 | #[cfg_attr(test, assert_instr(vpermi2w))] | |
6297 | pub unsafe fn _mm256_mask2_permutex2var_epi16( | |
6298 | a: __m256i, | |
6299 | idx: __m256i, | |
6300 | k: __mmask16, | |
6301 | b: __m256i, | |
6302 | ) -> __m256i { | |
6303 | let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); | |
6304 | transmute(simd_select_bitmask(k, permute, idx.as_i16x16())) | |
6305 | } | |
6306 | ||
6307 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. | |
6308 | /// | |
6309 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_epi16&expand=4218) | |
6310 | #[inline] | |
6311 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6312 | #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w | |
6313 | pub unsafe fn _mm_permutex2var_epi16(a: __m128i, idx: __m128i, b: __m128i) -> __m128i { | |
6314 | transmute(vpermi2w128(a.as_i16x8(), idx.as_i16x8(), b.as_i16x8())) | |
6315 | } | |
6316 | ||
6317 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). | |
6318 | /// | |
6319 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutex2var_epi16&expand=4215) | |
6320 | #[inline] | |
6321 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6322 | #[cfg_attr(test, assert_instr(vpermt2w))] | |
6323 | pub unsafe fn _mm_mask_permutex2var_epi16( | |
6324 | a: __m128i, | |
6325 | k: __mmask8, | |
6326 | idx: __m128i, | |
6327 | b: __m128i, | |
6328 | ) -> __m128i { | |
6329 | let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); | |
6330 | transmute(simd_select_bitmask(k, permute, a.as_i16x8())) | |
6331 | } | |
6332 | ||
6333 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6334 | /// | |
6335 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutex2var_epi16&expand=4217) | |
6336 | #[inline] | |
6337 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6338 | #[cfg_attr(test, assert_instr(vperm))] //vpermi2w or vpermt2w | |
6339 | pub unsafe fn _mm_maskz_permutex2var_epi16( | |
6340 | k: __mmask8, | |
6341 | a: __m128i, | |
6342 | idx: __m128i, | |
6343 | b: __m128i, | |
6344 | ) -> __m128i { | |
6345 | let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); | |
6346 | let zero = _mm_setzero_si128().as_i16x8(); | |
6347 | transmute(simd_select_bitmask(k, permute, zero)) | |
6348 | } | |
6349 | ||
6350 | /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). | |
6351 | /// | |
6352 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask2_permutex2var_epi16&expand=4216) | |
6353 | #[inline] | |
6354 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6355 | #[cfg_attr(test, assert_instr(vpermi2w))] | |
6356 | pub unsafe fn _mm_mask2_permutex2var_epi16( | |
6357 | a: __m128i, | |
6358 | idx: __m128i, | |
6359 | k: __mmask8, | |
6360 | b: __m128i, | |
6361 | ) -> __m128i { | |
6362 | let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); | |
6363 | transmute(simd_select_bitmask(k, permute, idx.as_i16x8())) | |
6364 | } | |
6365 | ||
6366 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. | |
6367 | /// | |
6368 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi16&expand=4295) | |
6369 | #[inline] | |
6370 | #[target_feature(enable = "avx512bw")] | |
6371 | #[cfg_attr(test, assert_instr(vpermw))] | |
6372 | pub unsafe fn _mm512_permutexvar_epi16(idx: __m512i, a: __m512i) -> __m512i { | |
6373 | transmute(vpermw(a.as_i16x32(), idx.as_i16x32())) | |
6374 | } | |
6375 | ||
6376 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6377 | /// | |
6378 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi16&expand=4293) | |
6379 | #[inline] | |
6380 | #[target_feature(enable = "avx512bw")] | |
6381 | #[cfg_attr(test, assert_instr(vpermw))] | |
6382 | pub unsafe fn _mm512_mask_permutexvar_epi16( | |
6383 | src: __m512i, | |
6384 | k: __mmask32, | |
6385 | idx: __m512i, | |
6386 | a: __m512i, | |
6387 | ) -> __m512i { | |
6388 | let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); | |
6389 | transmute(simd_select_bitmask(k, permute, src.as_i16x32())) | |
6390 | } | |
6391 | ||
6392 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6393 | /// | |
6394 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi16&expand=4294) | |
6395 | #[inline] | |
6396 | #[target_feature(enable = "avx512bw")] | |
6397 | #[cfg_attr(test, assert_instr(vpermw))] | |
6398 | pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i { | |
6399 | let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); | |
6400 | let zero = _mm512_setzero_si512().as_i16x32(); | |
6401 | transmute(simd_select_bitmask(k, permute, zero)) | |
6402 | } | |
6403 | ||
6404 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. | |
6405 | /// | |
6406 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_epi16&expand=4292) | |
6407 | #[inline] | |
6408 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6409 | #[cfg_attr(test, assert_instr(vpermw))] | |
6410 | pub unsafe fn _mm256_permutexvar_epi16(idx: __m256i, a: __m256i) -> __m256i { | |
6411 | transmute(vpermw256(a.as_i16x16(), idx.as_i16x16())) | |
6412 | } | |
6413 | ||
6414 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6415 | /// | |
6416 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_permutexvar_epi16&expand=4290) | |
6417 | #[inline] | |
6418 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6419 | #[cfg_attr(test, assert_instr(vpermw))] | |
6420 | pub unsafe fn _mm256_mask_permutexvar_epi16( | |
6421 | src: __m256i, | |
6422 | k: __mmask16, | |
6423 | idx: __m256i, | |
6424 | a: __m256i, | |
6425 | ) -> __m256i { | |
6426 | let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); | |
6427 | transmute(simd_select_bitmask(k, permute, src.as_i16x16())) | |
6428 | } | |
6429 | ||
6430 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6431 | /// | |
6432 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_permutexvar_epi16&expand=4291) | |
6433 | #[inline] | |
6434 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6435 | #[cfg_attr(test, assert_instr(vpermw))] | |
6436 | pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i { | |
6437 | let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); | |
6438 | let zero = _mm256_setzero_si256().as_i16x16(); | |
6439 | transmute(simd_select_bitmask(k, permute, zero)) | |
6440 | } | |
6441 | ||
6442 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. | |
6443 | /// | |
6444 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_epi16&expand=4289) | |
6445 | #[inline] | |
6446 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6447 | #[cfg_attr(test, assert_instr(vpermw))] | |
6448 | pub unsafe fn _mm_permutexvar_epi16(idx: __m128i, a: __m128i) -> __m128i { | |
6449 | transmute(vpermw128(a.as_i16x8(), idx.as_i16x8())) | |
6450 | } | |
6451 | ||
6452 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6453 | /// | |
6454 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_permutexvar_epi16&expand=4287) | |
6455 | #[inline] | |
6456 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6457 | #[cfg_attr(test, assert_instr(vpermw))] | |
6458 | pub unsafe fn _mm_mask_permutexvar_epi16( | |
6459 | src: __m128i, | |
6460 | k: __mmask8, | |
6461 | idx: __m128i, | |
6462 | a: __m128i, | |
6463 | ) -> __m128i { | |
6464 | let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); | |
6465 | transmute(simd_select_bitmask(k, permute, src.as_i16x8())) | |
6466 | } | |
6467 | ||
6468 | /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6469 | /// | |
6470 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_permutexvar_epi16&expand=4288) | |
6471 | #[inline] | |
6472 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6473 | #[cfg_attr(test, assert_instr(vpermw))] | |
6474 | pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { | |
6475 | let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); | |
6476 | let zero = _mm_setzero_si128().as_i16x8(); | |
6477 | transmute(simd_select_bitmask(k, permute, zero)) | |
6478 | } | |
6479 | ||
6480 | /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. | |
6481 | /// | |
6482 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi16&expand=430) | |
6483 | #[inline] | |
6484 | #[target_feature(enable = "avx512bw")] | |
6485 | #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw | |
6486 | pub unsafe fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
6487 | transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) | |
6488 | } | |
6489 | ||
6490 | /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. | |
6491 | /// | |
6492 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_epi16&expand=429) | |
6493 | #[inline] | |
6494 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6495 | #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw | |
6496 | pub unsafe fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
6497 | transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) | |
6498 | } | |
6499 | ||
6500 | /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. | |
6501 | /// | |
6502 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_epi16&expand=427) | |
6503 | #[inline] | |
6504 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6505 | #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw | |
6506 | pub unsafe fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
6507 | transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) | |
6508 | } | |
6509 | ||
6510 | /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. | |
6511 | /// | |
6512 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi8&expand=441) | |
6513 | #[inline] | |
6514 | #[target_feature(enable = "avx512bw")] | |
6515 | #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb | |
6516 | pub unsafe fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
6517 | transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) | |
6518 | } | |
6519 | ||
6520 | /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. | |
6521 | /// | |
6522 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_epi8&expand=440) | |
6523 | #[inline] | |
6524 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6525 | #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb | |
6526 | pub unsafe fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
6527 | transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) | |
6528 | } | |
6529 | ||
6530 | /// Blend packed 8-bit integers from a and b using control mask k, and store the results in dst. | |
6531 | /// | |
6532 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_epi8&expand=439) | |
6533 | #[inline] | |
6534 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6535 | #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb | |
6536 | pub unsafe fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
6537 | transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) | |
6538 | } | |
6539 | ||
6540 | /// Broadcast the low packed 16-bit integer from a to all elements of dst. | |
6541 | /// | |
6542 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastw_epi16&expand=587) | |
6543 | #[inline] | |
6544 | #[target_feature(enable = "avx512bw")] | |
6545 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6546 | pub unsafe fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i { | |
6547 | let a = _mm512_castsi128_si512(a).as_i16x32(); | |
17df50a5 | 6548 | let ret: i16x32 = simd_shuffle32!( |
fc512014 XL |
6549 | a, |
6550 | a, | |
6551 | [ | |
6552 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
6553 | 0, 0, 0, | |
6554 | ], | |
6555 | ); | |
6556 | transmute(ret) | |
6557 | } | |
6558 | ||
6559 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6560 | /// | |
6561 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastw_epi16&expand=588) | |
6562 | #[inline] | |
6563 | #[target_feature(enable = "avx512bw")] | |
6564 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6565 | pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i { | |
6566 | let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); | |
6567 | transmute(simd_select_bitmask(k, broadcast, src.as_i16x32())) | |
6568 | } | |
6569 | ||
6570 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6571 | /// | |
6572 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastw_epi16&expand=589) | |
6573 | #[inline] | |
6574 | #[target_feature(enable = "avx512bw")] | |
6575 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6576 | pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i { | |
6577 | let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); | |
6578 | let zero = _mm512_setzero_si512().as_i16x32(); | |
6579 | transmute(simd_select_bitmask(k, broadcast, zero)) | |
6580 | } | |
6581 | ||
6582 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6583 | /// | |
6584 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_broadcastw_epi16&expand=585) | |
6585 | #[inline] | |
6586 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6587 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6588 | pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { | |
6589 | let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); | |
6590 | transmute(simd_select_bitmask(k, broadcast, src.as_i16x16())) | |
6591 | } | |
6592 | ||
6593 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6594 | /// | |
6595 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_broadcastw_epi16&expand=586) | |
6596 | #[inline] | |
6597 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6598 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6599 | pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i { | |
6600 | let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); | |
6601 | let zero = _mm256_setzero_si256().as_i16x16(); | |
6602 | transmute(simd_select_bitmask(k, broadcast, zero)) | |
6603 | } | |
6604 | ||
6605 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6606 | /// | |
6607 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_broadcastw_epi16&expand=582) | |
6608 | #[inline] | |
6609 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6610 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6611 | pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
6612 | let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); | |
6613 | transmute(simd_select_bitmask(k, broadcast, src.as_i16x8())) | |
6614 | } | |
6615 | ||
6616 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6617 | /// | |
6618 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_broadcastw_epi16&expand=583) | |
6619 | #[inline] | |
6620 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6621 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
6622 | pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { | |
6623 | let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); | |
6624 | let zero = _mm_setzero_si128().as_i16x8(); | |
6625 | transmute(simd_select_bitmask(k, broadcast, zero)) | |
6626 | } | |
6627 | ||
6628 | /// Broadcast the low packed 8-bit integer from a to all elements of dst. | |
6629 | /// | |
6630 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastb_epi8&expand=536) | |
6631 | #[inline] | |
6632 | #[target_feature(enable = "avx512bw")] | |
6633 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6634 | pub unsafe fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i { | |
6635 | let a = _mm512_castsi128_si512(a).as_i8x64(); | |
17df50a5 | 6636 | let ret: i8x64 = simd_shuffle64!( |
fc512014 XL |
6637 | a, |
6638 | a, | |
6639 | [ | |
6640 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
6641 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
6642 | 0, 0, 0, 0, 0, 0, | |
6643 | ], | |
6644 | ); | |
6645 | transmute(ret) | |
6646 | } | |
6647 | ||
6648 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6649 | /// | |
6650 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastb_epi8&expand=537) | |
6651 | #[inline] | |
6652 | #[target_feature(enable = "avx512bw")] | |
6653 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6654 | pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i { | |
6655 | let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); | |
6656 | transmute(simd_select_bitmask(k, broadcast, src.as_i8x64())) | |
6657 | } | |
6658 | ||
6659 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6660 | /// | |
6661 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastb_epi8&expand=538) | |
6662 | #[inline] | |
6663 | #[target_feature(enable = "avx512bw")] | |
6664 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6665 | pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i { | |
6666 | let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); | |
6667 | let zero = _mm512_setzero_si512().as_i8x64(); | |
6668 | transmute(simd_select_bitmask(k, broadcast, zero)) | |
6669 | } | |
6670 | ||
6671 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6672 | /// | |
6673 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_broadcastb_epi8&expand=534) | |
6674 | #[inline] | |
6675 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6676 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6677 | pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i { | |
6678 | let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); | |
6679 | transmute(simd_select_bitmask(k, broadcast, src.as_i8x32())) | |
6680 | } | |
6681 | ||
6682 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6683 | /// | |
6684 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_broadcastb_epi8&expand=535) | |
6685 | #[inline] | |
6686 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6687 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6688 | pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i { | |
6689 | let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); | |
6690 | let zero = _mm256_setzero_si256().as_i8x32(); | |
6691 | transmute(simd_select_bitmask(k, broadcast, zero)) | |
6692 | } | |
6693 | ||
6694 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6695 | /// | |
6696 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_broadcastb_epi8&expand=531) | |
6697 | #[inline] | |
6698 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6699 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6700 | pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { | |
6701 | let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); | |
6702 | transmute(simd_select_bitmask(k, broadcast, src.as_i8x16())) | |
6703 | } | |
6704 | ||
6705 | /// Broadcast the low packed 8-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6706 | /// | |
6707 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_broadcastb_epi8&expand=532) | |
6708 | #[inline] | |
6709 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6710 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
6711 | pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { | |
6712 | let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); | |
6713 | let zero = _mm_setzero_si128().as_i8x16(); | |
6714 | transmute(simd_select_bitmask(k, broadcast, zero)) | |
6715 | } | |
6716 | ||
6717 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. | |
6718 | /// | |
6719 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi16&expand=6012) | |
6720 | #[inline] | |
6721 | #[target_feature(enable = "avx512bw")] | |
6722 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6723 | pub unsafe fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i { | |
6724 | let a = a.as_i16x32(); | |
6725 | let b = b.as_i16x32(); | |
6726 | #[rustfmt::skip] | |
17df50a5 | 6727 | let r: i16x32 = simd_shuffle32!( |
fc512014 XL |
6728 | a, |
6729 | b, | |
6730 | [ | |
6731 | 4, 32 + 4, 5, 32 + 5, | |
6732 | 6, 32 + 6, 7, 32 + 7, | |
6733 | 12, 32 + 12, 13, 32 + 13, | |
6734 | 14, 32 + 14, 15, 32 + 15, | |
6735 | 20, 32 + 20, 21, 32 + 21, | |
6736 | 22, 32 + 22, 23, 32 + 23, | |
6737 | 28, 32 + 28, 29, 32 + 29, | |
6738 | 30, 32 + 30, 31, 32 + 31, | |
6739 | ], | |
6740 | ); | |
6741 | transmute(r) | |
6742 | } | |
6743 | ||
6744 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6745 | /// | |
6746 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi16&expand=6010) | |
6747 | #[inline] | |
6748 | #[target_feature(enable = "avx512bw")] | |
6749 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6750 | pub unsafe fn _mm512_mask_unpackhi_epi16( | |
6751 | src: __m512i, | |
6752 | k: __mmask32, | |
6753 | a: __m512i, | |
6754 | b: __m512i, | |
6755 | ) -> __m512i { | |
6756 | let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); | |
6757 | transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32())) | |
6758 | } | |
6759 | ||
6760 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6761 | /// | |
6762 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi16&expand=6011) | |
6763 | #[inline] | |
6764 | #[target_feature(enable = "avx512bw")] | |
6765 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6766 | pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
6767 | let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); | |
6768 | let zero = _mm512_setzero_si512().as_i16x32(); | |
6769 | transmute(simd_select_bitmask(k, unpackhi, zero)) | |
6770 | } | |
6771 | ||
6772 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6773 | /// | |
6774 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpackhi_epi16&expand=6007) | |
6775 | #[inline] | |
6776 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6777 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6778 | pub unsafe fn _mm256_mask_unpackhi_epi16( | |
6779 | src: __m256i, | |
6780 | k: __mmask16, | |
6781 | a: __m256i, | |
6782 | b: __m256i, | |
6783 | ) -> __m256i { | |
6784 | let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); | |
6785 | transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16())) | |
6786 | } | |
6787 | ||
6788 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6789 | /// | |
6790 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpackhi_epi16&expand=6008) | |
6791 | #[inline] | |
6792 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6793 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6794 | pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
6795 | let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); | |
6796 | let zero = _mm256_setzero_si256().as_i16x16(); | |
6797 | transmute(simd_select_bitmask(k, unpackhi, zero)) | |
6798 | } | |
6799 | ||
6800 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6801 | /// | |
6802 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpackhi_epi16&expand=6004) | |
6803 | #[inline] | |
6804 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6805 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6806 | pub unsafe fn _mm_mask_unpackhi_epi16( | |
6807 | src: __m128i, | |
6808 | k: __mmask8, | |
6809 | a: __m128i, | |
6810 | b: __m128i, | |
6811 | ) -> __m128i { | |
6812 | let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); | |
6813 | transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8())) | |
6814 | } | |
6815 | ||
6816 | /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6817 | /// | |
6818 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpackhi_epi16&expand=6005) | |
6819 | #[inline] | |
6820 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6821 | #[cfg_attr(test, assert_instr(vpunpckhwd))] | |
6822 | pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
6823 | let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); | |
6824 | let zero = _mm_setzero_si128().as_i16x8(); | |
6825 | transmute(simd_select_bitmask(k, unpackhi, zero)) | |
6826 | } | |
6827 | ||
6828 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. | |
6829 | /// | |
6830 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi8&expand=6039) | |
6831 | #[inline] | |
6832 | #[target_feature(enable = "avx512bw")] | |
6833 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6834 | pub unsafe fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i { | |
6835 | let a = a.as_i8x64(); | |
6836 | let b = b.as_i8x64(); | |
6837 | #[rustfmt::skip] | |
17df50a5 | 6838 | let r: i8x64 = simd_shuffle64!( |
fc512014 XL |
6839 | a, |
6840 | b, | |
6841 | [ | |
6842 | 8, 64+8, 9, 64+9, | |
6843 | 10, 64+10, 11, 64+11, | |
6844 | 12, 64+12, 13, 64+13, | |
6845 | 14, 64+14, 15, 64+15, | |
6846 | 24, 64+24, 25, 64+25, | |
6847 | 26, 64+26, 27, 64+27, | |
6848 | 28, 64+28, 29, 64+29, | |
6849 | 30, 64+30, 31, 64+31, | |
6850 | 40, 64+40, 41, 64+41, | |
6851 | 42, 64+42, 43, 64+43, | |
6852 | 44, 64+44, 45, 64+45, | |
6853 | 46, 64+46, 47, 64+47, | |
6854 | 56, 64+56, 57, 64+57, | |
6855 | 58, 64+58, 59, 64+59, | |
6856 | 60, 64+60, 61, 64+61, | |
6857 | 62, 64+62, 63, 64+63, | |
6858 | ], | |
6859 | ); | |
6860 | transmute(r) | |
6861 | } | |
6862 | ||
6863 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6864 | /// | |
6865 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi8&expand=6037) | |
6866 | #[inline] | |
6867 | #[target_feature(enable = "avx512bw")] | |
6868 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6869 | pub unsafe fn _mm512_mask_unpackhi_epi8( | |
6870 | src: __m512i, | |
6871 | k: __mmask64, | |
6872 | a: __m512i, | |
6873 | b: __m512i, | |
6874 | ) -> __m512i { | |
6875 | let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); | |
6876 | transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64())) | |
6877 | } | |
6878 | ||
6879 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6880 | /// | |
6881 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi8&expand=6038) | |
6882 | #[inline] | |
6883 | #[target_feature(enable = "avx512bw")] | |
6884 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6885 | pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
6886 | let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); | |
6887 | let zero = _mm512_setzero_si512().as_i8x64(); | |
6888 | transmute(simd_select_bitmask(k, unpackhi, zero)) | |
6889 | } | |
6890 | ||
6891 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6892 | /// | |
6893 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpackhi_epi8&expand=6034) | |
6894 | #[inline] | |
6895 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6896 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6897 | pub unsafe fn _mm256_mask_unpackhi_epi8( | |
6898 | src: __m256i, | |
6899 | k: __mmask32, | |
6900 | a: __m256i, | |
6901 | b: __m256i, | |
6902 | ) -> __m256i { | |
6903 | let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); | |
6904 | transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32())) | |
6905 | } | |
6906 | ||
6907 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6908 | /// | |
6909 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpackhi_epi8&expand=6035) | |
6910 | #[inline] | |
6911 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6912 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6913 | pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
6914 | let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); | |
6915 | let zero = _mm256_setzero_si256().as_i8x32(); | |
6916 | transmute(simd_select_bitmask(k, unpackhi, zero)) | |
6917 | } | |
6918 | ||
6919 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6920 | /// | |
6921 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpackhi_epi8&expand=6031) | |
6922 | #[inline] | |
6923 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6924 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6925 | pub unsafe fn _mm_mask_unpackhi_epi8( | |
6926 | src: __m128i, | |
6927 | k: __mmask16, | |
6928 | a: __m128i, | |
6929 | b: __m128i, | |
6930 | ) -> __m128i { | |
6931 | let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); | |
6932 | transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16())) | |
6933 | } | |
6934 | ||
6935 | /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6936 | /// | |
6937 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpackhi_epi8&expand=6032) | |
6938 | #[inline] | |
6939 | #[target_feature(enable = "avx512bw,avx512vl")] | |
6940 | #[cfg_attr(test, assert_instr(vpunpckhbw))] | |
6941 | pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
6942 | let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); | |
6943 | let zero = _mm_setzero_si128().as_i8x16(); | |
6944 | transmute(simd_select_bitmask(k, unpackhi, zero)) | |
6945 | } | |
6946 | ||
6947 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. | |
6948 | /// | |
6949 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi16&expand=6069) | |
6950 | #[inline] | |
6951 | #[target_feature(enable = "avx512bw")] | |
6952 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
6953 | pub unsafe fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i { | |
6954 | let a = a.as_i16x32(); | |
6955 | let b = b.as_i16x32(); | |
6956 | #[rustfmt::skip] | |
17df50a5 | 6957 | let r: i16x32 = simd_shuffle32!( |
fc512014 XL |
6958 | a, |
6959 | b, | |
6960 | [ | |
6961 | 0, 32+0, 1, 32+1, | |
6962 | 2, 32+2, 3, 32+3, | |
6963 | 8, 32+8, 9, 32+9, | |
6964 | 10, 32+10, 11, 32+11, | |
6965 | 16, 32+16, 17, 32+17, | |
6966 | 18, 32+18, 19, 32+19, | |
6967 | 24, 32+24, 25, 32+25, | |
6968 | 26, 32+26, 27, 32+27 | |
6969 | ], | |
6970 | ); | |
6971 | transmute(r) | |
6972 | } | |
6973 | ||
6974 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
6975 | /// | |
6976 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi16&expand=6067) | |
6977 | #[inline] | |
6978 | #[target_feature(enable = "avx512bw")] | |
6979 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
6980 | pub unsafe fn _mm512_mask_unpacklo_epi16( | |
6981 | src: __m512i, | |
6982 | k: __mmask32, | |
6983 | a: __m512i, | |
6984 | b: __m512i, | |
6985 | ) -> __m512i { | |
6986 | let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); | |
6987 | transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32())) | |
6988 | } | |
6989 | ||
6990 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
6991 | /// | |
6992 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi16&expand=6068) | |
6993 | #[inline] | |
6994 | #[target_feature(enable = "avx512bw")] | |
6995 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
6996 | pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { | |
6997 | let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); | |
6998 | let zero = _mm512_setzero_si512().as_i16x32(); | |
6999 | transmute(simd_select_bitmask(k, unpacklo, zero)) | |
7000 | } | |
7001 | ||
7002 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7003 | /// | |
7004 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpacklo_epi16&expand=6064) | |
7005 | #[inline] | |
7006 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7007 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
7008 | pub unsafe fn _mm256_mask_unpacklo_epi16( | |
7009 | src: __m256i, | |
7010 | k: __mmask16, | |
7011 | a: __m256i, | |
7012 | b: __m256i, | |
7013 | ) -> __m256i { | |
7014 | let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); | |
7015 | transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16())) | |
7016 | } | |
7017 | ||
7018 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7019 | /// | |
7020 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpacklo_epi16&expand=6065) | |
7021 | #[inline] | |
7022 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7023 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
7024 | pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { | |
7025 | let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); | |
7026 | let zero = _mm256_setzero_si256().as_i16x16(); | |
7027 | transmute(simd_select_bitmask(k, unpacklo, zero)) | |
7028 | } | |
7029 | ||
7030 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7031 | /// | |
7032 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpacklo_epi16&expand=6061) | |
7033 | #[inline] | |
7034 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7035 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
7036 | pub unsafe fn _mm_mask_unpacklo_epi16( | |
7037 | src: __m128i, | |
7038 | k: __mmask8, | |
7039 | a: __m128i, | |
7040 | b: __m128i, | |
7041 | ) -> __m128i { | |
7042 | let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); | |
7043 | transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8())) | |
7044 | } | |
7045 | ||
7046 | /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7047 | /// | |
7048 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpacklo_epi16&expand=6062) | |
7049 | #[inline] | |
7050 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7051 | #[cfg_attr(test, assert_instr(vpunpcklwd))] | |
7052 | pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { | |
7053 | let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); | |
7054 | let zero = _mm_setzero_si128().as_i16x8(); | |
7055 | transmute(simd_select_bitmask(k, unpacklo, zero)) | |
7056 | } | |
7057 | ||
7058 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. | |
7059 | /// | |
7060 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi8&expand=6096) | |
7061 | #[inline] | |
7062 | #[target_feature(enable = "avx512bw")] | |
7063 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7064 | pub unsafe fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i { | |
7065 | let a = a.as_i8x64(); | |
7066 | let b = b.as_i8x64(); | |
7067 | #[rustfmt::skip] | |
17df50a5 | 7068 | let r: i8x64 = simd_shuffle64!( |
fc512014 XL |
7069 | a, |
7070 | b, | |
7071 | [ | |
7072 | 0, 64+0, 1, 64+1, | |
7073 | 2, 64+2, 3, 64+3, | |
7074 | 4, 64+4, 5, 64+5, | |
7075 | 6, 64+6, 7, 64+7, | |
7076 | 16, 64+16, 17, 64+17, | |
7077 | 18, 64+18, 19, 64+19, | |
7078 | 20, 64+20, 21, 64+21, | |
7079 | 22, 64+22, 23, 64+23, | |
7080 | 32, 64+32, 33, 64+33, | |
7081 | 34, 64+34, 35, 64+35, | |
7082 | 36, 64+36, 37, 64+37, | |
7083 | 38, 64+38, 39, 64+39, | |
7084 | 48, 64+48, 49, 64+49, | |
7085 | 50, 64+50, 51, 64+51, | |
7086 | 52, 64+52, 53, 64+53, | |
7087 | 54, 64+54, 55, 64+55, | |
7088 | ], | |
7089 | ); | |
7090 | transmute(r) | |
7091 | } | |
7092 | ||
7093 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7094 | /// | |
7095 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi8&expand=6094) | |
7096 | #[inline] | |
7097 | #[target_feature(enable = "avx512bw")] | |
7098 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7099 | pub unsafe fn _mm512_mask_unpacklo_epi8( | |
7100 | src: __m512i, | |
7101 | k: __mmask64, | |
7102 | a: __m512i, | |
7103 | b: __m512i, | |
7104 | ) -> __m512i { | |
7105 | let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); | |
7106 | transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64())) | |
7107 | } | |
7108 | ||
7109 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7110 | /// | |
7111 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi8&expand=6095) | |
7112 | #[inline] | |
7113 | #[target_feature(enable = "avx512bw")] | |
7114 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7115 | pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
7116 | let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); | |
7117 | let zero = _mm512_setzero_si512().as_i8x64(); | |
7118 | transmute(simd_select_bitmask(k, unpacklo, zero)) | |
7119 | } | |
7120 | ||
7121 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7122 | /// | |
7123 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_unpacklo_epi8&expand=6091) | |
7124 | #[inline] | |
7125 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7126 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7127 | pub unsafe fn _mm256_mask_unpacklo_epi8( | |
7128 | src: __m256i, | |
7129 | k: __mmask32, | |
7130 | a: __m256i, | |
7131 | b: __m256i, | |
7132 | ) -> __m256i { | |
7133 | let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); | |
7134 | transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32())) | |
7135 | } | |
7136 | ||
7137 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7138 | /// | |
7139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_unpacklo_epi8&expand=6092) | |
7140 | #[inline] | |
7141 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7142 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7143 | pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { | |
7144 | let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); | |
7145 | let zero = _mm256_setzero_si256().as_i8x32(); | |
7146 | transmute(simd_select_bitmask(k, unpacklo, zero)) | |
7147 | } | |
7148 | ||
7149 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7150 | /// | |
7151 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_unpacklo_epi8&expand=6088) | |
7152 | #[inline] | |
7153 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7154 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7155 | pub unsafe fn _mm_mask_unpacklo_epi8( | |
7156 | src: __m128i, | |
7157 | k: __mmask16, | |
7158 | a: __m128i, | |
7159 | b: __m128i, | |
7160 | ) -> __m128i { | |
7161 | let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); | |
7162 | transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16())) | |
7163 | } | |
7164 | ||
7165 | /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7166 | /// | |
7167 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_unpacklo_epi8&expand=6089) | |
7168 | #[inline] | |
7169 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7170 | #[cfg_attr(test, assert_instr(vpunpcklbw))] | |
7171 | pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { | |
7172 | let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); | |
7173 | let zero = _mm_setzero_si128().as_i8x16(); | |
7174 | transmute(simd_select_bitmask(k, unpacklo, zero)) | |
7175 | } | |
7176 | ||
7177 | /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7178 | /// | |
7179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi16&expand=3795) | |
7180 | #[inline] | |
7181 | #[target_feature(enable = "avx512bw")] | |
7182 | #[cfg_attr(test, assert_instr(vmovdqu16))] | |
7183 | pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { | |
7184 | let mov = a.as_i16x32(); | |
7185 | transmute(simd_select_bitmask(k, mov, src.as_i16x32())) | |
7186 | } | |
7187 | ||
7188 | /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7189 | /// | |
7190 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi16&expand=3796) | |
7191 | #[inline] | |
7192 | #[target_feature(enable = "avx512bw")] | |
7193 | #[cfg_attr(test, assert_instr(vmovdqu16))] | |
7194 | pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { | |
7195 | let mov = a.as_i16x32(); | |
7196 | let zero = _mm512_setzero_si512().as_i16x32(); | |
7197 | transmute(simd_select_bitmask(k, mov, zero)) | |
7198 | } | |
7199 | ||
7200 | /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7201 | /// | |
7202 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mov_epi16&expand=3793) | |
7203 | #[inline] | |
7204 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7205 | #[cfg_attr(test, assert_instr(vmovdqu16))] | |
7206 | pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { | |
7207 | let mov = a.as_i16x16(); | |
7208 | transmute(simd_select_bitmask(k, mov, src.as_i16x16())) | |
7209 | } | |
7210 | ||
7211 | /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7212 | /// | |
7213 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mov_epi16&expand=3794) | |
7214 | #[inline] | |
7215 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7216 | #[cfg_attr(test, assert_instr(vmovdqu16))] | |
7217 | pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { | |
7218 | let mov = a.as_i16x16(); | |
7219 | let zero = _mm256_setzero_si256().as_i16x16(); | |
7220 | transmute(simd_select_bitmask(k, mov, zero)) | |
7221 | } | |
7222 | ||
7223 | /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7224 | /// | |
7225 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mov_epi16&expand=3791) | |
7226 | #[inline] | |
7227 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7228 | #[cfg_attr(test, assert_instr(vmovdqu16))] | |
7229 | pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
7230 | let mov = a.as_i16x8(); | |
7231 | transmute(simd_select_bitmask(k, mov, src.as_i16x8())) | |
7232 | } | |
7233 | ||
7234 | /// Move packed 16-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7235 | /// | |
7236 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mov_epi16&expand=3792) | |
7237 | #[inline] | |
7238 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7239 | #[cfg_attr(test, assert_instr(vmovdqu16))] | |
7240 | pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { | |
7241 | let mov = a.as_i16x8(); | |
7242 | let zero = _mm_setzero_si128().as_i16x8(); | |
7243 | transmute(simd_select_bitmask(k, mov, zero)) | |
7244 | } | |
7245 | ||
7246 | /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7247 | /// | |
7248 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi8&expand=3813) | |
7249 | #[inline] | |
7250 | #[target_feature(enable = "avx512bw")] | |
7251 | #[cfg_attr(test, assert_instr(vmovdqu8))] | |
7252 | pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { | |
7253 | let mov = a.as_i8x64(); | |
7254 | transmute(simd_select_bitmask(k, mov, src.as_i8x64())) | |
7255 | } | |
7256 | ||
7257 | /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7258 | /// | |
7259 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi8&expand=3814) | |
7260 | #[inline] | |
7261 | #[target_feature(enable = "avx512bw")] | |
7262 | #[cfg_attr(test, assert_instr(vmovdqu8))] | |
7263 | pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { | |
7264 | let mov = a.as_i8x64(); | |
7265 | let zero = _mm512_setzero_si512().as_i8x64(); | |
7266 | transmute(simd_select_bitmask(k, mov, zero)) | |
7267 | } | |
7268 | ||
7269 | /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7270 | /// | |
7271 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_mov_epi8&expand=3811) | |
7272 | #[inline] | |
7273 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7274 | #[cfg_attr(test, assert_instr(vmovdqu8))] | |
7275 | pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { | |
7276 | let mov = a.as_i8x32(); | |
7277 | transmute(simd_select_bitmask(k, mov, src.as_i8x32())) | |
7278 | } | |
7279 | ||
7280 | /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7281 | /// | |
7282 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_mov_epi8&expand=3812) | |
7283 | #[inline] | |
7284 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7285 | #[cfg_attr(test, assert_instr(vmovdqu8))] | |
7286 | pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { | |
7287 | let mov = a.as_i8x32(); | |
7288 | let zero = _mm256_setzero_si256().as_i8x32(); | |
7289 | transmute(simd_select_bitmask(k, mov, zero)) | |
7290 | } | |
7291 | ||
7292 | /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7293 | /// | |
7294 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mov_epi8&expand=3809) | |
7295 | #[inline] | |
7296 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7297 | #[cfg_attr(test, assert_instr(vmovdqu8))] | |
7298 | pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { | |
7299 | let mov = a.as_i8x16(); | |
7300 | transmute(simd_select_bitmask(k, mov, src.as_i8x16())) | |
7301 | } | |
7302 | ||
7303 | /// Move packed 8-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7304 | /// | |
7305 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mov_epi8&expand=3810) | |
7306 | #[inline] | |
7307 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7308 | #[cfg_attr(test, assert_instr(vmovdqu8))] | |
7309 | pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { | |
7310 | let mov = a.as_i8x16(); | |
7311 | let zero = _mm_setzero_si128().as_i8x16(); | |
7312 | transmute(simd_select_bitmask(k, mov, zero)) | |
7313 | } | |
7314 | ||
7315 | /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7316 | /// | |
7317 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi16&expand=4942) | |
7318 | #[inline] | |
7319 | #[target_feature(enable = "avx512bw")] | |
7320 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
7321 | pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i { | |
7322 | let r = _mm512_set1_epi16(a).as_i16x32(); | |
7323 | transmute(simd_select_bitmask(k, r, src.as_i16x32())) | |
7324 | } | |
7325 | ||
7326 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7327 | /// | |
7328 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi16&expand=4943) | |
7329 | #[inline] | |
7330 | #[target_feature(enable = "avx512bw")] | |
7331 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
7332 | pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { | |
7333 | let r = _mm512_set1_epi16(a).as_i16x32(); | |
7334 | let zero = _mm512_setzero_si512().as_i16x32(); | |
7335 | transmute(simd_select_bitmask(k, r, zero)) | |
7336 | } | |
7337 | ||
7338 | /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7339 | /// | |
7340 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_set1_epi16&expand=4939) | |
7341 | #[inline] | |
7342 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7343 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
7344 | pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i { | |
7345 | let r = _mm256_set1_epi16(a).as_i16x16(); | |
7346 | transmute(simd_select_bitmask(k, r, src.as_i16x16())) | |
7347 | } | |
7348 | ||
7349 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7350 | /// | |
7351 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_set1_epi16&expand=4940) | |
7352 | #[inline] | |
7353 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7354 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
7355 | pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { | |
7356 | let r = _mm256_set1_epi16(a).as_i16x16(); | |
7357 | let zero = _mm256_setzero_si256().as_i16x16(); | |
7358 | transmute(simd_select_bitmask(k, r, zero)) | |
7359 | } | |
7360 | ||
7361 | /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7362 | /// | |
7363 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_set1_epi16&expand=4936) | |
7364 | #[inline] | |
7365 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7366 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
7367 | pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i { | |
7368 | let r = _mm_set1_epi16(a).as_i16x8(); | |
7369 | transmute(simd_select_bitmask(k, r, src.as_i16x8())) | |
7370 | } | |
7371 | ||
7372 | /// Broadcast the low packed 16-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7373 | /// | |
7374 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_set1_epi16&expand=4937) | |
7375 | #[inline] | |
7376 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7377 | #[cfg_attr(test, assert_instr(vpbroadcastw))] | |
7378 | pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { | |
7379 | let r = _mm_set1_epi16(a).as_i16x8(); | |
7380 | let zero = _mm_setzero_si128().as_i16x8(); | |
7381 | transmute(simd_select_bitmask(k, r, zero)) | |
7382 | } | |
7383 | ||
7384 | /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7385 | /// | |
7386 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi8&expand=4970) | |
7387 | #[inline] | |
7388 | #[target_feature(enable = "avx512bw")] | |
7389 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
7390 | pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i { | |
7391 | let r = _mm512_set1_epi8(a).as_i8x64(); | |
7392 | transmute(simd_select_bitmask(k, r, src.as_i8x64())) | |
7393 | } | |
7394 | ||
7395 | /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7396 | /// | |
7397 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi8&expand=4971) | |
7398 | #[inline] | |
7399 | #[target_feature(enable = "avx512bw")] | |
7400 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
7401 | pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { | |
7402 | let r = _mm512_set1_epi8(a).as_i8x64(); | |
7403 | let zero = _mm512_setzero_si512().as_i8x64(); | |
7404 | transmute(simd_select_bitmask(k, r, zero)) | |
7405 | } | |
7406 | ||
7407 | /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7408 | /// | |
7409 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_set1_epi8&expand=4967) | |
7410 | #[inline] | |
7411 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7412 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
7413 | pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i { | |
7414 | let r = _mm256_set1_epi8(a).as_i8x32(); | |
7415 | transmute(simd_select_bitmask(k, r, src.as_i8x32())) | |
7416 | } | |
7417 | ||
7418 | /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7419 | /// | |
7420 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_set1_epi8&expand=4968) | |
7421 | #[inline] | |
7422 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7423 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
7424 | pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { | |
7425 | let r = _mm256_set1_epi8(a).as_i8x32(); | |
7426 | let zero = _mm256_setzero_si256().as_i8x32(); | |
7427 | transmute(simd_select_bitmask(k, r, zero)) | |
7428 | } | |
7429 | ||
7430 | /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7431 | /// | |
7432 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_set1_epi8&expand=4964) | |
7433 | #[inline] | |
7434 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7435 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
7436 | pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { | |
7437 | let r = _mm_set1_epi8(a).as_i8x16(); | |
7438 | transmute(simd_select_bitmask(k, r, src.as_i8x16())) | |
7439 | } | |
7440 | ||
7441 | /// Broadcast 8-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7442 | /// | |
7443 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_set1_epi8&expand=4965) | |
7444 | #[inline] | |
7445 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7446 | #[cfg_attr(test, assert_instr(vpbroadcastb))] | |
7447 | pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { | |
7448 | let r = _mm_set1_epi8(a).as_i8x16(); | |
7449 | let zero = _mm_setzero_si128().as_i8x16(); | |
7450 | transmute(simd_select_bitmask(k, r, zero)) | |
7451 | } | |
7452 | ||
7453 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst. | |
7454 | /// | |
7455 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221) | |
7456 | #[inline] | |
7457 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
7458 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] |
7459 | #[rustc_legacy_const_generics(1)] | |
7460 | pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i { | |
7461 | static_assert_imm8!(IMM8); | |
fc512014 | 7462 | let a = a.as_i16x32(); |
17df50a5 XL |
7463 | let r: i16x32 = simd_shuffle32!( |
7464 | a, | |
7465 | a, | |
7466 | <const IMM8: i32> [ | |
7467 | IMM8 as u32 & 0b11, | |
7468 | (IMM8 as u32 >> 2) & 0b11, | |
7469 | (IMM8 as u32 >> 4) & 0b11, | |
7470 | (IMM8 as u32 >> 6) & 0b11, | |
7471 | 4, | |
7472 | 5, | |
7473 | 6, | |
7474 | 7, | |
7475 | (IMM8 as u32 & 0b11) + 8, | |
7476 | ((IMM8 as u32 >> 2) & 0b11) + 8, | |
7477 | ((IMM8 as u32 >> 4) & 0b11) + 8, | |
7478 | ((IMM8 as u32 >> 6) & 0b11) + 8, | |
7479 | 12, | |
7480 | 13, | |
7481 | 14, | |
7482 | 15, | |
7483 | (IMM8 as u32 & 0b11) + 16, | |
7484 | ((IMM8 as u32 >> 2) & 0b11) + 16, | |
7485 | ((IMM8 as u32 >> 4) & 0b11) + 16, | |
7486 | ((IMM8 as u32 >> 6) & 0b11) + 16, | |
7487 | 20, | |
7488 | 21, | |
7489 | 22, | |
7490 | 23, | |
7491 | (IMM8 as u32 & 0b11) + 24, | |
7492 | ((IMM8 as u32 >> 2) & 0b11) + 24, | |
7493 | ((IMM8 as u32 >> 4) & 0b11) + 24, | |
7494 | ((IMM8 as u32 >> 6) & 0b11) + 24, | |
7495 | 28, | |
7496 | 29, | |
7497 | 30, | |
7498 | 31, | |
7499 | ], | |
7500 | ); | |
fc512014 XL |
7501 | transmute(r) |
7502 | } | |
7503 | ||
7504 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7505 | /// | |
7506 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219) | |
7507 | #[inline] | |
7508 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
7509 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] |
7510 | #[rustc_legacy_const_generics(3)] | |
7511 | pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>( | |
fc512014 XL |
7512 | src: __m512i, |
7513 | k: __mmask32, | |
7514 | a: __m512i, | |
fc512014 | 7515 | ) -> __m512i { |
17df50a5 XL |
7516 | static_assert_imm8!(IMM8); |
7517 | let r = _mm512_shufflelo_epi16::<IMM8>(a); | |
cdc7bbd5 | 7518 | transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) |
fc512014 XL |
7519 | } |
7520 | ||
7521 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7522 | /// | |
7523 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220) | |
7524 | #[inline] | |
7525 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
7526 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))] |
7527 | #[rustc_legacy_const_generics(2)] | |
7528 | pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i { | |
7529 | static_assert_imm8!(IMM8); | |
7530 | let r = _mm512_shufflelo_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7531 | let zero = _mm512_setzero_si512().as_i16x32(); |
7532 | transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) | |
7533 | } | |
7534 | ||
7535 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7536 | /// | |
7537 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216) | |
7538 | #[inline] | |
7539 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7540 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] |
7541 | #[rustc_legacy_const_generics(3)] | |
7542 | pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>( | |
cdc7bbd5 XL |
7543 | src: __m256i, |
7544 | k: __mmask16, | |
7545 | a: __m256i, | |
cdc7bbd5 | 7546 | ) -> __m256i { |
17df50a5 XL |
7547 | static_assert_imm8!(IMM8); |
7548 | let shuffle = _mm256_shufflelo_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7549 | transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) |
7550 | } | |
7551 | ||
7552 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7553 | /// | |
7554 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217) | |
7555 | #[inline] | |
7556 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7557 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] |
7558 | #[rustc_legacy_const_generics(2)] | |
7559 | pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i { | |
7560 | static_assert_imm8!(IMM8); | |
7561 | let shuffle = _mm256_shufflelo_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7562 | let zero = _mm256_setzero_si256().as_i16x16(); |
7563 | transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) | |
7564 | } | |
7565 | ||
7566 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7567 | /// | |
7568 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213) | |
7569 | #[inline] | |
7570 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7571 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] |
7572 | #[rustc_legacy_const_generics(3)] | |
7573 | pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>( | |
cdc7bbd5 XL |
7574 | src: __m128i, |
7575 | k: __mmask8, | |
7576 | a: __m128i, | |
cdc7bbd5 | 7577 | ) -> __m128i { |
17df50a5 XL |
7578 | static_assert_imm8!(IMM8); |
7579 | let shuffle = _mm_shufflelo_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7580 | transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) |
7581 | } | |
7582 | ||
7583 | /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7584 | /// | |
7585 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214) | |
7586 | #[inline] | |
7587 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7588 | #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))] |
7589 | #[rustc_legacy_const_generics(2)] | |
7590 | pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i { | |
7591 | static_assert_imm8!(IMM8); | |
7592 | let shuffle = _mm_shufflelo_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7593 | let zero = _mm_setzero_si128().as_i16x8(); |
7594 | transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) | |
fc512014 XL |
7595 | } |
7596 | ||
7597 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst. | |
7598 | /// | |
7599 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212) | |
7600 | #[inline] | |
7601 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
7602 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] |
7603 | #[rustc_legacy_const_generics(1)] | |
7604 | pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i { | |
7605 | static_assert_imm8!(IMM8); | |
fc512014 | 7606 | let a = a.as_i16x32(); |
17df50a5 XL |
7607 | let r: i16x32 = simd_shuffle32!( |
7608 | a, | |
7609 | a, | |
7610 | <const IMM8: i32> [ | |
7611 | 0, | |
7612 | 1, | |
7613 | 2, | |
7614 | 3, | |
7615 | (IMM8 as u32 & 0b11) + 4, | |
7616 | ((IMM8 as u32 >> 2) & 0b11) + 4, | |
7617 | ((IMM8 as u32 >> 4) & 0b11) + 4, | |
7618 | ((IMM8 as u32 >> 6) & 0b11) + 4, | |
7619 | 8, | |
7620 | 9, | |
7621 | 10, | |
7622 | 11, | |
7623 | (IMM8 as u32 & 0b11) + 12, | |
7624 | ((IMM8 as u32 >> 2) & 0b11) + 12, | |
7625 | ((IMM8 as u32 >> 4) & 0b11) + 12, | |
7626 | ((IMM8 as u32 >> 6) & 0b11) + 12, | |
7627 | 16, | |
7628 | 17, | |
7629 | 18, | |
7630 | 19, | |
7631 | (IMM8 as u32 & 0b11) + 20, | |
7632 | ((IMM8 as u32 >> 2) & 0b11) + 20, | |
7633 | ((IMM8 as u32 >> 4) & 0b11) + 20, | |
7634 | ((IMM8 as u32 >> 6) & 0b11) + 20, | |
7635 | 24, | |
7636 | 25, | |
7637 | 26, | |
7638 | 27, | |
7639 | (IMM8 as u32 & 0b11) + 28, | |
7640 | ((IMM8 as u32 >> 2) & 0b11) + 28, | |
7641 | ((IMM8 as u32 >> 4) & 0b11) + 28, | |
7642 | ((IMM8 as u32 >> 6) & 0b11) + 28, | |
7643 | ], | |
7644 | ); | |
fc512014 XL |
7645 | transmute(r) |
7646 | } | |
7647 | ||
7648 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7649 | /// | |
7650 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210) | |
7651 | #[inline] | |
7652 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
7653 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] |
7654 | #[rustc_legacy_const_generics(3)] | |
7655 | pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>( | |
fc512014 XL |
7656 | src: __m512i, |
7657 | k: __mmask32, | |
7658 | a: __m512i, | |
fc512014 | 7659 | ) -> __m512i { |
17df50a5 XL |
7660 | static_assert_imm8!(IMM8); |
7661 | let r = _mm512_shufflehi_epi16::<IMM8>(a); | |
cdc7bbd5 | 7662 | transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) |
fc512014 XL |
7663 | } |
7664 | ||
7665 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7666 | /// | |
7667 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211) | |
7668 | #[inline] | |
7669 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
7670 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))] |
7671 | #[rustc_legacy_const_generics(2)] | |
7672 | pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i { | |
7673 | static_assert_imm8!(IMM8); | |
7674 | let r = _mm512_shufflehi_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7675 | let zero = _mm512_setzero_si512().as_i16x32(); |
7676 | transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) | |
7677 | } | |
7678 | ||
7679 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7680 | /// | |
7681 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207) | |
7682 | #[inline] | |
7683 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7684 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] |
7685 | #[rustc_legacy_const_generics(3)] | |
7686 | pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>( | |
cdc7bbd5 XL |
7687 | src: __m256i, |
7688 | k: __mmask16, | |
7689 | a: __m256i, | |
cdc7bbd5 | 7690 | ) -> __m256i { |
17df50a5 XL |
7691 | static_assert_imm8!(IMM8); |
7692 | let shuffle = _mm256_shufflehi_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7693 | transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) |
7694 | } | |
7695 | ||
7696 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7697 | /// | |
7698 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208) | |
7699 | #[inline] | |
7700 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7701 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] |
7702 | #[rustc_legacy_const_generics(2)] | |
7703 | pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i { | |
7704 | static_assert_imm8!(IMM8); | |
7705 | let shuffle = _mm256_shufflehi_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7706 | let zero = _mm256_setzero_si256().as_i16x16(); |
7707 | transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) | |
7708 | } | |
7709 | ||
7710 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7711 | /// | |
7712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204) | |
7713 | #[inline] | |
7714 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7715 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] |
7716 | #[rustc_legacy_const_generics(3)] | |
7717 | pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>( | |
cdc7bbd5 XL |
7718 | src: __m128i, |
7719 | k: __mmask8, | |
7720 | a: __m128i, | |
cdc7bbd5 | 7721 | ) -> __m128i { |
17df50a5 XL |
7722 | static_assert_imm8!(IMM8); |
7723 | let shuffle = _mm_shufflehi_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7724 | transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) |
7725 | } | |
7726 | ||
7727 | /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7728 | /// | |
7729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205) | |
7730 | #[inline] | |
7731 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
7732 | #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))] |
7733 | #[rustc_legacy_const_generics(2)] | |
7734 | pub unsafe fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i { | |
7735 | static_assert_imm8!(IMM8); | |
7736 | let shuffle = _mm_shufflehi_epi16::<IMM8>(a); | |
cdc7bbd5 XL |
7737 | let zero = _mm_setzero_si128().as_i16x8(); |
7738 | transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) | |
7739 | } | |
7740 | ||
7741 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst. | |
7742 | /// | |
7743 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_epi8&expand=5159) | |
7744 | #[inline] | |
7745 | #[target_feature(enable = "avx512bw")] | |
7746 | #[cfg_attr(test, assert_instr(vpshufb))] | |
7747 | pub unsafe fn _mm512_shuffle_epi8(a: __m512i, b: __m512i) -> __m512i { | |
7748 | transmute(vpshufb(a.as_i8x64(), b.as_i8x64())) | |
7749 | } | |
7750 | ||
7751 | /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7752 | /// | |
7753 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_epi8&expand=5157) | |
7754 | #[inline] | |
7755 | #[target_feature(enable = "avx512bw")] | |
7756 | #[cfg_attr(test, assert_instr(vpshufb))] | |
7757 | pub unsafe fn _mm512_mask_shuffle_epi8( | |
7758 | src: __m512i, | |
7759 | k: __mmask64, | |
7760 | a: __m512i, | |
7761 | b: __m512i, | |
7762 | ) -> __m512i { | |
7763 | let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); | |
7764 | transmute(simd_select_bitmask(k, shuffle, src.as_i8x64())) | |
7765 | } | |
7766 | ||
7767 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7768 | /// | |
7769 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_epi8&expand=5158) | |
7770 | #[inline] | |
7771 | #[target_feature(enable = "avx512bw")] | |
7772 | #[cfg_attr(test, assert_instr(vpshufb))] | |
7773 | pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { | |
7774 | let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); | |
7775 | let zero = _mm512_setzero_si512().as_i8x64(); | |
7776 | transmute(simd_select_bitmask(k, shuffle, zero)) | |
7777 | } | |
7778 | ||
7779 | /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7780 | /// | |
7781 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shuffle_epi8&expand=5154) | |
7782 | #[inline] | |
7783 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7784 | #[cfg_attr(test, assert_instr(vpshufb))] | |
7785 | pub unsafe fn _mm256_mask_shuffle_epi8( | |
7786 | src: __m256i, | |
7787 | k: __mmask32, | |
7788 | a: __m256i, | |
7789 | b: __m256i, | |
7790 | ) -> __m256i { | |
7791 | let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); | |
7792 | transmute(simd_select_bitmask(k, shuffle, src.as_i8x32())) | |
fc512014 XL |
7793 | } |
7794 | ||
cdc7bbd5 | 7795 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
fc512014 | 7796 | /// |
cdc7bbd5 | 7797 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shuffle_epi8&expand=5155) |
fc512014 | 7798 | #[inline] |
cdc7bbd5 | 7799 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 7800 | #[cfg_attr(test, assert_instr(vpshufb))] |
cdc7bbd5 XL |
7801 | pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { |
7802 | let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); | |
7803 | let zero = _mm256_setzero_si256().as_i8x32(); | |
7804 | transmute(simd_select_bitmask(k, shuffle, zero)) | |
fc512014 XL |
7805 | } |
7806 | ||
7807 | /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
7808 | /// | |
cdc7bbd5 | 7809 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shuffle_epi8&expand=5151) |
fc512014 | 7810 | #[inline] |
cdc7bbd5 | 7811 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 7812 | #[cfg_attr(test, assert_instr(vpshufb))] |
cdc7bbd5 XL |
7813 | pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
7814 | let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); | |
7815 | transmute(simd_select_bitmask(k, shuffle, src.as_i8x16())) | |
fc512014 XL |
7816 | } |
7817 | ||
7818 | /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
7819 | /// | |
cdc7bbd5 | 7820 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shuffle_epi8&expand=5152) |
fc512014 | 7821 | #[inline] |
cdc7bbd5 | 7822 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 7823 | #[cfg_attr(test, assert_instr(vpshufb))] |
cdc7bbd5 XL |
7824 | pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { |
7825 | let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); | |
7826 | let zero = _mm_setzero_si128().as_i8x16(); | |
fc512014 XL |
7827 | transmute(simd_select_bitmask(k, shuffle, zero)) |
7828 | } | |
7829 | ||
7830 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. | |
7831 | /// | |
7832 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi16_mask&expand=5884) | |
7833 | #[inline] | |
7834 | #[target_feature(enable = "avx512bw")] | |
7835 | #[cfg_attr(test, assert_instr(vptestmw))] | |
7836 | pub unsafe fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
7837 | let and = _mm512_and_si512(a, b); | |
7838 | let zero = _mm512_setzero_si512(); | |
7839 | _mm512_cmpneq_epi16_mask(and, zero) | |
7840 | } | |
7841 | ||
7842 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. | |
7843 | /// | |
7844 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi16_mask&expand=5883) | |
7845 | #[inline] | |
7846 | #[target_feature(enable = "avx512bw")] | |
7847 | #[cfg_attr(test, assert_instr(vptestmw))] | |
7848 | pub unsafe fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
7849 | let and = _mm512_and_si512(a, b); | |
7850 | let zero = _mm512_setzero_si512(); | |
7851 | _mm512_mask_cmpneq_epi16_mask(k, and, zero) | |
7852 | } | |
7853 | ||
cdc7bbd5 XL |
7854 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7855 | /// | |
7856 | // [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_test_epi16_mask&expand=5882) | |
7857 | #[inline] | |
7858 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7859 | #[cfg_attr(test, assert_instr(vptestmw))] | |
7860 | pub unsafe fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
7861 | let and = _mm256_and_si256(a, b); | |
7862 | let zero = _mm256_setzero_si256(); | |
7863 | _mm256_cmpneq_epi16_mask(and, zero) | |
7864 | } | |
7865 | ||
7866 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. | |
7867 | /// | |
7868 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_test_epi16_mask&expand=5881) | |
7869 | #[inline] | |
7870 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7871 | #[cfg_attr(test, assert_instr(vptestmw))] | |
7872 | pub unsafe fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
7873 | let and = _mm256_and_si256(a, b); | |
7874 | let zero = _mm256_setzero_si256(); | |
7875 | _mm256_mask_cmpneq_epi16_mask(k, and, zero) | |
7876 | } | |
7877 | ||
7878 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. | |
7879 | /// | |
7880 | // [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_epi16_mask&expand=5880) | |
7881 | #[inline] | |
7882 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7883 | #[cfg_attr(test, assert_instr(vptestmw))] | |
7884 | pub unsafe fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
7885 | let and = _mm_and_si128(a, b); | |
7886 | let zero = _mm_setzero_si128(); | |
7887 | _mm_cmpneq_epi16_mask(and, zero) | |
7888 | } | |
7889 | ||
7890 | /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. | |
7891 | /// | |
7892 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_test_epi16_mask&expand=5879) | |
7893 | #[inline] | |
7894 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7895 | #[cfg_attr(test, assert_instr(vptestmw))] | |
7896 | pub unsafe fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
7897 | let and = _mm_and_si128(a, b); | |
7898 | let zero = _mm_setzero_si128(); | |
7899 | _mm_mask_cmpneq_epi16_mask(k, and, zero) | |
7900 | } | |
7901 | ||
fc512014 XL |
7902 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7903 | /// | |
7904 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi8_mask&expand=5902) | |
7905 | #[inline] | |
7906 | #[target_feature(enable = "avx512bw")] | |
7907 | #[cfg_attr(test, assert_instr(vptestmb))] | |
7908 | pub unsafe fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
7909 | let and = _mm512_and_si512(a, b); | |
7910 | let zero = _mm512_setzero_si512(); | |
7911 | _mm512_cmpneq_epi8_mask(and, zero) | |
7912 | } | |
7913 | ||
7914 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. | |
7915 | /// | |
7916 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi8_mask&expand=5901) | |
7917 | #[inline] | |
7918 | #[target_feature(enable = "avx512bw")] | |
7919 | #[cfg_attr(test, assert_instr(vptestmb))] | |
7920 | pub unsafe fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
7921 | let and = _mm512_and_si512(a, b); | |
7922 | let zero = _mm512_setzero_si512(); | |
7923 | _mm512_mask_cmpneq_epi8_mask(k, and, zero) | |
7924 | } | |
7925 | ||
cdc7bbd5 XL |
7926 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. |
7927 | /// | |
7928 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_test_epi8_mask&expand=5900) | |
7929 | #[inline] | |
7930 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7931 | #[cfg_attr(test, assert_instr(vptestmb))] | |
7932 | pub unsafe fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
7933 | let and = _mm256_and_si256(a, b); | |
7934 | let zero = _mm256_setzero_si256(); | |
7935 | _mm256_cmpneq_epi8_mask(and, zero) | |
7936 | } | |
7937 | ||
7938 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. | |
7939 | /// | |
7940 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_test_epi8_mask&expand=5899) | |
7941 | #[inline] | |
7942 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7943 | #[cfg_attr(test, assert_instr(vptestmb))] | |
7944 | pub unsafe fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
7945 | let and = _mm256_and_si256(a, b); | |
7946 | let zero = _mm256_setzero_si256(); | |
7947 | _mm256_mask_cmpneq_epi8_mask(k, and, zero) | |
7948 | } | |
7949 | ||
7950 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. | |
7951 | /// | |
7952 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_epi8_mask&expand=5898) | |
7953 | #[inline] | |
7954 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7955 | #[cfg_attr(test, assert_instr(vptestmb))] | |
7956 | pub unsafe fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
7957 | let and = _mm_and_si128(a, b); | |
7958 | let zero = _mm_setzero_si128(); | |
7959 | _mm_cmpneq_epi8_mask(and, zero) | |
7960 | } | |
7961 | ||
7962 | /// Compute the bitwise AND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero. | |
7963 | /// | |
7964 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_test_epi8_mask&expand=5897) | |
7965 | #[inline] | |
7966 | #[target_feature(enable = "avx512bw,avx512vl")] | |
7967 | #[cfg_attr(test, assert_instr(vptestmb))] | |
7968 | pub unsafe fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
7969 | let and = _mm_and_si128(a, b); | |
7970 | let zero = _mm_setzero_si128(); | |
7971 | _mm_mask_cmpneq_epi8_mask(k, and, zero) | |
7972 | } | |
7973 | ||
fc512014 XL |
7974 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
7975 | /// | |
7976 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi16_mask&expand=5915) | |
7977 | #[inline] | |
7978 | #[target_feature(enable = "avx512bw")] | |
7979 | #[cfg_attr(test, assert_instr(vptestnmw))] | |
7980 | pub unsafe fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 { | |
7981 | let and = _mm512_and_si512(a, b); | |
7982 | let zero = _mm512_setzero_si512(); | |
7983 | _mm512_cmpeq_epi16_mask(and, zero) | |
7984 | } | |
7985 | ||
7986 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. | |
7987 | /// | |
7988 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi16&expand=5914) | |
7989 | #[inline] | |
7990 | #[target_feature(enable = "avx512bw")] | |
7991 | #[cfg_attr(test, assert_instr(vptestnmw))] | |
7992 | pub unsafe fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 { | |
7993 | let and = _mm512_and_si512(a, b); | |
7994 | let zero = _mm512_setzero_si512(); | |
7995 | _mm512_mask_cmpeq_epi16_mask(k, and, zero) | |
7996 | } | |
7997 | ||
cdc7bbd5 XL |
7998 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
7999 | /// | |
8000 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testn_epi16_mask&expand=5913) | |
8001 | #[inline] | |
8002 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8003 | #[cfg_attr(test, assert_instr(vptestnmw))] | |
8004 | pub unsafe fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 { | |
8005 | let and = _mm256_and_si256(a, b); | |
8006 | let zero = _mm256_setzero_si256(); | |
8007 | _mm256_cmpeq_epi16_mask(and, zero) | |
8008 | } | |
8009 | ||
8010 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. | |
8011 | /// | |
8012 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_testn_epi16_mask&expand=5912) | |
8013 | #[inline] | |
8014 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8015 | #[cfg_attr(test, assert_instr(vptestnmw))] | |
8016 | pub unsafe fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 { | |
8017 | let and = _mm256_and_si256(a, b); | |
8018 | let zero = _mm256_setzero_si256(); | |
8019 | _mm256_mask_cmpeq_epi16_mask(k, and, zero) | |
8020 | } | |
8021 | ||
8022 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. | |
8023 | /// | |
8024 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testn_epi16_mask&expand=5911) | |
8025 | #[inline] | |
8026 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8027 | #[cfg_attr(test, assert_instr(vptestnmw))] | |
8028 | pub unsafe fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 { | |
8029 | let and = _mm_and_si128(a, b); | |
8030 | let zero = _mm_setzero_si128(); | |
8031 | _mm_cmpeq_epi16_mask(and, zero) | |
8032 | } | |
8033 | ||
8034 | /// Compute the bitwise NAND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. | |
8035 | /// | |
8036 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_testn_epi16_mask&expand=5910) | |
8037 | #[inline] | |
8038 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8039 | #[cfg_attr(test, assert_instr(vptestnmw))] | |
8040 | pub unsafe fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 { | |
8041 | let and = _mm_and_si128(a, b); | |
8042 | let zero = _mm_setzero_si128(); | |
8043 | _mm_mask_cmpeq_epi16_mask(k, and, zero) | |
8044 | } | |
8045 | ||
fc512014 XL |
8046 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
8047 | /// | |
8048 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi8_mask&expand=5933) | |
8049 | #[inline] | |
8050 | #[target_feature(enable = "avx512bw")] | |
8051 | #[cfg_attr(test, assert_instr(vptestnmb))] | |
8052 | pub unsafe fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 { | |
8053 | let and = _mm512_and_si512(a, b); | |
8054 | let zero = _mm512_setzero_si512(); | |
8055 | _mm512_cmpeq_epi8_mask(and, zero) | |
8056 | } | |
8057 | ||
8058 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. | |
8059 | /// | |
8060 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi8_mask&expand=5932) | |
8061 | #[inline] | |
8062 | #[target_feature(enable = "avx512bw")] | |
8063 | #[cfg_attr(test, assert_instr(vptestnmb))] | |
8064 | pub unsafe fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 { | |
8065 | let and = _mm512_and_si512(a, b); | |
8066 | let zero = _mm512_setzero_si512(); | |
8067 | _mm512_mask_cmpeq_epi8_mask(k, and, zero) | |
8068 | } | |
8069 | ||
cdc7bbd5 XL |
8070 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. |
8071 | /// | |
8072 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testn_epi8_mask&expand=5931) | |
8073 | #[inline] | |
8074 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8075 | #[cfg_attr(test, assert_instr(vptestnmb))] | |
8076 | pub unsafe fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 { | |
8077 | let and = _mm256_and_si256(a, b); | |
8078 | let zero = _mm256_setzero_si256(); | |
8079 | _mm256_cmpeq_epi8_mask(and, zero) | |
8080 | } | |
8081 | ||
8082 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. | |
8083 | /// | |
8084 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_testn_epi8_mask&expand=5930) | |
8085 | #[inline] | |
8086 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8087 | #[cfg_attr(test, assert_instr(vptestnmb))] | |
8088 | pub unsafe fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 { | |
8089 | let and = _mm256_and_si256(a, b); | |
8090 | let zero = _mm256_setzero_si256(); | |
8091 | _mm256_mask_cmpeq_epi8_mask(k, and, zero) | |
8092 | } | |
8093 | ||
8094 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k if the intermediate value is zero. | |
8095 | /// | |
8096 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testn_epi8_mask&expand=5929) | |
8097 | #[inline] | |
8098 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8099 | #[cfg_attr(test, assert_instr(vptestnmb))] | |
8100 | pub unsafe fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 { | |
8101 | let and = _mm_and_si128(a, b); | |
8102 | let zero = _mm_setzero_si128(); | |
8103 | _mm_cmpeq_epi8_mask(and, zero) | |
8104 | } | |
8105 | ||
8106 | /// Compute the bitwise NAND of packed 8-bit integers in a and b, producing intermediate 8-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero. | |
8107 | /// | |
8108 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_testn_epi8_mask&expand=5928) | |
8109 | #[inline] | |
8110 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8111 | #[cfg_attr(test, assert_instr(vptestnmb))] | |
8112 | pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 { | |
8113 | let and = _mm_and_si128(a, b); | |
8114 | let zero = _mm_setzero_si128(); | |
8115 | _mm_mask_cmpeq_epi8_mask(k, and, zero) | |
8116 | } | |
8117 | ||
fc512014 XL |
8118 | /// Store 64-bit mask from a into memory. |
8119 | /// | |
8120 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask64&expand=5578) | |
8121 | #[inline] | |
8122 | #[target_feature(enable = "avx512bw")] | |
8123 | #[cfg_attr(test, assert_instr(mov))] //should be kmovq | |
8124 | pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) { | |
8125 | ptr::write(mem_addr as *mut __mmask64, a); | |
8126 | } | |
8127 | ||
8128 | /// Store 32-bit mask from a into memory. | |
8129 | /// | |
8130 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_store_mask32&expand=5577) | |
8131 | #[inline] | |
8132 | #[target_feature(enable = "avx512bw")] | |
8133 | #[cfg_attr(test, assert_instr(mov))] //should be kmovd | |
8134 | pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) { | |
8135 | ptr::write(mem_addr as *mut __mmask32, a); | |
8136 | } | |
8137 | ||
8138 | /// Load 64-bit mask from memory into k. | |
8139 | /// | |
8140 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask64&expand=3318) | |
8141 | #[inline] | |
8142 | #[target_feature(enable = "avx512bw")] | |
8143 | #[cfg_attr(test, assert_instr(mov))] //should be kmovq | |
8144 | pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 { | |
8145 | ptr::read(mem_addr as *const __mmask64) | |
8146 | } | |
8147 | ||
8148 | /// Load 32-bit mask from memory into k. | |
8149 | /// | |
8150 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_load_mask32&expand=3317) | |
8151 | #[inline] | |
8152 | #[target_feature(enable = "avx512bw")] | |
8153 | #[cfg_attr(test, assert_instr(mov))] //should be kmovd | |
8154 | pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 { | |
8155 | ptr::read(mem_addr as *const __mmask32) | |
8156 | } | |
8157 | ||
8158 | /// Compute the absolute differences of packed unsigned 8-bit integers in a and b, then horizontally sum each consecutive 8 differences to produce eight unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in dst. | |
8159 | /// | |
8160 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sad_epu8&expand=4855) | |
8161 | #[inline] | |
8162 | #[target_feature(enable = "avx512bw")] | |
8163 | #[cfg_attr(test, assert_instr(vpsadbw))] | |
8164 | pub unsafe fn _mm512_sad_epu8(a: __m512i, b: __m512i) -> __m512i { | |
8165 | transmute(vpsadbw(a.as_u8x64(), b.as_u8x64())) | |
8166 | } | |
8167 | ||
8168 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8169 | /// | |
8170 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_dbsad_epu8&expand=2114) | |
8171 | #[inline] | |
8172 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
8173 | #[rustc_legacy_const_generics(2)] |
8174 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8175 | pub unsafe fn _mm512_dbsad_epu8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { | |
8176 | static_assert_imm8!(IMM8); | |
fc512014 XL |
8177 | let a = a.as_u8x64(); |
8178 | let b = b.as_u8x64(); | |
17df50a5 | 8179 | let r = vdbpsadbw(a, b, IMM8); |
fc512014 XL |
8180 | transmute(r) |
8181 | } | |
8182 | ||
8183 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8184 | /// | |
8185 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_dbsad_epu8&expand=2115) | |
8186 | #[inline] | |
8187 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
8188 | #[rustc_legacy_const_generics(4)] |
8189 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8190 | pub unsafe fn _mm512_mask_dbsad_epu8<const IMM8: i32>( | |
fc512014 XL |
8191 | src: __m512i, |
8192 | k: __mmask32, | |
8193 | a: __m512i, | |
8194 | b: __m512i, | |
fc512014 | 8195 | ) -> __m512i { |
17df50a5 | 8196 | static_assert_imm8!(IMM8); |
fc512014 XL |
8197 | let a = a.as_u8x64(); |
8198 | let b = b.as_u8x64(); | |
17df50a5 | 8199 | let r = vdbpsadbw(a, b, IMM8); |
fc512014 XL |
8200 | transmute(simd_select_bitmask(k, r, src.as_u16x32())) |
8201 | } | |
8202 | ||
8203 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8204 | /// | |
8205 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_dbsad_epu8&expand=2116) | |
8206 | #[inline] | |
8207 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
8208 | #[rustc_legacy_const_generics(3)] |
8209 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8210 | pub unsafe fn _mm512_maskz_dbsad_epu8<const IMM8: i32>( | |
8211 | k: __mmask32, | |
8212 | a: __m512i, | |
8213 | b: __m512i, | |
8214 | ) -> __m512i { | |
8215 | static_assert_imm8!(IMM8); | |
fc512014 XL |
8216 | let a = a.as_u8x64(); |
8217 | let b = b.as_u8x64(); | |
17df50a5 | 8218 | let r = vdbpsadbw(a, b, IMM8); |
fc512014 XL |
8219 | transmute(simd_select_bitmask( |
8220 | k, | |
8221 | r, | |
8222 | _mm512_setzero_si512().as_u16x32(), | |
8223 | )) | |
8224 | } | |
8225 | ||
cdc7bbd5 XL |
8226 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. |
8227 | /// | |
8228 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dbsad_epu8&expand=2111) | |
8229 | #[inline] | |
8230 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
8231 | #[rustc_legacy_const_generics(2)] |
8232 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8233 | pub unsafe fn _mm256_dbsad_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { | |
8234 | static_assert_imm8!(IMM8); | |
cdc7bbd5 XL |
8235 | let a = a.as_u8x32(); |
8236 | let b = b.as_u8x32(); | |
17df50a5 | 8237 | let r = vdbpsadbw256(a, b, IMM8); |
cdc7bbd5 XL |
8238 | transmute(r) |
8239 | } | |
8240 | ||
8241 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8242 | /// | |
8243 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_dbsad_epu8&expand=2112) | |
8244 | #[inline] | |
8245 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
8246 | #[rustc_legacy_const_generics(4)] |
8247 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8248 | pub unsafe fn _mm256_mask_dbsad_epu8<const IMM8: i32>( | |
cdc7bbd5 XL |
8249 | src: __m256i, |
8250 | k: __mmask16, | |
8251 | a: __m256i, | |
8252 | b: __m256i, | |
cdc7bbd5 | 8253 | ) -> __m256i { |
17df50a5 | 8254 | static_assert_imm8!(IMM8); |
cdc7bbd5 XL |
8255 | let a = a.as_u8x32(); |
8256 | let b = b.as_u8x32(); | |
17df50a5 | 8257 | let r = vdbpsadbw256(a, b, IMM8); |
cdc7bbd5 XL |
8258 | transmute(simd_select_bitmask(k, r, src.as_u16x16())) |
8259 | } | |
8260 | ||
8261 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8262 | /// | |
8263 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_dbsad_epu8&expand=2113) | |
8264 | #[inline] | |
8265 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
8266 | #[rustc_legacy_const_generics(3)] |
8267 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8268 | pub unsafe fn _mm256_maskz_dbsad_epu8<const IMM8: i32>( | |
8269 | k: __mmask16, | |
8270 | a: __m256i, | |
8271 | b: __m256i, | |
8272 | ) -> __m256i { | |
8273 | static_assert_imm8!(IMM8); | |
cdc7bbd5 XL |
8274 | let a = a.as_u8x32(); |
8275 | let b = b.as_u8x32(); | |
17df50a5 | 8276 | let r = vdbpsadbw256(a, b, IMM8); |
cdc7bbd5 XL |
8277 | transmute(simd_select_bitmask( |
8278 | k, | |
8279 | r, | |
8280 | _mm256_setzero_si256().as_u16x16(), | |
8281 | )) | |
8282 | } | |
8283 | ||
8284 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8285 | /// | |
8286 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dbsad_epu8&expand=2108) | |
8287 | #[inline] | |
8288 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
8289 | #[rustc_legacy_const_generics(2)] |
8290 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8291 | pub unsafe fn _mm_dbsad_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { | |
8292 | static_assert_imm8!(IMM8); | |
cdc7bbd5 XL |
8293 | let a = a.as_u8x16(); |
8294 | let b = b.as_u8x16(); | |
17df50a5 | 8295 | let r = vdbpsadbw128(a, b, IMM8); |
cdc7bbd5 XL |
8296 | transmute(r) |
8297 | } | |
8298 | ||
8299 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8300 | /// | |
8301 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_dbsad_epu8&expand=2109) | |
8302 | #[inline] | |
8303 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
8304 | #[rustc_legacy_const_generics(4)] |
8305 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8306 | pub unsafe fn _mm_mask_dbsad_epu8<const IMM8: i32>( | |
cdc7bbd5 XL |
8307 | src: __m128i, |
8308 | k: __mmask8, | |
8309 | a: __m128i, | |
8310 | b: __m128i, | |
cdc7bbd5 | 8311 | ) -> __m128i { |
17df50a5 | 8312 | static_assert_imm8!(IMM8); |
cdc7bbd5 XL |
8313 | let a = a.as_u8x16(); |
8314 | let b = b.as_u8x16(); | |
17df50a5 | 8315 | let r = vdbpsadbw128(a, b, IMM8); |
cdc7bbd5 XL |
8316 | transmute(simd_select_bitmask(k, r, src.as_u16x8())) |
8317 | } | |
8318 | ||
8319 | /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. | |
8320 | /// | |
8321 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_dbsad_epu8&expand=2110) | |
8322 | #[inline] | |
8323 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
8324 | #[rustc_legacy_const_generics(3)] |
8325 | #[cfg_attr(test, assert_instr(vdbpsadbw, IMM8 = 0))] | |
8326 | pub unsafe fn _mm_maskz_dbsad_epu8<const IMM8: i32>( | |
8327 | k: __mmask8, | |
8328 | a: __m128i, | |
8329 | b: __m128i, | |
8330 | ) -> __m128i { | |
8331 | static_assert_imm8!(IMM8); | |
cdc7bbd5 XL |
8332 | let a = a.as_u8x16(); |
8333 | let b = b.as_u8x16(); | |
17df50a5 | 8334 | let r = vdbpsadbw128(a, b, IMM8); |
cdc7bbd5 XL |
8335 | transmute(simd_select_bitmask(k, r, _mm_setzero_si128().as_u16x8())) |
8336 | } | |
8337 | ||
fc512014 XL |
8338 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. |
8339 | /// | |
8340 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi16_mask&expand=3873) | |
8341 | #[inline] | |
8342 | #[target_feature(enable = "avx512bw")] | |
c295e0f8 | 8343 | #[cfg_attr(test, assert_instr(vpmovw2m))] |
cdc7bbd5 XL |
8344 | pub unsafe fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 { |
8345 | let filter = _mm512_set1_epi16(1 << 15); | |
8346 | let a = _mm512_and_si512(a, filter); | |
8347 | _mm512_cmpeq_epi16_mask(a, filter) | |
8348 | } | |
8349 | ||
8350 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. | |
8351 | /// | |
8352 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi16_mask&expand=3872) | |
8353 | #[inline] | |
8354 | #[target_feature(enable = "avx512bw,avx512vl")] | |
c295e0f8 | 8355 | #[cfg_attr(test, assert_instr(vpmovw2m))] |
cdc7bbd5 XL |
8356 | pub unsafe fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 { |
8357 | let filter = _mm256_set1_epi16(1 << 15); | |
8358 | let a = _mm256_and_si256(a, filter); | |
8359 | _mm256_cmpeq_epi16_mask(a, filter) | |
8360 | } | |
8361 | ||
8362 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. | |
8363 | /// | |
8364 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi16_mask&expand=3871) | |
8365 | #[inline] | |
8366 | #[target_feature(enable = "avx512bw,avx512vl")] | |
c295e0f8 | 8367 | #[cfg_attr(test, assert_instr(vpmovw2m))] |
cdc7bbd5 XL |
8368 | pub unsafe fn _mm_movepi16_mask(a: __m128i) -> __mmask8 { |
8369 | let filter = _mm_set1_epi16(1 << 15); | |
8370 | let a = _mm_and_si128(a, filter); | |
8371 | _mm_cmpeq_epi16_mask(a, filter) | |
fc512014 XL |
8372 | } |
8373 | ||
8374 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. | |
8375 | /// | |
8376 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movepi8_mask&expand=3883) | |
8377 | #[inline] | |
8378 | #[target_feature(enable = "avx512bw")] | |
c295e0f8 | 8379 | #[cfg_attr(test, assert_instr(vpmovb2m))] |
fc512014 XL |
8380 | pub unsafe fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 { |
8381 | let filter = _mm512_set1_epi8(1 << 7); | |
8382 | let a = _mm512_and_si512(a, filter); | |
8383 | _mm512_cmpeq_epi8_mask(a, filter) | |
8384 | } | |
8385 | ||
cdc7bbd5 XL |
8386 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. |
8387 | /// | |
8388 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movepi8_mask&expand=3882) | |
8389 | #[inline] | |
8390 | #[target_feature(enable = "avx512bw,avx512vl")] | |
c295e0f8 XL |
8391 | #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than |
8392 | // using vpmovb2m plus converting the mask register to a standard register. | |
cdc7bbd5 XL |
8393 | pub unsafe fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 { |
8394 | let filter = _mm256_set1_epi8(1 << 7); | |
8395 | let a = _mm256_and_si256(a, filter); | |
8396 | _mm256_cmpeq_epi8_mask(a, filter) | |
8397 | } | |
8398 | ||
8399 | /// Set each bit of mask register k based on the most significant bit of the corresponding packed 8-bit integer in a. | |
8400 | /// | |
8401 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi8_mask&expand=3881) | |
8402 | #[inline] | |
8403 | #[target_feature(enable = "avx512bw,avx512vl")] | |
c295e0f8 XL |
8404 | #[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than |
8405 | // using vpmovb2m plus converting the mask register to a standard register. | |
cdc7bbd5 XL |
8406 | pub unsafe fn _mm_movepi8_mask(a: __m128i) -> __mmask16 { |
8407 | let filter = _mm_set1_epi8(1 << 7); | |
8408 | let a = _mm_and_si128(a, filter); | |
8409 | _mm_cmpeq_epi8_mask(a, filter) | |
8410 | } | |
8411 | ||
fc512014 XL |
8412 | /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8413 | /// | |
8414 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movm_epi16&expand=3886) | |
8415 | #[inline] | |
8416 | #[target_feature(enable = "avx512bw")] | |
8417 | #[cfg_attr(test, assert_instr(vpmovm2w))] | |
8418 | pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i { | |
8419 | let one = _mm512_set1_epi16( | |
8420 | 1 << 15 | |
8421 | | 1 << 14 | |
8422 | | 1 << 13 | |
8423 | | 1 << 12 | |
8424 | | 1 << 11 | |
8425 | | 1 << 10 | |
8426 | | 1 << 9 | |
8427 | | 1 << 8 | |
8428 | | 1 << 7 | |
8429 | | 1 << 6 | |
8430 | | 1 << 5 | |
8431 | | 1 << 4 | |
8432 | | 1 << 3 | |
8433 | | 1 << 2 | |
8434 | | 1 << 1 | |
8435 | | 1 << 0, | |
8436 | ) | |
8437 | .as_i16x32(); | |
8438 | let zero = _mm512_setzero_si512().as_i16x32(); | |
8439 | transmute(simd_select_bitmask(k, one, zero)) | |
8440 | } | |
8441 | ||
cdc7bbd5 XL |
8442 | /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8443 | /// | |
8444 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movm_epi16&expand=3885) | |
8445 | #[inline] | |
8446 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8447 | #[cfg_attr(test, assert_instr(vpmovm2w))] | |
8448 | pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i { | |
8449 | let one = _mm256_set1_epi16( | |
8450 | 1 << 15 | |
8451 | | 1 << 14 | |
8452 | | 1 << 13 | |
8453 | | 1 << 12 | |
8454 | | 1 << 11 | |
8455 | | 1 << 10 | |
8456 | | 1 << 9 | |
8457 | | 1 << 8 | |
8458 | | 1 << 7 | |
8459 | | 1 << 6 | |
8460 | | 1 << 5 | |
8461 | | 1 << 4 | |
8462 | | 1 << 3 | |
8463 | | 1 << 2 | |
8464 | | 1 << 1 | |
8465 | | 1 << 0, | |
8466 | ) | |
8467 | .as_i16x16(); | |
8468 | let zero = _mm256_setzero_si256().as_i16x16(); | |
8469 | transmute(simd_select_bitmask(k, one, zero)) | |
8470 | } | |
8471 | ||
8472 | /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. | |
8473 | /// | |
8474 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movm_epi16&expand=3884) | |
8475 | #[inline] | |
8476 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8477 | #[cfg_attr(test, assert_instr(vpmovm2w))] | |
8478 | pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i { | |
8479 | let one = _mm_set1_epi16( | |
8480 | 1 << 15 | |
8481 | | 1 << 14 | |
8482 | | 1 << 13 | |
8483 | | 1 << 12 | |
8484 | | 1 << 11 | |
8485 | | 1 << 10 | |
8486 | | 1 << 9 | |
8487 | | 1 << 8 | |
8488 | | 1 << 7 | |
8489 | | 1 << 6 | |
8490 | | 1 << 5 | |
8491 | | 1 << 4 | |
8492 | | 1 << 3 | |
8493 | | 1 << 2 | |
8494 | | 1 << 1 | |
8495 | | 1 << 0, | |
8496 | ) | |
8497 | .as_i16x8(); | |
8498 | let zero = _mm_setzero_si128().as_i16x8(); | |
8499 | transmute(simd_select_bitmask(k, one, zero)) | |
8500 | } | |
8501 | ||
fc512014 XL |
8502 | /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8503 | /// | |
8504 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movm_epi8&expand=3895) | |
8505 | #[inline] | |
8506 | #[target_feature(enable = "avx512bw")] | |
8507 | #[cfg_attr(test, assert_instr(vpmovm2b))] | |
8508 | pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i { | |
8509 | let one = | |
8510 | _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) | |
8511 | .as_i8x64(); | |
8512 | let zero = _mm512_setzero_si512().as_i8x64(); | |
8513 | transmute(simd_select_bitmask(k, one, zero)) | |
8514 | } | |
8515 | ||
cdc7bbd5 XL |
8516 | /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. |
8517 | /// | |
8518 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movm_epi8&expand=3894) | |
8519 | #[inline] | |
8520 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8521 | #[cfg_attr(test, assert_instr(vpmovm2b))] | |
8522 | pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i { | |
8523 | let one = | |
8524 | _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) | |
8525 | .as_i8x32(); | |
8526 | let zero = _mm256_setzero_si256().as_i8x32(); | |
8527 | transmute(simd_select_bitmask(k, one, zero)) | |
8528 | } | |
8529 | ||
8530 | /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. | |
8531 | /// | |
8532 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movm_epi8&expand=3893) | |
8533 | #[inline] | |
8534 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8535 | #[cfg_attr(test, assert_instr(vpmovm2b))] | |
8536 | pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i { | |
8537 | let one = _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) | |
8538 | .as_i8x16(); | |
8539 | let zero = _mm_setzero_si128().as_i8x16(); | |
8540 | transmute(simd_select_bitmask(k, one, zero)) | |
8541 | } | |
8542 | ||
fc512014 XL |
8543 | /// Add 32-bit masks in a and b, and store the result in k. |
8544 | /// | |
8545 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask32&expand=3207) | |
8546 | #[inline] | |
8547 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8548 | pub unsafe fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8549 | transmute(a + b) | |
8550 | } | |
8551 | ||
8552 | /// Add 64-bit masks in a and b, and store the result in k. | |
8553 | /// | |
8554 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kadd_mask64&expand=3208) | |
8555 | #[inline] | |
8556 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8557 | pub unsafe fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8558 | transmute(a + b) | |
8559 | } | |
8560 | ||
8561 | /// Compute the bitwise AND of 32-bit masks a and b, and store the result in k. | |
8562 | /// | |
8563 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask32&expand=3213) | |
8564 | #[inline] | |
8565 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8566 | pub unsafe fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8567 | transmute(a & b) | |
8568 | } | |
8569 | ||
8570 | /// Compute the bitwise AND of 64-bit masks a and b, and store the result in k. | |
8571 | /// | |
8572 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kand_mask64&expand=3214) | |
8573 | #[inline] | |
8574 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8575 | pub unsafe fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8576 | transmute(a & b) | |
8577 | } | |
8578 | ||
8579 | /// Compute the bitwise NOT of 32-bit mask a, and store the result in k. | |
8580 | /// | |
8581 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_knot_mask32&expand=3234) | |
8582 | #[inline] | |
8583 | #[target_feature(enable = "avx512bw")] | |
8584 | pub unsafe fn _knot_mask32(a: __mmask32) -> __mmask32 { | |
8585 | transmute(a ^ 0b11111111_11111111_11111111_11111111) | |
8586 | } | |
8587 | ||
8588 | /// Compute the bitwise NOT of 64-bit mask a, and store the result in k. | |
8589 | /// | |
8590 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_knot_mask64&expand=3235) | |
8591 | #[inline] | |
8592 | #[target_feature(enable = "avx512bw")] | |
8593 | pub unsafe fn _knot_mask64(a: __mmask64) -> __mmask64 { | |
8594 | transmute(a ^ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111) | |
8595 | } | |
8596 | ||
8597 | /// Compute the bitwise NOT of 32-bit masks a and then AND with b, and store the result in k. | |
8598 | /// | |
8599 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask32&expand=3219) | |
8600 | #[inline] | |
8601 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8602 | pub unsafe fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8603 | transmute(_knot_mask32(a) & b) | |
8604 | } | |
8605 | ||
8606 | /// Compute the bitwise NOT of 64-bit masks a and then AND with b, and store the result in k. | |
8607 | /// | |
8608 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kandn_mask64&expand=3220) | |
8609 | #[inline] | |
8610 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8611 | pub unsafe fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8612 | transmute(_knot_mask64(a) & b) | |
8613 | } | |
8614 | ||
8615 | /// Compute the bitwise OR of 32-bit masks a and b, and store the result in k. | |
8616 | /// | |
8617 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask32&expand=3240) | |
8618 | #[inline] | |
8619 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8620 | pub unsafe fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8621 | transmute(a | b) | |
8622 | } | |
8623 | ||
8624 | /// Compute the bitwise OR of 64-bit masks a and b, and store the result in k. | |
8625 | /// | |
8626 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kor_mask64&expand=3241) | |
8627 | #[inline] | |
8628 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8629 | pub unsafe fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8630 | transmute(a | b) | |
8631 | } | |
8632 | ||
8633 | /// Compute the bitwise XOR of 32-bit masks a and b, and store the result in k. | |
8634 | /// | |
8635 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask32&expand=3292) | |
8636 | #[inline] | |
8637 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8638 | pub unsafe fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8639 | transmute(a ^ b) | |
8640 | } | |
8641 | ||
8642 | /// Compute the bitwise XOR of 64-bit masks a and b, and store the result in k. | |
8643 | /// | |
8644 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxor_mask64&expand=3293) | |
8645 | #[inline] | |
8646 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8647 | pub unsafe fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8648 | transmute(a ^ b) | |
8649 | } | |
8650 | ||
8651 | /// Compute the bitwise XNOR of 32-bit masks a and b, and store the result in k. | |
8652 | /// | |
8653 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask32&expand=3286) | |
8654 | #[inline] | |
8655 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8656 | pub unsafe fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 { |
8657 | transmute(_knot_mask32(a ^ b)) | |
8658 | } | |
8659 | ||
8660 | /// Compute the bitwise XNOR of 64-bit masks a and b, and store the result in k. | |
8661 | /// | |
8662 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_kxnor_mask64&expand=3287) | |
8663 | #[inline] | |
8664 | #[target_feature(enable = "avx512bw")] | |
fc512014 XL |
8665 | pub unsafe fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 { |
8666 | transmute(_knot_mask64(a ^ b)) | |
8667 | } | |
8668 | ||
8669 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. | |
8670 | /// | |
8671 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi8&expand=1407) | |
8672 | #[inline] | |
8673 | #[target_feature(enable = "avx512bw")] | |
8674 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8675 | pub unsafe fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i { | |
8676 | let a = a.as_i16x32(); | |
8677 | transmute::<i8x32, _>(simd_cast(a)) | |
8678 | } | |
8679 | ||
8680 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8681 | /// | |
8682 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_epi8&expand=1408) | |
8683 | #[inline] | |
8684 | #[target_feature(enable = "avx512bw")] | |
8685 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8686 | pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { | |
8687 | let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); | |
8688 | transmute(simd_select_bitmask(k, convert, src.as_i8x32())) | |
8689 | } | |
8690 | ||
8691 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8692 | /// | |
8693 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_epi8&expand=1409) | |
8694 | #[inline] | |
8695 | #[target_feature(enable = "avx512bw")] | |
8696 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8697 | pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { | |
8698 | let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); | |
8699 | transmute(simd_select_bitmask( | |
8700 | k, | |
8701 | convert, | |
8702 | _mm256_setzero_si256().as_i8x32(), | |
8703 | )) | |
8704 | } | |
8705 | ||
cdc7bbd5 XL |
8706 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. |
8707 | /// | |
8708 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi8&expand=1404) | |
8709 | #[inline] | |
8710 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8711 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8712 | pub unsafe fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i { | |
8713 | let a = a.as_i16x16(); | |
8714 | transmute::<i8x16, _>(simd_cast(a)) | |
8715 | } | |
8716 | ||
8717 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8718 | /// | |
8719 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_epi8&expand=1405) | |
8720 | #[inline] | |
8721 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8722 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8723 | pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { | |
8724 | let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); | |
8725 | transmute(simd_select_bitmask(k, convert, src.as_i8x16())) | |
8726 | } | |
8727 | ||
8728 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8729 | /// | |
8730 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi16_epi8&expand=1406) | |
8731 | #[inline] | |
8732 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8733 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8734 | pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { | |
8735 | let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); | |
8736 | transmute(simd_select_bitmask( | |
8737 | k, | |
8738 | convert, | |
8739 | _mm_setzero_si128().as_i8x16(), | |
8740 | )) | |
8741 | } | |
8742 | ||
8743 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. | |
8744 | /// | |
8745 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi8&expand=1401) | |
8746 | #[inline] | |
8747 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8748 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8749 | pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { | |
8750 | let a = a.as_i16x8(); | |
8751 | let zero = _mm_setzero_si128().as_i16x8(); | |
17df50a5 | 8752 | let v256: i16x16 = simd_shuffle16!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]); |
cdc7bbd5 XL |
8753 | transmute::<i8x16, _>(simd_cast(v256)) |
8754 | } | |
8755 | ||
8756 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8757 | /// | |
8758 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_epi8&expand=1402) | |
8759 | #[inline] | |
8760 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8761 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8762 | pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
8763 | let convert = _mm_cvtepi16_epi8(a).as_i8x16(); | |
8764 | let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; | |
8765 | transmute(simd_select_bitmask(k, convert, src.as_i8x16())) | |
8766 | } | |
8767 | ||
8768 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8769 | /// | |
8770 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi16_epi8&expand=1403) | |
8771 | #[inline] | |
8772 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8773 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
8774 | pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { | |
8775 | let convert = _mm_cvtepi16_epi8(a).as_i8x16(); | |
8776 | let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; | |
8777 | let zero = _mm_setzero_si128().as_i8x16(); | |
8778 | transmute(simd_select_bitmask(k, convert, zero)) | |
8779 | } | |
8780 | ||
fc512014 XL |
8781 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. |
8782 | /// | |
8783 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi16_epi8&expand=1807) | |
8784 | #[inline] | |
8785 | #[target_feature(enable = "avx512bw")] | |
8786 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8787 | pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { | |
8788 | transmute(vpmovswb( | |
8789 | a.as_i16x32(), | |
8790 | _mm256_setzero_si256().as_i8x32(), | |
8791 | 0b11111111_11111111_11111111_11111111, | |
8792 | )) | |
8793 | } | |
8794 | ||
8795 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8796 | /// | |
8797 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_epi8&expand=1808) | |
8798 | #[inline] | |
8799 | #[target_feature(enable = "avx512bw")] | |
8800 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8801 | pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { | |
8802 | transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) | |
8803 | } | |
8804 | ||
8805 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8806 | /// | |
8807 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi16_epi8&expand=1809) | |
8808 | #[inline] | |
8809 | #[target_feature(enable = "avx512bw")] | |
8810 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8811 | pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { | |
8812 | transmute(vpmovswb( | |
8813 | a.as_i16x32(), | |
8814 | _mm256_setzero_si256().as_i8x32(), | |
8815 | k, | |
8816 | )) | |
8817 | } | |
8818 | ||
cdc7bbd5 XL |
8819 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. |
8820 | /// | |
8821 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi16_epi8&expand=1804) | |
8822 | #[inline] | |
8823 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8824 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8825 | pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { | |
8826 | transmute(vpmovswb256( | |
8827 | a.as_i16x16(), | |
8828 | _mm_setzero_si128().as_i8x16(), | |
8829 | 0b11111111_11111111, | |
8830 | )) | |
8831 | } | |
8832 | ||
8833 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8834 | /// | |
8835 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_epi8&expand=1805) | |
8836 | #[inline] | |
8837 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8838 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8839 | pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { | |
8840 | transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) | |
8841 | } | |
8842 | ||
8843 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8844 | /// | |
8845 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi16_epi8&expand=1806) | |
8846 | #[inline] | |
8847 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8848 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8849 | pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { | |
8850 | transmute(vpmovswb256( | |
8851 | a.as_i16x16(), | |
8852 | _mm_setzero_si128().as_i8x16(), | |
8853 | k, | |
8854 | )) | |
8855 | } | |
8856 | ||
8857 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. | |
8858 | /// | |
8859 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi16_epi8&expand=1801) | |
8860 | #[inline] | |
8861 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8862 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8863 | pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { | |
8864 | transmute(vpmovswb128( | |
8865 | a.as_i16x8(), | |
8866 | _mm_setzero_si128().as_i8x16(), | |
8867 | 0b11111111, | |
8868 | )) | |
8869 | } | |
8870 | ||
8871 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8872 | /// | |
8873 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_epi8&expand=1802) | |
8874 | #[inline] | |
8875 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8876 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8877 | pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
8878 | transmute(vpmovswb128(a.as_i16x8(), src.as_i8x16(), k)) | |
8879 | } | |
8880 | ||
8881 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8882 | /// | |
8883 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi16_epi8&expand=1803) | |
8884 | #[inline] | |
8885 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8886 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
8887 | pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { | |
8888 | transmute(vpmovswb128(a.as_i16x8(), _mm_setzero_si128().as_i8x16(), k)) | |
8889 | } | |
8890 | ||
8891 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. | |
8892 | /// | |
8893 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi16_epi8&expand=2042) | |
8894 | #[inline] | |
8895 | #[target_feature(enable = "avx512bw")] | |
8896 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
8897 | pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { | |
8898 | transmute(vpmovuswb( | |
8899 | a.as_u16x32(), | |
8900 | _mm256_setzero_si256().as_u8x32(), | |
8901 | 0b11111111_11111111_11111111_11111111, | |
8902 | )) | |
8903 | } | |
8904 | ||
8905 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8906 | /// | |
8907 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_epi8&expand=2043) | |
8908 | #[inline] | |
8909 | #[target_feature(enable = "avx512bw")] | |
8910 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
8911 | pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i { | |
8912 | transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) | |
8913 | } | |
8914 | ||
8915 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8916 | /// | |
8917 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi16_epi8&expand=2044) | |
8918 | #[inline] | |
8919 | #[target_feature(enable = "avx512bw")] | |
8920 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
8921 | pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { | |
8922 | transmute(vpmovuswb( | |
8923 | a.as_u16x32(), | |
8924 | _mm256_setzero_si256().as_u8x32(), | |
8925 | k, | |
8926 | )) | |
8927 | } | |
8928 | ||
8929 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. | |
8930 | /// | |
8931 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi16_epi8&expand=2039) | |
8932 | #[inline] | |
8933 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8934 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
8935 | pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { | |
8936 | transmute(vpmovuswb256( | |
8937 | a.as_u16x16(), | |
8938 | _mm_setzero_si128().as_u8x16(), | |
8939 | 0b11111111_11111111, | |
8940 | )) | |
8941 | } | |
8942 | ||
8943 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8944 | /// | |
8945 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_epi8&expand=2040) | |
8946 | #[inline] | |
8947 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8948 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
8949 | pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i { | |
8950 | transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) | |
8951 | } | |
8952 | ||
8953 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8954 | /// | |
8955 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi16_epi8&expand=2041) | |
8956 | #[inline] | |
8957 | #[target_feature(enable = "avx512bw,avx512vl")] | |
8958 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
8959 | pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { | |
8960 | transmute(vpmovuswb256( | |
8961 | a.as_u16x16(), | |
8962 | _mm_setzero_si128().as_u8x16(), | |
8963 | k, | |
8964 | )) | |
8965 | } | |
8966 | ||
fc512014 XL |
8967 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. |
8968 | /// | |
cdc7bbd5 | 8969 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi16_epi8&expand=2036) |
fc512014 | 8970 | #[inline] |
cdc7bbd5 | 8971 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 8972 | #[cfg_attr(test, assert_instr(vpmovuswb))] |
cdc7bbd5 XL |
8973 | pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { |
8974 | transmute(vpmovuswb128( | |
8975 | a.as_u16x8(), | |
8976 | _mm_setzero_si128().as_u8x16(), | |
8977 | 0b11111111, | |
fc512014 XL |
8978 | )) |
8979 | } | |
8980 | ||
8981 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
8982 | /// | |
cdc7bbd5 | 8983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_epi8&expand=2037) |
fc512014 | 8984 | #[inline] |
cdc7bbd5 | 8985 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 8986 | #[cfg_attr(test, assert_instr(vpmovuswb))] |
cdc7bbd5 XL |
8987 | pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
8988 | transmute(vpmovuswb128(a.as_u16x8(), src.as_u8x16(), k)) | |
fc512014 XL |
8989 | } |
8990 | ||
8991 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
8992 | /// | |
cdc7bbd5 | 8993 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi16_epi8&expand=2038) |
fc512014 | 8994 | #[inline] |
cdc7bbd5 | 8995 | #[target_feature(enable = "avx512bw,avx512vl")] |
fc512014 | 8996 | #[cfg_attr(test, assert_instr(vpmovuswb))] |
cdc7bbd5 XL |
8997 | pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { |
8998 | transmute(vpmovuswb128( | |
8999 | a.as_u16x8(), | |
9000 | _mm_setzero_si128().as_u8x16(), | |
fc512014 XL |
9001 | k, |
9002 | )) | |
9003 | } | |
9004 | ||
9005 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst. | |
9006 | /// | |
9007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi8_epi16&expand=1526) | |
9008 | #[inline] | |
9009 | #[target_feature(enable = "avx512bw")] | |
9010 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9011 | pub unsafe fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i { | |
9012 | let a = a.as_i8x32(); | |
9013 | transmute::<i16x32, _>(simd_cast(a)) | |
9014 | } | |
9015 | ||
9016 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9017 | /// | |
9018 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi8_epi16&expand=1527) | |
9019 | #[inline] | |
9020 | #[target_feature(enable = "avx512bw")] | |
9021 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9022 | pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { | |
9023 | let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); | |
9024 | transmute(simd_select_bitmask(k, convert, src.as_i16x32())) | |
9025 | } | |
9026 | ||
9027 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9028 | /// | |
9029 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi8_epi16&expand=1528) | |
9030 | #[inline] | |
9031 | #[target_feature(enable = "avx512bw")] | |
9032 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9033 | pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { | |
9034 | let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); | |
9035 | transmute(simd_select_bitmask( | |
9036 | k, | |
9037 | convert, | |
9038 | _mm512_setzero_si512().as_i16x32(), | |
9039 | )) | |
9040 | } | |
9041 | ||
cdc7bbd5 XL |
9042 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9043 | /// | |
9044 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi8_epi16&expand=1524) | |
9045 | #[inline] | |
9046 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9047 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9048 | pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { | |
9049 | let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); | |
9050 | transmute(simd_select_bitmask(k, convert, src.as_i16x16())) | |
9051 | } | |
9052 | ||
9053 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9054 | /// | |
9055 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi8_epi16&expand=1525) | |
9056 | #[inline] | |
9057 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9058 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9059 | pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { | |
9060 | let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); | |
9061 | transmute(simd_select_bitmask( | |
9062 | k, | |
9063 | convert, | |
9064 | _mm256_setzero_si256().as_i16x16(), | |
9065 | )) | |
9066 | } | |
9067 | ||
9068 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9069 | /// | |
9070 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi8_epi16&expand=1521) | |
9071 | #[inline] | |
9072 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9073 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9074 | pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
9075 | let convert = _mm_cvtepi8_epi16(a).as_i16x8(); | |
9076 | transmute(simd_select_bitmask(k, convert, src.as_i16x8())) | |
9077 | } | |
9078 | ||
9079 | /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9080 | /// | |
9081 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi8_epi16&expand=1522) | |
9082 | #[inline] | |
9083 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9084 | #[cfg_attr(test, assert_instr(vpmovsxbw))] | |
9085 | pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { | |
9086 | let convert = _mm_cvtepi8_epi16(a).as_i16x8(); | |
9087 | transmute(simd_select_bitmask( | |
9088 | k, | |
9089 | convert, | |
9090 | _mm_setzero_si128().as_i16x8(), | |
9091 | )) | |
9092 | } | |
9093 | ||
fc512014 XL |
9094 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst. |
9095 | /// | |
9096 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi16&expand=1612) | |
9097 | #[inline] | |
9098 | #[target_feature(enable = "avx512bw")] | |
9099 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9100 | pub unsafe fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i { | |
9101 | let a = a.as_u8x32(); | |
9102 | transmute::<i16x32, _>(simd_cast(a)) | |
9103 | } | |
9104 | ||
9105 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9106 | /// | |
9107 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu8_epi16&expand=1613) | |
9108 | #[inline] | |
9109 | #[target_feature(enable = "avx512bw")] | |
9110 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9111 | pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i { | |
9112 | let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); | |
9113 | transmute(simd_select_bitmask(k, convert, src.as_i16x32())) | |
9114 | } | |
9115 | ||
9116 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9117 | /// | |
9118 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu8_epi16&expand=1614) | |
9119 | #[inline] | |
9120 | #[target_feature(enable = "avx512bw")] | |
9121 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9122 | pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { | |
9123 | let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); | |
9124 | transmute(simd_select_bitmask( | |
9125 | k, | |
9126 | convert, | |
9127 | _mm512_setzero_si512().as_i16x32(), | |
9128 | )) | |
9129 | } | |
9130 | ||
cdc7bbd5 XL |
9131 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
9132 | /// | |
9133 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi16&expand=1610) | |
9134 | #[inline] | |
9135 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9136 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9137 | pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i { | |
9138 | let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); | |
9139 | transmute(simd_select_bitmask(k, convert, src.as_i16x16())) | |
9140 | } | |
9141 | ||
9142 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9143 | /// | |
9144 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi16&expand=1611) | |
9145 | #[inline] | |
9146 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9147 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9148 | pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { | |
9149 | let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); | |
9150 | transmute(simd_select_bitmask( | |
9151 | k, | |
9152 | convert, | |
9153 | _mm256_setzero_si256().as_i16x16(), | |
9154 | )) | |
9155 | } | |
9156 | ||
9157 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9158 | /// | |
9159 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi16&expand=1607) | |
9160 | #[inline] | |
9161 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9162 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9163 | pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { | |
9164 | let convert = _mm_cvtepu8_epi16(a).as_i16x8(); | |
9165 | transmute(simd_select_bitmask(k, convert, src.as_i16x8())) | |
9166 | } | |
9167 | ||
9168 | /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9169 | /// | |
9170 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi16&expand=1608) | |
9171 | #[inline] | |
9172 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9173 | #[cfg_attr(test, assert_instr(vpmovzxbw))] | |
9174 | pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { | |
9175 | let convert = _mm_cvtepu8_epi16(a).as_i16x8(); | |
9176 | transmute(simd_select_bitmask( | |
9177 | k, | |
9178 | convert, | |
9179 | _mm_setzero_si128().as_i16x8(), | |
9180 | )) | |
9181 | } | |
9182 | ||
fc512014 XL |
9183 | /// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst. |
9184 | /// | |
9185 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bslli_epi128&expand=591) | |
9186 | #[inline] | |
9187 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
9188 | #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))] |
9189 | #[rustc_legacy_const_generics(1)] | |
9190 | pub unsafe fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i { | |
9191 | static_assert_imm8!(IMM8); | |
9192 | const fn mask(shift: i32, i: u32) -> u32 { | |
9193 | let shift = shift as u32 & 0xff; | |
9194 | if shift > 15 || i % 16 < shift { | |
9195 | 0 | |
9196 | } else { | |
9197 | 64 + (i - shift) | |
9198 | } | |
9199 | } | |
fc512014 XL |
9200 | let a = a.as_i8x64(); |
9201 | let zero = _mm512_setzero_si512().as_i8x64(); | |
17df50a5 XL |
9202 | let r: i8x64 = simd_shuffle64!( |
9203 | zero, | |
9204 | a, | |
9205 | <const IMM8: i32> [ | |
9206 | mask(IMM8, 0), | |
9207 | mask(IMM8, 1), | |
9208 | mask(IMM8, 2), | |
9209 | mask(IMM8, 3), | |
9210 | mask(IMM8, 4), | |
9211 | mask(IMM8, 5), | |
9212 | mask(IMM8, 6), | |
9213 | mask(IMM8, 7), | |
9214 | mask(IMM8, 8), | |
9215 | mask(IMM8, 9), | |
9216 | mask(IMM8, 10), | |
9217 | mask(IMM8, 11), | |
9218 | mask(IMM8, 12), | |
9219 | mask(IMM8, 13), | |
9220 | mask(IMM8, 14), | |
9221 | mask(IMM8, 15), | |
9222 | mask(IMM8, 16), | |
9223 | mask(IMM8, 17), | |
9224 | mask(IMM8, 18), | |
9225 | mask(IMM8, 19), | |
9226 | mask(IMM8, 20), | |
9227 | mask(IMM8, 21), | |
9228 | mask(IMM8, 22), | |
9229 | mask(IMM8, 23), | |
9230 | mask(IMM8, 24), | |
9231 | mask(IMM8, 25), | |
9232 | mask(IMM8, 26), | |
9233 | mask(IMM8, 27), | |
9234 | mask(IMM8, 28), | |
9235 | mask(IMM8, 29), | |
9236 | mask(IMM8, 30), | |
9237 | mask(IMM8, 31), | |
9238 | mask(IMM8, 32), | |
9239 | mask(IMM8, 33), | |
9240 | mask(IMM8, 34), | |
9241 | mask(IMM8, 35), | |
9242 | mask(IMM8, 36), | |
9243 | mask(IMM8, 37), | |
9244 | mask(IMM8, 38), | |
9245 | mask(IMM8, 39), | |
9246 | mask(IMM8, 40), | |
9247 | mask(IMM8, 41), | |
9248 | mask(IMM8, 42), | |
9249 | mask(IMM8, 43), | |
9250 | mask(IMM8, 44), | |
9251 | mask(IMM8, 45), | |
9252 | mask(IMM8, 46), | |
9253 | mask(IMM8, 47), | |
9254 | mask(IMM8, 48), | |
9255 | mask(IMM8, 49), | |
9256 | mask(IMM8, 50), | |
9257 | mask(IMM8, 51), | |
9258 | mask(IMM8, 52), | |
9259 | mask(IMM8, 53), | |
9260 | mask(IMM8, 54), | |
9261 | mask(IMM8, 55), | |
9262 | mask(IMM8, 56), | |
9263 | mask(IMM8, 57), | |
9264 | mask(IMM8, 58), | |
9265 | mask(IMM8, 59), | |
9266 | mask(IMM8, 60), | |
9267 | mask(IMM8, 61), | |
9268 | mask(IMM8, 62), | |
9269 | mask(IMM8, 63), | |
9270 | ], | |
9271 | ); | |
fc512014 XL |
9272 | transmute(r) |
9273 | } | |
9274 | ||
9275 | /// Shift 128-bit lanes in a right by imm8 bytes while shifting in zeros, and store the results in dst. | |
9276 | /// | |
9277 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bsrli_epi128&expand=594) | |
9278 | #[inline] | |
9279 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
9280 | #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))] |
9281 | #[rustc_legacy_const_generics(1)] | |
9282 | pub unsafe fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i { | |
9283 | static_assert_imm8!(IMM8); | |
fc512014 XL |
9284 | let a = a.as_i8x64(); |
9285 | let zero = _mm512_setzero_si512().as_i8x64(); | |
17df50a5 XL |
9286 | let r: i8x64 = match IMM8 % 16 { |
9287 | 0 => simd_shuffle64!( | |
9288 | a, | |
9289 | zero, | |
9290 | [ | |
9291 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, | |
9292 | 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, | |
9293 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
9294 | ], | |
9295 | ), | |
9296 | 1 => simd_shuffle64!( | |
9297 | a, | |
9298 | zero, | |
9299 | [ | |
9300 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, | |
9301 | 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | |
9302 | 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, | |
9303 | ], | |
9304 | ), | |
9305 | 2 => simd_shuffle64!( | |
9306 | a, | |
9307 | zero, | |
9308 | [ | |
9309 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24, | |
9310 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
9311 | 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, | |
9312 | ], | |
9313 | ), | |
9314 | 3 => simd_shuffle64!( | |
9315 | a, | |
9316 | zero, | |
9317 | [ | |
9318 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24, | |
9319 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
9320 | 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, | |
9321 | 114, | |
9322 | ], | |
9323 | ), | |
9324 | 4 => simd_shuffle64!( | |
9325 | a, | |
9326 | zero, | |
9327 | [ | |
9328 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25, | |
9329 | 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, | |
9330 | 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, | |
9331 | 115, | |
9332 | ], | |
9333 | ), | |
9334 | 5 => simd_shuffle64!( | |
9335 | a, | |
9336 | zero, | |
9337 | [ | |
9338 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26, | |
9339 | 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
9340 | 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, | |
9341 | 115, 116, | |
9342 | ], | |
9343 | ), | |
9344 | 6 => simd_shuffle64!( | |
9345 | a, | |
9346 | zero, | |
9347 | [ | |
9348 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27, | |
9349 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, | |
9350 | 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, | |
9351 | 116, 117, | |
9352 | ], | |
9353 | ), | |
9354 | 7 => simd_shuffle64!( | |
9355 | a, | |
9356 | zero, | |
9357 | [ | |
9358 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27, | |
9359 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, | |
9360 | 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, | |
9361 | 116, 117, 118, | |
9362 | ], | |
9363 | ), | |
9364 | 8 => simd_shuffle64!( | |
9365 | a, | |
9366 | zero, | |
9367 | [ | |
9368 | 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28, | |
9369 | 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97, | |
9370 | 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, | |
9371 | 116, 117, 118, 119, | |
9372 | ], | |
9373 | ), | |
9374 | 9 => simd_shuffle64!( | |
9375 | a, | |
9376 | zero, | |
9377 | [ | |
9378 | 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29, | |
9379 | 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98, | |
9380 | 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, | |
9381 | 117, 118, 119, 120, | |
9382 | ], | |
9383 | ), | |
9384 | 10 => simd_shuffle64!( | |
9385 | a, | |
9386 | zero, | |
9387 | [ | |
9388 | 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30, | |
9389 | 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99, | |
9390 | 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, | |
9391 | 118, 119, 120, 121, | |
9392 | ], | |
9393 | ), | |
9394 | 11 => simd_shuffle64!( | |
9395 | a, | |
9396 | zero, | |
9397 | [ | |
9398 | 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31, | |
9399 | 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99, | |
9400 | 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, | |
9401 | 117, 118, 119, 120, 121, 122, | |
9402 | ], | |
9403 | ), | |
9404 | 12 => simd_shuffle64!( | |
9405 | a, | |
9406 | zero, | |
9407 | [ | |
9408 | 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80, | |
9409 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100, | |
9410 | 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, | |
9411 | 118, 119, 120, 121, 122, 123, | |
9412 | ], | |
9413 | ), | |
9414 | 13 => simd_shuffle64!( | |
9415 | a, | |
9416 | zero, | |
9417 | [ | |
9418 | 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81, | |
9419 | 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101, | |
9420 | 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, | |
9421 | 119, 120, 121, 122, 123, 124, | |
9422 | ], | |
9423 | ), | |
9424 | 14 => simd_shuffle64!( | |
9425 | a, | |
9426 | zero, | |
9427 | [ | |
9428 | 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82, | |
9429 | 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102, | |
9430 | 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, | |
9431 | 120, 121, 122, 123, 124, 125, | |
9432 | ], | |
9433 | ), | |
9434 | 15 => simd_shuffle64!( | |
9435 | a, | |
9436 | zero, | |
9437 | [ | |
9438 | 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83, | |
9439 | 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103, | |
9440 | 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120, | |
9441 | 121, 122, 123, 124, 125, 126, | |
9442 | ], | |
9443 | ), | |
9444 | _ => zero, | |
fc512014 XL |
9445 | }; |
9446 | transmute(r) | |
9447 | } | |
9448 | ||
9449 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst. | |
9450 | /// | |
9451 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi8&expand=263) | |
9452 | #[inline] | |
9453 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
9454 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] |
9455 | #[rustc_legacy_const_generics(2)] | |
9456 | pub unsafe fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i { | |
fc512014 XL |
9457 | // If palignr is shifting the pair of vectors more than the size of two |
9458 | // lanes, emit zero. | |
17df50a5 | 9459 | if IMM8 > 32 { |
fc512014 XL |
9460 | return _mm512_set1_epi8(0); |
9461 | } | |
9462 | // If palignr is shifting the pair of input vectors more than one lane, | |
9463 | // but less than two lanes, convert to shifting in zeroes. | |
17df50a5 XL |
9464 | let (a, b) = if IMM8 > 16 { |
9465 | (_mm512_set1_epi8(0), a) | |
fc512014 | 9466 | } else { |
17df50a5 | 9467 | (a, b) |
fc512014 XL |
9468 | }; |
9469 | let a = a.as_i8x64(); | |
9470 | let b = b.as_i8x64(); | |
17df50a5 XL |
9471 | |
9472 | let r: i8x64 = match IMM8 % 16 { | |
9473 | 0 => simd_shuffle64!( | |
9474 | b, | |
9475 | a, | |
9476 | [ | |
9477 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, | |
9478 | 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, | |
9479 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, | |
9480 | ], | |
9481 | ), | |
9482 | 1 => simd_shuffle64!( | |
9483 | b, | |
9484 | a, | |
9485 | [ | |
9486 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, 22, 23, | |
9487 | 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, | |
9488 | 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, | |
9489 | ], | |
9490 | ), | |
9491 | 2 => simd_shuffle64!( | |
9492 | b, | |
9493 | a, | |
9494 | [ | |
9495 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, 23, 24, | |
9496 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
9497 | 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, | |
9498 | ], | |
9499 | ), | |
9500 | 3 => simd_shuffle64!( | |
9501 | b, | |
9502 | a, | |
9503 | [ | |
9504 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, 23, 24, | |
9505 | 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
9506 | 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, | |
9507 | 114, | |
9508 | ], | |
9509 | ), | |
9510 | 4 => simd_shuffle64!( | |
9511 | b, | |
9512 | a, | |
9513 | [ | |
9514 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, 24, 25, | |
9515 | 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, | |
9516 | 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, | |
9517 | 115, | |
9518 | ], | |
9519 | ), | |
9520 | 5 => simd_shuffle64!( | |
9521 | b, | |
9522 | a, | |
9523 | [ | |
9524 | 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, 25, 26, | |
9525 | 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
9526 | 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, | |
9527 | 115, 116, | |
9528 | ], | |
9529 | ), | |
9530 | 6 => simd_shuffle64!( | |
9531 | b, | |
9532 | a, | |
9533 | [ | |
9534 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, 26, 27, | |
9535 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, | |
9536 | 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, | |
9537 | 116, 117, | |
9538 | ], | |
9539 | ), | |
9540 | 7 => simd_shuffle64!( | |
9541 | b, | |
9542 | a, | |
9543 | [ | |
9544 | 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, 26, 27, | |
9545 | 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, 45, 46, 47, 96, | |
9546 | 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, | |
9547 | 116, 117, 118, | |
9548 | ], | |
9549 | ), | |
9550 | 8 => simd_shuffle64!( | |
9551 | b, | |
9552 | a, | |
9553 | [ | |
9554 | 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, 27, 28, | |
9555 | 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, 46, 47, 96, 97, | |
9556 | 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, | |
9557 | 116, 117, 118, 119, | |
9558 | ], | |
9559 | ), | |
9560 | 9 => simd_shuffle64!( | |
9561 | b, | |
9562 | a, | |
9563 | [ | |
9564 | 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, 28, 29, | |
9565 | 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, 47, 96, 97, 98, | |
9566 | 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, | |
9567 | 117, 118, 119, 120, | |
9568 | ], | |
9569 | ), | |
9570 | 10 => simd_shuffle64!( | |
9571 | b, | |
9572 | a, | |
9573 | [ | |
9574 | 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, 29, 30, | |
9575 | 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, 96, 97, 98, 99, | |
9576 | 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, | |
9577 | 118, 119, 120, 121, | |
9578 | ], | |
9579 | ), | |
9580 | 11 => simd_shuffle64!( | |
9581 | b, | |
9582 | a, | |
9583 | [ | |
9584 | 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, 30, 31, | |
9585 | 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, 97, 98, 99, | |
9586 | 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, 113, 114, 115, 116, | |
9587 | 117, 118, 119, 120, 121, 122, | |
9588 | ], | |
9589 | ), | |
9590 | 12 => simd_shuffle64!( | |
9591 | b, | |
9592 | a, | |
9593 | [ | |
9594 | 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, 31, 80, | |
9595 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, 98, 99, 100, | |
9596 | 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, 114, 115, 116, 117, | |
9597 | 118, 119, 120, 121, 122, 123, | |
9598 | ], | |
9599 | ), | |
9600 | 13 => simd_shuffle64!( | |
9601 | b, | |
9602 | a, | |
9603 | [ | |
9604 | 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, 80, 81, | |
9605 | 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, 99, 100, 101, | |
9606 | 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, 115, 116, 117, 118, | |
9607 | 119, 120, 121, 122, 123, 124, | |
9608 | ], | |
9609 | ), | |
9610 | 14 => simd_shuffle64!( | |
9611 | b, | |
9612 | a, | |
9613 | [ | |
9614 | 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, 81, 82, | |
9615 | 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, 100, 101, 102, | |
9616 | 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, 115, 116, 117, 118, 119, | |
9617 | 120, 121, 122, 123, 124, 125, | |
9618 | ], | |
9619 | ), | |
9620 | 15 => simd_shuffle64!( | |
9621 | b, | |
9622 | a, | |
9623 | [ | |
9624 | 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, 82, 83, | |
9625 | 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, 100, 101, 102, 103, | |
9626 | 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, 115, 116, 117, 118, 119, 120, | |
9627 | 121, 122, 123, 124, 125, 126, | |
9628 | ], | |
9629 | ), | |
9630 | _ => b, | |
fc512014 XL |
9631 | }; |
9632 | transmute(r) | |
9633 | } | |
9634 | ||
9635 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9636 | /// | |
9637 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi8&expand=264) | |
9638 | #[inline] | |
9639 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
9640 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] |
9641 | #[rustc_legacy_const_generics(4)] | |
9642 | pub unsafe fn _mm512_mask_alignr_epi8<const IMM8: i32>( | |
fc512014 XL |
9643 | src: __m512i, |
9644 | k: __mmask64, | |
9645 | a: __m512i, | |
9646 | b: __m512i, | |
fc512014 | 9647 | ) -> __m512i { |
17df50a5 XL |
9648 | static_assert_imm8!(IMM8); |
9649 | let r = _mm512_alignr_epi8::<IMM8>(a, b); | |
cdc7bbd5 XL |
9650 | transmute(simd_select_bitmask(k, r.as_i8x64(), src.as_i8x64())) |
9651 | } | |
9652 | ||
9653 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9654 | /// | |
9655 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi8&expand=265) | |
9656 | #[inline] | |
9657 | #[target_feature(enable = "avx512bw")] | |
17df50a5 XL |
9658 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] |
9659 | #[rustc_legacy_const_generics(3)] | |
9660 | pub unsafe fn _mm512_maskz_alignr_epi8<const IMM8: i32>( | |
9661 | k: __mmask64, | |
9662 | a: __m512i, | |
9663 | b: __m512i, | |
9664 | ) -> __m512i { | |
9665 | static_assert_imm8!(IMM8); | |
9666 | let r = _mm512_alignr_epi8::<IMM8>(a, b); | |
cdc7bbd5 XL |
9667 | let zero = _mm512_setzero_si512().as_i8x64(); |
9668 | transmute(simd_select_bitmask(k, r.as_i8x64(), zero)) | |
9669 | } | |
9670 | ||
9671 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9672 | /// | |
9673 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_alignr_epi8&expand=261) | |
9674 | #[inline] | |
9675 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
9676 | #[rustc_legacy_const_generics(4)] |
9677 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] | |
9678 | pub unsafe fn _mm256_mask_alignr_epi8<const IMM8: i32>( | |
cdc7bbd5 XL |
9679 | src: __m256i, |
9680 | k: __mmask32, | |
9681 | a: __m256i, | |
9682 | b: __m256i, | |
cdc7bbd5 | 9683 | ) -> __m256i { |
17df50a5 XL |
9684 | static_assert_imm8!(IMM8); |
9685 | let r = _mm256_alignr_epi8::<IMM8>(a, b); | |
cdc7bbd5 XL |
9686 | transmute(simd_select_bitmask(k, r.as_i8x32(), src.as_i8x32())) |
9687 | } | |
9688 | ||
9689 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9690 | /// | |
9691 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_alignr_epi8&expand=262) | |
9692 | #[inline] | |
9693 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
9694 | #[rustc_legacy_const_generics(3)] |
9695 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] | |
9696 | pub unsafe fn _mm256_maskz_alignr_epi8<const IMM8: i32>( | |
9697 | k: __mmask32, | |
9698 | a: __m256i, | |
9699 | b: __m256i, | |
9700 | ) -> __m256i { | |
9701 | static_assert_imm8!(IMM8); | |
9702 | let r = _mm256_alignr_epi8::<IMM8>(a, b); | |
cdc7bbd5 XL |
9703 | transmute(simd_select_bitmask( |
9704 | k, | |
9705 | r.as_i8x32(), | |
9706 | _mm256_setzero_si256().as_i8x32(), | |
9707 | )) | |
9708 | } | |
9709 | ||
9710 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). | |
9711 | /// | |
9712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_alignr_epi8&expand=258) | |
9713 | #[inline] | |
9714 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
9715 | #[rustc_legacy_const_generics(4)] |
9716 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] | |
9717 | pub unsafe fn _mm_mask_alignr_epi8<const IMM8: i32>( | |
cdc7bbd5 XL |
9718 | src: __m128i, |
9719 | k: __mmask16, | |
9720 | a: __m128i, | |
9721 | b: __m128i, | |
cdc7bbd5 | 9722 | ) -> __m128i { |
17df50a5 XL |
9723 | static_assert_imm8!(IMM8); |
9724 | let r = _mm_alignr_epi8::<IMM8>(a, b); | |
cdc7bbd5 XL |
9725 | transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16())) |
9726 | } | |
9727 | ||
9728 | /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). | |
9729 | /// | |
9730 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_alignr_epi8&expand=259) | |
9731 | #[inline] | |
9732 | #[target_feature(enable = "avx512bw,avx512vl")] | |
17df50a5 XL |
9733 | #[rustc_legacy_const_generics(3)] |
9734 | #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))] | |
9735 | pub unsafe fn _mm_maskz_alignr_epi8<const IMM8: i32>( | |
9736 | k: __mmask16, | |
9737 | a: __m128i, | |
9738 | b: __m128i, | |
9739 | ) -> __m128i { | |
9740 | static_assert_imm8!(IMM8); | |
9741 | let r = _mm_alignr_epi8::<IMM8>(a, b); | |
cdc7bbd5 XL |
9742 | let zero = _mm_setzero_si128().as_i8x16(); |
9743 | transmute(simd_select_bitmask(k, r.as_i8x16(), zero)) | |
9744 | } | |
9745 | ||
9746 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9747 | /// | |
9748 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi16_storeu_epi8&expand=1812) | |
9749 | #[inline] | |
9750 | #[target_feature(enable = "avx512bw")] | |
9751 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
9752 | pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { | |
9753 | vpmovswbmem(mem_addr as *mut i8, a.as_i16x32(), k); | |
9754 | } | |
9755 | ||
9756 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9757 | /// | |
9758 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi16_storeu_epi8&expand=1811) | |
9759 | #[inline] | |
9760 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9761 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
9762 | pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { | |
9763 | vpmovswbmem256(mem_addr as *mut i8, a.as_i16x16(), k); | |
9764 | } | |
9765 | ||
9766 | /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9767 | /// | |
9768 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi16_storeu_epi8&expand=1810) | |
9769 | #[inline] | |
9770 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9771 | #[cfg_attr(test, assert_instr(vpmovswb))] | |
9772 | pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { | |
9773 | vpmovswbmem128(mem_addr as *mut i8, a.as_i16x8(), k); | |
9774 | } | |
9775 | ||
9776 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9777 | /// | |
9778 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_storeu_epi8&expand=1412) | |
9779 | #[inline] | |
9780 | #[target_feature(enable = "avx512bw")] | |
9781 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
9782 | pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { | |
9783 | vpmovwbmem(mem_addr as *mut i8, a.as_i16x32(), k); | |
fc512014 XL |
9784 | } |
9785 | ||
cdc7bbd5 | 9786 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. |
fc512014 | 9787 | /// |
cdc7bbd5 XL |
9788 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi16_storeu_epi8&expand=1411) |
9789 | #[inline] | |
9790 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9791 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
9792 | pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { | |
9793 | vpmovwbmem256(mem_addr as *mut i8, a.as_i16x16(), k); | |
9794 | } | |
9795 | ||
9796 | /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9797 | /// | |
9798 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi16_storeu_epi8&expand=1410) | |
9799 | #[inline] | |
9800 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9801 | #[cfg_attr(test, assert_instr(vpmovwb))] | |
9802 | pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { | |
9803 | vpmovwbmem128(mem_addr as *mut i8, a.as_i16x8(), k); | |
9804 | } | |
9805 | ||
9806 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9807 | /// | |
9808 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi16_storeu_epi8&expand=2047) | |
fc512014 XL |
9809 | #[inline] |
9810 | #[target_feature(enable = "avx512bw")] | |
cdc7bbd5 XL |
9811 | #[cfg_attr(test, assert_instr(vpmovuswb))] |
9812 | pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) { | |
9813 | vpmovuswbmem(mem_addr as *mut i8, a.as_i16x32(), k); | |
9814 | } | |
9815 | ||
9816 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9817 | /// | |
9818 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi16_storeu_epi8&expand=2046) | |
9819 | #[inline] | |
9820 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9821 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
9822 | pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) { | |
9823 | vpmovuswbmem256(mem_addr as *mut i8, a.as_i16x16(), k); | |
9824 | } | |
9825 | ||
9826 | /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. | |
9827 | /// | |
9828 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi16_storeu_epi8&expand=2045) | |
9829 | #[inline] | |
9830 | #[target_feature(enable = "avx512bw,avx512vl")] | |
9831 | #[cfg_attr(test, assert_instr(vpmovuswb))] | |
9832 | pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { | |
9833 | vpmovuswbmem128(mem_addr as *mut i8, a.as_i16x8(), k); | |
fc512014 XL |
9834 | } |
9835 | ||
9836 | #[allow(improper_ctypes)] | |
9837 | extern "C" { | |
9838 | #[link_name = "llvm.x86.avx512.mask.paddus.w.512"] | |
9839 | fn vpaddusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32; | |
9840 | #[link_name = "llvm.x86.avx512.mask.paddus.w.256"] | |
9841 | fn vpaddusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16; | |
9842 | #[link_name = "llvm.x86.avx512.mask.paddus.w.128"] | |
9843 | fn vpaddusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8; | |
9844 | ||
9845 | #[link_name = "llvm.x86.avx512.mask.paddus.b.512"] | |
9846 | fn vpaddusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64; | |
9847 | #[link_name = "llvm.x86.avx512.mask.paddus.b.256"] | |
9848 | fn vpaddusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32; | |
9849 | #[link_name = "llvm.x86.avx512.mask.paddus.b.128"] | |
9850 | fn vpaddusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16; | |
9851 | ||
9852 | #[link_name = "llvm.x86.avx512.mask.padds.w.512"] | |
9853 | fn vpaddsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32; | |
9854 | #[link_name = "llvm.x86.avx512.mask.padds.w.256"] | |
9855 | fn vpaddsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16; | |
9856 | #[link_name = "llvm.x86.avx512.mask.padds.w.128"] | |
9857 | fn vpaddsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8; | |
9858 | ||
9859 | #[link_name = "llvm.x86.avx512.mask.padds.b.512"] | |
9860 | fn vpaddsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64; | |
9861 | #[link_name = "llvm.x86.avx512.mask.padds.b.256"] | |
9862 | fn vpaddsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32; | |
9863 | #[link_name = "llvm.x86.avx512.mask.padds.b.128"] | |
9864 | fn vpaddsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16; | |
9865 | ||
9866 | #[link_name = "llvm.x86.avx512.mask.psubus.w.512"] | |
9867 | fn vpsubusw(a: u16x32, b: u16x32, src: u16x32, mask: u32) -> u16x32; | |
9868 | #[link_name = "llvm.x86.avx512.mask.psubus.w.256"] | |
9869 | fn vpsubusw256(a: u16x16, b: u16x16, src: u16x16, mask: u16) -> u16x16; | |
9870 | #[link_name = "llvm.x86.avx512.mask.psubus.w.128"] | |
9871 | fn vpsubusw128(a: u16x8, b: u16x8, src: u16x8, mask: u8) -> u16x8; | |
9872 | ||
9873 | #[link_name = "llvm.x86.avx512.mask.psubus.b.512"] | |
9874 | fn vpsubusb(a: u8x64, b: u8x64, src: u8x64, mask: u64) -> u8x64; | |
9875 | #[link_name = "llvm.x86.avx512.mask.psubus.b.256"] | |
9876 | fn vpsubusb256(a: u8x32, b: u8x32, src: u8x32, mask: u32) -> u8x32; | |
9877 | #[link_name = "llvm.x86.avx512.mask.psubus.b.128"] | |
9878 | fn vpsubusb128(a: u8x16, b: u8x16, src: u8x16, mask: u16) -> u8x16; | |
9879 | ||
9880 | #[link_name = "llvm.x86.avx512.mask.psubs.w.512"] | |
9881 | fn vpsubsw(a: i16x32, b: i16x32, src: i16x32, mask: u32) -> i16x32; | |
9882 | #[link_name = "llvm.x86.avx512.mask.psubs.w.256"] | |
9883 | fn vpsubsw256(a: i16x16, b: i16x16, src: i16x16, mask: u16) -> i16x16; | |
9884 | #[link_name = "llvm.x86.avx512.mask.psubs.w.128"] | |
9885 | fn vpsubsw128(a: i16x8, b: i16x8, src: i16x8, mask: u8) -> i16x8; | |
9886 | ||
9887 | #[link_name = "llvm.x86.avx512.mask.psubs.b.512"] | |
9888 | fn vpsubsb(a: i8x64, b: i8x64, src: i8x64, mask: u64) -> i8x64; | |
9889 | #[link_name = "llvm.x86.avx512.mask.psubs.b.256"] | |
9890 | fn vpsubsb256(a: i8x32, b: i8x32, src: i8x32, mask: u32) -> i8x32; | |
9891 | #[link_name = "llvm.x86.avx512.mask.psubs.b.128"] | |
9892 | fn vpsubsb128(a: i8x16, b: i8x16, src: i8x16, mask: u16) -> i8x16; | |
9893 | ||
9894 | #[link_name = "llvm.x86.avx512.pmulhu.w.512"] | |
9895 | fn vpmulhuw(a: u16x32, b: u16x32) -> u16x32; | |
9896 | #[link_name = "llvm.x86.avx512.pmulh.w.512"] | |
9897 | fn vpmulhw(a: i16x32, b: i16x32) -> i16x32; | |
9898 | #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"] | |
9899 | fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32; | |
9900 | ||
9901 | #[link_name = "llvm.x86.avx512.mask.ucmp.w.512"] | |
9902 | fn vpcmpuw(a: u16x32, b: u16x32, op: i32, mask: u32) -> u32; | |
cdc7bbd5 XL |
9903 | #[link_name = "llvm.x86.avx512.mask.ucmp.w.256"] |
9904 | fn vpcmpuw256(a: u16x16, b: u16x16, op: i32, mask: u16) -> u16; | |
9905 | #[link_name = "llvm.x86.avx512.mask.ucmp.w.128"] | |
9906 | fn vpcmpuw128(a: u16x8, b: u16x8, op: i32, mask: u8) -> u8; | |
9907 | ||
fc512014 XL |
9908 | #[link_name = "llvm.x86.avx512.mask.ucmp.b.512"] |
9909 | fn vpcmpub(a: u8x64, b: u8x64, op: i32, mask: u64) -> u64; | |
cdc7bbd5 XL |
9910 | #[link_name = "llvm.x86.avx512.mask.ucmp.b.256"] |
9911 | fn vpcmpub256(a: u8x32, b: u8x32, op: i32, mask: u32) -> u32; | |
9912 | #[link_name = "llvm.x86.avx512.mask.ucmp.b.128"] | |
9913 | fn vpcmpub128(a: u8x16, b: u8x16, op: i32, mask: u16) -> u16; | |
9914 | ||
fc512014 XL |
9915 | #[link_name = "llvm.x86.avx512.mask.cmp.w.512"] |
9916 | fn vpcmpw(a: i16x32, b: i16x32, op: i32, mask: u32) -> u32; | |
cdc7bbd5 XL |
9917 | #[link_name = "llvm.x86.avx512.mask.cmp.w.256"] |
9918 | fn vpcmpw256(a: i16x16, b: i16x16, op: i32, mask: u16) -> u16; | |
9919 | #[link_name = "llvm.x86.avx512.mask.cmp.w.128"] | |
9920 | fn vpcmpw128(a: i16x8, b: i16x8, op: i32, mask: u8) -> u8; | |
9921 | ||
fc512014 XL |
9922 | #[link_name = "llvm.x86.avx512.mask.cmp.b.512"] |
9923 | fn vpcmpb(a: i8x64, b: i8x64, op: i32, mask: u64) -> u64; | |
cdc7bbd5 XL |
9924 | #[link_name = "llvm.x86.avx512.mask.cmp.b.256"] |
9925 | fn vpcmpb256(a: i8x32, b: i8x32, op: i32, mask: u32) -> u32; | |
9926 | #[link_name = "llvm.x86.avx512.mask.cmp.b.128"] | |
9927 | fn vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u16; | |
fc512014 XL |
9928 | |
9929 | #[link_name = "llvm.x86.avx512.mask.pmaxu.w.512"] | |
9930 | fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32; | |
9931 | #[link_name = "llvm.x86.avx512.mask.pmaxu.b.512"] | |
9932 | fn vpmaxub(a: u8x64, b: u8x64) -> u8x64; | |
9933 | #[link_name = "llvm.x86.avx512.mask.pmaxs.w.512"] | |
9934 | fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32; | |
9935 | #[link_name = "llvm.x86.avx512.mask.pmaxs.b.512"] | |
9936 | fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64; | |
9937 | ||
9938 | #[link_name = "llvm.x86.avx512.mask.pminu.w.512"] | |
9939 | fn vpminuw(a: u16x32, b: u16x32) -> u16x32; | |
9940 | #[link_name = "llvm.x86.avx512.mask.pminu.b.512"] | |
9941 | fn vpminub(a: u8x64, b: u8x64) -> u8x64; | |
9942 | #[link_name = "llvm.x86.avx512.mask.pmins.w.512"] | |
9943 | fn vpminsw(a: i16x32, b: i16x32) -> i16x32; | |
9944 | #[link_name = "llvm.x86.avx512.mask.pmins.b.512"] | |
9945 | fn vpminsb(a: i8x64, b: i8x64) -> i8x64; | |
9946 | ||
9947 | #[link_name = "llvm.x86.avx512.pmaddw.d.512"] | |
9948 | fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16; | |
9949 | #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] | |
9950 | fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32; | |
9951 | ||
9952 | #[link_name = "llvm.x86.avx512.packssdw.512"] | |
9953 | fn vpackssdw(a: i32x16, b: i32x16) -> i16x32; | |
9954 | #[link_name = "llvm.x86.avx512.packsswb.512"] | |
9955 | fn vpacksswb(a: i16x32, b: i16x32) -> i8x64; | |
9956 | #[link_name = "llvm.x86.avx512.packusdw.512"] | |
9957 | fn vpackusdw(a: i32x16, b: i32x16) -> u16x32; | |
9958 | #[link_name = "llvm.x86.avx512.packuswb.512"] | |
9959 | fn vpackuswb(a: i16x32, b: i16x32) -> u8x64; | |
9960 | ||
9961 | #[link_name = "llvm.x86.avx512.pavg.w.512"] | |
9962 | fn vpavgw(a: u16x32, b: u16x32) -> u16x32; | |
9963 | #[link_name = "llvm.x86.avx512.pavg.b.512"] | |
9964 | fn vpavgb(a: u8x64, b: u8x64) -> u8x64; | |
9965 | ||
9966 | #[link_name = "llvm.x86.avx512.psll.w.512"] | |
9967 | fn vpsllw(a: i16x32, count: i16x8) -> i16x32; | |
9968 | #[link_name = "llvm.x86.avx512.pslli.w.512"] | |
9969 | fn vpslliw(a: i16x32, imm8: u32) -> i16x32; | |
9970 | ||
17df50a5 XL |
9971 | #[link_name = "llvm.x86.avx2.pslli.w"] |
9972 | fn pslliw256(a: i16x16, imm8: i32) -> i16x16; | |
9973 | #[link_name = "llvm.x86.sse2.pslli.w"] | |
9974 | fn pslliw128(a: i16x8, imm8: i32) -> i16x8; | |
9975 | ||
fc512014 XL |
9976 | #[link_name = "llvm.x86.avx512.psllv.w.512"] |
9977 | fn vpsllvw(a: i16x32, b: i16x32) -> i16x32; | |
9978 | #[link_name = "llvm.x86.avx512.psllv.w.256"] | |
9979 | fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16; | |
9980 | #[link_name = "llvm.x86.avx512.psllv.w.128"] | |
9981 | fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8; | |
9982 | ||
9983 | #[link_name = "llvm.x86.avx512.psrl.w.512"] | |
9984 | fn vpsrlw(a: i16x32, count: i16x8) -> i16x32; | |
9985 | #[link_name = "llvm.x86.avx512.psrli.w.512"] | |
9986 | fn vpsrliw(a: i16x32, imm8: u32) -> i16x32; | |
9987 | ||
9988 | #[link_name = "llvm.x86.avx512.psrlv.w.512"] | |
9989 | fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32; | |
9990 | #[link_name = "llvm.x86.avx512.psrlv.w.256"] | |
9991 | fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16; | |
9992 | #[link_name = "llvm.x86.avx512.psrlv.w.128"] | |
9993 | fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8; | |
9994 | ||
9995 | #[link_name = "llvm.x86.avx512.psra.w.512"] | |
9996 | fn vpsraw(a: i16x32, count: i16x8) -> i16x32; | |
9997 | #[link_name = "llvm.x86.avx512.psrai.w.512"] | |
9998 | fn vpsraiw(a: i16x32, imm8: u32) -> i16x32; | |
9999 | ||
17df50a5 XL |
10000 | #[link_name = "llvm.x86.avx2.psrai.w"] |
10001 | fn psraiw256(a: i16x16, imm8: i32) -> i16x16; | |
10002 | #[link_name = "llvm.x86.sse2.psrai.w"] | |
10003 | fn psraiw128(a: i16x8, imm8: i32) -> i16x8; | |
10004 | ||
fc512014 XL |
10005 | #[link_name = "llvm.x86.avx512.psrav.w.512"] |
10006 | fn vpsravw(a: i16x32, count: i16x32) -> i16x32; | |
10007 | #[link_name = "llvm.x86.avx512.psrav.w.256"] | |
10008 | fn vpsravw256(a: i16x16, count: i16x16) -> i16x16; | |
10009 | #[link_name = "llvm.x86.avx512.psrav.w.128"] | |
10010 | fn vpsravw128(a: i16x8, count: i16x8) -> i16x8; | |
10011 | ||
10012 | #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"] | |
10013 | fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32; | |
10014 | #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"] | |
10015 | fn vpermi2w256(a: i16x16, idx: i16x16, b: i16x16) -> i16x16; | |
10016 | #[link_name = "llvm.x86.avx512.vpermi2var.hi.128"] | |
10017 | fn vpermi2w128(a: i16x8, idx: i16x8, b: i16x8) -> i16x8; | |
10018 | ||
10019 | #[link_name = "llvm.x86.avx512.permvar.hi.512"] | |
10020 | fn vpermw(a: i16x32, idx: i16x32) -> i16x32; | |
10021 | #[link_name = "llvm.x86.avx512.permvar.hi.256"] | |
10022 | fn vpermw256(a: i16x16, idx: i16x16) -> i16x16; | |
10023 | #[link_name = "llvm.x86.avx512.permvar.hi.128"] | |
10024 | fn vpermw128(a: i16x8, idx: i16x8) -> i16x8; | |
10025 | ||
10026 | #[link_name = "llvm.x86.avx512.pshuf.b.512"] | |
10027 | fn vpshufb(a: i8x64, b: i8x64) -> i8x64; | |
10028 | ||
10029 | #[link_name = "llvm.x86.avx512.psad.bw.512"] | |
10030 | fn vpsadbw(a: u8x64, b: u8x64) -> u64x8; | |
cdc7bbd5 | 10031 | |
fc512014 XL |
10032 | #[link_name = "llvm.x86.avx512.dbpsadbw.512"] |
10033 | fn vdbpsadbw(a: u8x64, b: u8x64, imm8: i32) -> u16x32; | |
cdc7bbd5 XL |
10034 | #[link_name = "llvm.x86.avx512.dbpsadbw.256"] |
10035 | fn vdbpsadbw256(a: u8x32, b: u8x32, imm8: i32) -> u16x16; | |
10036 | #[link_name = "llvm.x86.avx512.dbpsadbw.128"] | |
10037 | fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8; | |
fc512014 XL |
10038 | |
10039 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"] | |
10040 | fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32; | |
cdc7bbd5 XL |
10041 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"] |
10042 | fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16; | |
10043 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"] | |
10044 | fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16; | |
10045 | ||
fc512014 XL |
10046 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"] |
10047 | fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32; | |
cdc7bbd5 XL |
10048 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"] |
10049 | fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16; | |
10050 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"] | |
10051 | fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16; | |
10052 | ||
10053 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"] | |
10054 | fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32); | |
10055 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"] | |
10056 | fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); | |
10057 | #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"] | |
10058 | fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); | |
10059 | ||
10060 | #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"] | |
10061 | fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32); | |
10062 | #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"] | |
10063 | fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); | |
10064 | #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"] | |
10065 | fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); | |
10066 | ||
10067 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"] | |
10068 | fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32); | |
10069 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"] | |
10070 | fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16); | |
10071 | #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"] | |
10072 | fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8); | |
fc512014 XL |
10073 | } |
10074 | ||
10075 | #[cfg(test)] | |
10076 | mod tests { | |
10077 | ||
10078 | use stdarch_test::simd_test; | |
10079 | ||
10080 | use crate::core_arch::x86::*; | |
10081 | use crate::hint::black_box; | |
10082 | use crate::mem::{self}; | |
10083 | ||
10084 | #[simd_test(enable = "avx512bw")] | |
10085 | unsafe fn test_mm512_abs_epi16() { | |
10086 | let a = _mm512_set1_epi16(-1); | |
10087 | let r = _mm512_abs_epi16(a); | |
10088 | let e = _mm512_set1_epi16(1); | |
10089 | assert_eq_m512i(r, e); | |
10090 | } | |
10091 | ||
10092 | #[simd_test(enable = "avx512bw")] | |
10093 | unsafe fn test_mm512_mask_abs_epi16() { | |
10094 | let a = _mm512_set1_epi16(-1); | |
10095 | let r = _mm512_mask_abs_epi16(a, 0, a); | |
10096 | assert_eq_m512i(r, a); | |
10097 | let r = _mm512_mask_abs_epi16(a, 0b00000000_11111111_00000000_11111111, a); | |
10098 | #[rustfmt::skip] | |
10099 | let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10100 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); | |
10101 | assert_eq_m512i(r, e); | |
10102 | } | |
10103 | ||
10104 | #[simd_test(enable = "avx512bw")] | |
10105 | unsafe fn test_mm512_maskz_abs_epi16() { | |
10106 | let a = _mm512_set1_epi16(-1); | |
10107 | let r = _mm512_maskz_abs_epi16(0, a); | |
10108 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10109 | let r = _mm512_maskz_abs_epi16(0b00000000_11111111_00000000_11111111, a); | |
10110 | #[rustfmt::skip] | |
10111 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, | |
10112 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
10113 | assert_eq_m512i(r, e); | |
10114 | } | |
10115 | ||
10116 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10117 | unsafe fn test_mm256_mask_abs_epi16() { | |
10118 | let a = _mm256_set1_epi16(-1); | |
10119 | let r = _mm256_mask_abs_epi16(a, 0, a); | |
10120 | assert_eq_m256i(r, a); | |
10121 | let r = _mm256_mask_abs_epi16(a, 0b00000000_11111111, a); | |
10122 | let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); | |
10123 | assert_eq_m256i(r, e); | |
10124 | } | |
10125 | ||
10126 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10127 | unsafe fn test_mm256_maskz_abs_epi16() { | |
10128 | let a = _mm256_set1_epi16(-1); | |
10129 | let r = _mm256_maskz_abs_epi16(0, a); | |
10130 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10131 | let r = _mm256_maskz_abs_epi16(0b00000000_11111111, a); | |
10132 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
10133 | assert_eq_m256i(r, e); | |
10134 | } | |
10135 | ||
10136 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10137 | unsafe fn test_mm_mask_abs_epi16() { | |
10138 | let a = _mm_set1_epi16(-1); | |
10139 | let r = _mm_mask_abs_epi16(a, 0, a); | |
10140 | assert_eq_m128i(r, a); | |
10141 | let r = _mm_mask_abs_epi16(a, 0b00001111, a); | |
10142 | let e = _mm_set_epi16(-1, -1, -1, -1, 1, 1, 1, 1); | |
10143 | assert_eq_m128i(r, e); | |
10144 | } | |
10145 | ||
10146 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10147 | unsafe fn test_mm_maskz_abs_epi16() { | |
10148 | let a = _mm_set1_epi16(-1); | |
10149 | let r = _mm_maskz_abs_epi16(0, a); | |
10150 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10151 | let r = _mm_maskz_abs_epi16(0b00001111, a); | |
10152 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); | |
10153 | assert_eq_m128i(r, e); | |
10154 | } | |
10155 | ||
10156 | #[simd_test(enable = "avx512bw")] | |
10157 | unsafe fn test_mm512_abs_epi8() { | |
10158 | let a = _mm512_set1_epi8(-1); | |
10159 | let r = _mm512_abs_epi8(a); | |
10160 | let e = _mm512_set1_epi8(1); | |
10161 | assert_eq_m512i(r, e); | |
10162 | } | |
10163 | ||
10164 | #[simd_test(enable = "avx512bw")] | |
10165 | unsafe fn test_mm512_mask_abs_epi8() { | |
10166 | let a = _mm512_set1_epi8(-1); | |
10167 | let r = _mm512_mask_abs_epi8(a, 0, a); | |
10168 | assert_eq_m512i(r, a); | |
10169 | let r = _mm512_mask_abs_epi8( | |
10170 | a, | |
10171 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
10172 | a, | |
10173 | ); | |
10174 | #[rustfmt::skip] | |
10175 | let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10176 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10177 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10178 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); | |
10179 | assert_eq_m512i(r, e); | |
10180 | } | |
10181 | ||
10182 | #[simd_test(enable = "avx512bw")] | |
10183 | unsafe fn test_mm512_maskz_abs_epi8() { | |
10184 | let a = _mm512_set1_epi8(-1); | |
10185 | let r = _mm512_maskz_abs_epi8(0, a); | |
10186 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10187 | let r = _mm512_maskz_abs_epi8( | |
10188 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
10189 | a, | |
10190 | ); | |
10191 | #[rustfmt::skip] | |
10192 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, | |
10193 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, | |
10194 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, | |
10195 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
10196 | assert_eq_m512i(r, e); | |
10197 | } | |
10198 | ||
10199 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10200 | unsafe fn test_mm256_mask_abs_epi8() { | |
10201 | let a = _mm256_set1_epi8(-1); | |
10202 | let r = _mm256_mask_abs_epi8(a, 0, a); | |
10203 | assert_eq_m256i(r, a); | |
10204 | let r = _mm256_mask_abs_epi8(a, 0b00000000_11111111_00000000_11111111, a); | |
10205 | #[rustfmt::skip] | |
10206 | let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10207 | -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); | |
10208 | assert_eq_m256i(r, e); | |
10209 | } | |
10210 | ||
10211 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10212 | unsafe fn test_mm256_maskz_abs_epi8() { | |
10213 | let a = _mm256_set1_epi8(-1); | |
10214 | let r = _mm256_maskz_abs_epi8(0, a); | |
10215 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10216 | let r = _mm256_maskz_abs_epi8(0b00000000_11111111_00000000_11111111, a); | |
10217 | #[rustfmt::skip] | |
10218 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, | |
10219 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
10220 | assert_eq_m256i(r, e); | |
10221 | } | |
10222 | ||
10223 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10224 | unsafe fn test_mm_mask_abs_epi8() { | |
10225 | let a = _mm_set1_epi8(-1); | |
10226 | let r = _mm_mask_abs_epi8(a, 0, a); | |
10227 | assert_eq_m128i(r, a); | |
10228 | let r = _mm_mask_abs_epi8(a, 0b00000000_11111111, a); | |
10229 | let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1); | |
10230 | assert_eq_m128i(r, e); | |
10231 | } | |
10232 | ||
10233 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10234 | unsafe fn test_mm_maskz_abs_epi8() { | |
10235 | let a = _mm_set1_epi8(-1); | |
10236 | let r = _mm_maskz_abs_epi8(0, a); | |
10237 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10238 | let r = _mm_maskz_abs_epi8(0b00000000_11111111, a); | |
10239 | #[rustfmt::skip] | |
10240 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
10241 | assert_eq_m128i(r, e); | |
10242 | } | |
10243 | ||
10244 | #[simd_test(enable = "avx512bw")] | |
10245 | unsafe fn test_mm512_add_epi16() { | |
10246 | let a = _mm512_set1_epi16(1); | |
10247 | let b = _mm512_set1_epi16(2); | |
10248 | let r = _mm512_add_epi16(a, b); | |
10249 | let e = _mm512_set1_epi16(3); | |
10250 | assert_eq_m512i(r, e); | |
10251 | } | |
10252 | ||
10253 | #[simd_test(enable = "avx512bw")] | |
10254 | unsafe fn test_mm512_mask_add_epi16() { | |
10255 | let a = _mm512_set1_epi16(1); | |
10256 | let b = _mm512_set1_epi16(2); | |
10257 | let r = _mm512_mask_add_epi16(a, 0, a, b); | |
10258 | assert_eq_m512i(r, a); | |
10259 | let r = _mm512_mask_add_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); | |
10260 | #[rustfmt::skip] | |
10261 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, | |
10262 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); | |
10263 | assert_eq_m512i(r, e); | |
10264 | } | |
10265 | ||
10266 | #[simd_test(enable = "avx512bw")] | |
10267 | unsafe fn test_mm512_maskz_add_epi16() { | |
10268 | let a = _mm512_set1_epi16(1); | |
10269 | let b = _mm512_set1_epi16(2); | |
10270 | let r = _mm512_maskz_add_epi16(0, a, b); | |
10271 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10272 | let r = _mm512_maskz_add_epi16(0b00000000_11111111_00000000_11111111, a, b); | |
10273 | #[rustfmt::skip] | |
10274 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, | |
10275 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); | |
10276 | assert_eq_m512i(r, e); | |
10277 | } | |
10278 | ||
10279 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10280 | unsafe fn test_mm256_mask_add_epi16() { | |
10281 | let a = _mm256_set1_epi16(1); | |
10282 | let b = _mm256_set1_epi16(2); | |
10283 | let r = _mm256_mask_add_epi16(a, 0, a, b); | |
10284 | assert_eq_m256i(r, a); | |
10285 | let r = _mm256_mask_add_epi16(a, 0b00000000_11111111, a, b); | |
10286 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); | |
10287 | assert_eq_m256i(r, e); | |
10288 | } | |
10289 | ||
10290 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10291 | unsafe fn test_mm256_maskz_add_epi16() { | |
10292 | let a = _mm256_set1_epi16(1); | |
10293 | let b = _mm256_set1_epi16(2); | |
10294 | let r = _mm256_maskz_add_epi16(0, a, b); | |
10295 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10296 | let r = _mm256_maskz_add_epi16(0b00000000_11111111, a, b); | |
10297 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); | |
10298 | assert_eq_m256i(r, e); | |
10299 | } | |
10300 | ||
10301 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10302 | unsafe fn test_mm_mask_add_epi16() { | |
10303 | let a = _mm_set1_epi16(1); | |
10304 | let b = _mm_set1_epi16(2); | |
10305 | let r = _mm_mask_add_epi16(a, 0, a, b); | |
10306 | assert_eq_m128i(r, a); | |
10307 | let r = _mm_mask_add_epi16(a, 0b00001111, a, b); | |
10308 | let e = _mm_set_epi16(1, 1, 1, 1, 3, 3, 3, 3); | |
10309 | assert_eq_m128i(r, e); | |
10310 | } | |
10311 | ||
10312 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10313 | unsafe fn test_mm_maskz_add_epi16() { | |
10314 | let a = _mm_set1_epi16(1); | |
10315 | let b = _mm_set1_epi16(2); | |
10316 | let r = _mm_maskz_add_epi16(0, a, b); | |
10317 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10318 | let r = _mm_maskz_add_epi16(0b00001111, a, b); | |
10319 | let e = _mm_set_epi16(0, 0, 0, 0, 3, 3, 3, 3); | |
10320 | assert_eq_m128i(r, e); | |
10321 | } | |
10322 | ||
10323 | #[simd_test(enable = "avx512bw")] | |
10324 | unsafe fn test_mm512_add_epi8() { | |
10325 | let a = _mm512_set1_epi8(1); | |
10326 | let b = _mm512_set1_epi8(2); | |
10327 | let r = _mm512_add_epi8(a, b); | |
10328 | let e = _mm512_set1_epi8(3); | |
10329 | assert_eq_m512i(r, e); | |
10330 | } | |
10331 | ||
10332 | #[simd_test(enable = "avx512bw")] | |
10333 | unsafe fn test_mm512_mask_add_epi8() { | |
10334 | let a = _mm512_set1_epi8(1); | |
10335 | let b = _mm512_set1_epi8(2); | |
10336 | let r = _mm512_mask_add_epi8(a, 0, a, b); | |
10337 | assert_eq_m512i(r, a); | |
10338 | let r = _mm512_mask_add_epi8( | |
10339 | a, | |
10340 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
10341 | a, | |
10342 | b, | |
10343 | ); | |
10344 | #[rustfmt::skip] | |
10345 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, | |
10346 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, | |
10347 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, | |
10348 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); | |
10349 | assert_eq_m512i(r, e); | |
10350 | } | |
10351 | ||
10352 | #[simd_test(enable = "avx512bw")] | |
10353 | unsafe fn test_mm512_maskz_add_epi8() { | |
10354 | let a = _mm512_set1_epi8(1); | |
10355 | let b = _mm512_set1_epi8(2); | |
10356 | let r = _mm512_maskz_add_epi8(0, a, b); | |
10357 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10358 | let r = _mm512_maskz_add_epi8( | |
10359 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
10360 | a, | |
10361 | b, | |
10362 | ); | |
10363 | #[rustfmt::skip] | |
10364 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, | |
10365 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, | |
10366 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, | |
10367 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); | |
10368 | assert_eq_m512i(r, e); | |
10369 | } | |
10370 | ||
10371 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10372 | unsafe fn test_mm256_mask_add_epi8() { | |
10373 | let a = _mm256_set1_epi8(1); | |
10374 | let b = _mm256_set1_epi8(2); | |
10375 | let r = _mm256_mask_add_epi8(a, 0, a, b); | |
10376 | assert_eq_m256i(r, a); | |
10377 | let r = _mm256_mask_add_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); | |
10378 | #[rustfmt::skip] | |
10379 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, | |
10380 | 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); | |
10381 | assert_eq_m256i(r, e); | |
10382 | } | |
10383 | ||
10384 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10385 | unsafe fn test_mm256_maskz_add_epi8() { | |
10386 | let a = _mm256_set1_epi8(1); | |
10387 | let b = _mm256_set1_epi8(2); | |
10388 | let r = _mm256_maskz_add_epi8(0, a, b); | |
10389 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10390 | let r = _mm256_maskz_add_epi8(0b00000000_11111111_00000000_11111111, a, b); | |
10391 | #[rustfmt::skip] | |
10392 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, | |
10393 | 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); | |
10394 | assert_eq_m256i(r, e); | |
10395 | } | |
10396 | ||
10397 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10398 | unsafe fn test_mm_mask_add_epi8() { | |
10399 | let a = _mm_set1_epi8(1); | |
10400 | let b = _mm_set1_epi8(2); | |
10401 | let r = _mm_mask_add_epi8(a, 0, a, b); | |
10402 | assert_eq_m128i(r, a); | |
10403 | let r = _mm_mask_add_epi8(a, 0b00000000_11111111, a, b); | |
10404 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3); | |
10405 | assert_eq_m128i(r, e); | |
10406 | } | |
10407 | ||
10408 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10409 | unsafe fn test_mm_maskz_add_epi8() { | |
10410 | let a = _mm_set1_epi8(1); | |
10411 | let b = _mm_set1_epi8(2); | |
10412 | let r = _mm_maskz_add_epi8(0, a, b); | |
10413 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10414 | let r = _mm_maskz_add_epi8(0b00000000_11111111, a, b); | |
10415 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3); | |
10416 | assert_eq_m128i(r, e); | |
10417 | } | |
10418 | ||
10419 | #[simd_test(enable = "avx512bw")] | |
10420 | unsafe fn test_mm512_adds_epu16() { | |
10421 | let a = _mm512_set1_epi16(1); | |
10422 | let b = _mm512_set1_epi16(u16::MAX as i16); | |
10423 | let r = _mm512_adds_epu16(a, b); | |
10424 | let e = _mm512_set1_epi16(u16::MAX as i16); | |
10425 | assert_eq_m512i(r, e); | |
10426 | } | |
10427 | ||
10428 | #[simd_test(enable = "avx512bw")] | |
10429 | unsafe fn test_mm512_mask_adds_epu16() { | |
10430 | let a = _mm512_set1_epi16(1); | |
10431 | let b = _mm512_set1_epi16(u16::MAX as i16); | |
10432 | let r = _mm512_mask_adds_epu16(a, 0, a, b); | |
10433 | assert_eq_m512i(r, a); | |
10434 | let r = _mm512_mask_adds_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
10435 | #[rustfmt::skip] | |
10436 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10437 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); | |
10438 | assert_eq_m512i(r, e); | |
10439 | } | |
10440 | ||
10441 | #[simd_test(enable = "avx512bw")] | |
10442 | unsafe fn test_mm512_maskz_adds_epu16() { | |
10443 | let a = _mm512_set1_epi16(1); | |
10444 | let b = _mm512_set1_epi16(u16::MAX as i16); | |
10445 | let r = _mm512_maskz_adds_epu16(0, a, b); | |
10446 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10447 | let r = _mm512_maskz_adds_epu16(0b00000000_00000000_00000000_00001111, a, b); | |
10448 | #[rustfmt::skip] | |
10449 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10450 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); | |
10451 | assert_eq_m512i(r, e); | |
10452 | } | |
10453 | ||
10454 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10455 | unsafe fn test_mm256_mask_adds_epu16() { | |
10456 | let a = _mm256_set1_epi16(1); | |
10457 | let b = _mm256_set1_epi16(u16::MAX as i16); | |
10458 | let r = _mm256_mask_adds_epu16(a, 0, a, b); | |
10459 | assert_eq_m256i(r, a); | |
10460 | let r = _mm256_mask_adds_epu16(a, 0b00000000_00001111, a, b); | |
10461 | #[rustfmt::skip] | |
10462 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); | |
10463 | assert_eq_m256i(r, e); | |
10464 | } | |
10465 | ||
10466 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10467 | unsafe fn test_mm256_maskz_adds_epu16() { | |
10468 | let a = _mm256_set1_epi16(1); | |
10469 | let b = _mm256_set1_epi16(u16::MAX as i16); | |
10470 | let r = _mm256_maskz_adds_epu16(0, a, b); | |
10471 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10472 | let r = _mm256_maskz_adds_epu16(0b00000000_00001111, a, b); | |
10473 | #[rustfmt::skip] | |
10474 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); | |
10475 | assert_eq_m256i(r, e); | |
10476 | } | |
10477 | ||
10478 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10479 | unsafe fn test_mm_mask_adds_epu16() { | |
10480 | let a = _mm_set1_epi16(1); | |
10481 | let b = _mm_set1_epi16(u16::MAX as i16); | |
10482 | let r = _mm_mask_adds_epu16(a, 0, a, b); | |
10483 | assert_eq_m128i(r, a); | |
10484 | let r = _mm_mask_adds_epu16(a, 0b00001111, a, b); | |
10485 | #[rustfmt::skip] | |
10486 | let e = _mm_set_epi16(1, 1, 1, 1, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); | |
10487 | assert_eq_m128i(r, e); | |
10488 | } | |
10489 | ||
10490 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10491 | unsafe fn test_mm_maskz_adds_epu16() { | |
10492 | let a = _mm_set1_epi16(1); | |
10493 | let b = _mm_set1_epi16(u16::MAX as i16); | |
10494 | let r = _mm_maskz_adds_epu16(0, a, b); | |
10495 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10496 | let r = _mm_maskz_adds_epu16(0b00001111, a, b); | |
10497 | #[rustfmt::skip] | |
10498 | let e = _mm_set_epi16(0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16, u16::MAX as i16); | |
10499 | assert_eq_m128i(r, e); | |
10500 | } | |
10501 | ||
10502 | #[simd_test(enable = "avx512bw")] | |
10503 | unsafe fn test_mm512_adds_epu8() { | |
10504 | let a = _mm512_set1_epi8(1); | |
10505 | let b = _mm512_set1_epi8(u8::MAX as i8); | |
10506 | let r = _mm512_adds_epu8(a, b); | |
10507 | let e = _mm512_set1_epi8(u8::MAX as i8); | |
10508 | assert_eq_m512i(r, e); | |
10509 | } | |
10510 | ||
10511 | #[simd_test(enable = "avx512bw")] | |
10512 | unsafe fn test_mm512_mask_adds_epu8() { | |
10513 | let a = _mm512_set1_epi8(1); | |
10514 | let b = _mm512_set1_epi8(u8::MAX as i8); | |
10515 | let r = _mm512_mask_adds_epu8(a, 0, a, b); | |
10516 | assert_eq_m512i(r, a); | |
10517 | let r = _mm512_mask_adds_epu8( | |
10518 | a, | |
10519 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
10520 | a, | |
10521 | b, | |
10522 | ); | |
10523 | #[rustfmt::skip] | |
10524 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10525 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10526 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10527 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); | |
10528 | assert_eq_m512i(r, e); | |
10529 | } | |
10530 | ||
10531 | #[simd_test(enable = "avx512bw")] | |
10532 | unsafe fn test_mm512_maskz_adds_epu8() { | |
10533 | let a = _mm512_set1_epi8(1); | |
10534 | let b = _mm512_set1_epi8(u8::MAX as i8); | |
10535 | let r = _mm512_maskz_adds_epu8(0, a, b); | |
10536 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10537 | let r = _mm512_maskz_adds_epu8( | |
10538 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
10539 | a, | |
10540 | b, | |
10541 | ); | |
10542 | #[rustfmt::skip] | |
10543 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10544 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10545 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10546 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); | |
10547 | assert_eq_m512i(r, e); | |
10548 | } | |
10549 | ||
10550 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10551 | unsafe fn test_mm256_mask_adds_epu8() { | |
10552 | let a = _mm256_set1_epi8(1); | |
10553 | let b = _mm256_set1_epi8(u8::MAX as i8); | |
10554 | let r = _mm256_mask_adds_epu8(a, 0, a, b); | |
10555 | assert_eq_m256i(r, a); | |
10556 | let r = _mm256_mask_adds_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); | |
10557 | #[rustfmt::skip] | |
10558 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10559 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); | |
10560 | assert_eq_m256i(r, e); | |
10561 | } | |
10562 | ||
10563 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10564 | unsafe fn test_mm256_maskz_adds_epu8() { | |
10565 | let a = _mm256_set1_epi8(1); | |
10566 | let b = _mm256_set1_epi8(u8::MAX as i8); | |
10567 | let r = _mm256_maskz_adds_epu8(0, a, b); | |
10568 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10569 | let r = _mm256_maskz_adds_epu8(0b00000000_00000000_00000000_00001111, a, b); | |
10570 | #[rustfmt::skip] | |
10571 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10572 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); | |
10573 | assert_eq_m256i(r, e); | |
10574 | } | |
10575 | ||
10576 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10577 | unsafe fn test_mm_mask_adds_epu8() { | |
10578 | let a = _mm_set1_epi8(1); | |
10579 | let b = _mm_set1_epi8(u8::MAX as i8); | |
10580 | let r = _mm_mask_adds_epu8(a, 0, a, b); | |
10581 | assert_eq_m128i(r, a); | |
10582 | let r = _mm_mask_adds_epu8(a, 0b00000000_00001111, a, b); | |
10583 | #[rustfmt::skip] | |
10584 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); | |
10585 | assert_eq_m128i(r, e); | |
10586 | } | |
10587 | ||
10588 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10589 | unsafe fn test_mm_maskz_adds_epu8() { | |
10590 | let a = _mm_set1_epi8(1); | |
10591 | let b = _mm_set1_epi8(u8::MAX as i8); | |
10592 | let r = _mm_maskz_adds_epu8(0, a, b); | |
10593 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10594 | let r = _mm_maskz_adds_epu8(0b00000000_00001111, a, b); | |
10595 | #[rustfmt::skip] | |
10596 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8); | |
10597 | assert_eq_m128i(r, e); | |
10598 | } | |
10599 | ||
10600 | #[simd_test(enable = "avx512bw")] | |
10601 | unsafe fn test_mm512_adds_epi16() { | |
10602 | let a = _mm512_set1_epi16(1); | |
10603 | let b = _mm512_set1_epi16(i16::MAX); | |
10604 | let r = _mm512_adds_epi16(a, b); | |
10605 | let e = _mm512_set1_epi16(i16::MAX); | |
10606 | assert_eq_m512i(r, e); | |
10607 | } | |
10608 | ||
10609 | #[simd_test(enable = "avx512bw")] | |
10610 | unsafe fn test_mm512_mask_adds_epi16() { | |
10611 | let a = _mm512_set1_epi16(1); | |
10612 | let b = _mm512_set1_epi16(i16::MAX); | |
10613 | let r = _mm512_mask_adds_epi16(a, 0, a, b); | |
10614 | assert_eq_m512i(r, a); | |
10615 | let r = _mm512_mask_adds_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
10616 | #[rustfmt::skip] | |
10617 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10618 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
10619 | assert_eq_m512i(r, e); | |
10620 | } | |
10621 | ||
10622 | #[simd_test(enable = "avx512bw")] | |
10623 | unsafe fn test_mm512_maskz_adds_epi16() { | |
10624 | let a = _mm512_set1_epi16(1); | |
10625 | let b = _mm512_set1_epi16(i16::MAX); | |
10626 | let r = _mm512_maskz_adds_epi16(0, a, b); | |
10627 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10628 | let r = _mm512_maskz_adds_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
10629 | #[rustfmt::skip] | |
10630 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10631 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
10632 | assert_eq_m512i(r, e); | |
10633 | } | |
10634 | ||
10635 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10636 | unsafe fn test_mm256_mask_adds_epi16() { | |
10637 | let a = _mm256_set1_epi16(1); | |
10638 | let b = _mm256_set1_epi16(i16::MAX); | |
10639 | let r = _mm256_mask_adds_epi16(a, 0, a, b); | |
10640 | assert_eq_m256i(r, a); | |
10641 | let r = _mm256_mask_adds_epi16(a, 0b00000000_00001111, a, b); | |
10642 | #[rustfmt::skip] | |
10643 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
10644 | assert_eq_m256i(r, e); | |
10645 | } | |
10646 | ||
10647 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10648 | unsafe fn test_mm256_maskz_adds_epi16() { | |
10649 | let a = _mm256_set1_epi16(1); | |
10650 | let b = _mm256_set1_epi16(i16::MAX); | |
10651 | let r = _mm256_maskz_adds_epi16(0, a, b); | |
10652 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10653 | let r = _mm256_maskz_adds_epi16(0b00000000_00001111, a, b); | |
10654 | #[rustfmt::skip] | |
10655 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
10656 | assert_eq_m256i(r, e); | |
10657 | } | |
10658 | ||
10659 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10660 | unsafe fn test_mm_mask_adds_epi16() { | |
10661 | let a = _mm_set1_epi16(1); | |
10662 | let b = _mm_set1_epi16(i16::MAX); | |
10663 | let r = _mm_mask_adds_epi16(a, 0, a, b); | |
10664 | assert_eq_m128i(r, a); | |
10665 | let r = _mm_mask_adds_epi16(a, 0b00001111, a, b); | |
10666 | let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
10667 | assert_eq_m128i(r, e); | |
10668 | } | |
10669 | ||
10670 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10671 | unsafe fn test_mm_maskz_adds_epi16() { | |
10672 | let a = _mm_set1_epi16(1); | |
10673 | let b = _mm_set1_epi16(i16::MAX); | |
10674 | let r = _mm_maskz_adds_epi16(0, a, b); | |
10675 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10676 | let r = _mm_maskz_adds_epi16(0b00001111, a, b); | |
10677 | let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
10678 | assert_eq_m128i(r, e); | |
10679 | } | |
10680 | ||
10681 | #[simd_test(enable = "avx512bw")] | |
10682 | unsafe fn test_mm512_adds_epi8() { | |
10683 | let a = _mm512_set1_epi8(1); | |
10684 | let b = _mm512_set1_epi8(i8::MAX); | |
10685 | let r = _mm512_adds_epi8(a, b); | |
10686 | let e = _mm512_set1_epi8(i8::MAX); | |
10687 | assert_eq_m512i(r, e); | |
10688 | } | |
10689 | ||
10690 | #[simd_test(enable = "avx512bw")] | |
10691 | unsafe fn test_mm512_mask_adds_epi8() { | |
10692 | let a = _mm512_set1_epi8(1); | |
10693 | let b = _mm512_set1_epi8(i8::MAX); | |
10694 | let r = _mm512_mask_adds_epi8(a, 0, a, b); | |
10695 | assert_eq_m512i(r, a); | |
10696 | let r = _mm512_mask_adds_epi8( | |
10697 | a, | |
10698 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
10699 | a, | |
10700 | b, | |
10701 | ); | |
10702 | #[rustfmt::skip] | |
10703 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10704 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10705 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10706 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
10707 | assert_eq_m512i(r, e); | |
10708 | } | |
10709 | ||
10710 | #[simd_test(enable = "avx512bw")] | |
10711 | unsafe fn test_mm512_maskz_adds_epi8() { | |
10712 | let a = _mm512_set1_epi8(1); | |
10713 | let b = _mm512_set1_epi8(i8::MAX); | |
10714 | let r = _mm512_maskz_adds_epi8(0, a, b); | |
10715 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10716 | let r = _mm512_maskz_adds_epi8( | |
10717 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
10718 | a, | |
10719 | b, | |
10720 | ); | |
10721 | #[rustfmt::skip] | |
10722 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10723 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10724 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10725 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
10726 | assert_eq_m512i(r, e); | |
10727 | } | |
10728 | ||
10729 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10730 | unsafe fn test_mm256_mask_adds_epi8() { | |
10731 | let a = _mm256_set1_epi8(1); | |
10732 | let b = _mm256_set1_epi8(i8::MAX); | |
10733 | let r = _mm256_mask_adds_epi8(a, 0, a, b); | |
10734 | assert_eq_m256i(r, a); | |
10735 | let r = _mm256_mask_adds_epi8(a, 0b00000000_00000000_00000000_00001111, a, b); | |
10736 | #[rustfmt::skip] | |
10737 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10738 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
10739 | assert_eq_m256i(r, e); | |
10740 | } | |
10741 | ||
10742 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10743 | unsafe fn test_mm256_maskz_adds_epi8() { | |
10744 | let a = _mm256_set1_epi8(1); | |
10745 | let b = _mm256_set1_epi8(i8::MAX); | |
10746 | let r = _mm256_maskz_adds_epi8(0, a, b); | |
10747 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10748 | let r = _mm256_maskz_adds_epi8(0b00000000_00000000_00000000_00001111, a, b); | |
10749 | #[rustfmt::skip] | |
10750 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10751 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
10752 | assert_eq_m256i(r, e); | |
10753 | } | |
10754 | ||
10755 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10756 | unsafe fn test_mm_mask_adds_epi8() { | |
10757 | let a = _mm_set1_epi8(1); | |
10758 | let b = _mm_set1_epi8(i8::MAX); | |
10759 | let r = _mm_mask_adds_epi8(a, 0, a, b); | |
10760 | assert_eq_m128i(r, a); | |
10761 | let r = _mm_mask_adds_epi8(a, 0b00000000_00001111, a, b); | |
10762 | #[rustfmt::skip] | |
10763 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
10764 | assert_eq_m128i(r, e); | |
10765 | } | |
10766 | ||
10767 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10768 | unsafe fn test_mm_maskz_adds_epi8() { | |
10769 | let a = _mm_set1_epi8(1); | |
10770 | let b = _mm_set1_epi8(i8::MAX); | |
10771 | let r = _mm_maskz_adds_epi8(0, a, b); | |
10772 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10773 | let r = _mm_maskz_adds_epi8(0b00000000_00001111, a, b); | |
10774 | #[rustfmt::skip] | |
10775 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
10776 | assert_eq_m128i(r, e); | |
10777 | } | |
10778 | ||
10779 | #[simd_test(enable = "avx512bw")] | |
10780 | unsafe fn test_mm512_sub_epi16() { | |
10781 | let a = _mm512_set1_epi16(1); | |
10782 | let b = _mm512_set1_epi16(2); | |
10783 | let r = _mm512_sub_epi16(a, b); | |
10784 | let e = _mm512_set1_epi16(-1); | |
10785 | assert_eq_m512i(r, e); | |
10786 | } | |
10787 | ||
10788 | #[simd_test(enable = "avx512bw")] | |
10789 | unsafe fn test_mm512_mask_sub_epi16() { | |
10790 | let a = _mm512_set1_epi16(1); | |
10791 | let b = _mm512_set1_epi16(2); | |
10792 | let r = _mm512_mask_sub_epi16(a, 0, a, b); | |
10793 | assert_eq_m512i(r, a); | |
10794 | let r = _mm512_mask_sub_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); | |
10795 | #[rustfmt::skip] | |
10796 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, | |
10797 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); | |
10798 | assert_eq_m512i(r, e); | |
10799 | } | |
10800 | ||
10801 | #[simd_test(enable = "avx512bw")] | |
10802 | unsafe fn test_mm512_maskz_sub_epi16() { | |
10803 | let a = _mm512_set1_epi16(1); | |
10804 | let b = _mm512_set1_epi16(2); | |
10805 | let r = _mm512_maskz_sub_epi16(0, a, b); | |
10806 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10807 | let r = _mm512_maskz_sub_epi16(0b00000000_11111111_00000000_11111111, a, b); | |
10808 | #[rustfmt::skip] | |
10809 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, | |
10810 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
10811 | assert_eq_m512i(r, e); | |
10812 | } | |
10813 | ||
10814 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10815 | unsafe fn test_mm256_mask_sub_epi16() { | |
10816 | let a = _mm256_set1_epi16(1); | |
10817 | let b = _mm256_set1_epi16(2); | |
10818 | let r = _mm256_mask_sub_epi16(a, 0, a, b); | |
10819 | assert_eq_m256i(r, a); | |
10820 | let r = _mm256_mask_sub_epi16(a, 0b00000000_11111111, a, b); | |
10821 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); | |
10822 | assert_eq_m256i(r, e); | |
10823 | } | |
10824 | ||
10825 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10826 | unsafe fn test_mm256_maskz_sub_epi16() { | |
10827 | let a = _mm256_set1_epi16(1); | |
10828 | let b = _mm256_set1_epi16(2); | |
10829 | let r = _mm256_maskz_sub_epi16(0, a, b); | |
10830 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10831 | let r = _mm256_maskz_sub_epi16(0b00000000_11111111, a, b); | |
10832 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
10833 | assert_eq_m256i(r, e); | |
10834 | } | |
10835 | ||
10836 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10837 | unsafe fn test_mm_mask_sub_epi16() { | |
10838 | let a = _mm_set1_epi16(1); | |
10839 | let b = _mm_set1_epi16(2); | |
10840 | let r = _mm_mask_sub_epi16(a, 0, a, b); | |
10841 | assert_eq_m128i(r, a); | |
10842 | let r = _mm_mask_sub_epi16(a, 0b00001111, a, b); | |
10843 | let e = _mm_set_epi16(1, 1, 1, 1, -1, -1, -1, -1); | |
10844 | assert_eq_m128i(r, e); | |
10845 | } | |
10846 | ||
10847 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10848 | unsafe fn test_mm_maskz_sub_epi16() { | |
10849 | let a = _mm_set1_epi16(1); | |
10850 | let b = _mm_set1_epi16(2); | |
10851 | let r = _mm_maskz_sub_epi16(0, a, b); | |
10852 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10853 | let r = _mm_maskz_sub_epi16(0b00001111, a, b); | |
10854 | let e = _mm_set_epi16(0, 0, 0, 0, -1, -1, -1, -1); | |
10855 | assert_eq_m128i(r, e); | |
10856 | } | |
10857 | ||
10858 | #[simd_test(enable = "avx512bw")] | |
10859 | unsafe fn test_mm512_sub_epi8() { | |
10860 | let a = _mm512_set1_epi8(1); | |
10861 | let b = _mm512_set1_epi8(2); | |
10862 | let r = _mm512_sub_epi8(a, b); | |
10863 | let e = _mm512_set1_epi8(-1); | |
10864 | assert_eq_m512i(r, e); | |
10865 | } | |
10866 | ||
10867 | #[simd_test(enable = "avx512bw")] | |
10868 | unsafe fn test_mm512_mask_sub_epi8() { | |
10869 | let a = _mm512_set1_epi8(1); | |
10870 | let b = _mm512_set1_epi8(2); | |
10871 | let r = _mm512_mask_sub_epi8(a, 0, a, b); | |
10872 | assert_eq_m512i(r, a); | |
10873 | let r = _mm512_mask_sub_epi8( | |
10874 | a, | |
10875 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
10876 | a, | |
10877 | b, | |
10878 | ); | |
10879 | #[rustfmt::skip] | |
10880 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, | |
10881 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, | |
10882 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, | |
10883 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); | |
10884 | assert_eq_m512i(r, e); | |
10885 | } | |
10886 | ||
10887 | #[simd_test(enable = "avx512bw")] | |
10888 | unsafe fn test_mm512_maskz_sub_epi8() { | |
10889 | let a = _mm512_set1_epi8(1); | |
10890 | let b = _mm512_set1_epi8(2); | |
10891 | let r = _mm512_maskz_sub_epi8(0, a, b); | |
10892 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10893 | let r = _mm512_maskz_sub_epi8( | |
10894 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
10895 | a, | |
10896 | b, | |
10897 | ); | |
10898 | #[rustfmt::skip] | |
10899 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, | |
10900 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, | |
10901 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, | |
10902 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
10903 | assert_eq_m512i(r, e); | |
10904 | } | |
10905 | ||
10906 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10907 | unsafe fn test_mm256_mask_sub_epi8() { | |
10908 | let a = _mm256_set1_epi8(1); | |
10909 | let b = _mm256_set1_epi8(2); | |
10910 | let r = _mm256_mask_sub_epi8(a, 0, a, b); | |
10911 | assert_eq_m256i(r, a); | |
10912 | let r = _mm256_mask_sub_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); | |
10913 | #[rustfmt::skip] | |
10914 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, | |
10915 | 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); | |
10916 | assert_eq_m256i(r, e); | |
10917 | } | |
10918 | ||
10919 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10920 | unsafe fn test_mm256_maskz_sub_epi8() { | |
10921 | let a = _mm256_set1_epi8(1); | |
10922 | let b = _mm256_set1_epi8(2); | |
10923 | let r = _mm256_maskz_sub_epi8(0, a, b); | |
10924 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
10925 | let r = _mm256_maskz_sub_epi8(0b00000000_11111111_00000000_11111111, a, b); | |
10926 | #[rustfmt::skip] | |
10927 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, | |
10928 | 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
10929 | assert_eq_m256i(r, e); | |
10930 | } | |
10931 | ||
10932 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10933 | unsafe fn test_mm_mask_sub_epi8() { | |
10934 | let a = _mm_set1_epi8(1); | |
10935 | let b = _mm_set1_epi8(2); | |
10936 | let r = _mm_mask_sub_epi8(a, 0, a, b); | |
10937 | assert_eq_m128i(r, a); | |
10938 | let r = _mm_mask_sub_epi8(a, 0b00000000_11111111, a, b); | |
10939 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1); | |
10940 | assert_eq_m128i(r, e); | |
10941 | } | |
10942 | ||
10943 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10944 | unsafe fn test_mm_maskz_sub_epi8() { | |
10945 | let a = _mm_set1_epi8(1); | |
10946 | let b = _mm_set1_epi8(2); | |
10947 | let r = _mm_maskz_sub_epi8(0, a, b); | |
10948 | assert_eq_m128i(r, _mm_setzero_si128()); | |
10949 | let r = _mm_maskz_sub_epi8(0b00000000_11111111, a, b); | |
10950 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
10951 | assert_eq_m128i(r, e); | |
10952 | } | |
10953 | ||
10954 | #[simd_test(enable = "avx512bw")] | |
10955 | unsafe fn test_mm512_subs_epu16() { | |
10956 | let a = _mm512_set1_epi16(1); | |
10957 | let b = _mm512_set1_epi16(u16::MAX as i16); | |
10958 | let r = _mm512_subs_epu16(a, b); | |
10959 | let e = _mm512_set1_epi16(0); | |
10960 | assert_eq_m512i(r, e); | |
10961 | } | |
10962 | ||
10963 | #[simd_test(enable = "avx512bw")] | |
10964 | unsafe fn test_mm512_mask_subs_epu16() { | |
10965 | let a = _mm512_set1_epi16(1); | |
10966 | let b = _mm512_set1_epi16(u16::MAX as i16); | |
10967 | let r = _mm512_mask_subs_epu16(a, 0, a, b); | |
10968 | assert_eq_m512i(r, a); | |
10969 | let r = _mm512_mask_subs_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
10970 | #[rustfmt::skip] | |
10971 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
10972 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
10973 | assert_eq_m512i(r, e); | |
10974 | } | |
10975 | ||
10976 | #[simd_test(enable = "avx512bw")] | |
10977 | unsafe fn test_mm512_maskz_subs_epu16() { | |
10978 | let a = _mm512_set1_epi16(1); | |
10979 | let b = _mm512_set1_epi16(u16::MAX as i16); | |
10980 | let r = _mm512_maskz_subs_epu16(0, a, b); | |
10981 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
10982 | let r = _mm512_maskz_subs_epu16(0b00000000_00000000_00000000_00001111, a, b); | |
10983 | #[rustfmt::skip] | |
10984 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
10985 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
10986 | assert_eq_m512i(r, e); | |
10987 | } | |
10988 | ||
10989 | #[simd_test(enable = "avx512bw,avx512vl")] | |
10990 | unsafe fn test_mm256_mask_subs_epu16() { | |
10991 | let a = _mm256_set1_epi16(1); | |
10992 | let b = _mm256_set1_epi16(u16::MAX as i16); | |
10993 | let r = _mm256_mask_subs_epu16(a, 0, a, b); | |
10994 | assert_eq_m256i(r, a); | |
10995 | let r = _mm256_mask_subs_epu16(a, 0b00000000_00001111, a, b); | |
10996 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
10997 | assert_eq_m256i(r, e); | |
10998 | } | |
10999 | ||
11000 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11001 | unsafe fn test_mm256_maskz_subs_epu16() { | |
11002 | let a = _mm256_set1_epi16(1); | |
11003 | let b = _mm256_set1_epi16(u16::MAX as i16); | |
11004 | let r = _mm256_maskz_subs_epu16(0, a, b); | |
11005 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11006 | let r = _mm256_maskz_subs_epu16(0b00000000_00001111, a, b); | |
11007 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11008 | assert_eq_m256i(r, e); | |
11009 | } | |
11010 | ||
11011 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11012 | unsafe fn test_mm_mask_subs_epu16() { | |
11013 | let a = _mm_set1_epi16(1); | |
11014 | let b = _mm_set1_epi16(u16::MAX as i16); | |
11015 | let r = _mm_mask_subs_epu16(a, 0, a, b); | |
11016 | assert_eq_m128i(r, a); | |
11017 | let r = _mm_mask_subs_epu16(a, 0b00001111, a, b); | |
11018 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); | |
11019 | assert_eq_m128i(r, e); | |
11020 | } | |
11021 | ||
11022 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11023 | unsafe fn test_mm_maskz_subs_epu16() { | |
11024 | let a = _mm_set1_epi16(1); | |
11025 | let b = _mm_set1_epi16(u16::MAX as i16); | |
11026 | let r = _mm_maskz_subs_epu16(0, a, b); | |
11027 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11028 | let r = _mm_maskz_subs_epu16(0b00001111, a, b); | |
11029 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
11030 | assert_eq_m128i(r, e); | |
11031 | } | |
11032 | ||
11033 | #[simd_test(enable = "avx512bw")] | |
11034 | unsafe fn test_mm512_subs_epu8() { | |
11035 | let a = _mm512_set1_epi8(1); | |
11036 | let b = _mm512_set1_epi8(u8::MAX as i8); | |
11037 | let r = _mm512_subs_epu8(a, b); | |
11038 | let e = _mm512_set1_epi8(0); | |
11039 | assert_eq_m512i(r, e); | |
11040 | } | |
11041 | ||
11042 | #[simd_test(enable = "avx512bw")] | |
11043 | unsafe fn test_mm512_mask_subs_epu8() { | |
11044 | let a = _mm512_set1_epi8(1); | |
11045 | let b = _mm512_set1_epi8(u8::MAX as i8); | |
11046 | let r = _mm512_mask_subs_epu8(a, 0, a, b); | |
11047 | assert_eq_m512i(r, a); | |
11048 | let r = _mm512_mask_subs_epu8( | |
11049 | a, | |
11050 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
11051 | a, | |
11052 | b, | |
11053 | ); | |
11054 | #[rustfmt::skip] | |
11055 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11056 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11057 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11058 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11059 | assert_eq_m512i(r, e); | |
11060 | } | |
11061 | ||
11062 | #[simd_test(enable = "avx512bw")] | |
11063 | unsafe fn test_mm512_maskz_subs_epu8() { | |
11064 | let a = _mm512_set1_epi8(1); | |
11065 | let b = _mm512_set1_epi8(u8::MAX as i8); | |
11066 | let r = _mm512_maskz_subs_epu8(0, a, b); | |
11067 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11068 | let r = _mm512_maskz_subs_epu8( | |
11069 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
11070 | a, | |
11071 | b, | |
11072 | ); | |
11073 | #[rustfmt::skip] | |
11074 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11075 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11076 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11077 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11078 | assert_eq_m512i(r, e); | |
11079 | } | |
11080 | ||
11081 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11082 | unsafe fn test_mm256_mask_subs_epu8() { | |
11083 | let a = _mm256_set1_epi8(1); | |
11084 | let b = _mm256_set1_epi8(u8::MAX as i8); | |
11085 | let r = _mm256_mask_subs_epu8(a, 0, a, b); | |
11086 | assert_eq_m256i(r, a); | |
11087 | let r = _mm256_mask_subs_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11088 | #[rustfmt::skip] | |
11089 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11090 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11091 | assert_eq_m256i(r, e); | |
11092 | } | |
11093 | ||
11094 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11095 | unsafe fn test_mm256_maskz_subs_epu8() { | |
11096 | let a = _mm256_set1_epi8(1); | |
11097 | let b = _mm256_set1_epi8(u8::MAX as i8); | |
11098 | let r = _mm256_maskz_subs_epu8(0, a, b); | |
11099 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11100 | let r = _mm256_maskz_subs_epu8(0b00000000_00000000_00000000_00001111, a, b); | |
11101 | #[rustfmt::skip] | |
11102 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11103 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11104 | assert_eq_m256i(r, e); | |
11105 | } | |
11106 | ||
11107 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11108 | unsafe fn test_mm_mask_subs_epu8() { | |
11109 | let a = _mm_set1_epi8(1); | |
11110 | let b = _mm_set1_epi8(u8::MAX as i8); | |
11111 | let r = _mm_mask_subs_epu8(a, 0, a, b); | |
11112 | assert_eq_m128i(r, a); | |
11113 | let r = _mm_mask_subs_epu8(a, 0b00000000_00001111, a, b); | |
11114 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11115 | assert_eq_m128i(r, e); | |
11116 | } | |
11117 | ||
11118 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11119 | unsafe fn test_mm_maskz_subs_epu8() { | |
11120 | let a = _mm_set1_epi8(1); | |
11121 | let b = _mm_set1_epi8(u8::MAX as i8); | |
11122 | let r = _mm_maskz_subs_epu8(0, a, b); | |
11123 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11124 | let r = _mm_maskz_subs_epu8(0b00000000_00001111, a, b); | |
11125 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11126 | assert_eq_m128i(r, e); | |
11127 | } | |
11128 | ||
11129 | #[simd_test(enable = "avx512bw")] | |
11130 | unsafe fn test_mm512_subs_epi16() { | |
11131 | let a = _mm512_set1_epi16(-1); | |
11132 | let b = _mm512_set1_epi16(i16::MAX); | |
11133 | let r = _mm512_subs_epi16(a, b); | |
11134 | let e = _mm512_set1_epi16(i16::MIN); | |
11135 | assert_eq_m512i(r, e); | |
11136 | } | |
11137 | ||
11138 | #[simd_test(enable = "avx512bw")] | |
11139 | unsafe fn test_mm512_mask_subs_epi16() { | |
11140 | let a = _mm512_set1_epi16(-1); | |
11141 | let b = _mm512_set1_epi16(i16::MAX); | |
11142 | let r = _mm512_mask_subs_epi16(a, 0, a, b); | |
11143 | assert_eq_m512i(r, a); | |
11144 | let r = _mm512_mask_subs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11145 | #[rustfmt::skip] | |
11146 | let e = _mm512_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
11147 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); | |
11148 | assert_eq_m512i(r, e); | |
11149 | } | |
11150 | ||
11151 | #[simd_test(enable = "avx512bw")] | |
11152 | unsafe fn test_mm512_maskz_subs_epi16() { | |
11153 | let a = _mm512_set1_epi16(-1); | |
11154 | let b = _mm512_set1_epi16(i16::MAX); | |
11155 | let r = _mm512_maskz_subs_epi16(0, a, b); | |
11156 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11157 | let r = _mm512_maskz_subs_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
11158 | #[rustfmt::skip] | |
11159 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11160 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); | |
11161 | assert_eq_m512i(r, e); | |
11162 | } | |
11163 | ||
11164 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11165 | unsafe fn test_mm256_mask_subs_epi16() { | |
11166 | let a = _mm256_set1_epi16(-1); | |
11167 | let b = _mm256_set1_epi16(i16::MAX); | |
11168 | let r = _mm256_mask_subs_epi16(a, 0, a, b); | |
11169 | assert_eq_m256i(r, a); | |
11170 | let r = _mm256_mask_subs_epi16(a, 0b00000000_00001111, a, b); | |
11171 | #[rustfmt::skip] | |
11172 | let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); | |
11173 | assert_eq_m256i(r, e); | |
11174 | } | |
11175 | ||
11176 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11177 | unsafe fn test_mm256_maskz_subs_epi16() { | |
11178 | let a = _mm256_set1_epi16(-1); | |
11179 | let b = _mm256_set1_epi16(i16::MAX); | |
11180 | let r = _mm256_maskz_subs_epi16(0, a, b); | |
11181 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11182 | let r = _mm256_maskz_subs_epi16(0b00000000_00001111, a, b); | |
11183 | #[rustfmt::skip] | |
11184 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); | |
11185 | assert_eq_m256i(r, e); | |
11186 | } | |
11187 | ||
11188 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11189 | unsafe fn test_mm_mask_subs_epi16() { | |
11190 | let a = _mm_set1_epi16(-1); | |
11191 | let b = _mm_set1_epi16(i16::MAX); | |
11192 | let r = _mm_mask_subs_epi16(a, 0, a, b); | |
11193 | assert_eq_m128i(r, a); | |
11194 | let r = _mm_mask_subs_epi16(a, 0b00001111, a, b); | |
11195 | let e = _mm_set_epi16(-1, -1, -1, -1, i16::MIN, i16::MIN, i16::MIN, i16::MIN); | |
11196 | assert_eq_m128i(r, e); | |
11197 | } | |
11198 | ||
11199 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11200 | unsafe fn test_mm_maskz_subs_epi16() { | |
11201 | let a = _mm_set1_epi16(-1); | |
11202 | let b = _mm_set1_epi16(i16::MAX); | |
11203 | let r = _mm_maskz_subs_epi16(0, a, b); | |
11204 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11205 | let r = _mm_maskz_subs_epi16(0b00001111, a, b); | |
11206 | let e = _mm_set_epi16(0, 0, 0, 0, i16::MIN, i16::MIN, i16::MIN, i16::MIN); | |
11207 | assert_eq_m128i(r, e); | |
11208 | } | |
11209 | ||
11210 | #[simd_test(enable = "avx512bw")] | |
11211 | unsafe fn test_mm512_subs_epi8() { | |
11212 | let a = _mm512_set1_epi8(-1); | |
11213 | let b = _mm512_set1_epi8(i8::MAX); | |
11214 | let r = _mm512_subs_epi8(a, b); | |
11215 | let e = _mm512_set1_epi8(i8::MIN); | |
11216 | assert_eq_m512i(r, e); | |
11217 | } | |
11218 | ||
11219 | #[simd_test(enable = "avx512bw")] | |
11220 | unsafe fn test_mm512_mask_subs_epi8() { | |
11221 | let a = _mm512_set1_epi8(-1); | |
11222 | let b = _mm512_set1_epi8(i8::MAX); | |
11223 | let r = _mm512_mask_subs_epi8(a, 0, a, b); | |
11224 | assert_eq_m512i(r, a); | |
11225 | let r = _mm512_mask_subs_epi8( | |
11226 | a, | |
11227 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
11228 | a, | |
11229 | b, | |
11230 | ); | |
11231 | #[rustfmt::skip] | |
11232 | let e = _mm512_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
11233 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
11234 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
11235 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); | |
11236 | assert_eq_m512i(r, e); | |
11237 | } | |
11238 | ||
11239 | #[simd_test(enable = "avx512bw")] | |
11240 | unsafe fn test_mm512_maskz_subs_epi8() { | |
11241 | let a = _mm512_set1_epi8(-1); | |
11242 | let b = _mm512_set1_epi8(i8::MAX); | |
11243 | let r = _mm512_maskz_subs_epi8(0, a, b); | |
11244 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11245 | let r = _mm512_maskz_subs_epi8( | |
11246 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
11247 | a, | |
11248 | b, | |
11249 | ); | |
11250 | #[rustfmt::skip] | |
11251 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11252 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11253 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11254 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); | |
11255 | assert_eq_m512i(r, e); | |
11256 | } | |
11257 | ||
11258 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11259 | unsafe fn test_mm256_mask_subs_epi8() { | |
11260 | let a = _mm256_set1_epi8(-1); | |
11261 | let b = _mm256_set1_epi8(i8::MAX); | |
11262 | let r = _mm256_mask_subs_epi8(a, 0, a, b); | |
11263 | assert_eq_m256i(r, a); | |
11264 | let r = _mm256_mask_subs_epi8(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11265 | #[rustfmt::skip] | |
11266 | let e = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
11267 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); | |
11268 | assert_eq_m256i(r, e); | |
11269 | } | |
11270 | ||
11271 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11272 | unsafe fn test_mm256_maskz_subs_epi8() { | |
11273 | let a = _mm256_set1_epi8(-1); | |
11274 | let b = _mm256_set1_epi8(i8::MAX); | |
11275 | let r = _mm256_maskz_subs_epi8(0, a, b); | |
11276 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11277 | let r = _mm256_maskz_subs_epi8(0b00000000_00000000_00000000_00001111, a, b); | |
11278 | #[rustfmt::skip] | |
11279 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11280 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); | |
11281 | assert_eq_m256i(r, e); | |
11282 | } | |
11283 | ||
11284 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11285 | unsafe fn test_mm_mask_subs_epi8() { | |
11286 | let a = _mm_set1_epi8(-1); | |
11287 | let b = _mm_set1_epi8(i8::MAX); | |
11288 | let r = _mm_mask_subs_epi8(a, 0, a, b); | |
11289 | assert_eq_m128i(r, a); | |
11290 | let r = _mm_mask_subs_epi8(a, 0b00000000_00001111, a, b); | |
11291 | #[rustfmt::skip] | |
11292 | let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, i8::MIN, i8::MIN, i8::MIN, i8::MIN); | |
11293 | assert_eq_m128i(r, e); | |
11294 | } | |
11295 | ||
11296 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11297 | unsafe fn test_mm_maskz_subs_epi8() { | |
11298 | let a = _mm_set1_epi8(-1); | |
11299 | let b = _mm_set1_epi8(i8::MAX); | |
11300 | let r = _mm_maskz_subs_epi8(0, a, b); | |
11301 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11302 | let r = _mm_maskz_subs_epi8(0b00000000_00001111, a, b); | |
11303 | #[rustfmt::skip] | |
11304 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MIN, i8::MIN, i8::MIN); | |
11305 | assert_eq_m128i(r, e); | |
11306 | } | |
11307 | ||
11308 | #[simd_test(enable = "avx512bw")] | |
11309 | unsafe fn test_mm512_mulhi_epu16() { | |
11310 | let a = _mm512_set1_epi16(1); | |
11311 | let b = _mm512_set1_epi16(1); | |
11312 | let r = _mm512_mulhi_epu16(a, b); | |
11313 | let e = _mm512_set1_epi16(0); | |
11314 | assert_eq_m512i(r, e); | |
11315 | } | |
11316 | ||
11317 | #[simd_test(enable = "avx512bw")] | |
11318 | unsafe fn test_mm512_mask_mulhi_epu16() { | |
11319 | let a = _mm512_set1_epi16(1); | |
11320 | let b = _mm512_set1_epi16(1); | |
11321 | let r = _mm512_mask_mulhi_epu16(a, 0, a, b); | |
11322 | assert_eq_m512i(r, a); | |
11323 | let r = _mm512_mask_mulhi_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11324 | #[rustfmt::skip] | |
11325 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11326 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11327 | assert_eq_m512i(r, e); | |
11328 | } | |
11329 | ||
11330 | #[simd_test(enable = "avx512bw")] | |
11331 | unsafe fn test_mm512_maskz_mulhi_epu16() { | |
11332 | let a = _mm512_set1_epi16(1); | |
11333 | let b = _mm512_set1_epi16(1); | |
11334 | let r = _mm512_maskz_mulhi_epu16(0, a, b); | |
11335 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11336 | let r = _mm512_maskz_mulhi_epu16(0b00000000_00000000_00000000_00001111, a, b); | |
11337 | #[rustfmt::skip] | |
11338 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11339 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11340 | assert_eq_m512i(r, e); | |
11341 | } | |
11342 | ||
11343 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11344 | unsafe fn test_mm256_mask_mulhi_epu16() { | |
11345 | let a = _mm256_set1_epi16(1); | |
11346 | let b = _mm256_set1_epi16(1); | |
11347 | let r = _mm256_mask_mulhi_epu16(a, 0, a, b); | |
11348 | assert_eq_m256i(r, a); | |
11349 | let r = _mm256_mask_mulhi_epu16(a, 0b00000000_00001111, a, b); | |
11350 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11351 | assert_eq_m256i(r, e); | |
11352 | } | |
11353 | ||
11354 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11355 | unsafe fn test_mm256_maskz_mulhi_epu16() { | |
11356 | let a = _mm256_set1_epi16(1); | |
11357 | let b = _mm256_set1_epi16(1); | |
11358 | let r = _mm256_maskz_mulhi_epu16(0, a, b); | |
11359 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11360 | let r = _mm256_maskz_mulhi_epu16(0b00000000_00001111, a, b); | |
11361 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11362 | assert_eq_m256i(r, e); | |
11363 | } | |
11364 | ||
11365 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11366 | unsafe fn test_mm_mask_mulhi_epu16() { | |
11367 | let a = _mm_set1_epi16(1); | |
11368 | let b = _mm_set1_epi16(1); | |
11369 | let r = _mm_mask_mulhi_epu16(a, 0, a, b); | |
11370 | assert_eq_m128i(r, a); | |
11371 | let r = _mm_mask_mulhi_epu16(a, 0b00001111, a, b); | |
11372 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); | |
11373 | assert_eq_m128i(r, e); | |
11374 | } | |
11375 | ||
11376 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11377 | unsafe fn test_mm_maskz_mulhi_epu16() { | |
11378 | let a = _mm_set1_epi16(1); | |
11379 | let b = _mm_set1_epi16(1); | |
11380 | let r = _mm_maskz_mulhi_epu16(0, a, b); | |
11381 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11382 | let r = _mm_maskz_mulhi_epu16(0b00001111, a, b); | |
11383 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
11384 | assert_eq_m128i(r, e); | |
11385 | } | |
11386 | ||
11387 | #[simd_test(enable = "avx512bw")] | |
11388 | unsafe fn test_mm512_mulhi_epi16() { | |
11389 | let a = _mm512_set1_epi16(1); | |
11390 | let b = _mm512_set1_epi16(1); | |
11391 | let r = _mm512_mulhi_epi16(a, b); | |
11392 | let e = _mm512_set1_epi16(0); | |
11393 | assert_eq_m512i(r, e); | |
11394 | } | |
11395 | ||
11396 | #[simd_test(enable = "avx512bw")] | |
11397 | unsafe fn test_mm512_mask_mulhi_epi16() { | |
11398 | let a = _mm512_set1_epi16(1); | |
11399 | let b = _mm512_set1_epi16(1); | |
11400 | let r = _mm512_mask_mulhi_epi16(a, 0, a, b); | |
11401 | assert_eq_m512i(r, a); | |
11402 | let r = _mm512_mask_mulhi_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11403 | #[rustfmt::skip] | |
11404 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11405 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11406 | assert_eq_m512i(r, e); | |
11407 | } | |
11408 | ||
11409 | #[simd_test(enable = "avx512bw")] | |
11410 | unsafe fn test_mm512_maskz_mulhi_epi16() { | |
11411 | let a = _mm512_set1_epi16(1); | |
11412 | let b = _mm512_set1_epi16(1); | |
11413 | let r = _mm512_maskz_mulhi_epi16(0, a, b); | |
11414 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11415 | let r = _mm512_maskz_mulhi_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
11416 | #[rustfmt::skip] | |
11417 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11418 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11419 | assert_eq_m512i(r, e); | |
11420 | } | |
11421 | ||
11422 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11423 | unsafe fn test_mm256_mask_mulhi_epi16() { | |
11424 | let a = _mm256_set1_epi16(1); | |
11425 | let b = _mm256_set1_epi16(1); | |
11426 | let r = _mm256_mask_mulhi_epi16(a, 0, a, b); | |
11427 | assert_eq_m256i(r, a); | |
11428 | let r = _mm256_mask_mulhi_epi16(a, 0b00000000_00001111, a, b); | |
11429 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11430 | assert_eq_m256i(r, e); | |
11431 | } | |
11432 | ||
11433 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11434 | unsafe fn test_mm256_maskz_mulhi_epi16() { | |
11435 | let a = _mm256_set1_epi16(1); | |
11436 | let b = _mm256_set1_epi16(1); | |
11437 | let r = _mm256_maskz_mulhi_epi16(0, a, b); | |
11438 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11439 | let r = _mm256_maskz_mulhi_epi16(0b00000000_00001111, a, b); | |
11440 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11441 | assert_eq_m256i(r, e); | |
11442 | } | |
11443 | ||
11444 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11445 | unsafe fn test_mm_mask_mulhi_epi16() { | |
11446 | let a = _mm_set1_epi16(1); | |
11447 | let b = _mm_set1_epi16(1); | |
11448 | let r = _mm_mask_mulhi_epi16(a, 0, a, b); | |
11449 | assert_eq_m128i(r, a); | |
11450 | let r = _mm_mask_mulhi_epi16(a, 0b00001111, a, b); | |
11451 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); | |
11452 | assert_eq_m128i(r, e); | |
11453 | } | |
11454 | ||
11455 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11456 | unsafe fn test_mm_maskz_mulhi_epi16() { | |
11457 | let a = _mm_set1_epi16(1); | |
11458 | let b = _mm_set1_epi16(1); | |
11459 | let r = _mm_maskz_mulhi_epi16(0, a, b); | |
11460 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11461 | let r = _mm_maskz_mulhi_epi16(0b00001111, a, b); | |
11462 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
11463 | assert_eq_m128i(r, e); | |
11464 | } | |
11465 | ||
11466 | #[simd_test(enable = "avx512bw")] | |
11467 | unsafe fn test_mm512_mulhrs_epi16() { | |
11468 | let a = _mm512_set1_epi16(1); | |
11469 | let b = _mm512_set1_epi16(1); | |
11470 | let r = _mm512_mulhrs_epi16(a, b); | |
11471 | let e = _mm512_set1_epi16(0); | |
11472 | assert_eq_m512i(r, e); | |
11473 | } | |
11474 | ||
11475 | #[simd_test(enable = "avx512bw")] | |
11476 | unsafe fn test_mm512_mask_mulhrs_epi16() { | |
11477 | let a = _mm512_set1_epi16(1); | |
11478 | let b = _mm512_set1_epi16(1); | |
11479 | let r = _mm512_mask_mulhrs_epi16(a, 0, a, b); | |
11480 | assert_eq_m512i(r, a); | |
11481 | let r = _mm512_mask_mulhrs_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11482 | #[rustfmt::skip] | |
11483 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11484 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11485 | assert_eq_m512i(r, e); | |
11486 | } | |
11487 | ||
11488 | #[simd_test(enable = "avx512bw")] | |
11489 | unsafe fn test_mm512_maskz_mulhrs_epi16() { | |
11490 | let a = _mm512_set1_epi16(1); | |
11491 | let b = _mm512_set1_epi16(1); | |
11492 | let r = _mm512_maskz_mulhrs_epi16(0, a, b); | |
11493 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11494 | let r = _mm512_maskz_mulhrs_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
11495 | #[rustfmt::skip] | |
11496 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11497 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11498 | assert_eq_m512i(r, e); | |
11499 | } | |
11500 | ||
11501 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11502 | unsafe fn test_mm256_mask_mulhrs_epi16() { | |
11503 | let a = _mm256_set1_epi16(1); | |
11504 | let b = _mm256_set1_epi16(1); | |
11505 | let r = _mm256_mask_mulhrs_epi16(a, 0, a, b); | |
11506 | assert_eq_m256i(r, a); | |
11507 | let r = _mm256_mask_mulhrs_epi16(a, 0b00000000_00001111, a, b); | |
11508 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
11509 | assert_eq_m256i(r, e); | |
11510 | } | |
11511 | ||
11512 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11513 | unsafe fn test_mm256_maskz_mulhrs_epi16() { | |
11514 | let a = _mm256_set1_epi16(1); | |
11515 | let b = _mm256_set1_epi16(1); | |
11516 | let r = _mm256_maskz_mulhrs_epi16(0, a, b); | |
11517 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11518 | let r = _mm256_maskz_mulhrs_epi16(0b00000000_00001111, a, b); | |
11519 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
11520 | assert_eq_m256i(r, e); | |
11521 | } | |
11522 | ||
11523 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11524 | unsafe fn test_mm_mask_mulhrs_epi16() { | |
11525 | let a = _mm_set1_epi16(1); | |
11526 | let b = _mm_set1_epi16(1); | |
11527 | let r = _mm_mask_mulhrs_epi16(a, 0, a, b); | |
11528 | assert_eq_m128i(r, a); | |
11529 | let r = _mm_mask_mulhrs_epi16(a, 0b00001111, a, b); | |
11530 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); | |
11531 | assert_eq_m128i(r, e); | |
11532 | } | |
11533 | ||
11534 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11535 | unsafe fn test_mm_maskz_mulhrs_epi16() { | |
11536 | let a = _mm_set1_epi16(1); | |
11537 | let b = _mm_set1_epi16(1); | |
11538 | let r = _mm_maskz_mulhrs_epi16(0, a, b); | |
11539 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11540 | let r = _mm_maskz_mulhrs_epi16(0b00001111, a, b); | |
11541 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
11542 | assert_eq_m128i(r, e); | |
11543 | } | |
11544 | ||
11545 | #[simd_test(enable = "avx512bw")] | |
11546 | unsafe fn test_mm512_mullo_epi16() { | |
11547 | let a = _mm512_set1_epi16(1); | |
11548 | let b = _mm512_set1_epi16(1); | |
11549 | let r = _mm512_mullo_epi16(a, b); | |
11550 | let e = _mm512_set1_epi16(1); | |
11551 | assert_eq_m512i(r, e); | |
11552 | } | |
11553 | ||
11554 | #[simd_test(enable = "avx512bw")] | |
11555 | unsafe fn test_mm512_mask_mullo_epi16() { | |
11556 | let a = _mm512_set1_epi16(1); | |
11557 | let b = _mm512_set1_epi16(1); | |
11558 | let r = _mm512_mask_mullo_epi16(a, 0, a, b); | |
11559 | assert_eq_m512i(r, a); | |
11560 | let r = _mm512_mask_mullo_epi16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
11561 | #[rustfmt::skip] | |
11562 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
11563 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
11564 | assert_eq_m512i(r, e); | |
11565 | } | |
11566 | ||
11567 | #[simd_test(enable = "avx512bw")] | |
11568 | unsafe fn test_mm512_maskz_mullo_epi16() { | |
11569 | let a = _mm512_set1_epi16(1); | |
11570 | let b = _mm512_set1_epi16(1); | |
11571 | let r = _mm512_maskz_mullo_epi16(0, a, b); | |
11572 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11573 | let r = _mm512_maskz_mullo_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
11574 | #[rustfmt::skip] | |
11575 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
11576 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
11577 | assert_eq_m512i(r, e); | |
11578 | } | |
11579 | ||
11580 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11581 | unsafe fn test_mm256_mask_mullo_epi16() { | |
11582 | let a = _mm256_set1_epi16(1); | |
11583 | let b = _mm256_set1_epi16(1); | |
11584 | let r = _mm256_mask_mullo_epi16(a, 0, a, b); | |
11585 | assert_eq_m256i(r, a); | |
11586 | let r = _mm256_mask_mullo_epi16(a, 0b00000000_00001111, a, b); | |
11587 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
11588 | assert_eq_m256i(r, e); | |
11589 | } | |
11590 | ||
11591 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11592 | unsafe fn test_mm256_maskz_mullo_epi16() { | |
11593 | let a = _mm256_set1_epi16(1); | |
11594 | let b = _mm256_set1_epi16(1); | |
11595 | let r = _mm256_maskz_mullo_epi16(0, a, b); | |
11596 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11597 | let r = _mm256_maskz_mullo_epi16(0b00000000_00001111, a, b); | |
11598 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
11599 | assert_eq_m256i(r, e); | |
11600 | } | |
11601 | ||
11602 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11603 | unsafe fn test_mm_mask_mullo_epi16() { | |
11604 | let a = _mm_set1_epi16(1); | |
11605 | let b = _mm_set1_epi16(1); | |
11606 | let r = _mm_mask_mullo_epi16(a, 0, a, b); | |
11607 | assert_eq_m128i(r, a); | |
11608 | let r = _mm_mask_mullo_epi16(a, 0b00001111, a, b); | |
11609 | let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1); | |
11610 | assert_eq_m128i(r, e); | |
11611 | } | |
11612 | ||
11613 | #[simd_test(enable = "avx512bw,avx512vl")] | |
11614 | unsafe fn test_mm_maskz_mullo_epi16() { | |
11615 | let a = _mm_set1_epi16(1); | |
11616 | let b = _mm_set1_epi16(1); | |
11617 | let r = _mm_maskz_mullo_epi16(0, a, b); | |
11618 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11619 | let r = _mm_maskz_mullo_epi16(0b00001111, a, b); | |
11620 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); | |
11621 | assert_eq_m128i(r, e); | |
11622 | } | |
11623 | ||
11624 | #[simd_test(enable = "avx512bw")] | |
11625 | unsafe fn test_mm512_max_epu16() { | |
11626 | #[rustfmt::skip] | |
11627 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11628 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11629 | #[rustfmt::skip] | |
11630 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11631 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11632 | let r = _mm512_max_epu16(a, b); | |
11633 | #[rustfmt::skip] | |
11634 | let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11635 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); | |
11636 | assert_eq_m512i(r, e); | |
11637 | } | |
11638 | ||
11639 | #[simd_test(enable = "avx512f")] | |
11640 | unsafe fn test_mm512_mask_max_epu16() { | |
11641 | #[rustfmt::skip] | |
11642 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11643 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11644 | #[rustfmt::skip] | |
11645 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11646 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11647 | let r = _mm512_mask_max_epu16(a, 0, a, b); | |
11648 | assert_eq_m512i(r, a); | |
11649 | let r = _mm512_mask_max_epu16(a, 0b00000000_11111111_00000000_11111111, a, b); | |
11650 | #[rustfmt::skip] | |
11651 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11652 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11653 | assert_eq_m512i(r, e); | |
11654 | } | |
11655 | ||
11656 | #[simd_test(enable = "avx512f")] | |
11657 | unsafe fn test_mm512_maskz_max_epu16() { | |
11658 | #[rustfmt::skip] | |
11659 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11660 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11661 | #[rustfmt::skip] | |
11662 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11663 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11664 | let r = _mm512_maskz_max_epu16(0, a, b); | |
11665 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11666 | let r = _mm512_maskz_max_epu16(0b00000000_11111111_00000000_11111111, a, b); | |
11667 | #[rustfmt::skip] | |
11668 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
11669 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11670 | assert_eq_m512i(r, e); | |
11671 | } | |
11672 | ||
11673 | #[simd_test(enable = "avx512f,avx512vl")] | |
11674 | unsafe fn test_mm256_mask_max_epu16() { | |
11675 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11676 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11677 | let r = _mm256_mask_max_epu16(a, 0, a, b); | |
11678 | assert_eq_m256i(r, a); | |
11679 | let r = _mm256_mask_max_epu16(a, 0b00000000_11111111, a, b); | |
11680 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11681 | assert_eq_m256i(r, e); | |
11682 | } | |
11683 | ||
11684 | #[simd_test(enable = "avx512f,avx512vl")] | |
11685 | unsafe fn test_mm256_maskz_max_epu16() { | |
11686 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11687 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11688 | let r = _mm256_maskz_max_epu16(0, a, b); | |
11689 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11690 | let r = _mm256_maskz_max_epu16(0b00000000_11111111, a, b); | |
11691 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11692 | assert_eq_m256i(r, e); | |
11693 | } | |
11694 | ||
11695 | #[simd_test(enable = "avx512f,avx512vl")] | |
11696 | unsafe fn test_mm_mask_max_epu16() { | |
11697 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
11698 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
11699 | let r = _mm_mask_max_epu16(a, 0, a, b); | |
11700 | assert_eq_m128i(r, a); | |
11701 | let r = _mm_mask_max_epu16(a, 0b00001111, a, b); | |
11702 | let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
11703 | assert_eq_m128i(r, e); | |
11704 | } | |
11705 | ||
11706 | #[simd_test(enable = "avx512f,avx512vl")] | |
11707 | unsafe fn test_mm_maskz_max_epu16() { | |
11708 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
11709 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
11710 | let r = _mm_maskz_max_epu16(0, a, b); | |
11711 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11712 | let r = _mm_maskz_max_epu16(0b00001111, a, b); | |
11713 | let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); | |
11714 | assert_eq_m128i(r, e); | |
11715 | } | |
11716 | ||
11717 | #[simd_test(enable = "avx512bw")] | |
11718 | unsafe fn test_mm512_max_epu8() { | |
11719 | #[rustfmt::skip] | |
11720 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11721 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11722 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11723 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11724 | #[rustfmt::skip] | |
11725 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11726 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11727 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11728 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11729 | let r = _mm512_max_epu8(a, b); | |
11730 | #[rustfmt::skip] | |
11731 | let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11732 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11733 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11734 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); | |
11735 | assert_eq_m512i(r, e); | |
11736 | } | |
11737 | ||
11738 | #[simd_test(enable = "avx512f")] | |
11739 | unsafe fn test_mm512_mask_max_epu8() { | |
11740 | #[rustfmt::skip] | |
11741 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11742 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11743 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11744 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11745 | #[rustfmt::skip] | |
11746 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11747 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11748 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11749 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11750 | let r = _mm512_mask_max_epu8(a, 0, a, b); | |
11751 | assert_eq_m512i(r, a); | |
11752 | let r = _mm512_mask_max_epu8( | |
11753 | a, | |
11754 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
11755 | a, | |
11756 | b, | |
11757 | ); | |
11758 | #[rustfmt::skip] | |
11759 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11760 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11761 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11762 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11763 | assert_eq_m512i(r, e); | |
11764 | } | |
11765 | ||
11766 | #[simd_test(enable = "avx512f")] | |
11767 | unsafe fn test_mm512_maskz_max_epu8() { | |
11768 | #[rustfmt::skip] | |
11769 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11770 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11771 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11772 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11773 | #[rustfmt::skip] | |
11774 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11775 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11776 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11777 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11778 | let r = _mm512_maskz_max_epu8(0, a, b); | |
11779 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11780 | let r = _mm512_maskz_max_epu8( | |
11781 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
11782 | a, | |
11783 | b, | |
11784 | ); | |
11785 | #[rustfmt::skip] | |
11786 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
11787 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
11788 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
11789 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11790 | assert_eq_m512i(r, e); | |
11791 | } | |
11792 | ||
11793 | #[simd_test(enable = "avx512f,avx512vl")] | |
11794 | unsafe fn test_mm256_mask_max_epu8() { | |
11795 | #[rustfmt::skip] | |
11796 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11797 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11798 | #[rustfmt::skip] | |
11799 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11800 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11801 | let r = _mm256_mask_max_epu8(a, 0, a, b); | |
11802 | assert_eq_m256i(r, a); | |
11803 | let r = _mm256_mask_max_epu8(a, 0b00000000_11111111_00000000_11111111, a, b); | |
11804 | #[rustfmt::skip] | |
11805 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11806 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11807 | assert_eq_m256i(r, e); | |
11808 | } | |
11809 | ||
11810 | #[simd_test(enable = "avx512f,avx512vl")] | |
11811 | unsafe fn test_mm256_maskz_max_epu8() { | |
11812 | #[rustfmt::skip] | |
11813 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11814 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11815 | #[rustfmt::skip] | |
11816 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11817 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11818 | let r = _mm256_maskz_max_epu8(0, a, b); | |
11819 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11820 | let r = _mm256_maskz_max_epu8(0b00000000_11111111_00000000_11111111, a, b); | |
11821 | #[rustfmt::skip] | |
11822 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
11823 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11824 | assert_eq_m256i(r, e); | |
11825 | } | |
11826 | ||
11827 | #[simd_test(enable = "avx512f,avx512vl")] | |
11828 | unsafe fn test_mm_mask_max_epu8() { | |
11829 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11830 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11831 | let r = _mm_mask_max_epu8(a, 0, a, b); | |
11832 | assert_eq_m128i(r, a); | |
11833 | let r = _mm_mask_max_epu8(a, 0b00000000_11111111, a, b); | |
11834 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11835 | assert_eq_m128i(r, e); | |
11836 | } | |
11837 | ||
11838 | #[simd_test(enable = "avx512f,avx512vl")] | |
11839 | unsafe fn test_mm_maskz_max_epu8() { | |
11840 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11841 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11842 | let r = _mm_maskz_max_epu8(0, a, b); | |
11843 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11844 | let r = _mm_maskz_max_epu8(0b00000000_11111111, a, b); | |
11845 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11846 | assert_eq_m128i(r, e); | |
11847 | } | |
11848 | ||
11849 | #[simd_test(enable = "avx512bw")] | |
11850 | unsafe fn test_mm512_max_epi16() { | |
11851 | #[rustfmt::skip] | |
11852 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11853 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11854 | #[rustfmt::skip] | |
11855 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11856 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11857 | let r = _mm512_max_epi16(a, b); | |
11858 | #[rustfmt::skip] | |
11859 | let e = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11860 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); | |
11861 | assert_eq_m512i(r, e); | |
11862 | } | |
11863 | ||
11864 | #[simd_test(enable = "avx512f")] | |
11865 | unsafe fn test_mm512_mask_max_epi16() { | |
11866 | #[rustfmt::skip] | |
11867 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11868 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11869 | #[rustfmt::skip] | |
11870 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11871 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11872 | let r = _mm512_mask_max_epi16(a, 0, a, b); | |
11873 | assert_eq_m512i(r, a); | |
11874 | let r = _mm512_mask_max_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); | |
11875 | #[rustfmt::skip] | |
11876 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11877 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11878 | assert_eq_m512i(r, e); | |
11879 | } | |
11880 | ||
11881 | #[simd_test(enable = "avx512f")] | |
11882 | unsafe fn test_mm512_maskz_max_epi16() { | |
11883 | #[rustfmt::skip] | |
11884 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11885 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11886 | #[rustfmt::skip] | |
11887 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11888 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11889 | let r = _mm512_maskz_max_epi16(0, a, b); | |
11890 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
11891 | let r = _mm512_maskz_max_epi16(0b00000000_11111111_00000000_11111111, a, b); | |
11892 | #[rustfmt::skip] | |
11893 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
11894 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11895 | assert_eq_m512i(r, e); | |
11896 | } | |
11897 | ||
11898 | #[simd_test(enable = "avx512f,avx512vl")] | |
11899 | unsafe fn test_mm256_mask_max_epi16() { | |
11900 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11901 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11902 | let r = _mm256_mask_max_epi16(a, 0, a, b); | |
11903 | assert_eq_m256i(r, a); | |
11904 | let r = _mm256_mask_max_epi16(a, 0b00000000_11111111, a, b); | |
11905 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11906 | assert_eq_m256i(r, e); | |
11907 | } | |
11908 | ||
11909 | #[simd_test(enable = "avx512f,avx512vl")] | |
11910 | unsafe fn test_mm256_maskz_max_epi16() { | |
11911 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11912 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11913 | let r = _mm256_maskz_max_epi16(0, a, b); | |
11914 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
11915 | let r = _mm256_maskz_max_epi16(0b00000000_11111111, a, b); | |
11916 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
11917 | assert_eq_m256i(r, e); | |
11918 | } | |
11919 | ||
11920 | #[simd_test(enable = "avx512f,avx512vl")] | |
11921 | unsafe fn test_mm_mask_max_epi16() { | |
11922 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
11923 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
11924 | let r = _mm_mask_max_epi16(a, 0, a, b); | |
11925 | assert_eq_m128i(r, a); | |
11926 | let r = _mm_mask_max_epi16(a, 0b00001111, a, b); | |
11927 | let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
11928 | assert_eq_m128i(r, e); | |
11929 | } | |
11930 | ||
11931 | #[simd_test(enable = "avx512f,avx512vl")] | |
11932 | unsafe fn test_mm_maskz_max_epi16() { | |
11933 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
11934 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
11935 | let r = _mm_maskz_max_epi16(0, a, b); | |
11936 | assert_eq_m128i(r, _mm_setzero_si128()); | |
11937 | let r = _mm_maskz_max_epi16(0b00001111, a, b); | |
11938 | let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); | |
11939 | assert_eq_m128i(r, e); | |
11940 | } | |
11941 | ||
11942 | #[simd_test(enable = "avx512bw")] | |
11943 | unsafe fn test_mm512_max_epi8() { | |
11944 | #[rustfmt::skip] | |
11945 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11946 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11947 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11948 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11949 | #[rustfmt::skip] | |
11950 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11951 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11952 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11953 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11954 | let r = _mm512_max_epi8(a, b); | |
11955 | #[rustfmt::skip] | |
11956 | let e = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11957 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11958 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15, | |
11959 | 15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); | |
11960 | assert_eq_m512i(r, e); | |
11961 | } | |
11962 | ||
11963 | #[simd_test(enable = "avx512f")] | |
11964 | unsafe fn test_mm512_mask_max_epi8() { | |
11965 | #[rustfmt::skip] | |
11966 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11967 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11968 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11969 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11970 | #[rustfmt::skip] | |
11971 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11972 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11973 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
11974 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
11975 | let r = _mm512_mask_max_epi8(a, 0, a, b); | |
11976 | assert_eq_m512i(r, a); | |
11977 | let r = _mm512_mask_max_epi8( | |
11978 | a, | |
11979 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
11980 | a, | |
11981 | b, | |
11982 | ); | |
11983 | #[rustfmt::skip] | |
11984 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11985 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11986 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11987 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11988 | assert_eq_m512i(r, e); | |
11989 | } | |
11990 | ||
11991 | #[simd_test(enable = "avx512f")] | |
11992 | unsafe fn test_mm512_maskz_max_epi8() { | |
11993 | #[rustfmt::skip] | |
11994 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11995 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11996 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
11997 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
11998 | #[rustfmt::skip] | |
11999 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12000 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12001 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12002 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12003 | let r = _mm512_maskz_max_epi8(0, a, b); | |
12004 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
12005 | let r = _mm512_maskz_max_epi8( | |
12006 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
12007 | a, | |
12008 | b, | |
12009 | ); | |
12010 | #[rustfmt::skip] | |
12011 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
12012 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
12013 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
12014 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
12015 | assert_eq_m512i(r, e); | |
12016 | } | |
12017 | ||
12018 | #[simd_test(enable = "avx512f,avx512vl")] | |
12019 | unsafe fn test_mm256_mask_max_epi8() { | |
12020 | #[rustfmt::skip] | |
12021 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12022 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12023 | #[rustfmt::skip] | |
12024 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12025 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12026 | let r = _mm256_mask_max_epi8(a, 0, a, b); | |
12027 | assert_eq_m256i(r, a); | |
12028 | let r = _mm256_mask_max_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); | |
12029 | #[rustfmt::skip] | |
12030 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12031 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12032 | assert_eq_m256i(r, e); | |
12033 | } | |
12034 | ||
12035 | #[simd_test(enable = "avx512f,avx512vl")] | |
12036 | unsafe fn test_mm256_maskz_max_epi8() { | |
12037 | #[rustfmt::skip] | |
12038 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12039 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12040 | #[rustfmt::skip] | |
12041 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12042 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12043 | let r = _mm256_maskz_max_epi8(0, a, b); | |
12044 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
12045 | let r = _mm256_maskz_max_epi8(0b00000000_11111111_00000000_11111111, a, b); | |
12046 | #[rustfmt::skip] | |
12047 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15, | |
12048 | 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
12049 | assert_eq_m256i(r, e); | |
12050 | } | |
12051 | ||
12052 | #[simd_test(enable = "avx512f,avx512vl")] | |
12053 | unsafe fn test_mm_mask_max_epi8() { | |
12054 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12055 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12056 | let r = _mm_mask_max_epi8(a, 0, a, b); | |
12057 | assert_eq_m128i(r, a); | |
12058 | let r = _mm_mask_max_epi8(a, 0b00000000_11111111, a, b); | |
12059 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12060 | assert_eq_m128i(r, e); | |
12061 | } | |
12062 | ||
12063 | #[simd_test(enable = "avx512f,avx512vl")] | |
12064 | unsafe fn test_mm_maskz_max_epi8() { | |
12065 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12066 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12067 | let r = _mm_maskz_max_epi8(0, a, b); | |
12068 | assert_eq_m128i(r, _mm_setzero_si128()); | |
12069 | let r = _mm_maskz_max_epi8(0b00000000_11111111, a, b); | |
12070 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); | |
12071 | assert_eq_m128i(r, e); | |
12072 | } | |
12073 | ||
12074 | #[simd_test(enable = "avx512bw")] | |
12075 | unsafe fn test_mm512_min_epu16() { | |
12076 | #[rustfmt::skip] | |
12077 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12078 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12079 | #[rustfmt::skip] | |
12080 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12081 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12082 | let r = _mm512_min_epu16(a, b); | |
12083 | #[rustfmt::skip] | |
12084 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12085 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12086 | assert_eq_m512i(r, e); | |
12087 | } | |
12088 | ||
12089 | #[simd_test(enable = "avx512f")] | |
12090 | unsafe fn test_mm512_mask_min_epu16() { | |
12091 | #[rustfmt::skip] | |
12092 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12093 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12094 | #[rustfmt::skip] | |
12095 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12096 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12097 | let r = _mm512_mask_min_epu16(a, 0, a, b); | |
12098 | assert_eq_m512i(r, a); | |
12099 | let r = _mm512_mask_min_epu16(a, 0b00000000_11111111_00000000_11111111, a, b); | |
12100 | #[rustfmt::skip] | |
12101 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12102 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12103 | assert_eq_m512i(r, e); | |
12104 | } | |
12105 | ||
12106 | #[simd_test(enable = "avx512f")] | |
12107 | unsafe fn test_mm512_maskz_min_epu16() { | |
12108 | #[rustfmt::skip] | |
12109 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12110 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12111 | #[rustfmt::skip] | |
12112 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12113 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12114 | let r = _mm512_maskz_min_epu16(0, a, b); | |
12115 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
12116 | let r = _mm512_maskz_min_epu16(0b00000000_11111111_00000000_11111111, a, b); | |
12117 | #[rustfmt::skip] | |
12118 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12119 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12120 | assert_eq_m512i(r, e); | |
12121 | } | |
12122 | ||
12123 | #[simd_test(enable = "avx512f,avx512vl")] | |
12124 | unsafe fn test_mm256_mask_min_epu16() { | |
12125 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12126 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12127 | let r = _mm256_mask_min_epu16(a, 0, a, b); | |
12128 | assert_eq_m256i(r, a); | |
12129 | let r = _mm256_mask_min_epu16(a, 0b00000000_11111111, a, b); | |
12130 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12131 | assert_eq_m256i(r, e); | |
12132 | } | |
12133 | ||
12134 | #[simd_test(enable = "avx512f,avx512vl")] | |
12135 | unsafe fn test_mm256_maskz_min_epu16() { | |
12136 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12137 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12138 | let r = _mm256_maskz_min_epu16(0, a, b); | |
12139 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
12140 | let r = _mm256_maskz_min_epu16(0b00000000_11111111, a, b); | |
12141 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12142 | assert_eq_m256i(r, e); | |
12143 | } | |
12144 | ||
12145 | #[simd_test(enable = "avx512f,avx512vl")] | |
12146 | unsafe fn test_mm_mask_min_epu16() { | |
12147 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
12148 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
12149 | let r = _mm_mask_min_epu16(a, 0, a, b); | |
12150 | assert_eq_m128i(r, a); | |
12151 | let r = _mm_mask_min_epu16(a, 0b00001111, a, b); | |
12152 | let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0); | |
12153 | assert_eq_m128i(r, e); | |
12154 | } | |
12155 | ||
12156 | #[simd_test(enable = "avx512f,avx512vl")] | |
12157 | unsafe fn test_mm_maskz_min_epu16() { | |
12158 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
12159 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
12160 | let r = _mm_maskz_min_epu16(0, a, b); | |
12161 | assert_eq_m128i(r, _mm_setzero_si128()); | |
12162 | let r = _mm_maskz_min_epu16(0b00001111, a, b); | |
12163 | let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0); | |
12164 | assert_eq_m128i(r, e); | |
12165 | } | |
12166 | ||
12167 | #[simd_test(enable = "avx512bw")] | |
12168 | unsafe fn test_mm512_min_epu8() { | |
12169 | #[rustfmt::skip] | |
12170 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12171 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12172 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12173 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12174 | #[rustfmt::skip] | |
12175 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12176 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12177 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12178 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12179 | let r = _mm512_min_epu8(a, b); | |
12180 | #[rustfmt::skip] | |
12181 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12182 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12183 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12184 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12185 | assert_eq_m512i(r, e); | |
12186 | } | |
12187 | ||
12188 | #[simd_test(enable = "avx512f")] | |
12189 | unsafe fn test_mm512_mask_min_epu8() { | |
12190 | #[rustfmt::skip] | |
12191 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12192 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12193 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12194 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12195 | #[rustfmt::skip] | |
12196 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12197 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12198 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12199 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12200 | let r = _mm512_mask_min_epu8(a, 0, a, b); | |
12201 | assert_eq_m512i(r, a); | |
12202 | let r = _mm512_mask_min_epu8( | |
12203 | a, | |
12204 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
12205 | a, | |
12206 | b, | |
12207 | ); | |
12208 | #[rustfmt::skip] | |
12209 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12210 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12211 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12212 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12213 | assert_eq_m512i(r, e); | |
12214 | } | |
12215 | ||
12216 | #[simd_test(enable = "avx512f")] | |
12217 | unsafe fn test_mm512_maskz_min_epu8() { | |
12218 | #[rustfmt::skip] | |
12219 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12220 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12221 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12222 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12223 | #[rustfmt::skip] | |
12224 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12225 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12226 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12227 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12228 | let r = _mm512_maskz_min_epu8(0, a, b); | |
12229 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
12230 | let r = _mm512_maskz_min_epu8( | |
12231 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
12232 | a, | |
12233 | b, | |
12234 | ); | |
12235 | #[rustfmt::skip] | |
12236 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12237 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12238 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12239 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12240 | assert_eq_m512i(r, e); | |
12241 | } | |
12242 | ||
12243 | #[simd_test(enable = "avx512f,avx512vl")] | |
12244 | unsafe fn test_mm256_mask_min_epu8() { | |
12245 | #[rustfmt::skip] | |
12246 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12247 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12248 | #[rustfmt::skip] | |
12249 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12250 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12251 | let r = _mm256_mask_min_epu8(a, 0, a, b); | |
12252 | assert_eq_m256i(r, a); | |
12253 | let r = _mm256_mask_min_epu8(a, 0b00000000_11111111_00000000_11111111, a, b); | |
12254 | #[rustfmt::skip] | |
12255 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12256 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12257 | assert_eq_m256i(r, e); | |
12258 | } | |
12259 | ||
12260 | #[simd_test(enable = "avx512f,avx512vl")] | |
12261 | unsafe fn test_mm256_maskz_min_epu8() { | |
12262 | #[rustfmt::skip] | |
12263 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12264 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12265 | #[rustfmt::skip] | |
12266 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12267 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12268 | let r = _mm256_maskz_min_epu8(0, a, b); | |
12269 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
12270 | let r = _mm256_maskz_min_epu8(0b00000000_11111111_00000000_11111111, a, b); | |
12271 | #[rustfmt::skip] | |
12272 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12273 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12274 | assert_eq_m256i(r, e); | |
12275 | } | |
12276 | ||
12277 | #[simd_test(enable = "avx512f,avx512vl")] | |
12278 | unsafe fn test_mm_mask_min_epu8() { | |
12279 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12280 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12281 | let r = _mm_mask_min_epu8(a, 0, a, b); | |
12282 | assert_eq_m128i(r, a); | |
12283 | let r = _mm_mask_min_epu8(a, 0b00000000_11111111, a, b); | |
12284 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12285 | assert_eq_m128i(r, e); | |
12286 | } | |
12287 | ||
12288 | #[simd_test(enable = "avx512f,avx512vl")] | |
12289 | unsafe fn test_mm_maskz_min_epu8() { | |
12290 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12291 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12292 | let r = _mm_maskz_min_epu8(0, a, b); | |
12293 | assert_eq_m128i(r, _mm_setzero_si128()); | |
12294 | let r = _mm_maskz_min_epu8(0b00000000_11111111, a, b); | |
12295 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12296 | assert_eq_m128i(r, e); | |
12297 | } | |
12298 | ||
12299 | #[simd_test(enable = "avx512bw")] | |
12300 | unsafe fn test_mm512_min_epi16() { | |
12301 | #[rustfmt::skip] | |
12302 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12303 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12304 | #[rustfmt::skip] | |
12305 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12306 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12307 | let r = _mm512_min_epi16(a, b); | |
12308 | #[rustfmt::skip] | |
12309 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12310 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12311 | assert_eq_m512i(r, e); | |
12312 | } | |
12313 | ||
12314 | #[simd_test(enable = "avx512f")] | |
12315 | unsafe fn test_mm512_mask_min_epi16() { | |
12316 | #[rustfmt::skip] | |
12317 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12318 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12319 | #[rustfmt::skip] | |
12320 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12321 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12322 | let r = _mm512_mask_min_epi16(a, 0, a, b); | |
12323 | assert_eq_m512i(r, a); | |
12324 | let r = _mm512_mask_min_epi16(a, 0b00000000_11111111_00000000_11111111, a, b); | |
12325 | #[rustfmt::skip] | |
12326 | let e = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12327 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12328 | assert_eq_m512i(r, e); | |
12329 | } | |
12330 | ||
12331 | #[simd_test(enable = "avx512f")] | |
12332 | unsafe fn test_mm512_maskz_min_epi16() { | |
12333 | #[rustfmt::skip] | |
12334 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12335 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12336 | #[rustfmt::skip] | |
12337 | let b = _mm512_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12338 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12339 | let r = _mm512_maskz_min_epi16(0, a, b); | |
12340 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
12341 | let r = _mm512_maskz_min_epi16(0b00000000_11111111_00000000_11111111, a, b); | |
12342 | #[rustfmt::skip] | |
12343 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12344 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12345 | assert_eq_m512i(r, e); | |
12346 | } | |
12347 | ||
12348 | #[simd_test(enable = "avx512f,avx512vl")] | |
12349 | unsafe fn test_mm256_mask_min_epi16() { | |
12350 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12351 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12352 | let r = _mm256_mask_min_epi16(a, 0, a, b); | |
12353 | assert_eq_m256i(r, a); | |
12354 | let r = _mm256_mask_min_epi16(a, 0b00000000_11111111, a, b); | |
12355 | let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12356 | assert_eq_m256i(r, e); | |
12357 | } | |
12358 | ||
12359 | #[simd_test(enable = "avx512f,avx512vl")] | |
12360 | unsafe fn test_mm256_maskz_min_epi16() { | |
12361 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12362 | let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12363 | let r = _mm256_maskz_min_epi16(0, a, b); | |
12364 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
12365 | let r = _mm256_maskz_min_epi16(0b00000000_11111111, a, b); | |
12366 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12367 | assert_eq_m256i(r, e); | |
12368 | } | |
12369 | ||
12370 | #[simd_test(enable = "avx512f,avx512vl")] | |
12371 | unsafe fn test_mm_mask_min_epi16() { | |
12372 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
12373 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
12374 | let r = _mm_mask_min_epi16(a, 0, a, b); | |
12375 | assert_eq_m128i(r, a); | |
12376 | let r = _mm_mask_min_epi16(a, 0b00001111, a, b); | |
12377 | let e = _mm_set_epi16(0, 1, 2, 3, 3, 2, 1, 0); | |
12378 | assert_eq_m128i(r, e); | |
12379 | } | |
12380 | ||
12381 | #[simd_test(enable = "avx512f,avx512vl")] | |
12382 | unsafe fn test_mm_maskz_min_epi16() { | |
12383 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
12384 | let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0); | |
12385 | let r = _mm_maskz_min_epi16(0, a, b); | |
12386 | assert_eq_m128i(r, _mm_setzero_si128()); | |
12387 | let r = _mm_maskz_min_epi16(0b00001111, a, b); | |
12388 | let e = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0); | |
12389 | assert_eq_m128i(r, e); | |
12390 | } | |
12391 | ||
12392 | #[simd_test(enable = "avx512bw")] | |
12393 | unsafe fn test_mm512_min_epi8() { | |
12394 | #[rustfmt::skip] | |
12395 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12396 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12397 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12398 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12399 | #[rustfmt::skip] | |
12400 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12401 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12402 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12403 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12404 | let r = _mm512_min_epi8(a, b); | |
12405 | #[rustfmt::skip] | |
12406 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12407 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12408 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12409 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12410 | assert_eq_m512i(r, e); | |
12411 | } | |
12412 | ||
12413 | #[simd_test(enable = "avx512f")] | |
12414 | unsafe fn test_mm512_mask_min_epi8() { | |
12415 | #[rustfmt::skip] | |
12416 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12417 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12418 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12419 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12420 | #[rustfmt::skip] | |
12421 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12422 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12423 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12424 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12425 | let r = _mm512_mask_min_epi8(a, 0, a, b); | |
12426 | assert_eq_m512i(r, a); | |
12427 | let r = _mm512_mask_min_epi8( | |
12428 | a, | |
12429 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
12430 | a, | |
12431 | b, | |
12432 | ); | |
12433 | #[rustfmt::skip] | |
12434 | let e = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12435 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12436 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12437 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12438 | assert_eq_m512i(r, e); | |
12439 | } | |
12440 | ||
12441 | #[simd_test(enable = "avx512f")] | |
12442 | unsafe fn test_mm512_maskz_min_epi8() { | |
12443 | #[rustfmt::skip] | |
12444 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12445 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12446 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12447 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12448 | #[rustfmt::skip] | |
12449 | let b = _mm512_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12450 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12451 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12452 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12453 | let r = _mm512_maskz_min_epi8(0, a, b); | |
12454 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
12455 | let r = _mm512_maskz_min_epi8( | |
12456 | 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111, | |
12457 | a, | |
12458 | b, | |
12459 | ); | |
12460 | #[rustfmt::skip] | |
12461 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12462 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12463 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12464 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12465 | assert_eq_m512i(r, e); | |
12466 | } | |
12467 | ||
12468 | #[simd_test(enable = "avx512f,avx512vl")] | |
12469 | unsafe fn test_mm256_mask_min_epi8() { | |
12470 | #[rustfmt::skip] | |
12471 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12472 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12473 | #[rustfmt::skip] | |
12474 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12475 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12476 | let r = _mm256_mask_min_epi8(a, 0, a, b); | |
12477 | assert_eq_m256i(r, a); | |
12478 | let r = _mm256_mask_min_epi8(a, 0b00000000_11111111_00000000_11111111, a, b); | |
12479 | #[rustfmt::skip] | |
12480 | let e = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0, | |
12481 | 0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12482 | assert_eq_m256i(r, e); | |
12483 | } | |
12484 | ||
12485 | #[simd_test(enable = "avx512f,avx512vl")] | |
12486 | unsafe fn test_mm256_maskz_min_epi8() { | |
12487 | #[rustfmt::skip] | |
12488 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
12489 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12490 | #[rustfmt::skip] | |
12491 | let b = _mm256_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, | |
12492 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12493 | let r = _mm256_maskz_min_epi8(0, a, b); | |
12494 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
12495 | let r = _mm256_maskz_min_epi8(0b00000000_11111111_00000000_11111111, a, b); | |
12496 | #[rustfmt::skip] | |
12497 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0, | |
12498 | 0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12499 | assert_eq_m256i(r, e); | |
12500 | } | |
12501 | ||
12502 | #[simd_test(enable = "avx512f,avx512vl")] | |
12503 | unsafe fn test_mm_mask_min_epi8() { | |
12504 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12505 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12506 | let r = _mm_mask_min_epi8(a, 0, a, b); | |
12507 | assert_eq_m128i(r, a); | |
12508 | let r = _mm_mask_min_epi8(a, 0b00000000_11111111, a, b); | |
12509 | let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); | |
12510 | assert_eq_m128i(r, e); | |
12511 | } | |
12512 | ||
12513 | #[simd_test(enable = "avx512f,avx512vl")] | |
12514 | unsafe fn test_mm_maskz_min_epi8() { | |
12515 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
12516 | let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
12517 | let r = _mm_maskz_min_epi8(0, a, b); | |
12518 | assert_eq_m128i(r, _mm_setzero_si128()); | |
12519 | let r = _mm_maskz_min_epi8(0b00000000_11111111, a, b); | |
12520 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 7, 6, 5, 4, 3, 2, 1, 0); | |
12521 | assert_eq_m128i(r, e); | |
12522 | } | |
12523 | ||
12524 | #[simd_test(enable = "avx512bw")] | |
12525 | unsafe fn test_mm512_cmplt_epu16_mask() { | |
12526 | let a = _mm512_set1_epi16(-2); | |
12527 | let b = _mm512_set1_epi16(-1); | |
12528 | let m = _mm512_cmplt_epu16_mask(a, b); | |
12529 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12530 | } | |
12531 | ||
12532 | #[simd_test(enable = "avx512bw")] | |
12533 | unsafe fn test_mm512_mask_cmplt_epu16_mask() { | |
12534 | let a = _mm512_set1_epi16(-2); | |
12535 | let b = _mm512_set1_epi16(-1); | |
12536 | let mask = 0b01010101_01010101_01010101_01010101; | |
12537 | let r = _mm512_mask_cmplt_epu16_mask(mask, a, b); | |
12538 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12539 | } | |
12540 | ||
cdc7bbd5 XL |
12541 | #[simd_test(enable = "avx512bw,avx512vl")] |
12542 | unsafe fn test_mm256_cmplt_epu16_mask() { | |
12543 | let a = _mm256_set1_epi16(-2); | |
12544 | let b = _mm256_set1_epi16(-1); | |
12545 | let m = _mm256_cmplt_epu16_mask(a, b); | |
12546 | assert_eq!(m, 0b11111111_11111111); | |
12547 | } | |
12548 | ||
12549 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12550 | unsafe fn test_mm256_mask_cmplt_epu16_mask() { | |
12551 | let a = _mm256_set1_epi16(-2); | |
12552 | let b = _mm256_set1_epi16(-1); | |
12553 | let mask = 0b01010101_01010101; | |
12554 | let r = _mm256_mask_cmplt_epu16_mask(mask, a, b); | |
12555 | assert_eq!(r, 0b01010101_01010101); | |
12556 | } | |
12557 | ||
12558 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12559 | unsafe fn test_mm_cmplt_epu16_mask() { | |
12560 | let a = _mm_set1_epi16(-2); | |
12561 | let b = _mm_set1_epi16(-1); | |
12562 | let m = _mm_cmplt_epu16_mask(a, b); | |
12563 | assert_eq!(m, 0b11111111); | |
12564 | } | |
12565 | ||
12566 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12567 | unsafe fn test_mm_mask_cmplt_epu16_mask() { | |
12568 | let a = _mm_set1_epi16(-2); | |
12569 | let b = _mm_set1_epi16(-1); | |
12570 | let mask = 0b01010101; | |
12571 | let r = _mm_mask_cmplt_epu16_mask(mask, a, b); | |
12572 | assert_eq!(r, 0b01010101); | |
12573 | } | |
12574 | ||
fc512014 XL |
12575 | #[simd_test(enable = "avx512bw")] |
12576 | unsafe fn test_mm512_cmplt_epu8_mask() { | |
12577 | let a = _mm512_set1_epi8(-2); | |
12578 | let b = _mm512_set1_epi8(-1); | |
12579 | let m = _mm512_cmplt_epu8_mask(a, b); | |
12580 | assert_eq!( | |
12581 | m, | |
12582 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
12583 | ); | |
12584 | } | |
12585 | ||
12586 | #[simd_test(enable = "avx512bw")] | |
12587 | unsafe fn test_mm512_mask_cmplt_epu8_mask() { | |
12588 | let a = _mm512_set1_epi8(-2); | |
12589 | let b = _mm512_set1_epi8(-1); | |
12590 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
12591 | let r = _mm512_mask_cmplt_epu8_mask(mask, a, b); | |
12592 | assert_eq!( | |
12593 | r, | |
12594 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
12595 | ); | |
12596 | } | |
12597 | ||
cdc7bbd5 XL |
12598 | #[simd_test(enable = "avx512bw,avx512vl")] |
12599 | unsafe fn test_mm256_cmplt_epu8_mask() { | |
12600 | let a = _mm256_set1_epi8(-2); | |
12601 | let b = _mm256_set1_epi8(-1); | |
12602 | let m = _mm256_cmplt_epu8_mask(a, b); | |
12603 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12604 | } | |
12605 | ||
12606 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12607 | unsafe fn test_mm256_mask_cmplt_epu8_mask() { | |
12608 | let a = _mm256_set1_epi8(-2); | |
12609 | let b = _mm256_set1_epi8(-1); | |
12610 | let mask = 0b01010101_01010101_01010101_01010101; | |
12611 | let r = _mm256_mask_cmplt_epu8_mask(mask, a, b); | |
12612 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12613 | } | |
12614 | ||
12615 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12616 | unsafe fn test_mm_cmplt_epu8_mask() { | |
12617 | let a = _mm_set1_epi8(-2); | |
12618 | let b = _mm_set1_epi8(-1); | |
12619 | let m = _mm_cmplt_epu8_mask(a, b); | |
12620 | assert_eq!(m, 0b11111111_11111111); | |
12621 | } | |
12622 | ||
12623 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12624 | unsafe fn test_mm_mask_cmplt_epu8_mask() { | |
12625 | let a = _mm_set1_epi8(-2); | |
12626 | let b = _mm_set1_epi8(-1); | |
12627 | let mask = 0b01010101_01010101; | |
12628 | let r = _mm_mask_cmplt_epu8_mask(mask, a, b); | |
12629 | assert_eq!(r, 0b01010101_01010101); | |
12630 | } | |
12631 | ||
fc512014 XL |
12632 | #[simd_test(enable = "avx512bw")] |
12633 | unsafe fn test_mm512_cmplt_epi16_mask() { | |
12634 | let a = _mm512_set1_epi16(-2); | |
12635 | let b = _mm512_set1_epi16(-1); | |
12636 | let m = _mm512_cmplt_epi16_mask(a, b); | |
12637 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12638 | } | |
12639 | ||
12640 | #[simd_test(enable = "avx512bw")] | |
12641 | unsafe fn test_mm512_mask_cmplt_epi16_mask() { | |
12642 | let a = _mm512_set1_epi16(-2); | |
12643 | let b = _mm512_set1_epi16(-1); | |
12644 | let mask = 0b01010101_01010101_01010101_01010101; | |
12645 | let r = _mm512_mask_cmplt_epi16_mask(mask, a, b); | |
12646 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12647 | } | |
12648 | ||
cdc7bbd5 XL |
12649 | #[simd_test(enable = "avx512bw,avx512vl")] |
12650 | unsafe fn test_mm256_cmplt_epi16_mask() { | |
12651 | let a = _mm256_set1_epi16(-2); | |
12652 | let b = _mm256_set1_epi16(-1); | |
12653 | let m = _mm256_cmplt_epi16_mask(a, b); | |
12654 | assert_eq!(m, 0b11111111_11111111); | |
12655 | } | |
12656 | ||
12657 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12658 | unsafe fn test_mm256_mask_cmplt_epi16_mask() { | |
12659 | let a = _mm256_set1_epi16(-2); | |
12660 | let b = _mm256_set1_epi16(-1); | |
12661 | let mask = 0b01010101_01010101; | |
12662 | let r = _mm256_mask_cmplt_epi16_mask(mask, a, b); | |
12663 | assert_eq!(r, 0b01010101_01010101); | |
12664 | } | |
12665 | ||
12666 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12667 | unsafe fn test_mm_cmplt_epi16_mask() { | |
12668 | let a = _mm_set1_epi16(-2); | |
12669 | let b = _mm_set1_epi16(-1); | |
12670 | let m = _mm_cmplt_epi16_mask(a, b); | |
12671 | assert_eq!(m, 0b11111111); | |
12672 | } | |
12673 | ||
12674 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12675 | unsafe fn test_mm_mask_cmplt_epi16_mask() { | |
12676 | let a = _mm_set1_epi16(-2); | |
12677 | let b = _mm_set1_epi16(-1); | |
12678 | let mask = 0b01010101; | |
12679 | let r = _mm_mask_cmplt_epi16_mask(mask, a, b); | |
12680 | assert_eq!(r, 0b01010101); | |
12681 | } | |
12682 | ||
fc512014 XL |
12683 | #[simd_test(enable = "avx512bw")] |
12684 | unsafe fn test_mm512_cmplt_epi8_mask() { | |
12685 | let a = _mm512_set1_epi8(-2); | |
12686 | let b = _mm512_set1_epi8(-1); | |
12687 | let m = _mm512_cmplt_epi8_mask(a, b); | |
12688 | assert_eq!( | |
12689 | m, | |
12690 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
12691 | ); | |
12692 | } | |
12693 | ||
12694 | #[simd_test(enable = "avx512bw")] | |
12695 | unsafe fn test_mm512_mask_cmplt_epi8_mask() { | |
12696 | let a = _mm512_set1_epi8(-2); | |
12697 | let b = _mm512_set1_epi8(-1); | |
12698 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
12699 | let r = _mm512_mask_cmplt_epi8_mask(mask, a, b); | |
12700 | assert_eq!( | |
12701 | r, | |
12702 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
12703 | ); | |
12704 | } | |
12705 | ||
cdc7bbd5 XL |
12706 | #[simd_test(enable = "avx512bw,avx512vl")] |
12707 | unsafe fn test_mm256_cmplt_epi8_mask() { | |
12708 | let a = _mm256_set1_epi8(-2); | |
12709 | let b = _mm256_set1_epi8(-1); | |
12710 | let m = _mm256_cmplt_epi8_mask(a, b); | |
12711 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12712 | } | |
12713 | ||
12714 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12715 | unsafe fn test_mm256_mask_cmplt_epi8_mask() { | |
12716 | let a = _mm256_set1_epi8(-2); | |
12717 | let b = _mm256_set1_epi8(-1); | |
12718 | let mask = 0b01010101_01010101_01010101_01010101; | |
12719 | let r = _mm256_mask_cmplt_epi8_mask(mask, a, b); | |
12720 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12721 | } | |
12722 | ||
12723 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12724 | unsafe fn test_mm_cmplt_epi8_mask() { | |
12725 | let a = _mm_set1_epi8(-2); | |
12726 | let b = _mm_set1_epi8(-1); | |
12727 | let m = _mm_cmplt_epi8_mask(a, b); | |
12728 | assert_eq!(m, 0b11111111_11111111); | |
12729 | } | |
12730 | ||
12731 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12732 | unsafe fn test_mm_mask_cmplt_epi8_mask() { | |
12733 | let a = _mm_set1_epi8(-2); | |
12734 | let b = _mm_set1_epi8(-1); | |
12735 | let mask = 0b01010101_01010101; | |
12736 | let r = _mm_mask_cmplt_epi8_mask(mask, a, b); | |
12737 | assert_eq!(r, 0b01010101_01010101); | |
12738 | } | |
12739 | ||
fc512014 XL |
12740 | #[simd_test(enable = "avx512bw")] |
12741 | unsafe fn test_mm512_cmpgt_epu16_mask() { | |
12742 | let a = _mm512_set1_epi16(2); | |
12743 | let b = _mm512_set1_epi16(1); | |
12744 | let m = _mm512_cmpgt_epu16_mask(a, b); | |
12745 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12746 | } | |
12747 | ||
12748 | #[simd_test(enable = "avx512bw")] | |
12749 | unsafe fn test_mm512_mask_cmpgt_epu16_mask() { | |
12750 | let a = _mm512_set1_epi16(2); | |
12751 | let b = _mm512_set1_epi16(1); | |
12752 | let mask = 0b01010101_01010101_01010101_01010101; | |
12753 | let r = _mm512_mask_cmpgt_epu16_mask(mask, a, b); | |
12754 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12755 | } | |
12756 | ||
cdc7bbd5 XL |
12757 | #[simd_test(enable = "avx512bw,avx512vl")] |
12758 | unsafe fn test_mm256_cmpgt_epu16_mask() { | |
12759 | let a = _mm256_set1_epi16(2); | |
12760 | let b = _mm256_set1_epi16(1); | |
12761 | let m = _mm256_cmpgt_epu16_mask(a, b); | |
12762 | assert_eq!(m, 0b11111111_11111111); | |
12763 | } | |
12764 | ||
12765 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12766 | unsafe fn test_mm256_mask_cmpgt_epu16_mask() { | |
12767 | let a = _mm256_set1_epi16(2); | |
12768 | let b = _mm256_set1_epi16(1); | |
12769 | let mask = 0b01010101_01010101; | |
12770 | let r = _mm256_mask_cmpgt_epu16_mask(mask, a, b); | |
12771 | assert_eq!(r, 0b01010101_01010101); | |
12772 | } | |
12773 | ||
12774 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12775 | unsafe fn test_mm_cmpgt_epu16_mask() { | |
12776 | let a = _mm_set1_epi16(2); | |
12777 | let b = _mm_set1_epi16(1); | |
12778 | let m = _mm_cmpgt_epu16_mask(a, b); | |
12779 | assert_eq!(m, 0b11111111); | |
12780 | } | |
12781 | ||
12782 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12783 | unsafe fn test_mm_mask_cmpgt_epu16_mask() { | |
12784 | let a = _mm_set1_epi16(2); | |
12785 | let b = _mm_set1_epi16(1); | |
12786 | let mask = 0b01010101; | |
12787 | let r = _mm_mask_cmpgt_epu16_mask(mask, a, b); | |
12788 | assert_eq!(r, 0b01010101); | |
12789 | } | |
12790 | ||
fc512014 XL |
12791 | #[simd_test(enable = "avx512bw")] |
12792 | unsafe fn test_mm512_cmpgt_epu8_mask() { | |
12793 | let a = _mm512_set1_epi8(2); | |
12794 | let b = _mm512_set1_epi8(1); | |
12795 | let m = _mm512_cmpgt_epu8_mask(a, b); | |
12796 | assert_eq!( | |
12797 | m, | |
12798 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
12799 | ); | |
12800 | } | |
12801 | ||
12802 | #[simd_test(enable = "avx512bw")] | |
12803 | unsafe fn test_mm512_mask_cmpgt_epu8_mask() { | |
12804 | let a = _mm512_set1_epi8(2); | |
12805 | let b = _mm512_set1_epi8(1); | |
12806 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
12807 | let r = _mm512_mask_cmpgt_epu8_mask(mask, a, b); | |
12808 | assert_eq!( | |
12809 | r, | |
12810 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
12811 | ); | |
12812 | } | |
12813 | ||
cdc7bbd5 XL |
12814 | #[simd_test(enable = "avx512bw,avx512vl")] |
12815 | unsafe fn test_mm256_cmpgt_epu8_mask() { | |
12816 | let a = _mm256_set1_epi8(2); | |
12817 | let b = _mm256_set1_epi8(1); | |
12818 | let m = _mm256_cmpgt_epu8_mask(a, b); | |
12819 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12820 | } | |
12821 | ||
12822 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12823 | unsafe fn test_mm256_mask_cmpgt_epu8_mask() { | |
12824 | let a = _mm256_set1_epi8(2); | |
12825 | let b = _mm256_set1_epi8(1); | |
12826 | let mask = 0b01010101_01010101_01010101_01010101; | |
12827 | let r = _mm256_mask_cmpgt_epu8_mask(mask, a, b); | |
12828 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12829 | } | |
12830 | ||
12831 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12832 | unsafe fn test_mm_cmpgt_epu8_mask() { | |
12833 | let a = _mm_set1_epi8(2); | |
12834 | let b = _mm_set1_epi8(1); | |
12835 | let m = _mm_cmpgt_epu8_mask(a, b); | |
12836 | assert_eq!(m, 0b11111111_11111111); | |
12837 | } | |
12838 | ||
12839 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12840 | unsafe fn test_mm_mask_cmpgt_epu8_mask() { | |
12841 | let a = _mm_set1_epi8(2); | |
12842 | let b = _mm_set1_epi8(1); | |
12843 | let mask = 0b01010101_01010101; | |
12844 | let r = _mm_mask_cmpgt_epu8_mask(mask, a, b); | |
12845 | assert_eq!(r, 0b01010101_01010101); | |
12846 | } | |
12847 | ||
fc512014 XL |
12848 | #[simd_test(enable = "avx512bw")] |
12849 | unsafe fn test_mm512_cmpgt_epi16_mask() { | |
12850 | let a = _mm512_set1_epi16(2); | |
12851 | let b = _mm512_set1_epi16(-1); | |
12852 | let m = _mm512_cmpgt_epi16_mask(a, b); | |
12853 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12854 | } | |
12855 | ||
12856 | #[simd_test(enable = "avx512bw")] | |
12857 | unsafe fn test_mm512_mask_cmpgt_epi16_mask() { | |
12858 | let a = _mm512_set1_epi16(2); | |
12859 | let b = _mm512_set1_epi16(-1); | |
12860 | let mask = 0b01010101_01010101_01010101_01010101; | |
12861 | let r = _mm512_mask_cmpgt_epi16_mask(mask, a, b); | |
12862 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12863 | } | |
12864 | ||
cdc7bbd5 XL |
12865 | #[simd_test(enable = "avx512bw,avx512vl")] |
12866 | unsafe fn test_mm256_cmpgt_epi16_mask() { | |
12867 | let a = _mm256_set1_epi16(2); | |
12868 | let b = _mm256_set1_epi16(-1); | |
12869 | let m = _mm256_cmpgt_epi16_mask(a, b); | |
12870 | assert_eq!(m, 0b11111111_11111111); | |
12871 | } | |
12872 | ||
12873 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12874 | unsafe fn test_mm256_mask_cmpgt_epi16_mask() { | |
12875 | let a = _mm256_set1_epi16(2); | |
12876 | let b = _mm256_set1_epi16(-1); | |
12877 | let mask = 0b001010101_01010101; | |
12878 | let r = _mm256_mask_cmpgt_epi16_mask(mask, a, b); | |
12879 | assert_eq!(r, 0b01010101_01010101); | |
12880 | } | |
12881 | ||
12882 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12883 | unsafe fn test_mm_cmpgt_epi16_mask() { | |
12884 | let a = _mm_set1_epi16(2); | |
12885 | let b = _mm_set1_epi16(-1); | |
12886 | let m = _mm_cmpgt_epi16_mask(a, b); | |
12887 | assert_eq!(m, 0b11111111); | |
12888 | } | |
12889 | ||
12890 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12891 | unsafe fn test_mm_mask_cmpgt_epi16_mask() { | |
12892 | let a = _mm_set1_epi16(2); | |
12893 | let b = _mm_set1_epi16(-1); | |
12894 | let mask = 0b01010101; | |
12895 | let r = _mm_mask_cmpgt_epi16_mask(mask, a, b); | |
12896 | assert_eq!(r, 0b01010101); | |
12897 | } | |
12898 | ||
fc512014 XL |
12899 | #[simd_test(enable = "avx512bw")] |
12900 | unsafe fn test_mm512_cmpgt_epi8_mask() { | |
12901 | let a = _mm512_set1_epi8(2); | |
12902 | let b = _mm512_set1_epi8(-1); | |
12903 | let m = _mm512_cmpgt_epi8_mask(a, b); | |
12904 | assert_eq!( | |
12905 | m, | |
12906 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
12907 | ); | |
12908 | } | |
12909 | ||
12910 | #[simd_test(enable = "avx512bw")] | |
12911 | unsafe fn test_mm512_mask_cmpgt_epi8_mask() { | |
12912 | let a = _mm512_set1_epi8(2); | |
12913 | let b = _mm512_set1_epi8(-1); | |
12914 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
12915 | let r = _mm512_mask_cmpgt_epi8_mask(mask, a, b); | |
12916 | assert_eq!( | |
12917 | r, | |
12918 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
12919 | ); | |
12920 | } | |
12921 | ||
cdc7bbd5 XL |
12922 | #[simd_test(enable = "avx512bw,avx512vl")] |
12923 | unsafe fn test_mm256_cmpgt_epi8_mask() { | |
12924 | let a = _mm256_set1_epi8(2); | |
12925 | let b = _mm256_set1_epi8(-1); | |
12926 | let m = _mm256_cmpgt_epi8_mask(a, b); | |
12927 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12928 | } | |
12929 | ||
12930 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12931 | unsafe fn test_mm256_mask_cmpgt_epi8_mask() { | |
12932 | let a = _mm256_set1_epi8(2); | |
12933 | let b = _mm256_set1_epi8(-1); | |
12934 | let mask = 0b01010101_01010101_01010101_01010101; | |
12935 | let r = _mm256_mask_cmpgt_epi8_mask(mask, a, b); | |
12936 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12937 | } | |
12938 | ||
12939 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12940 | unsafe fn test_mm_cmpgt_epi8_mask() { | |
12941 | let a = _mm_set1_epi8(2); | |
12942 | let b = _mm_set1_epi8(-1); | |
12943 | let m = _mm_cmpgt_epi8_mask(a, b); | |
12944 | assert_eq!(m, 0b11111111_11111111); | |
12945 | } | |
12946 | ||
12947 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12948 | unsafe fn test_mm_mask_cmpgt_epi8_mask() { | |
12949 | let a = _mm_set1_epi8(2); | |
12950 | let b = _mm_set1_epi8(-1); | |
12951 | let mask = 0b01010101_01010101; | |
12952 | let r = _mm_mask_cmpgt_epi8_mask(mask, a, b); | |
12953 | assert_eq!(r, 0b01010101_01010101); | |
12954 | } | |
12955 | ||
fc512014 XL |
12956 | #[simd_test(enable = "avx512bw")] |
12957 | unsafe fn test_mm512_cmple_epu16_mask() { | |
12958 | let a = _mm512_set1_epi16(-1); | |
12959 | let b = _mm512_set1_epi16(-1); | |
12960 | let m = _mm512_cmple_epu16_mask(a, b); | |
12961 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
12962 | } | |
12963 | ||
12964 | #[simd_test(enable = "avx512bw")] | |
12965 | unsafe fn test_mm512_mask_cmple_epu16_mask() { | |
12966 | let a = _mm512_set1_epi16(-1); | |
12967 | let b = _mm512_set1_epi16(-1); | |
12968 | let mask = 0b01010101_01010101_01010101_01010101; | |
12969 | let r = _mm512_mask_cmple_epu16_mask(mask, a, b); | |
12970 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
12971 | } | |
12972 | ||
cdc7bbd5 XL |
12973 | #[simd_test(enable = "avx512bw,avx512vl")] |
12974 | unsafe fn test_mm256_cmple_epu16_mask() { | |
12975 | let a = _mm256_set1_epi16(-1); | |
12976 | let b = _mm256_set1_epi16(-1); | |
12977 | let m = _mm256_cmple_epu16_mask(a, b); | |
12978 | assert_eq!(m, 0b11111111_11111111); | |
12979 | } | |
12980 | ||
12981 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12982 | unsafe fn test_mm256_mask_cmple_epu16_mask() { | |
12983 | let a = _mm256_set1_epi16(-1); | |
12984 | let b = _mm256_set1_epi16(-1); | |
12985 | let mask = 0b01010101_01010101; | |
12986 | let r = _mm256_mask_cmple_epu16_mask(mask, a, b); | |
12987 | assert_eq!(r, 0b01010101_01010101); | |
12988 | } | |
12989 | ||
12990 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12991 | unsafe fn test_mm_cmple_epu16_mask() { | |
12992 | let a = _mm_set1_epi16(-1); | |
12993 | let b = _mm_set1_epi16(-1); | |
12994 | let m = _mm_cmple_epu16_mask(a, b); | |
12995 | assert_eq!(m, 0b11111111); | |
12996 | } | |
12997 | ||
12998 | #[simd_test(enable = "avx512bw,avx512vl")] | |
12999 | unsafe fn test_mm_mask_cmple_epu16_mask() { | |
13000 | let a = _mm_set1_epi16(-1); | |
13001 | let b = _mm_set1_epi16(-1); | |
13002 | let mask = 0b01010101; | |
13003 | let r = _mm_mask_cmple_epu16_mask(mask, a, b); | |
13004 | assert_eq!(r, 0b01010101); | |
13005 | } | |
13006 | ||
fc512014 XL |
13007 | #[simd_test(enable = "avx512bw")] |
13008 | unsafe fn test_mm512_cmple_epu8_mask() { | |
13009 | let a = _mm512_set1_epi8(-1); | |
13010 | let b = _mm512_set1_epi8(-1); | |
13011 | let m = _mm512_cmple_epu8_mask(a, b); | |
13012 | assert_eq!( | |
13013 | m, | |
13014 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13015 | ); | |
13016 | } | |
13017 | ||
cdc7bbd5 XL |
13018 | #[simd_test(enable = "avx512bw")] |
13019 | unsafe fn test_mm512_mask_cmple_epu8_mask() { | |
13020 | let a = _mm512_set1_epi8(-1); | |
13021 | let b = _mm512_set1_epi8(-1); | |
13022 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13023 | let r = _mm512_mask_cmple_epu8_mask(mask, a, b); | |
13024 | assert_eq!( | |
13025 | r, | |
13026 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13027 | ); | |
13028 | } | |
13029 | ||
13030 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13031 | unsafe fn test_mm256_cmple_epu8_mask() { | |
13032 | let a = _mm256_set1_epi8(-1); | |
13033 | let b = _mm256_set1_epi8(-1); | |
13034 | let m = _mm256_cmple_epu8_mask(a, b); | |
13035 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13036 | } | |
13037 | ||
13038 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13039 | unsafe fn test_mm256_mask_cmple_epu8_mask() { | |
13040 | let a = _mm256_set1_epi8(-1); | |
13041 | let b = _mm256_set1_epi8(-1); | |
13042 | let mask = 0b01010101_01010101_01010101_01010101; | |
13043 | let r = _mm256_mask_cmple_epu8_mask(mask, a, b); | |
13044 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13045 | } | |
13046 | ||
13047 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13048 | unsafe fn test_mm_cmple_epu8_mask() { | |
13049 | let a = _mm_set1_epi8(-1); | |
13050 | let b = _mm_set1_epi8(-1); | |
13051 | let m = _mm_cmple_epu8_mask(a, b); | |
13052 | assert_eq!(m, 0b11111111_11111111); | |
13053 | } | |
13054 | ||
13055 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13056 | unsafe fn test_mm_mask_cmple_epu8_mask() { | |
13057 | let a = _mm_set1_epi8(-1); | |
13058 | let b = _mm_set1_epi8(-1); | |
13059 | let mask = 0b01010101_01010101; | |
13060 | let r = _mm_mask_cmple_epu8_mask(mask, a, b); | |
13061 | assert_eq!(r, 0b01010101_01010101); | |
fc512014 XL |
13062 | } |
13063 | ||
13064 | #[simd_test(enable = "avx512bw")] | |
13065 | unsafe fn test_mm512_cmple_epi16_mask() { | |
13066 | let a = _mm512_set1_epi16(-1); | |
13067 | let b = _mm512_set1_epi16(-1); | |
13068 | let m = _mm512_cmple_epi16_mask(a, b); | |
13069 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13070 | } | |
13071 | ||
13072 | #[simd_test(enable = "avx512bw")] | |
13073 | unsafe fn test_mm512_mask_cmple_epi16_mask() { | |
13074 | let a = _mm512_set1_epi16(-1); | |
13075 | let b = _mm512_set1_epi16(-1); | |
13076 | let mask = 0b01010101_01010101_01010101_01010101; | |
13077 | let r = _mm512_mask_cmple_epi16_mask(mask, a, b); | |
13078 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13079 | } | |
13080 | ||
cdc7bbd5 XL |
13081 | #[simd_test(enable = "avx512bw,avx512vl")] |
13082 | unsafe fn test_mm256_cmple_epi16_mask() { | |
13083 | let a = _mm256_set1_epi16(-1); | |
13084 | let b = _mm256_set1_epi16(-1); | |
13085 | let m = _mm256_cmple_epi16_mask(a, b); | |
13086 | assert_eq!(m, 0b11111111_11111111); | |
13087 | } | |
13088 | ||
13089 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13090 | unsafe fn test_mm256_mask_cmple_epi16_mask() { | |
13091 | let a = _mm256_set1_epi16(-1); | |
13092 | let b = _mm256_set1_epi16(-1); | |
13093 | let mask = 0b01010101_01010101; | |
13094 | let r = _mm256_mask_cmple_epi16_mask(mask, a, b); | |
13095 | assert_eq!(r, 0b01010101_01010101); | |
13096 | } | |
13097 | ||
13098 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13099 | unsafe fn test_mm_cmple_epi16_mask() { | |
13100 | let a = _mm_set1_epi16(-1); | |
13101 | let b = _mm_set1_epi16(-1); | |
13102 | let m = _mm_cmple_epi16_mask(a, b); | |
13103 | assert_eq!(m, 0b11111111); | |
13104 | } | |
13105 | ||
13106 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13107 | unsafe fn test_mm_mask_cmple_epi16_mask() { | |
13108 | let a = _mm_set1_epi16(-1); | |
13109 | let b = _mm_set1_epi16(-1); | |
13110 | let mask = 0b01010101; | |
13111 | let r = _mm_mask_cmple_epi16_mask(mask, a, b); | |
13112 | assert_eq!(r, 0b01010101); | |
13113 | } | |
13114 | ||
fc512014 XL |
13115 | #[simd_test(enable = "avx512bw")] |
13116 | unsafe fn test_mm512_cmple_epi8_mask() { | |
13117 | let a = _mm512_set1_epi8(-1); | |
13118 | let b = _mm512_set1_epi8(-1); | |
13119 | let m = _mm512_cmple_epi8_mask(a, b); | |
13120 | assert_eq!( | |
13121 | m, | |
13122 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13123 | ); | |
13124 | } | |
13125 | ||
13126 | #[simd_test(enable = "avx512bw")] | |
13127 | unsafe fn test_mm512_mask_cmple_epi8_mask() { | |
13128 | let a = _mm512_set1_epi8(-1); | |
13129 | let b = _mm512_set1_epi8(-1); | |
13130 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13131 | let r = _mm512_mask_cmple_epi8_mask(mask, a, b); | |
13132 | assert_eq!( | |
13133 | r, | |
13134 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13135 | ); | |
13136 | } | |
13137 | ||
cdc7bbd5 XL |
13138 | #[simd_test(enable = "avx512bw,avx512vl")] |
13139 | unsafe fn test_mm256_cmple_epi8_mask() { | |
13140 | let a = _mm256_set1_epi8(-1); | |
13141 | let b = _mm256_set1_epi8(-1); | |
13142 | let m = _mm256_cmple_epi8_mask(a, b); | |
13143 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13144 | } | |
13145 | ||
13146 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13147 | unsafe fn test_mm256_mask_cmple_epi8_mask() { | |
13148 | let a = _mm256_set1_epi8(-1); | |
13149 | let b = _mm256_set1_epi8(-1); | |
13150 | let mask = 0b01010101_01010101_01010101_01010101; | |
13151 | let r = _mm256_mask_cmple_epi8_mask(mask, a, b); | |
13152 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13153 | } | |
13154 | ||
13155 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13156 | unsafe fn test_mm_cmple_epi8_mask() { | |
13157 | let a = _mm_set1_epi8(-1); | |
13158 | let b = _mm_set1_epi8(-1); | |
13159 | let m = _mm_cmple_epi8_mask(a, b); | |
13160 | assert_eq!(m, 0b11111111_11111111); | |
13161 | } | |
13162 | ||
13163 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13164 | unsafe fn test_mm_mask_cmple_epi8_mask() { | |
13165 | let a = _mm_set1_epi8(-1); | |
13166 | let b = _mm_set1_epi8(-1); | |
13167 | let mask = 0b01010101_01010101; | |
13168 | let r = _mm_mask_cmple_epi8_mask(mask, a, b); | |
13169 | assert_eq!(r, 0b01010101_01010101); | |
13170 | } | |
13171 | ||
fc512014 XL |
13172 | #[simd_test(enable = "avx512bw")] |
13173 | unsafe fn test_mm512_cmpge_epu16_mask() { | |
13174 | let a = _mm512_set1_epi16(1); | |
13175 | let b = _mm512_set1_epi16(1); | |
13176 | let m = _mm512_cmpge_epu16_mask(a, b); | |
13177 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13178 | } | |
13179 | ||
13180 | #[simd_test(enable = "avx512bw")] | |
13181 | unsafe fn test_mm512_mask_cmpge_epu16_mask() { | |
13182 | let a = _mm512_set1_epi16(1); | |
13183 | let b = _mm512_set1_epi16(1); | |
13184 | let mask = 0b01010101_01010101_01010101_01010101; | |
13185 | let r = _mm512_mask_cmpge_epu16_mask(mask, a, b); | |
13186 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13187 | } | |
13188 | ||
cdc7bbd5 XL |
13189 | #[simd_test(enable = "avx512bw,avx512vl")] |
13190 | unsafe fn test_mm256_cmpge_epu16_mask() { | |
13191 | let a = _mm256_set1_epi16(1); | |
13192 | let b = _mm256_set1_epi16(1); | |
13193 | let m = _mm256_cmpge_epu16_mask(a, b); | |
13194 | assert_eq!(m, 0b11111111_11111111); | |
13195 | } | |
13196 | ||
13197 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13198 | unsafe fn test_mm256_mask_cmpge_epu16_mask() { | |
13199 | let a = _mm256_set1_epi16(1); | |
13200 | let b = _mm256_set1_epi16(1); | |
13201 | let mask = 0b01010101_01010101; | |
13202 | let r = _mm256_mask_cmpge_epu16_mask(mask, a, b); | |
13203 | assert_eq!(r, 0b01010101_01010101); | |
13204 | } | |
13205 | ||
13206 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13207 | unsafe fn test_mm_cmpge_epu16_mask() { | |
13208 | let a = _mm_set1_epi16(1); | |
13209 | let b = _mm_set1_epi16(1); | |
13210 | let m = _mm_cmpge_epu16_mask(a, b); | |
13211 | assert_eq!(m, 0b11111111); | |
13212 | } | |
13213 | ||
13214 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13215 | unsafe fn test_mm_mask_cmpge_epu16_mask() { | |
13216 | let a = _mm_set1_epi16(1); | |
13217 | let b = _mm_set1_epi16(1); | |
13218 | let mask = 0b01010101; | |
13219 | let r = _mm_mask_cmpge_epu16_mask(mask, a, b); | |
13220 | assert_eq!(r, 0b01010101); | |
13221 | } | |
13222 | ||
fc512014 XL |
13223 | #[simd_test(enable = "avx512bw")] |
13224 | unsafe fn test_mm512_cmpge_epu8_mask() { | |
13225 | let a = _mm512_set1_epi8(1); | |
13226 | let b = _mm512_set1_epi8(1); | |
13227 | let m = _mm512_cmpge_epu8_mask(a, b); | |
13228 | assert_eq!( | |
13229 | m, | |
13230 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13231 | ); | |
13232 | } | |
13233 | ||
13234 | #[simd_test(enable = "avx512bw")] | |
13235 | unsafe fn test_mm512_mask_cmpge_epu8_mask() { | |
13236 | let a = _mm512_set1_epi8(1); | |
13237 | let b = _mm512_set1_epi8(1); | |
13238 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13239 | let r = _mm512_mask_cmpge_epu8_mask(mask, a, b); | |
13240 | assert_eq!( | |
13241 | r, | |
13242 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13243 | ); | |
13244 | } | |
13245 | ||
cdc7bbd5 XL |
13246 | #[simd_test(enable = "avx512bw,avx512vl")] |
13247 | unsafe fn test_mm256_cmpge_epu8_mask() { | |
13248 | let a = _mm256_set1_epi8(1); | |
13249 | let b = _mm256_set1_epi8(1); | |
13250 | let m = _mm256_cmpge_epu8_mask(a, b); | |
13251 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13252 | } | |
13253 | ||
13254 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13255 | unsafe fn test_mm256_mask_cmpge_epu8_mask() { | |
13256 | let a = _mm256_set1_epi8(1); | |
13257 | let b = _mm256_set1_epi8(1); | |
13258 | let mask = 0b01010101_01010101_01010101_01010101; | |
13259 | let r = _mm256_mask_cmpge_epu8_mask(mask, a, b); | |
13260 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13261 | } | |
13262 | ||
13263 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13264 | unsafe fn test_mm_cmpge_epu8_mask() { | |
13265 | let a = _mm_set1_epi8(1); | |
13266 | let b = _mm_set1_epi8(1); | |
13267 | let m = _mm_cmpge_epu8_mask(a, b); | |
13268 | assert_eq!(m, 0b11111111_11111111); | |
13269 | } | |
13270 | ||
13271 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13272 | unsafe fn test_mm_mask_cmpge_epu8_mask() { | |
13273 | let a = _mm_set1_epi8(1); | |
13274 | let b = _mm_set1_epi8(1); | |
13275 | let mask = 0b01010101_01010101; | |
13276 | let r = _mm_mask_cmpge_epu8_mask(mask, a, b); | |
13277 | assert_eq!(r, 0b01010101_01010101); | |
13278 | } | |
13279 | ||
fc512014 XL |
13280 | #[simd_test(enable = "avx512bw")] |
13281 | unsafe fn test_mm512_cmpge_epi16_mask() { | |
13282 | let a = _mm512_set1_epi16(-1); | |
13283 | let b = _mm512_set1_epi16(-1); | |
13284 | let m = _mm512_cmpge_epi16_mask(a, b); | |
13285 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13286 | } | |
13287 | ||
13288 | #[simd_test(enable = "avx512bw")] | |
13289 | unsafe fn test_mm512_mask_cmpge_epi16_mask() { | |
13290 | let a = _mm512_set1_epi16(-1); | |
13291 | let b = _mm512_set1_epi16(-1); | |
13292 | let mask = 0b01010101_01010101_01010101_01010101; | |
13293 | let r = _mm512_mask_cmpge_epi16_mask(mask, a, b); | |
13294 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13295 | } | |
13296 | ||
cdc7bbd5 XL |
13297 | #[simd_test(enable = "avx512bw,avx512vl")] |
13298 | unsafe fn test_mm256_cmpge_epi16_mask() { | |
13299 | let a = _mm256_set1_epi16(-1); | |
13300 | let b = _mm256_set1_epi16(-1); | |
13301 | let m = _mm256_cmpge_epi16_mask(a, b); | |
13302 | assert_eq!(m, 0b11111111_11111111); | |
13303 | } | |
13304 | ||
13305 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13306 | unsafe fn test_mm256_mask_cmpge_epi16_mask() { | |
13307 | let a = _mm256_set1_epi16(-1); | |
13308 | let b = _mm256_set1_epi16(-1); | |
13309 | let mask = 0b01010101_01010101; | |
13310 | let r = _mm256_mask_cmpge_epi16_mask(mask, a, b); | |
13311 | assert_eq!(r, 0b01010101_01010101); | |
13312 | } | |
13313 | ||
13314 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13315 | unsafe fn test_mm_cmpge_epi16_mask() { | |
13316 | let a = _mm_set1_epi16(-1); | |
13317 | let b = _mm_set1_epi16(-1); | |
13318 | let m = _mm_cmpge_epi16_mask(a, b); | |
13319 | assert_eq!(m, 0b11111111); | |
13320 | } | |
13321 | ||
13322 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13323 | unsafe fn test_mm_mask_cmpge_epi16_mask() { | |
13324 | let a = _mm_set1_epi16(-1); | |
13325 | let b = _mm_set1_epi16(-1); | |
13326 | let mask = 0b01010101; | |
13327 | let r = _mm_mask_cmpge_epi16_mask(mask, a, b); | |
13328 | assert_eq!(r, 0b01010101); | |
13329 | } | |
13330 | ||
fc512014 XL |
13331 | #[simd_test(enable = "avx512bw")] |
13332 | unsafe fn test_mm512_cmpge_epi8_mask() { | |
13333 | let a = _mm512_set1_epi8(-1); | |
13334 | let b = _mm512_set1_epi8(-1); | |
13335 | let m = _mm512_cmpge_epi8_mask(a, b); | |
13336 | assert_eq!( | |
13337 | m, | |
13338 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13339 | ); | |
13340 | } | |
13341 | ||
13342 | #[simd_test(enable = "avx512bw")] | |
13343 | unsafe fn test_mm512_mask_cmpge_epi8_mask() { | |
13344 | let a = _mm512_set1_epi8(-1); | |
13345 | let b = _mm512_set1_epi8(-1); | |
13346 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13347 | let r = _mm512_mask_cmpge_epi8_mask(mask, a, b); | |
13348 | assert_eq!( | |
13349 | r, | |
13350 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13351 | ); | |
13352 | } | |
13353 | ||
cdc7bbd5 XL |
13354 | #[simd_test(enable = "avx512bw,avx512vl")] |
13355 | unsafe fn test_mm256_cmpge_epi8_mask() { | |
13356 | let a = _mm256_set1_epi8(-1); | |
13357 | let b = _mm256_set1_epi8(-1); | |
13358 | let m = _mm256_cmpge_epi8_mask(a, b); | |
13359 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13360 | } | |
13361 | ||
13362 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13363 | unsafe fn test_mm256_mask_cmpge_epi8_mask() { | |
13364 | let a = _mm256_set1_epi8(-1); | |
13365 | let b = _mm256_set1_epi8(-1); | |
13366 | let mask = 0b01010101_01010101_01010101_01010101; | |
13367 | let r = _mm256_mask_cmpge_epi8_mask(mask, a, b); | |
13368 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13369 | } | |
13370 | ||
13371 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13372 | unsafe fn test_mm_cmpge_epi8_mask() { | |
13373 | let a = _mm_set1_epi8(-1); | |
13374 | let b = _mm_set1_epi8(-1); | |
13375 | let m = _mm_cmpge_epi8_mask(a, b); | |
13376 | assert_eq!(m, 0b11111111_11111111); | |
13377 | } | |
13378 | ||
13379 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13380 | unsafe fn test_mm_mask_cmpge_epi8_mask() { | |
13381 | let a = _mm_set1_epi8(-1); | |
13382 | let b = _mm_set1_epi8(-1); | |
13383 | let mask = 0b01010101_01010101; | |
13384 | let r = _mm_mask_cmpge_epi8_mask(mask, a, b); | |
13385 | assert_eq!(r, 0b01010101_01010101); | |
13386 | } | |
13387 | ||
fc512014 XL |
13388 | #[simd_test(enable = "avx512bw")] |
13389 | unsafe fn test_mm512_cmpeq_epu16_mask() { | |
13390 | let a = _mm512_set1_epi16(1); | |
13391 | let b = _mm512_set1_epi16(1); | |
13392 | let m = _mm512_cmpeq_epu16_mask(a, b); | |
13393 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13394 | } | |
13395 | ||
13396 | #[simd_test(enable = "avx512bw")] | |
13397 | unsafe fn test_mm512_mask_cmpeq_epu16_mask() { | |
13398 | let a = _mm512_set1_epi16(1); | |
13399 | let b = _mm512_set1_epi16(1); | |
13400 | let mask = 0b01010101_01010101_01010101_01010101; | |
13401 | let r = _mm512_mask_cmpeq_epu16_mask(mask, a, b); | |
13402 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13403 | } | |
13404 | ||
cdc7bbd5 XL |
13405 | #[simd_test(enable = "avx512bw,avx512vl")] |
13406 | unsafe fn test_mm256_cmpeq_epu16_mask() { | |
13407 | let a = _mm256_set1_epi16(1); | |
13408 | let b = _mm256_set1_epi16(1); | |
13409 | let m = _mm256_cmpeq_epu16_mask(a, b); | |
13410 | assert_eq!(m, 0b11111111_11111111); | |
13411 | } | |
13412 | ||
13413 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13414 | unsafe fn test_mm256_mask_cmpeq_epu16_mask() { | |
13415 | let a = _mm256_set1_epi16(1); | |
13416 | let b = _mm256_set1_epi16(1); | |
13417 | let mask = 0b01010101_01010101; | |
13418 | let r = _mm256_mask_cmpeq_epu16_mask(mask, a, b); | |
13419 | assert_eq!(r, 0b01010101_01010101); | |
13420 | } | |
13421 | ||
13422 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13423 | unsafe fn test_mm_cmpeq_epu16_mask() { | |
13424 | let a = _mm_set1_epi16(1); | |
13425 | let b = _mm_set1_epi16(1); | |
13426 | let m = _mm_cmpeq_epu16_mask(a, b); | |
13427 | assert_eq!(m, 0b11111111); | |
13428 | } | |
13429 | ||
13430 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13431 | unsafe fn test_mm_mask_cmpeq_epu16_mask() { | |
13432 | let a = _mm_set1_epi16(1); | |
13433 | let b = _mm_set1_epi16(1); | |
13434 | let mask = 0b01010101; | |
13435 | let r = _mm_mask_cmpeq_epu16_mask(mask, a, b); | |
13436 | assert_eq!(r, 0b01010101); | |
13437 | } | |
13438 | ||
fc512014 XL |
13439 | #[simd_test(enable = "avx512bw")] |
13440 | unsafe fn test_mm512_cmpeq_epu8_mask() { | |
13441 | let a = _mm512_set1_epi8(1); | |
13442 | let b = _mm512_set1_epi8(1); | |
13443 | let m = _mm512_cmpeq_epu8_mask(a, b); | |
13444 | assert_eq!( | |
13445 | m, | |
13446 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13447 | ); | |
13448 | } | |
13449 | ||
13450 | #[simd_test(enable = "avx512bw")] | |
13451 | unsafe fn test_mm512_mask_cmpeq_epu8_mask() { | |
13452 | let a = _mm512_set1_epi8(1); | |
13453 | let b = _mm512_set1_epi8(1); | |
13454 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13455 | let r = _mm512_mask_cmpeq_epu8_mask(mask, a, b); | |
13456 | assert_eq!( | |
13457 | r, | |
13458 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13459 | ); | |
13460 | } | |
13461 | ||
cdc7bbd5 XL |
13462 | #[simd_test(enable = "avx512bw,avx512vl")] |
13463 | unsafe fn test_mm256_cmpeq_epu8_mask() { | |
13464 | let a = _mm256_set1_epi8(1); | |
13465 | let b = _mm256_set1_epi8(1); | |
13466 | let m = _mm256_cmpeq_epu8_mask(a, b); | |
13467 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13468 | } | |
13469 | ||
13470 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13471 | unsafe fn test_mm256_mask_cmpeq_epu8_mask() { | |
13472 | let a = _mm256_set1_epi8(1); | |
13473 | let b = _mm256_set1_epi8(1); | |
13474 | let mask = 0b01010101_01010101_01010101_01010101; | |
13475 | let r = _mm256_mask_cmpeq_epu8_mask(mask, a, b); | |
13476 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13477 | } | |
13478 | ||
13479 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13480 | unsafe fn test_mm_cmpeq_epu8_mask() { | |
13481 | let a = _mm_set1_epi8(1); | |
13482 | let b = _mm_set1_epi8(1); | |
13483 | let m = _mm_cmpeq_epu8_mask(a, b); | |
13484 | assert_eq!(m, 0b11111111_11111111); | |
13485 | } | |
13486 | ||
13487 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13488 | unsafe fn test_mm_mask_cmpeq_epu8_mask() { | |
13489 | let a = _mm_set1_epi8(1); | |
13490 | let b = _mm_set1_epi8(1); | |
13491 | let mask = 0b01010101_01010101; | |
13492 | let r = _mm_mask_cmpeq_epu8_mask(mask, a, b); | |
13493 | assert_eq!(r, 0b01010101_01010101); | |
13494 | } | |
13495 | ||
fc512014 XL |
13496 | #[simd_test(enable = "avx512bw")] |
13497 | unsafe fn test_mm512_cmpeq_epi16_mask() { | |
13498 | let a = _mm512_set1_epi16(-1); | |
13499 | let b = _mm512_set1_epi16(-1); | |
13500 | let m = _mm512_cmpeq_epi16_mask(a, b); | |
13501 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13502 | } | |
13503 | ||
13504 | #[simd_test(enable = "avx512bw")] | |
13505 | unsafe fn test_mm512_mask_cmpeq_epi16_mask() { | |
13506 | let a = _mm512_set1_epi16(-1); | |
13507 | let b = _mm512_set1_epi16(-1); | |
13508 | let mask = 0b01010101_01010101_01010101_01010101; | |
13509 | let r = _mm512_mask_cmpeq_epi16_mask(mask, a, b); | |
13510 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13511 | } | |
13512 | ||
cdc7bbd5 XL |
13513 | #[simd_test(enable = "avx512bw,avx512vl")] |
13514 | unsafe fn test_mm256_cmpeq_epi16_mask() { | |
13515 | let a = _mm256_set1_epi16(-1); | |
13516 | let b = _mm256_set1_epi16(-1); | |
13517 | let m = _mm256_cmpeq_epi16_mask(a, b); | |
13518 | assert_eq!(m, 0b11111111_11111111); | |
13519 | } | |
13520 | ||
13521 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13522 | unsafe fn test_mm256_mask_cmpeq_epi16_mask() { | |
13523 | let a = _mm256_set1_epi16(-1); | |
13524 | let b = _mm256_set1_epi16(-1); | |
13525 | let mask = 0b01010101_01010101; | |
13526 | let r = _mm256_mask_cmpeq_epi16_mask(mask, a, b); | |
13527 | assert_eq!(r, 0b01010101_01010101); | |
13528 | } | |
13529 | ||
13530 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13531 | unsafe fn test_mm_cmpeq_epi16_mask() { | |
13532 | let a = _mm_set1_epi16(-1); | |
13533 | let b = _mm_set1_epi16(-1); | |
13534 | let m = _mm_cmpeq_epi16_mask(a, b); | |
13535 | assert_eq!(m, 0b11111111); | |
13536 | } | |
13537 | ||
13538 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13539 | unsafe fn test_mm_mask_cmpeq_epi16_mask() { | |
13540 | let a = _mm_set1_epi16(-1); | |
13541 | let b = _mm_set1_epi16(-1); | |
13542 | let mask = 0b01010101; | |
13543 | let r = _mm_mask_cmpeq_epi16_mask(mask, a, b); | |
13544 | assert_eq!(r, 0b01010101); | |
13545 | } | |
13546 | ||
fc512014 XL |
13547 | #[simd_test(enable = "avx512bw")] |
13548 | unsafe fn test_mm512_cmpeq_epi8_mask() { | |
13549 | let a = _mm512_set1_epi8(-1); | |
13550 | let b = _mm512_set1_epi8(-1); | |
13551 | let m = _mm512_cmpeq_epi8_mask(a, b); | |
13552 | assert_eq!( | |
13553 | m, | |
13554 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13555 | ); | |
13556 | } | |
13557 | ||
13558 | #[simd_test(enable = "avx512bw")] | |
13559 | unsafe fn test_mm512_mask_cmpeq_epi8_mask() { | |
13560 | let a = _mm512_set1_epi8(-1); | |
13561 | let b = _mm512_set1_epi8(-1); | |
13562 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13563 | let r = _mm512_mask_cmpeq_epi8_mask(mask, a, b); | |
13564 | assert_eq!( | |
13565 | r, | |
13566 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13567 | ); | |
13568 | } | |
13569 | ||
cdc7bbd5 XL |
13570 | #[simd_test(enable = "avx512bw,avx512vl")] |
13571 | unsafe fn test_mm256_cmpeq_epi8_mask() { | |
13572 | let a = _mm256_set1_epi8(-1); | |
13573 | let b = _mm256_set1_epi8(-1); | |
13574 | let m = _mm256_cmpeq_epi8_mask(a, b); | |
13575 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13576 | } | |
13577 | ||
13578 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13579 | unsafe fn test_mm256_mask_cmpeq_epi8_mask() { | |
13580 | let a = _mm256_set1_epi8(-1); | |
13581 | let b = _mm256_set1_epi8(-1); | |
13582 | let mask = 0b01010101_01010101_01010101_01010101; | |
13583 | let r = _mm256_mask_cmpeq_epi8_mask(mask, a, b); | |
13584 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13585 | } | |
13586 | ||
13587 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13588 | unsafe fn test_mm_cmpeq_epi8_mask() { | |
13589 | let a = _mm_set1_epi8(-1); | |
13590 | let b = _mm_set1_epi8(-1); | |
13591 | let m = _mm_cmpeq_epi8_mask(a, b); | |
13592 | assert_eq!(m, 0b11111111_11111111); | |
13593 | } | |
13594 | ||
13595 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13596 | unsafe fn test_mm_mask_cmpeq_epi8_mask() { | |
13597 | let a = _mm_set1_epi8(-1); | |
13598 | let b = _mm_set1_epi8(-1); | |
13599 | let mask = 0b01010101_01010101; | |
13600 | let r = _mm_mask_cmpeq_epi8_mask(mask, a, b); | |
13601 | assert_eq!(r, 0b01010101_01010101); | |
13602 | } | |
13603 | ||
fc512014 XL |
13604 | #[simd_test(enable = "avx512bw")] |
13605 | unsafe fn test_mm512_cmpneq_epu16_mask() { | |
13606 | let a = _mm512_set1_epi16(2); | |
13607 | let b = _mm512_set1_epi16(1); | |
13608 | let m = _mm512_cmpneq_epu16_mask(a, b); | |
13609 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13610 | } | |
13611 | ||
13612 | #[simd_test(enable = "avx512bw")] | |
13613 | unsafe fn test_mm512_mask_cmpneq_epu16_mask() { | |
13614 | let a = _mm512_set1_epi16(2); | |
13615 | let b = _mm512_set1_epi16(1); | |
13616 | let mask = 0b01010101_01010101_01010101_01010101; | |
13617 | let r = _mm512_mask_cmpneq_epu16_mask(mask, a, b); | |
13618 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13619 | } | |
13620 | ||
cdc7bbd5 XL |
13621 | #[simd_test(enable = "avx512bw,avx512vl")] |
13622 | unsafe fn test_mm256_cmpneq_epu16_mask() { | |
13623 | let a = _mm256_set1_epi16(2); | |
13624 | let b = _mm256_set1_epi16(1); | |
13625 | let m = _mm256_cmpneq_epu16_mask(a, b); | |
13626 | assert_eq!(m, 0b11111111_11111111); | |
13627 | } | |
13628 | ||
13629 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13630 | unsafe fn test_mm256_mask_cmpneq_epu16_mask() { | |
13631 | let a = _mm256_set1_epi16(2); | |
13632 | let b = _mm256_set1_epi16(1); | |
13633 | let mask = 0b01010101_01010101; | |
13634 | let r = _mm256_mask_cmpneq_epu16_mask(mask, a, b); | |
13635 | assert_eq!(r, 0b01010101_01010101); | |
13636 | } | |
13637 | ||
13638 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13639 | unsafe fn test_mm_cmpneq_epu16_mask() { | |
13640 | let a = _mm_set1_epi16(2); | |
13641 | let b = _mm_set1_epi16(1); | |
13642 | let m = _mm_cmpneq_epu16_mask(a, b); | |
13643 | assert_eq!(m, 0b11111111); | |
13644 | } | |
13645 | ||
13646 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13647 | unsafe fn test_mm_mask_cmpneq_epu16_mask() { | |
13648 | let a = _mm_set1_epi16(2); | |
13649 | let b = _mm_set1_epi16(1); | |
13650 | let mask = 0b01010101; | |
13651 | let r = _mm_mask_cmpneq_epu16_mask(mask, a, b); | |
13652 | assert_eq!(r, 0b01010101); | |
13653 | } | |
13654 | ||
fc512014 XL |
13655 | #[simd_test(enable = "avx512bw")] |
13656 | unsafe fn test_mm512_cmpneq_epu8_mask() { | |
13657 | let a = _mm512_set1_epi8(2); | |
13658 | let b = _mm512_set1_epi8(1); | |
13659 | let m = _mm512_cmpneq_epu8_mask(a, b); | |
13660 | assert_eq!( | |
13661 | m, | |
13662 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13663 | ); | |
13664 | } | |
13665 | ||
cdc7bbd5 XL |
13666 | #[simd_test(enable = "avx512bw")] |
13667 | unsafe fn test_mm512_mask_cmpneq_epu8_mask() { | |
13668 | let a = _mm512_set1_epi8(2); | |
13669 | let b = _mm512_set1_epi8(1); | |
13670 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13671 | let r = _mm512_mask_cmpneq_epu8_mask(mask, a, b); | |
13672 | assert_eq!( | |
13673 | r, | |
13674 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13675 | ); | |
13676 | } | |
13677 | ||
13678 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13679 | unsafe fn test_mm256_cmpneq_epu8_mask() { | |
13680 | let a = _mm256_set1_epi8(2); | |
13681 | let b = _mm256_set1_epi8(1); | |
13682 | let m = _mm256_cmpneq_epu8_mask(a, b); | |
13683 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13684 | } | |
13685 | ||
13686 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13687 | unsafe fn test_mm256_mask_cmpneq_epu8_mask() { | |
13688 | let a = _mm256_set1_epi8(2); | |
13689 | let b = _mm256_set1_epi8(1); | |
13690 | let mask = 0b01010101_01010101_01010101_01010101; | |
13691 | let r = _mm256_mask_cmpneq_epu8_mask(mask, a, b); | |
13692 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13693 | } | |
13694 | ||
13695 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13696 | unsafe fn test_mm_cmpneq_epu8_mask() { | |
13697 | let a = _mm_set1_epi8(2); | |
13698 | let b = _mm_set1_epi8(1); | |
13699 | let m = _mm_cmpneq_epu8_mask(a, b); | |
13700 | assert_eq!(m, 0b11111111_11111111); | |
13701 | } | |
13702 | ||
13703 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13704 | unsafe fn test_mm_mask_cmpneq_epu8_mask() { | |
13705 | let a = _mm_set1_epi8(2); | |
13706 | let b = _mm_set1_epi8(1); | |
13707 | let mask = 0b01010101_01010101; | |
13708 | let r = _mm_mask_cmpneq_epu8_mask(mask, a, b); | |
13709 | assert_eq!(r, 0b01010101_01010101); | |
fc512014 XL |
13710 | } |
13711 | ||
13712 | #[simd_test(enable = "avx512bw")] | |
13713 | unsafe fn test_mm512_cmpneq_epi16_mask() { | |
13714 | let a = _mm512_set1_epi16(1); | |
13715 | let b = _mm512_set1_epi16(-1); | |
13716 | let m = _mm512_cmpneq_epi16_mask(a, b); | |
13717 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13718 | } | |
13719 | ||
13720 | #[simd_test(enable = "avx512bw")] | |
13721 | unsafe fn test_mm512_mask_cmpneq_epi16_mask() { | |
13722 | let a = _mm512_set1_epi16(1); | |
13723 | let b = _mm512_set1_epi16(-1); | |
13724 | let mask = 0b01010101_01010101_01010101_01010101; | |
13725 | let r = _mm512_mask_cmpneq_epi16_mask(mask, a, b); | |
13726 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13727 | } | |
13728 | ||
cdc7bbd5 XL |
13729 | #[simd_test(enable = "avx512bw,avx512vl")] |
13730 | unsafe fn test_mm256_cmpneq_epi16_mask() { | |
13731 | let a = _mm256_set1_epi16(1); | |
13732 | let b = _mm256_set1_epi16(-1); | |
13733 | let m = _mm256_cmpneq_epi16_mask(a, b); | |
13734 | assert_eq!(m, 0b11111111_11111111); | |
13735 | } | |
13736 | ||
13737 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13738 | unsafe fn test_mm256_mask_cmpneq_epi16_mask() { | |
13739 | let a = _mm256_set1_epi16(1); | |
13740 | let b = _mm256_set1_epi16(-1); | |
13741 | let mask = 0b01010101_01010101; | |
13742 | let r = _mm256_mask_cmpneq_epi16_mask(mask, a, b); | |
13743 | assert_eq!(r, 0b01010101_01010101); | |
13744 | } | |
13745 | ||
13746 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13747 | unsafe fn test_mm_cmpneq_epi16_mask() { | |
13748 | let a = _mm_set1_epi16(1); | |
13749 | let b = _mm_set1_epi16(-1); | |
13750 | let m = _mm_cmpneq_epi16_mask(a, b); | |
13751 | assert_eq!(m, 0b11111111); | |
13752 | } | |
13753 | ||
13754 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13755 | unsafe fn test_mm_mask_cmpneq_epi16_mask() { | |
13756 | let a = _mm_set1_epi16(1); | |
13757 | let b = _mm_set1_epi16(-1); | |
13758 | let mask = 0b01010101; | |
13759 | let r = _mm_mask_cmpneq_epi16_mask(mask, a, b); | |
13760 | assert_eq!(r, 0b01010101); | |
13761 | } | |
13762 | ||
fc512014 XL |
13763 | #[simd_test(enable = "avx512bw")] |
13764 | unsafe fn test_mm512_cmpneq_epi8_mask() { | |
13765 | let a = _mm512_set1_epi8(1); | |
13766 | let b = _mm512_set1_epi8(-1); | |
13767 | let m = _mm512_cmpneq_epi8_mask(a, b); | |
13768 | assert_eq!( | |
13769 | m, | |
13770 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13771 | ); | |
13772 | } | |
13773 | ||
13774 | #[simd_test(enable = "avx512bw")] | |
13775 | unsafe fn test_mm512_mask_cmpneq_epi8_mask() { | |
13776 | let a = _mm512_set1_epi8(1); | |
13777 | let b = _mm512_set1_epi8(-1); | |
13778 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
13779 | let r = _mm512_mask_cmpneq_epi8_mask(mask, a, b); | |
13780 | assert_eq!( | |
13781 | r, | |
13782 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13783 | ); | |
13784 | } | |
13785 | ||
cdc7bbd5 XL |
13786 | #[simd_test(enable = "avx512bw,avx512vl")] |
13787 | unsafe fn test_mm256_cmpneq_epi8_mask() { | |
13788 | let a = _mm256_set1_epi8(1); | |
13789 | let b = _mm256_set1_epi8(-1); | |
13790 | let m = _mm256_cmpneq_epi8_mask(a, b); | |
13791 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); | |
13792 | } | |
13793 | ||
13794 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13795 | unsafe fn test_mm256_mask_cmpneq_epi8_mask() { | |
13796 | let a = _mm256_set1_epi8(1); | |
13797 | let b = _mm256_set1_epi8(-1); | |
13798 | let mask = 0b01010101_01010101_01010101_01010101; | |
13799 | let r = _mm256_mask_cmpneq_epi8_mask(mask, a, b); | |
13800 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); | |
13801 | } | |
13802 | ||
13803 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13804 | unsafe fn test_mm_cmpneq_epi8_mask() { | |
13805 | let a = _mm_set1_epi8(1); | |
13806 | let b = _mm_set1_epi8(-1); | |
13807 | let m = _mm_cmpneq_epi8_mask(a, b); | |
13808 | assert_eq!(m, 0b11111111_11111111); | |
13809 | } | |
13810 | ||
13811 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13812 | unsafe fn test_mm_mask_cmpneq_epi8_mask() { | |
13813 | let a = _mm_set1_epi8(1); | |
13814 | let b = _mm_set1_epi8(-1); | |
13815 | let mask = 0b01010101_01010101; | |
13816 | let r = _mm_mask_cmpneq_epi8_mask(mask, a, b); | |
13817 | assert_eq!(r, 0b01010101_01010101); | |
13818 | } | |
13819 | ||
fc512014 XL |
13820 | #[simd_test(enable = "avx512bw")] |
13821 | unsafe fn test_mm512_cmp_epu16_mask() { | |
13822 | let a = _mm512_set1_epi16(0); | |
13823 | let b = _mm512_set1_epi16(1); | |
17df50a5 | 13824 | let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); |
fc512014 XL |
13825 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13826 | } | |
13827 | ||
13828 | #[simd_test(enable = "avx512bw")] | |
13829 | unsafe fn test_mm512_mask_cmp_epu16_mask() { | |
13830 | let a = _mm512_set1_epi16(0); | |
13831 | let b = _mm512_set1_epi16(1); | |
13832 | let mask = 0b01010101_01010101_01010101_01010101; | |
17df50a5 | 13833 | let r = _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); |
fc512014 XL |
13834 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13835 | } | |
13836 | ||
cdc7bbd5 XL |
13837 | #[simd_test(enable = "avx512bw,avx512vl")] |
13838 | unsafe fn test_mm256_cmp_epu16_mask() { | |
13839 | let a = _mm256_set1_epi16(0); | |
13840 | let b = _mm256_set1_epi16(1); | |
17df50a5 | 13841 | let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
13842 | assert_eq!(m, 0b11111111_11111111); |
13843 | } | |
13844 | ||
13845 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13846 | unsafe fn test_mm256_mask_cmp_epu16_mask() { | |
13847 | let a = _mm256_set1_epi16(0); | |
13848 | let b = _mm256_set1_epi16(1); | |
13849 | let mask = 0b01010101_01010101; | |
17df50a5 | 13850 | let r = _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
13851 | assert_eq!(r, 0b01010101_01010101); |
13852 | } | |
13853 | ||
13854 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13855 | unsafe fn test_mm_cmp_epu16_mask() { | |
13856 | let a = _mm_set1_epi16(0); | |
13857 | let b = _mm_set1_epi16(1); | |
17df50a5 | 13858 | let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
13859 | assert_eq!(m, 0b11111111); |
13860 | } | |
13861 | ||
13862 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13863 | unsafe fn test_mm_mask_cmp_epu16_mask() { | |
13864 | let a = _mm_set1_epi16(0); | |
13865 | let b = _mm_set1_epi16(1); | |
13866 | let mask = 0b01010101; | |
17df50a5 | 13867 | let r = _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
13868 | assert_eq!(r, 0b01010101); |
13869 | } | |
13870 | ||
fc512014 XL |
13871 | #[simd_test(enable = "avx512bw")] |
13872 | unsafe fn test_mm512_cmp_epu8_mask() { | |
13873 | let a = _mm512_set1_epi8(0); | |
13874 | let b = _mm512_set1_epi8(1); | |
17df50a5 | 13875 | let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); |
fc512014 XL |
13876 | assert_eq!( |
13877 | m, | |
13878 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13879 | ); | |
13880 | } | |
13881 | ||
13882 | #[simd_test(enable = "avx512bw")] | |
13883 | unsafe fn test_mm512_mask_cmp_epu8_mask() { | |
13884 | let a = _mm512_set1_epi8(0); | |
13885 | let b = _mm512_set1_epi8(1); | |
13886 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
17df50a5 | 13887 | let r = _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); |
fc512014 XL |
13888 | assert_eq!( |
13889 | r, | |
13890 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13891 | ); | |
13892 | } | |
13893 | ||
cdc7bbd5 XL |
13894 | #[simd_test(enable = "avx512bw,avx512vl")] |
13895 | unsafe fn test_mm256_cmp_epu8_mask() { | |
13896 | let a = _mm256_set1_epi8(0); | |
13897 | let b = _mm256_set1_epi8(1); | |
17df50a5 | 13898 | let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
13899 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13900 | } | |
13901 | ||
13902 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13903 | unsafe fn test_mm256_mask_cmp_epu8_mask() { | |
13904 | let a = _mm256_set1_epi8(0); | |
13905 | let b = _mm256_set1_epi8(1); | |
13906 | let mask = 0b01010101_01010101_01010101_01010101; | |
17df50a5 | 13907 | let r = _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
13908 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13909 | } | |
13910 | ||
13911 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13912 | unsafe fn test_mm_cmp_epu8_mask() { | |
13913 | let a = _mm_set1_epi8(0); | |
13914 | let b = _mm_set1_epi8(1); | |
17df50a5 | 13915 | let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
13916 | assert_eq!(m, 0b11111111_11111111); |
13917 | } | |
13918 | ||
13919 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13920 | unsafe fn test_mm_mask_cmp_epu8_mask() { | |
13921 | let a = _mm_set1_epi8(0); | |
13922 | let b = _mm_set1_epi8(1); | |
13923 | let mask = 0b01010101_01010101; | |
17df50a5 | 13924 | let r = _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
13925 | assert_eq!(r, 0b01010101_01010101); |
13926 | } | |
13927 | ||
fc512014 XL |
13928 | #[simd_test(enable = "avx512bw")] |
13929 | unsafe fn test_mm512_cmp_epi16_mask() { | |
13930 | let a = _mm512_set1_epi16(0); | |
13931 | let b = _mm512_set1_epi16(1); | |
17df50a5 | 13932 | let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); |
fc512014 XL |
13933 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
13934 | } | |
13935 | ||
13936 | #[simd_test(enable = "avx512bw")] | |
13937 | unsafe fn test_mm512_mask_cmp_epi16_mask() { | |
13938 | let a = _mm512_set1_epi16(0); | |
13939 | let b = _mm512_set1_epi16(1); | |
13940 | let mask = 0b01010101_01010101_01010101_01010101; | |
17df50a5 | 13941 | let r = _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); |
fc512014 XL |
13942 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
13943 | } | |
13944 | ||
cdc7bbd5 XL |
13945 | #[simd_test(enable = "avx512bw,avx512vl")] |
13946 | unsafe fn test_mm256_cmp_epi16_mask() { | |
13947 | let a = _mm256_set1_epi16(0); | |
13948 | let b = _mm256_set1_epi16(1); | |
17df50a5 | 13949 | let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
13950 | assert_eq!(m, 0b11111111_11111111); |
13951 | } | |
13952 | ||
13953 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13954 | unsafe fn test_mm256_mask_cmp_epi16_mask() { | |
13955 | let a = _mm256_set1_epi16(0); | |
13956 | let b = _mm256_set1_epi16(1); | |
13957 | let mask = 0b01010101_01010101; | |
17df50a5 | 13958 | let r = _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
13959 | assert_eq!(r, 0b01010101_01010101); |
13960 | } | |
13961 | ||
13962 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13963 | unsafe fn test_mm_cmp_epi16_mask() { | |
13964 | let a = _mm_set1_epi16(0); | |
13965 | let b = _mm_set1_epi16(1); | |
17df50a5 | 13966 | let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
13967 | assert_eq!(m, 0b11111111); |
13968 | } | |
13969 | ||
13970 | #[simd_test(enable = "avx512bw,avx512vl")] | |
13971 | unsafe fn test_mm_mask_cmp_epi16_mask() { | |
13972 | let a = _mm_set1_epi16(0); | |
13973 | let b = _mm_set1_epi16(1); | |
13974 | let mask = 0b01010101; | |
17df50a5 | 13975 | let r = _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
13976 | assert_eq!(r, 0b01010101); |
13977 | } | |
13978 | ||
fc512014 XL |
13979 | #[simd_test(enable = "avx512bw")] |
13980 | unsafe fn test_mm512_cmp_epi8_mask() { | |
13981 | let a = _mm512_set1_epi8(0); | |
13982 | let b = _mm512_set1_epi8(1); | |
17df50a5 | 13983 | let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); |
fc512014 XL |
13984 | assert_eq!( |
13985 | m, | |
13986 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111 | |
13987 | ); | |
13988 | } | |
13989 | ||
13990 | #[simd_test(enable = "avx512bw")] | |
13991 | unsafe fn test_mm512_mask_cmp_epi8_mask() { | |
13992 | let a = _mm512_set1_epi8(0); | |
13993 | let b = _mm512_set1_epi8(1); | |
13994 | let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; | |
17df50a5 | 13995 | let r = _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); |
fc512014 XL |
13996 | assert_eq!( |
13997 | r, | |
13998 | 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101 | |
13999 | ); | |
14000 | } | |
14001 | ||
cdc7bbd5 XL |
14002 | #[simd_test(enable = "avx512bw,avx512vl")] |
14003 | unsafe fn test_mm256_cmp_epi8_mask() { | |
14004 | let a = _mm256_set1_epi8(0); | |
14005 | let b = _mm256_set1_epi8(1); | |
17df50a5 | 14006 | let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
14007 | assert_eq!(m, 0b11111111_11111111_11111111_11111111); |
14008 | } | |
14009 | ||
14010 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14011 | unsafe fn test_mm256_mask_cmp_epi8_mask() { | |
14012 | let a = _mm256_set1_epi8(0); | |
14013 | let b = _mm256_set1_epi8(1); | |
14014 | let mask = 0b01010101_01010101_01010101_01010101; | |
17df50a5 | 14015 | let r = _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
14016 | assert_eq!(r, 0b01010101_01010101_01010101_01010101); |
14017 | } | |
14018 | ||
14019 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14020 | unsafe fn test_mm_cmp_epi8_mask() { | |
14021 | let a = _mm_set1_epi8(0); | |
14022 | let b = _mm_set1_epi8(1); | |
17df50a5 | 14023 | let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b); |
cdc7bbd5 XL |
14024 | assert_eq!(m, 0b11111111_11111111); |
14025 | } | |
14026 | ||
14027 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14028 | unsafe fn test_mm_mask_cmp_epi8_mask() { | |
14029 | let a = _mm_set1_epi8(0); | |
14030 | let b = _mm_set1_epi8(1); | |
14031 | let mask = 0b01010101_01010101; | |
17df50a5 | 14032 | let r = _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(mask, a, b); |
cdc7bbd5 XL |
14033 | assert_eq!(r, 0b01010101_01010101); |
14034 | } | |
14035 | ||
fc512014 XL |
14036 | #[simd_test(enable = "avx512bw")] |
14037 | unsafe fn test_mm512_loadu_epi16() { | |
14038 | #[rustfmt::skip] | |
14039 | let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; | |
14040 | let r = _mm512_loadu_epi16(&a[0]); | |
14041 | #[rustfmt::skip] | |
14042 | let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); | |
14043 | assert_eq_m512i(r, e); | |
14044 | } | |
14045 | ||
cdc7bbd5 XL |
14046 | #[simd_test(enable = "avx512bw,avx512vl")] |
14047 | unsafe fn test_mm256_loadu_epi16() { | |
14048 | let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14049 | let r = _mm256_loadu_epi16(&a[0]); | |
14050 | let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); | |
14051 | assert_eq_m256i(r, e); | |
14052 | } | |
14053 | ||
14054 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14055 | unsafe fn test_mm_loadu_epi16() { | |
14056 | let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; | |
14057 | let r = _mm_loadu_epi16(&a[0]); | |
14058 | let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); | |
14059 | assert_eq_m128i(r, e); | |
14060 | } | |
14061 | ||
fc512014 XL |
14062 | #[simd_test(enable = "avx512bw")] |
14063 | unsafe fn test_mm512_loadu_epi8() { | |
14064 | #[rustfmt::skip] | |
14065 | let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14066 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; | |
14067 | let r = _mm512_loadu_epi8(&a[0]); | |
14068 | #[rustfmt::skip] | |
14069 | let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, | |
14070 | 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); | |
14071 | assert_eq_m512i(r, e); | |
14072 | } | |
14073 | ||
cdc7bbd5 XL |
14074 | #[simd_test(enable = "avx512bw,avx512vl")] |
14075 | unsafe fn test_mm256_loadu_epi8() { | |
14076 | #[rustfmt::skip] | |
14077 | let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; | |
14078 | let r = _mm256_loadu_epi8(&a[0]); | |
14079 | #[rustfmt::skip] | |
14080 | let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); | |
14081 | assert_eq_m256i(r, e); | |
14082 | } | |
14083 | ||
14084 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14085 | unsafe fn test_mm_loadu_epi8() { | |
14086 | let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14087 | let r = _mm_loadu_epi8(&a[0]); | |
14088 | let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); | |
14089 | assert_eq_m128i(r, e); | |
14090 | } | |
14091 | ||
fc512014 XL |
14092 | #[simd_test(enable = "avx512bw")] |
14093 | unsafe fn test_mm512_storeu_epi16() { | |
14094 | let a = _mm512_set1_epi16(9); | |
14095 | let mut r = _mm512_undefined_epi32(); | |
14096 | _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a); | |
14097 | assert_eq_m512i(r, a); | |
14098 | } | |
14099 | ||
cdc7bbd5 XL |
14100 | #[simd_test(enable = "avx512bw,avx512vl")] |
14101 | unsafe fn test_mm256_storeu_epi16() { | |
14102 | let a = _mm256_set1_epi16(9); | |
14103 | let mut r = _mm256_set1_epi32(0); | |
14104 | _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a); | |
14105 | assert_eq_m256i(r, a); | |
14106 | } | |
14107 | ||
14108 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14109 | unsafe fn test_mm_storeu_epi16() { | |
14110 | let a = _mm_set1_epi16(9); | |
14111 | let mut r = _mm_set1_epi32(0); | |
14112 | _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a); | |
14113 | assert_eq_m128i(r, a); | |
14114 | } | |
14115 | ||
fc512014 XL |
14116 | #[simd_test(enable = "avx512bw")] |
14117 | unsafe fn test_mm512_storeu_epi8() { | |
14118 | let a = _mm512_set1_epi8(9); | |
14119 | let mut r = _mm512_undefined_epi32(); | |
14120 | _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a); | |
14121 | assert_eq_m512i(r, a); | |
14122 | } | |
14123 | ||
cdc7bbd5 XL |
14124 | #[simd_test(enable = "avx512bw,avx512vl")] |
14125 | unsafe fn test_mm256_storeu_epi8() { | |
14126 | let a = _mm256_set1_epi8(9); | |
14127 | let mut r = _mm256_set1_epi32(0); | |
14128 | _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a); | |
14129 | assert_eq_m256i(r, a); | |
14130 | } | |
14131 | ||
14132 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14133 | unsafe fn test_mm_storeu_epi8() { | |
14134 | let a = _mm_set1_epi8(9); | |
14135 | let mut r = _mm_set1_epi32(0); | |
14136 | _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a); | |
14137 | assert_eq_m128i(r, a); | |
14138 | } | |
14139 | ||
a2a8927a XL |
14140 | #[simd_test(enable = "avx512f,avx512bw")] |
14141 | unsafe fn test_mm512_mask_loadu_epi16() { | |
14142 | let src = _mm512_set1_epi16(42); | |
14143 | let a = &[ | |
14144 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14145 | 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14146 | ]; | |
14147 | let p = a.as_ptr(); | |
14148 | let m = 0b10101010_11001100_11101000_11001010; | |
14149 | let r = _mm512_mask_loadu_epi16(src, m, black_box(p)); | |
14150 | let e = &[ | |
14151 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, | |
14152 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, | |
14153 | ]; | |
14154 | let e = _mm512_loadu_epi16(e.as_ptr()); | |
14155 | assert_eq_m512i(r, e); | |
14156 | } | |
14157 | ||
14158 | #[simd_test(enable = "avx512f,avx512bw")] | |
14159 | unsafe fn test_mm512_maskz_loadu_epi16() { | |
14160 | let a = &[ | |
14161 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14162 | 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14163 | ]; | |
14164 | let p = a.as_ptr(); | |
14165 | let m = 0b10101010_11001100_11101000_11001010; | |
14166 | let r = _mm512_maskz_loadu_epi16(m, black_box(p)); | |
14167 | let e = &[ | |
14168 | 0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, | |
14169 | 26, 0, 28, 0, 30, 0, 32, | |
14170 | ]; | |
14171 | let e = _mm512_loadu_epi16(e.as_ptr()); | |
14172 | assert_eq_m512i(r, e); | |
14173 | } | |
14174 | ||
14175 | #[simd_test(enable = "avx512f,avx512bw")] | |
14176 | unsafe fn test_mm512_mask_storeu_epi16() { | |
14177 | let mut r = [42_i16; 32]; | |
14178 | let a = &[ | |
14179 | 1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14180 | 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14181 | ]; | |
14182 | let a = _mm512_loadu_epi16(a.as_ptr()); | |
14183 | let m = 0b10101010_11001100_11101000_11001010; | |
14184 | _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a); | |
14185 | let e = &[ | |
14186 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, | |
14187 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, | |
14188 | ]; | |
14189 | let e = _mm512_loadu_epi16(e.as_ptr()); | |
14190 | assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e); | |
14191 | } | |
14192 | ||
14193 | #[simd_test(enable = "avx512f,avx512bw")] | |
14194 | unsafe fn test_mm512_mask_loadu_epi8() { | |
14195 | let src = _mm512_set1_epi8(42); | |
14196 | let a = &[ | |
14197 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14198 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
14199 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, | |
14200 | ]; | |
14201 | let p = a.as_ptr(); | |
14202 | let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; | |
14203 | let r = _mm512_mask_loadu_epi8(src, m, black_box(p)); | |
14204 | let e = &[ | |
14205 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, | |
14206 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44, | |
14207 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42, | |
14208 | ]; | |
14209 | let e = _mm512_loadu_epi8(e.as_ptr()); | |
14210 | assert_eq_m512i(r, e); | |
14211 | } | |
14212 | ||
14213 | #[simd_test(enable = "avx512f,avx512bw")] | |
14214 | unsafe fn test_mm512_maskz_loadu_epi8() { | |
14215 | let a = &[ | |
14216 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14217 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
14218 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, | |
14219 | ]; | |
14220 | let p = a.as_ptr(); | |
14221 | let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; | |
14222 | let r = _mm512_maskz_loadu_epi8(m, black_box(p)); | |
14223 | let e = &[ | |
14224 | 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, | |
14225 | 26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49, | |
14226 | 50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0, | |
14227 | ]; | |
14228 | let e = _mm512_loadu_epi8(e.as_ptr()); | |
14229 | assert_eq_m512i(r, e); | |
14230 | } | |
14231 | ||
14232 | #[simd_test(enable = "avx512f,avx512bw")] | |
14233 | unsafe fn test_mm512_mask_storeu_epi8() { | |
14234 | let mut r = [42_i8; 64]; | |
14235 | let a = &[ | |
14236 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14237 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
14238 | 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, | |
14239 | ]; | |
14240 | let a = _mm512_loadu_epi8(a.as_ptr()); | |
14241 | let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010; | |
14242 | _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a); | |
14243 | let e = &[ | |
14244 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, | |
14245 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44, | |
14246 | 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42, | |
14247 | ]; | |
14248 | let e = _mm512_loadu_epi8(e.as_ptr()); | |
14249 | assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e); | |
14250 | } | |
14251 | ||
14252 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14253 | unsafe fn test_mm256_mask_loadu_epi16() { | |
14254 | let src = _mm256_set1_epi16(42); | |
14255 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14256 | let p = a.as_ptr(); | |
14257 | let m = 0b11101000_11001010; | |
14258 | let r = _mm256_mask_loadu_epi16(src, m, black_box(p)); | |
14259 | let e = &[ | |
14260 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, | |
14261 | ]; | |
14262 | let e = _mm256_loadu_epi16(e.as_ptr()); | |
14263 | assert_eq_m256i(r, e); | |
14264 | } | |
14265 | ||
14266 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14267 | unsafe fn test_mm256_maskz_loadu_epi16() { | |
14268 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14269 | let p = a.as_ptr(); | |
14270 | let m = 0b11101000_11001010; | |
14271 | let r = _mm256_maskz_loadu_epi16(m, black_box(p)); | |
14272 | let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16]; | |
14273 | let e = _mm256_loadu_epi16(e.as_ptr()); | |
14274 | assert_eq_m256i(r, e); | |
14275 | } | |
14276 | ||
14277 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14278 | unsafe fn test_mm256_mask_storeu_epi16() { | |
14279 | let mut r = [42_i16; 16]; | |
14280 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14281 | let a = _mm256_loadu_epi16(a.as_ptr()); | |
14282 | let m = 0b11101000_11001010; | |
14283 | _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a); | |
14284 | let e = &[ | |
14285 | 42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, | |
14286 | ]; | |
14287 | let e = _mm256_loadu_epi16(e.as_ptr()); | |
14288 | assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e); | |
14289 | } | |
14290 | ||
14291 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14292 | unsafe fn test_mm256_mask_loadu_epi8() { | |
14293 | let src = _mm256_set1_epi8(42); | |
14294 | let a = &[ | |
14295 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14296 | 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14297 | ]; | |
14298 | let p = a.as_ptr(); | |
14299 | let m = 0b10101010_11001100_11101000_11001010; | |
14300 | let r = _mm256_mask_loadu_epi8(src, m, black_box(p)); | |
14301 | let e = &[ | |
14302 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, | |
14303 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, | |
14304 | ]; | |
14305 | let e = _mm256_loadu_epi8(e.as_ptr()); | |
14306 | assert_eq_m256i(r, e); | |
14307 | } | |
14308 | ||
14309 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14310 | unsafe fn test_mm256_maskz_loadu_epi8() { | |
14311 | let a = &[ | |
14312 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14313 | 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14314 | ]; | |
14315 | let p = a.as_ptr(); | |
14316 | let m = 0b10101010_11001100_11101000_11001010; | |
14317 | let r = _mm256_maskz_loadu_epi8(m, black_box(p)); | |
14318 | let e = &[ | |
14319 | 0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0, | |
14320 | 26, 0, 28, 0, 30, 0, 32, | |
14321 | ]; | |
14322 | let e = _mm256_loadu_epi8(e.as_ptr()); | |
14323 | assert_eq_m256i(r, e); | |
14324 | } | |
14325 | ||
14326 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14327 | unsafe fn test_mm256_mask_storeu_epi8() { | |
14328 | let mut r = [42_i8; 32]; | |
14329 | let a = &[ | |
14330 | 1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, | |
14331 | 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
14332 | ]; | |
14333 | let a = _mm256_loadu_epi8(a.as_ptr()); | |
14334 | let m = 0b10101010_11001100_11101000_11001010; | |
14335 | _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a); | |
14336 | let e = &[ | |
14337 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42, | |
14338 | 23, 24, 42, 26, 42, 28, 42, 30, 42, 32, | |
14339 | ]; | |
14340 | let e = _mm256_loadu_epi8(e.as_ptr()); | |
14341 | assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e); | |
14342 | } | |
14343 | ||
14344 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14345 | unsafe fn test_mm_mask_loadu_epi16() { | |
14346 | let src = _mm_set1_epi16(42); | |
14347 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; | |
14348 | let p = a.as_ptr(); | |
14349 | let m = 0b11001010; | |
14350 | let r = _mm_mask_loadu_epi16(src, m, black_box(p)); | |
14351 | let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8]; | |
14352 | let e = _mm_loadu_epi16(e.as_ptr()); | |
14353 | assert_eq_m128i(r, e); | |
14354 | } | |
14355 | ||
14356 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14357 | unsafe fn test_mm_maskz_loadu_epi16() { | |
14358 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; | |
14359 | let p = a.as_ptr(); | |
14360 | let m = 0b11001010; | |
14361 | let r = _mm_maskz_loadu_epi16(m, black_box(p)); | |
14362 | let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8]; | |
14363 | let e = _mm_loadu_epi16(e.as_ptr()); | |
14364 | assert_eq_m128i(r, e); | |
14365 | } | |
14366 | ||
14367 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14368 | unsafe fn test_mm_mask_storeu_epi16() { | |
14369 | let mut r = [42_i16; 8]; | |
14370 | let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8]; | |
14371 | let a = _mm_loadu_epi16(a.as_ptr()); | |
14372 | let m = 0b11001010; | |
14373 | _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a); | |
14374 | let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8]; | |
14375 | let e = _mm_loadu_epi16(e.as_ptr()); | |
14376 | assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e); | |
14377 | } | |
14378 | ||
14379 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14380 | unsafe fn test_mm_mask_loadu_epi8() { | |
14381 | let src = _mm_set1_epi8(42); | |
14382 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14383 | let p = a.as_ptr(); | |
14384 | let m = 0b11101000_11001010; | |
14385 | let r = _mm_mask_loadu_epi8(src, m, black_box(p)); | |
14386 | let e = &[ | |
14387 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, | |
14388 | ]; | |
14389 | let e = _mm_loadu_epi8(e.as_ptr()); | |
14390 | assert_eq_m128i(r, e); | |
14391 | } | |
14392 | ||
14393 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14394 | unsafe fn test_mm_maskz_loadu_epi8() { | |
14395 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14396 | let p = a.as_ptr(); | |
14397 | let m = 0b11101000_11001010; | |
14398 | let r = _mm_maskz_loadu_epi8(m, black_box(p)); | |
14399 | let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16]; | |
14400 | let e = _mm_loadu_epi8(e.as_ptr()); | |
14401 | assert_eq_m128i(r, e); | |
14402 | } | |
14403 | ||
14404 | #[simd_test(enable = "avx512f,avx512bw,avx512vl")] | |
14405 | unsafe fn test_mm_mask_storeu_epi8() { | |
14406 | let mut r = [42_i8; 16]; | |
14407 | let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; | |
14408 | let a = _mm_loadu_epi8(a.as_ptr()); | |
14409 | let m = 0b11101000_11001010; | |
14410 | _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a); | |
14411 | let e = &[ | |
14412 | 42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, | |
14413 | ]; | |
14414 | let e = _mm_loadu_epi8(e.as_ptr()); | |
14415 | assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e); | |
14416 | } | |
14417 | ||
fc512014 XL |
14418 | #[simd_test(enable = "avx512bw")] |
14419 | unsafe fn test_mm512_madd_epi16() { | |
14420 | let a = _mm512_set1_epi16(1); | |
14421 | let b = _mm512_set1_epi16(1); | |
14422 | let r = _mm512_madd_epi16(a, b); | |
14423 | let e = _mm512_set1_epi32(2); | |
14424 | assert_eq_m512i(r, e); | |
14425 | } | |
14426 | ||
14427 | #[simd_test(enable = "avx512bw")] | |
14428 | unsafe fn test_mm512_mask_madd_epi16() { | |
14429 | let a = _mm512_set1_epi16(1); | |
14430 | let b = _mm512_set1_epi16(1); | |
14431 | let r = _mm512_mask_madd_epi16(a, 0, a, b); | |
14432 | assert_eq_m512i(r, a); | |
14433 | let r = _mm512_mask_madd_epi16(a, 0b00000000_00001111, a, b); | |
14434 | let e = _mm512_set_epi32( | |
14435 | 1 << 16 | 1, | |
14436 | 1 << 16 | 1, | |
14437 | 1 << 16 | 1, | |
14438 | 1 << 16 | 1, | |
14439 | 1 << 16 | 1, | |
14440 | 1 << 16 | 1, | |
14441 | 1 << 16 | 1, | |
14442 | 1 << 16 | 1, | |
14443 | 1 << 16 | 1, | |
14444 | 1 << 16 | 1, | |
14445 | 1 << 16 | 1, | |
14446 | 1 << 16 | 1, | |
14447 | 2, | |
14448 | 2, | |
14449 | 2, | |
14450 | 2, | |
14451 | ); | |
14452 | assert_eq_m512i(r, e); | |
14453 | } | |
14454 | ||
14455 | #[simd_test(enable = "avx512bw")] | |
14456 | unsafe fn test_mm512_maskz_madd_epi16() { | |
14457 | let a = _mm512_set1_epi16(1); | |
14458 | let b = _mm512_set1_epi16(1); | |
14459 | let r = _mm512_maskz_madd_epi16(0, a, b); | |
14460 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14461 | let r = _mm512_maskz_madd_epi16(0b00000000_00001111, a, b); | |
14462 | let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2); | |
14463 | assert_eq_m512i(r, e); | |
14464 | } | |
14465 | ||
14466 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14467 | unsafe fn test_mm256_mask_madd_epi16() { | |
14468 | let a = _mm256_set1_epi16(1); | |
14469 | let b = _mm256_set1_epi16(1); | |
14470 | let r = _mm256_mask_madd_epi16(a, 0, a, b); | |
14471 | assert_eq_m256i(r, a); | |
14472 | let r = _mm256_mask_madd_epi16(a, 0b00001111, a, b); | |
14473 | let e = _mm256_set_epi32( | |
14474 | 1 << 16 | 1, | |
14475 | 1 << 16 | 1, | |
14476 | 1 << 16 | 1, | |
14477 | 1 << 16 | 1, | |
14478 | 2, | |
14479 | 2, | |
14480 | 2, | |
14481 | 2, | |
14482 | ); | |
14483 | assert_eq_m256i(r, e); | |
14484 | } | |
14485 | ||
14486 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14487 | unsafe fn test_mm256_maskz_madd_epi16() { | |
14488 | let a = _mm256_set1_epi16(1); | |
14489 | let b = _mm256_set1_epi16(1); | |
14490 | let r = _mm256_maskz_madd_epi16(0, a, b); | |
14491 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
14492 | let r = _mm256_maskz_madd_epi16(0b00001111, a, b); | |
14493 | let e = _mm256_set_epi32(0, 0, 0, 0, 2, 2, 2, 2); | |
14494 | assert_eq_m256i(r, e); | |
14495 | } | |
14496 | ||
14497 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14498 | unsafe fn test_mm_mask_madd_epi16() { | |
14499 | let a = _mm_set1_epi16(1); | |
14500 | let b = _mm_set1_epi16(1); | |
14501 | let r = _mm_mask_madd_epi16(a, 0, a, b); | |
14502 | assert_eq_m128i(r, a); | |
14503 | let r = _mm_mask_madd_epi16(a, 0b00001111, a, b); | |
14504 | let e = _mm_set_epi32(2, 2, 2, 2); | |
14505 | assert_eq_m128i(r, e); | |
14506 | } | |
14507 | ||
14508 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14509 | unsafe fn test_mm_maskz_madd_epi16() { | |
14510 | let a = _mm_set1_epi16(1); | |
14511 | let b = _mm_set1_epi16(1); | |
14512 | let r = _mm_maskz_madd_epi16(0, a, b); | |
14513 | assert_eq_m128i(r, _mm_setzero_si128()); | |
14514 | let r = _mm_maskz_madd_epi16(0b00001111, a, b); | |
14515 | let e = _mm_set_epi32(2, 2, 2, 2); | |
14516 | assert_eq_m128i(r, e); | |
14517 | } | |
14518 | ||
14519 | #[simd_test(enable = "avx512bw")] | |
14520 | unsafe fn test_mm512_maddubs_epi16() { | |
14521 | let a = _mm512_set1_epi8(1); | |
14522 | let b = _mm512_set1_epi8(1); | |
14523 | let r = _mm512_maddubs_epi16(a, b); | |
14524 | let e = _mm512_set1_epi16(2); | |
14525 | assert_eq_m512i(r, e); | |
14526 | } | |
14527 | ||
14528 | #[simd_test(enable = "avx512bw")] | |
14529 | unsafe fn test_mm512_mask_maddubs_epi16() { | |
14530 | let a = _mm512_set1_epi8(1); | |
14531 | let b = _mm512_set1_epi8(1); | |
14532 | let src = _mm512_set1_epi16(1); | |
14533 | let r = _mm512_mask_maddubs_epi16(src, 0, a, b); | |
14534 | assert_eq_m512i(r, src); | |
14535 | let r = _mm512_mask_add_epi16(src, 0b00000000_00000000_00000000_00000001, a, b); | |
14536 | #[rustfmt::skip] | |
14537 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14538 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1<<9|2); | |
14539 | assert_eq_m512i(r, e); | |
14540 | } | |
14541 | ||
14542 | #[simd_test(enable = "avx512bw")] | |
14543 | unsafe fn test_mm512_maskz_maddubs_epi16() { | |
14544 | let a = _mm512_set1_epi8(1); | |
14545 | let b = _mm512_set1_epi8(1); | |
14546 | let r = _mm512_maskz_maddubs_epi16(0, a, b); | |
14547 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14548 | let r = _mm512_maskz_maddubs_epi16(0b00000000_11111111_00000000_11111111, a, b); | |
14549 | #[rustfmt::skip] | |
14550 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, | |
14551 | 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); | |
14552 | assert_eq_m512i(r, e); | |
14553 | } | |
14554 | ||
14555 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14556 | unsafe fn test_mm256_mask_maddubs_epi16() { | |
14557 | let a = _mm256_set1_epi8(1); | |
14558 | let b = _mm256_set1_epi8(1); | |
14559 | let src = _mm256_set1_epi16(1); | |
14560 | let r = _mm256_mask_maddubs_epi16(src, 0, a, b); | |
14561 | assert_eq_m256i(r, src); | |
14562 | let r = _mm256_mask_add_epi16(src, 0b00000000_00000001, a, b); | |
14563 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2); | |
14564 | assert_eq_m256i(r, e); | |
14565 | } | |
14566 | ||
14567 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14568 | unsafe fn test_mm256_maskz_maddubs_epi16() { | |
14569 | let a = _mm256_set1_epi8(1); | |
14570 | let b = _mm256_set1_epi8(1); | |
14571 | let r = _mm256_maskz_maddubs_epi16(0, a, b); | |
14572 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
14573 | let r = _mm256_maskz_maddubs_epi16(0b00000000_11111111, a, b); | |
14574 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); | |
14575 | assert_eq_m256i(r, e); | |
14576 | } | |
14577 | ||
14578 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14579 | unsafe fn test_mm_mask_maddubs_epi16() { | |
14580 | let a = _mm_set1_epi8(1); | |
14581 | let b = _mm_set1_epi8(1); | |
14582 | let src = _mm_set1_epi16(1); | |
14583 | let r = _mm_mask_maddubs_epi16(src, 0, a, b); | |
14584 | assert_eq_m128i(r, src); | |
14585 | let r = _mm_mask_add_epi16(src, 0b00000001, a, b); | |
14586 | let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1 << 9 | 2); | |
14587 | assert_eq_m128i(r, e); | |
14588 | } | |
14589 | ||
14590 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14591 | unsafe fn test_mm_maskz_maddubs_epi16() { | |
14592 | let a = _mm_set1_epi8(1); | |
14593 | let b = _mm_set1_epi8(1); | |
14594 | let r = _mm_maskz_maddubs_epi16(0, a, b); | |
14595 | assert_eq_m128i(r, _mm_setzero_si128()); | |
14596 | let r = _mm_maskz_maddubs_epi16(0b00001111, a, b); | |
14597 | let e = _mm_set_epi16(0, 0, 0, 0, 2, 2, 2, 2); | |
14598 | assert_eq_m128i(r, e); | |
14599 | } | |
14600 | ||
14601 | #[simd_test(enable = "avx512bw")] | |
14602 | unsafe fn test_mm512_packs_epi32() { | |
14603 | let a = _mm512_set1_epi32(i32::MAX); | |
14604 | let b = _mm512_set1_epi32(1); | |
14605 | let r = _mm512_packs_epi32(a, b); | |
14606 | #[rustfmt::skip] | |
14607 | let e = _mm512_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, | |
14608 | 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14609 | assert_eq_m512i(r, e); | |
14610 | } | |
14611 | ||
14612 | #[simd_test(enable = "avx512bw")] | |
14613 | unsafe fn test_mm512_mask_packs_epi32() { | |
14614 | let a = _mm512_set1_epi32(i32::MAX); | |
14615 | let b = _mm512_set1_epi32(1 << 16 | 1); | |
14616 | let r = _mm512_mask_packs_epi32(a, 0, a, b); | |
14617 | assert_eq_m512i(r, a); | |
14618 | let r = _mm512_mask_packs_epi32(b, 0b00000000_00000000_00000000_00001111, a, b); | |
14619 | #[rustfmt::skip] | |
14620 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14621 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14622 | assert_eq_m512i(r, e); | |
14623 | } | |
14624 | ||
14625 | #[simd_test(enable = "avx512bw")] | |
14626 | unsafe fn test_mm512_maskz_packs_epi32() { | |
14627 | let a = _mm512_set1_epi32(i32::MAX); | |
14628 | let b = _mm512_set1_epi32(1); | |
14629 | let r = _mm512_maskz_packs_epi32(0, a, b); | |
14630 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14631 | let r = _mm512_maskz_packs_epi32(0b00000000_00000000_00000000_00001111, a, b); | |
14632 | #[rustfmt::skip] | |
14633 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14634 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14635 | assert_eq_m512i(r, e); | |
14636 | } | |
14637 | ||
14638 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14639 | unsafe fn test_mm256_mask_packs_epi32() { | |
14640 | let a = _mm256_set1_epi32(i32::MAX); | |
14641 | let b = _mm256_set1_epi32(1 << 16 | 1); | |
14642 | let r = _mm256_mask_packs_epi32(a, 0, a, b); | |
14643 | assert_eq_m256i(r, a); | |
14644 | let r = _mm256_mask_packs_epi32(b, 0b00000000_00001111, a, b); | |
14645 | #[rustfmt::skip] | |
14646 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14647 | assert_eq_m256i(r, e); | |
14648 | } | |
14649 | ||
14650 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14651 | unsafe fn test_mm256_maskz_packs_epi32() { | |
14652 | let a = _mm256_set1_epi32(i32::MAX); | |
14653 | let b = _mm256_set1_epi32(1); | |
14654 | let r = _mm256_maskz_packs_epi32(0, a, b); | |
14655 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
14656 | let r = _mm256_maskz_packs_epi32(0b00000000_00001111, a, b); | |
14657 | #[rustfmt::skip] | |
14658 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14659 | assert_eq_m256i(r, e); | |
14660 | } | |
14661 | ||
14662 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14663 | unsafe fn test_mm_mask_packs_epi32() { | |
14664 | let a = _mm_set1_epi32(i32::MAX); | |
14665 | let b = _mm_set1_epi32(1 << 16 | 1); | |
14666 | let r = _mm_mask_packs_epi32(a, 0, a, b); | |
14667 | assert_eq_m128i(r, a); | |
14668 | let r = _mm_mask_packs_epi32(b, 0b00001111, a, b); | |
14669 | let e = _mm_set_epi16(1, 1, 1, 1, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14670 | assert_eq_m128i(r, e); | |
14671 | } | |
14672 | ||
14673 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14674 | unsafe fn test_mm_maskz_packs_epi32() { | |
14675 | let a = _mm_set1_epi32(i32::MAX); | |
14676 | let b = _mm_set1_epi32(1); | |
14677 | let r = _mm_maskz_packs_epi32(0, a, b); | |
14678 | assert_eq_m128i(r, _mm_setzero_si128()); | |
14679 | let r = _mm_maskz_packs_epi32(0b00001111, a, b); | |
14680 | let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); | |
14681 | assert_eq_m128i(r, e); | |
14682 | } | |
14683 | ||
14684 | #[simd_test(enable = "avx512bw")] | |
14685 | unsafe fn test_mm512_packs_epi16() { | |
14686 | let a = _mm512_set1_epi16(i16::MAX); | |
14687 | let b = _mm512_set1_epi16(1); | |
14688 | let r = _mm512_packs_epi16(a, b); | |
14689 | #[rustfmt::skip] | |
14690 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, | |
14691 | 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, | |
14692 | 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, | |
14693 | 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14694 | assert_eq_m512i(r, e); | |
14695 | } | |
14696 | ||
14697 | #[simd_test(enable = "avx512bw")] | |
14698 | unsafe fn test_mm512_mask_packs_epi16() { | |
14699 | let a = _mm512_set1_epi16(i16::MAX); | |
14700 | let b = _mm512_set1_epi16(1 << 8 | 1); | |
14701 | let r = _mm512_mask_packs_epi16(a, 0, a, b); | |
14702 | assert_eq_m512i(r, a); | |
14703 | let r = _mm512_mask_packs_epi16( | |
14704 | b, | |
14705 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
14706 | a, | |
14707 | b, | |
14708 | ); | |
14709 | #[rustfmt::skip] | |
14710 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14711 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14712 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14713 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14714 | assert_eq_m512i(r, e); | |
14715 | } | |
14716 | ||
14717 | #[simd_test(enable = "avx512bw")] | |
14718 | unsafe fn test_mm512_maskz_packs_epi16() { | |
14719 | let a = _mm512_set1_epi16(i16::MAX); | |
14720 | let b = _mm512_set1_epi16(1); | |
14721 | let r = _mm512_maskz_packs_epi16(0, a, b); | |
14722 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14723 | let r = _mm512_maskz_packs_epi16( | |
14724 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
14725 | a, | |
14726 | b, | |
14727 | ); | |
14728 | #[rustfmt::skip] | |
14729 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14730 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14731 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14732 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14733 | assert_eq_m512i(r, e); | |
14734 | } | |
14735 | ||
14736 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14737 | unsafe fn test_mm256_mask_packs_epi16() { | |
14738 | let a = _mm256_set1_epi16(i16::MAX); | |
14739 | let b = _mm256_set1_epi16(1 << 8 | 1); | |
14740 | let r = _mm256_mask_packs_epi16(a, 0, a, b); | |
14741 | assert_eq_m256i(r, a); | |
14742 | let r = _mm256_mask_packs_epi16(b, 0b00000000_00000000_00000000_00001111, a, b); | |
14743 | #[rustfmt::skip] | |
14744 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14745 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14746 | assert_eq_m256i(r, e); | |
14747 | } | |
14748 | ||
14749 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14750 | unsafe fn test_mm256_maskz_packs_epi16() { | |
14751 | let a = _mm256_set1_epi16(i16::MAX); | |
14752 | let b = _mm256_set1_epi16(1); | |
14753 | let r = _mm256_maskz_packs_epi16(0, a, b); | |
14754 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
14755 | let r = _mm256_maskz_packs_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
14756 | #[rustfmt::skip] | |
14757 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14758 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14759 | assert_eq_m256i(r, e); | |
14760 | } | |
14761 | ||
14762 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14763 | unsafe fn test_mm_mask_packs_epi16() { | |
14764 | let a = _mm_set1_epi16(i16::MAX); | |
14765 | let b = _mm_set1_epi16(1 << 8 | 1); | |
14766 | let r = _mm_mask_packs_epi16(a, 0, a, b); | |
14767 | assert_eq_m128i(r, a); | |
14768 | let r = _mm_mask_packs_epi16(b, 0b00000000_00001111, a, b); | |
14769 | #[rustfmt::skip] | |
14770 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14771 | assert_eq_m128i(r, e); | |
14772 | } | |
14773 | ||
14774 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14775 | unsafe fn test_mm_maskz_packs_epi16() { | |
14776 | let a = _mm_set1_epi16(i16::MAX); | |
14777 | let b = _mm_set1_epi16(1); | |
14778 | let r = _mm_maskz_packs_epi16(0, a, b); | |
14779 | assert_eq_m128i(r, _mm_setzero_si128()); | |
14780 | let r = _mm_maskz_packs_epi16(0b00000000_00001111, a, b); | |
14781 | #[rustfmt::skip] | |
14782 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
14783 | assert_eq_m128i(r, e); | |
14784 | } | |
14785 | ||
14786 | #[simd_test(enable = "avx512bw")] | |
14787 | unsafe fn test_mm512_packus_epi32() { | |
14788 | let a = _mm512_set1_epi32(-1); | |
14789 | let b = _mm512_set1_epi32(1); | |
14790 | let r = _mm512_packus_epi32(a, b); | |
14791 | #[rustfmt::skip] | |
14792 | let e = _mm512_set_epi16(1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, | |
14793 | 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0); | |
14794 | assert_eq_m512i(r, e); | |
14795 | } | |
14796 | ||
14797 | #[simd_test(enable = "avx512bw")] | |
14798 | unsafe fn test_mm512_mask_packus_epi32() { | |
14799 | let a = _mm512_set1_epi32(-1); | |
14800 | let b = _mm512_set1_epi32(1 << 16 | 1); | |
14801 | let r = _mm512_mask_packus_epi32(a, 0, a, b); | |
14802 | assert_eq_m512i(r, a); | |
14803 | let r = _mm512_mask_packus_epi32(b, 0b00000000_00000000_00000000_00001111, a, b); | |
14804 | #[rustfmt::skip] | |
14805 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14806 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
14807 | assert_eq_m512i(r, e); | |
14808 | } | |
14809 | ||
14810 | #[simd_test(enable = "avx512bw")] | |
14811 | unsafe fn test_mm512_maskz_packus_epi32() { | |
14812 | let a = _mm512_set1_epi32(-1); | |
14813 | let b = _mm512_set1_epi32(1); | |
14814 | let r = _mm512_maskz_packus_epi32(0, a, b); | |
14815 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14816 | let r = _mm512_maskz_packus_epi32(0b00000000_00000000_00000000_00001111, a, b); | |
14817 | #[rustfmt::skip] | |
14818 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14819 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
14820 | assert_eq_m512i(r, e); | |
14821 | } | |
14822 | ||
14823 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14824 | unsafe fn test_mm256_mask_packus_epi32() { | |
14825 | let a = _mm256_set1_epi32(-1); | |
14826 | let b = _mm256_set1_epi32(1 << 16 | 1); | |
14827 | let r = _mm256_mask_packus_epi32(a, 0, a, b); | |
14828 | assert_eq_m256i(r, a); | |
14829 | let r = _mm256_mask_packus_epi32(b, 0b00000000_00001111, a, b); | |
14830 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
14831 | assert_eq_m256i(r, e); | |
14832 | } | |
14833 | ||
14834 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14835 | unsafe fn test_mm256_maskz_packus_epi32() { | |
14836 | let a = _mm256_set1_epi32(-1); | |
14837 | let b = _mm256_set1_epi32(1); | |
14838 | let r = _mm256_maskz_packus_epi32(0, a, b); | |
14839 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
14840 | let r = _mm256_maskz_packus_epi32(0b00000000_00001111, a, b); | |
14841 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
14842 | assert_eq_m256i(r, e); | |
14843 | } | |
14844 | ||
14845 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14846 | unsafe fn test_mm_mask_packus_epi32() { | |
14847 | let a = _mm_set1_epi32(-1); | |
14848 | let b = _mm_set1_epi32(1 << 16 | 1); | |
14849 | let r = _mm_mask_packus_epi32(a, 0, a, b); | |
14850 | assert_eq_m128i(r, a); | |
14851 | let r = _mm_mask_packus_epi32(b, 0b00001111, a, b); | |
14852 | let e = _mm_set_epi16(1, 1, 1, 1, 0, 0, 0, 0); | |
14853 | assert_eq_m128i(r, e); | |
14854 | } | |
14855 | ||
14856 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14857 | unsafe fn test_mm_maskz_packus_epi32() { | |
14858 | let a = _mm_set1_epi32(-1); | |
14859 | let b = _mm_set1_epi32(1); | |
14860 | let r = _mm_maskz_packus_epi32(0, a, b); | |
14861 | assert_eq_m128i(r, _mm_setzero_si128()); | |
14862 | let r = _mm_maskz_packus_epi32(0b00001111, a, b); | |
14863 | let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0); | |
14864 | assert_eq_m128i(r, e); | |
14865 | } | |
14866 | ||
14867 | #[simd_test(enable = "avx512bw")] | |
14868 | unsafe fn test_mm512_packus_epi16() { | |
14869 | let a = _mm512_set1_epi16(-1); | |
14870 | let b = _mm512_set1_epi16(1); | |
14871 | let r = _mm512_packus_epi16(a, b); | |
14872 | #[rustfmt::skip] | |
14873 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, | |
14874 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, | |
14875 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, | |
14876 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0); | |
14877 | assert_eq_m512i(r, e); | |
14878 | } | |
14879 | ||
14880 | #[simd_test(enable = "avx512bw")] | |
14881 | unsafe fn test_mm512_mask_packus_epi16() { | |
14882 | let a = _mm512_set1_epi16(-1); | |
14883 | let b = _mm512_set1_epi16(1 << 8 | 1); | |
14884 | let r = _mm512_mask_packus_epi16(a, 0, a, b); | |
14885 | assert_eq_m512i(r, a); | |
14886 | let r = _mm512_mask_packus_epi16( | |
14887 | b, | |
14888 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
14889 | a, | |
14890 | b, | |
14891 | ); | |
14892 | #[rustfmt::skip] | |
14893 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14894 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14895 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14896 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
14897 | assert_eq_m512i(r, e); | |
14898 | } | |
14899 | ||
14900 | #[simd_test(enable = "avx512bw")] | |
14901 | unsafe fn test_mm512_maskz_packus_epi16() { | |
14902 | let a = _mm512_set1_epi16(-1); | |
14903 | let b = _mm512_set1_epi16(1); | |
14904 | let r = _mm512_maskz_packus_epi16(0, a, b); | |
14905 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14906 | let r = _mm512_maskz_packus_epi16( | |
14907 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
14908 | a, | |
14909 | b, | |
14910 | ); | |
14911 | #[rustfmt::skip] | |
14912 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14913 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14914 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14915 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
14916 | assert_eq_m512i(r, e); | |
14917 | } | |
14918 | ||
14919 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14920 | unsafe fn test_mm256_mask_packus_epi16() { | |
14921 | let a = _mm256_set1_epi16(-1); | |
14922 | let b = _mm256_set1_epi16(1 << 8 | 1); | |
14923 | let r = _mm256_mask_packus_epi16(a, 0, a, b); | |
14924 | assert_eq_m256i(r, a); | |
14925 | let r = _mm256_mask_packus_epi16(b, 0b00000000_00000000_00000000_00001111, a, b); | |
14926 | #[rustfmt::skip] | |
14927 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14928 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
14929 | assert_eq_m256i(r, e); | |
14930 | } | |
14931 | ||
14932 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14933 | unsafe fn test_mm256_maskz_packus_epi16() { | |
14934 | let a = _mm256_set1_epi16(-1); | |
14935 | let b = _mm256_set1_epi16(1); | |
14936 | let r = _mm256_maskz_packus_epi16(0, a, b); | |
14937 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
14938 | let r = _mm256_maskz_packus_epi16(0b00000000_00000000_00000000_00001111, a, b); | |
14939 | #[rustfmt::skip] | |
14940 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14941 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
14942 | assert_eq_m256i(r, e); | |
14943 | } | |
14944 | ||
14945 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14946 | unsafe fn test_mm_mask_packus_epi16() { | |
14947 | let a = _mm_set1_epi16(-1); | |
14948 | let b = _mm_set1_epi16(1 << 8 | 1); | |
14949 | let r = _mm_mask_packus_epi16(a, 0, a, b); | |
14950 | assert_eq_m128i(r, a); | |
14951 | let r = _mm_mask_packus_epi16(b, 0b00000000_00001111, a, b); | |
14952 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0); | |
14953 | assert_eq_m128i(r, e); | |
14954 | } | |
14955 | ||
14956 | #[simd_test(enable = "avx512bw,avx512vl")] | |
14957 | unsafe fn test_mm_maskz_packus_epi16() { | |
14958 | let a = _mm_set1_epi16(-1); | |
14959 | let b = _mm_set1_epi16(1); | |
14960 | let r = _mm_maskz_packus_epi16(0, a, b); | |
14961 | assert_eq_m128i(r, _mm_setzero_si128()); | |
14962 | let r = _mm_maskz_packus_epi16(0b00000000_00001111, a, b); | |
14963 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
14964 | assert_eq_m128i(r, e); | |
14965 | } | |
14966 | ||
14967 | #[simd_test(enable = "avx512bw")] | |
14968 | unsafe fn test_mm512_avg_epu16() { | |
14969 | let a = _mm512_set1_epi16(1); | |
14970 | let b = _mm512_set1_epi16(1); | |
14971 | let r = _mm512_avg_epu16(a, b); | |
14972 | let e = _mm512_set1_epi16(1); | |
14973 | assert_eq_m512i(r, e); | |
14974 | } | |
14975 | ||
14976 | #[simd_test(enable = "avx512bw")] | |
14977 | unsafe fn test_mm512_mask_avg_epu16() { | |
14978 | let a = _mm512_set1_epi16(1); | |
14979 | let b = _mm512_set1_epi16(1); | |
14980 | let r = _mm512_mask_avg_epu16(a, 0, a, b); | |
14981 | assert_eq_m512i(r, a); | |
14982 | let r = _mm512_mask_avg_epu16(a, 0b00000000_00000000_00000000_00001111, a, b); | |
14983 | #[rustfmt::skip] | |
14984 | let e = _mm512_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
14985 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
14986 | assert_eq_m512i(r, e); | |
14987 | } | |
14988 | ||
14989 | #[simd_test(enable = "avx512bw")] | |
14990 | unsafe fn test_mm512_maskz_avg_epu16() { | |
14991 | let a = _mm512_set1_epi16(1); | |
14992 | let b = _mm512_set1_epi16(1); | |
14993 | let r = _mm512_maskz_avg_epu16(0, a, b); | |
14994 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
14995 | let r = _mm512_maskz_avg_epu16(0b00000000_00000000_00000000_00001111, a, b); | |
14996 | #[rustfmt::skip] | |
14997 | let e = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
14998 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
14999 | assert_eq_m512i(r, e); | |
15000 | } | |
15001 | ||
15002 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15003 | unsafe fn test_mm256_mask_avg_epu16() { | |
15004 | let a = _mm256_set1_epi16(1); | |
15005 | let b = _mm256_set1_epi16(1); | |
15006 | let r = _mm256_mask_avg_epu16(a, 0, a, b); | |
15007 | assert_eq_m256i(r, a); | |
15008 | let r = _mm256_mask_avg_epu16(a, 0b00000000_00001111, a, b); | |
15009 | let e = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
15010 | assert_eq_m256i(r, e); | |
15011 | } | |
15012 | ||
15013 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15014 | unsafe fn test_mm256_maskz_avg_epu16() { | |
15015 | let a = _mm256_set1_epi16(1); | |
15016 | let b = _mm256_set1_epi16(1); | |
15017 | let r = _mm256_maskz_avg_epu16(0, a, b); | |
15018 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15019 | let r = _mm256_maskz_avg_epu16(0b00000000_00001111, a, b); | |
15020 | let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
15021 | assert_eq_m256i(r, e); | |
15022 | } | |
15023 | ||
15024 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15025 | unsafe fn test_mm_mask_avg_epu16() { | |
15026 | let a = _mm_set1_epi16(1); | |
15027 | let b = _mm_set1_epi16(1); | |
15028 | let r = _mm_mask_avg_epu16(a, 0, a, b); | |
15029 | assert_eq_m128i(r, a); | |
15030 | let r = _mm_mask_avg_epu16(a, 0b00001111, a, b); | |
15031 | let e = _mm_set_epi16(1, 1, 1, 1, 1, 1, 1, 1); | |
15032 | assert_eq_m128i(r, e); | |
15033 | } | |
15034 | ||
15035 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15036 | unsafe fn test_mm_maskz_avg_epu16() { | |
15037 | let a = _mm_set1_epi16(1); | |
15038 | let b = _mm_set1_epi16(1); | |
15039 | let r = _mm_maskz_avg_epu16(0, a, b); | |
15040 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15041 | let r = _mm_maskz_avg_epu16(0b00001111, a, b); | |
15042 | let e = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1); | |
15043 | assert_eq_m128i(r, e); | |
15044 | } | |
15045 | ||
15046 | #[simd_test(enable = "avx512bw")] | |
15047 | unsafe fn test_mm512_avg_epu8() { | |
15048 | let a = _mm512_set1_epi8(1); | |
15049 | let b = _mm512_set1_epi8(1); | |
15050 | let r = _mm512_avg_epu8(a, b); | |
15051 | let e = _mm512_set1_epi8(1); | |
15052 | assert_eq_m512i(r, e); | |
15053 | } | |
15054 | ||
15055 | #[simd_test(enable = "avx512bw")] | |
15056 | unsafe fn test_mm512_mask_avg_epu8() { | |
15057 | let a = _mm512_set1_epi8(1); | |
15058 | let b = _mm512_set1_epi8(1); | |
15059 | let r = _mm512_mask_avg_epu8(a, 0, a, b); | |
15060 | assert_eq_m512i(r, a); | |
15061 | let r = _mm512_mask_avg_epu8( | |
15062 | a, | |
15063 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00001111, | |
15064 | a, | |
15065 | b, | |
15066 | ); | |
15067 | #[rustfmt::skip] | |
15068 | let e = _mm512_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
15069 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
15070 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
15071 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
15072 | assert_eq_m512i(r, e); | |
15073 | } | |
15074 | ||
15075 | #[simd_test(enable = "avx512bw")] | |
15076 | unsafe fn test_mm512_maskz_avg_epu8() { | |
15077 | let a = _mm512_set1_epi8(1); | |
15078 | let b = _mm512_set1_epi8(1); | |
15079 | let r = _mm512_maskz_avg_epu8(0, a, b); | |
15080 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15081 | let r = _mm512_maskz_avg_epu8( | |
15082 | 0b00000000_000000000_00000000_00000000_00000000_0000000_00000000_00001111, | |
15083 | a, | |
15084 | b, | |
15085 | ); | |
15086 | #[rustfmt::skip] | |
15087 | let e = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
15088 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
15089 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
15090 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
15091 | assert_eq_m512i(r, e); | |
15092 | } | |
15093 | ||
15094 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15095 | unsafe fn test_mm256_mask_avg_epu8() { | |
15096 | let a = _mm256_set1_epi8(1); | |
15097 | let b = _mm256_set1_epi8(1); | |
15098 | let r = _mm256_mask_avg_epu8(a, 0, a, b); | |
15099 | assert_eq_m256i(r, a); | |
15100 | let r = _mm256_mask_avg_epu8(a, 0b00000000_00000000_00000000_00001111, a, b); | |
15101 | #[rustfmt::skip] | |
15102 | let e = _mm256_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
15103 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
15104 | assert_eq_m256i(r, e); | |
15105 | } | |
15106 | ||
15107 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15108 | unsafe fn test_mm256_maskz_avg_epu8() { | |
15109 | let a = _mm256_set1_epi8(1); | |
15110 | let b = _mm256_set1_epi8(1); | |
15111 | let r = _mm256_maskz_avg_epu8(0, a, b); | |
15112 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15113 | let r = _mm256_maskz_avg_epu8(0b00000000_0000000_00000000_00001111, a, b); | |
15114 | #[rustfmt::skip] | |
15115 | let e = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
15116 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
15117 | assert_eq_m256i(r, e); | |
15118 | } | |
15119 | ||
15120 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15121 | unsafe fn test_mm_mask_avg_epu8() { | |
15122 | let a = _mm_set1_epi8(1); | |
15123 | let b = _mm_set1_epi8(1); | |
15124 | let r = _mm_mask_avg_epu8(a, 0, a, b); | |
15125 | assert_eq_m128i(r, a); | |
15126 | let r = _mm_mask_avg_epu8(a, 0b00000000_00001111, a, b); | |
15127 | let e = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); | |
15128 | assert_eq_m128i(r, e); | |
15129 | } | |
15130 | ||
15131 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15132 | unsafe fn test_mm_maskz_avg_epu8() { | |
15133 | let a = _mm_set1_epi8(1); | |
15134 | let b = _mm_set1_epi8(1); | |
15135 | let r = _mm_maskz_avg_epu8(0, a, b); | |
15136 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15137 | let r = _mm_maskz_avg_epu8(0b00000000_00001111, a, b); | |
15138 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1); | |
15139 | assert_eq_m128i(r, e); | |
15140 | } | |
15141 | ||
15142 | #[simd_test(enable = "avx512bw")] | |
15143 | unsafe fn test_mm512_sll_epi16() { | |
15144 | let a = _mm512_set1_epi16(1 << 15); | |
15145 | let count = _mm_set1_epi16(2); | |
15146 | let r = _mm512_sll_epi16(a, count); | |
15147 | let e = _mm512_set1_epi16(0); | |
15148 | assert_eq_m512i(r, e); | |
15149 | } | |
15150 | ||
15151 | #[simd_test(enable = "avx512bw")] | |
15152 | unsafe fn test_mm512_mask_sll_epi16() { | |
15153 | let a = _mm512_set1_epi16(1 << 15); | |
15154 | let count = _mm_set1_epi16(2); | |
15155 | let r = _mm512_mask_sll_epi16(a, 0, a, count); | |
15156 | assert_eq_m512i(r, a); | |
15157 | let r = _mm512_mask_sll_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); | |
15158 | let e = _mm512_set1_epi16(0); | |
15159 | assert_eq_m512i(r, e); | |
15160 | } | |
15161 | ||
15162 | #[simd_test(enable = "avx512bw")] | |
15163 | unsafe fn test_mm512_maskz_sll_epi16() { | |
15164 | let a = _mm512_set1_epi16(1 << 15); | |
15165 | let count = _mm_set1_epi16(2); | |
15166 | let r = _mm512_maskz_sll_epi16(0, a, count); | |
15167 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15168 | let r = _mm512_maskz_sll_epi16(0b11111111_11111111_11111111_11111111, a, count); | |
15169 | let e = _mm512_set1_epi16(0); | |
15170 | assert_eq_m512i(r, e); | |
15171 | } | |
15172 | ||
15173 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15174 | unsafe fn test_mm256_mask_sll_epi16() { | |
15175 | let a = _mm256_set1_epi16(1 << 15); | |
15176 | let count = _mm_set1_epi16(2); | |
15177 | let r = _mm256_mask_sll_epi16(a, 0, a, count); | |
15178 | assert_eq_m256i(r, a); | |
15179 | let r = _mm256_mask_sll_epi16(a, 0b11111111_11111111, a, count); | |
15180 | let e = _mm256_set1_epi16(0); | |
15181 | assert_eq_m256i(r, e); | |
15182 | } | |
15183 | ||
15184 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15185 | unsafe fn test_mm256_maskz_sll_epi16() { | |
15186 | let a = _mm256_set1_epi16(1 << 15); | |
15187 | let count = _mm_set1_epi16(2); | |
15188 | let r = _mm256_maskz_sll_epi16(0, a, count); | |
15189 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15190 | let r = _mm256_maskz_sll_epi16(0b11111111_11111111, a, count); | |
15191 | let e = _mm256_set1_epi16(0); | |
15192 | assert_eq_m256i(r, e); | |
15193 | } | |
15194 | ||
15195 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15196 | unsafe fn test_mm_mask_sll_epi16() { | |
15197 | let a = _mm_set1_epi16(1 << 15); | |
15198 | let count = _mm_set1_epi16(2); | |
15199 | let r = _mm_mask_sll_epi16(a, 0, a, count); | |
15200 | assert_eq_m128i(r, a); | |
15201 | let r = _mm_mask_sll_epi16(a, 0b11111111, a, count); | |
15202 | let e = _mm_set1_epi16(0); | |
15203 | assert_eq_m128i(r, e); | |
15204 | } | |
15205 | ||
15206 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15207 | unsafe fn test_mm_maskz_sll_epi16() { | |
15208 | let a = _mm_set1_epi16(1 << 15); | |
15209 | let count = _mm_set1_epi16(2); | |
15210 | let r = _mm_maskz_sll_epi16(0, a, count); | |
15211 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15212 | let r = _mm_maskz_sll_epi16(0b11111111, a, count); | |
15213 | let e = _mm_set1_epi16(0); | |
15214 | assert_eq_m128i(r, e); | |
15215 | } | |
15216 | ||
15217 | #[simd_test(enable = "avx512bw")] | |
15218 | unsafe fn test_mm512_slli_epi16() { | |
15219 | let a = _mm512_set1_epi16(1 << 15); | |
17df50a5 | 15220 | let r = _mm512_slli_epi16::<1>(a); |
fc512014 XL |
15221 | let e = _mm512_set1_epi16(0); |
15222 | assert_eq_m512i(r, e); | |
15223 | } | |
15224 | ||
15225 | #[simd_test(enable = "avx512bw")] | |
15226 | unsafe fn test_mm512_mask_slli_epi16() { | |
15227 | let a = _mm512_set1_epi16(1 << 15); | |
17df50a5 | 15228 | let r = _mm512_mask_slli_epi16::<1>(a, 0, a); |
fc512014 | 15229 | assert_eq_m512i(r, a); |
17df50a5 | 15230 | let r = _mm512_mask_slli_epi16::<1>(a, 0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
15231 | let e = _mm512_set1_epi16(0); |
15232 | assert_eq_m512i(r, e); | |
15233 | } | |
15234 | ||
15235 | #[simd_test(enable = "avx512bw")] | |
15236 | unsafe fn test_mm512_maskz_slli_epi16() { | |
15237 | let a = _mm512_set1_epi16(1 << 15); | |
17df50a5 | 15238 | let r = _mm512_maskz_slli_epi16::<1>(0, a); |
fc512014 | 15239 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17df50a5 | 15240 | let r = _mm512_maskz_slli_epi16::<1>(0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
15241 | let e = _mm512_set1_epi16(0); |
15242 | assert_eq_m512i(r, e); | |
15243 | } | |
15244 | ||
15245 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15246 | unsafe fn test_mm256_mask_slli_epi16() { | |
15247 | let a = _mm256_set1_epi16(1 << 15); | |
17df50a5 | 15248 | let r = _mm256_mask_slli_epi16::<1>(a, 0, a); |
fc512014 | 15249 | assert_eq_m256i(r, a); |
17df50a5 | 15250 | let r = _mm256_mask_slli_epi16::<1>(a, 0b11111111_11111111, a); |
fc512014 XL |
15251 | let e = _mm256_set1_epi16(0); |
15252 | assert_eq_m256i(r, e); | |
15253 | } | |
15254 | ||
15255 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15256 | unsafe fn test_mm256_maskz_slli_epi16() { | |
15257 | let a = _mm256_set1_epi16(1 << 15); | |
17df50a5 | 15258 | let r = _mm256_maskz_slli_epi16::<1>(0, a); |
fc512014 | 15259 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 15260 | let r = _mm256_maskz_slli_epi16::<1>(0b11111111_11111111, a); |
fc512014 XL |
15261 | let e = _mm256_set1_epi16(0); |
15262 | assert_eq_m256i(r, e); | |
15263 | } | |
15264 | ||
15265 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15266 | unsafe fn test_mm_mask_slli_epi16() { | |
15267 | let a = _mm_set1_epi16(1 << 15); | |
17df50a5 | 15268 | let r = _mm_mask_slli_epi16::<1>(a, 0, a); |
fc512014 | 15269 | assert_eq_m128i(r, a); |
17df50a5 | 15270 | let r = _mm_mask_slli_epi16::<1>(a, 0b11111111, a); |
fc512014 XL |
15271 | let e = _mm_set1_epi16(0); |
15272 | assert_eq_m128i(r, e); | |
15273 | } | |
15274 | ||
15275 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15276 | unsafe fn test_mm_maskz_slli_epi16() { | |
15277 | let a = _mm_set1_epi16(1 << 15); | |
17df50a5 | 15278 | let r = _mm_maskz_slli_epi16::<1>(0, a); |
fc512014 | 15279 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 15280 | let r = _mm_maskz_slli_epi16::<1>(0b11111111, a); |
fc512014 XL |
15281 | let e = _mm_set1_epi16(0); |
15282 | assert_eq_m128i(r, e); | |
15283 | } | |
15284 | ||
15285 | #[simd_test(enable = "avx512bw")] | |
15286 | unsafe fn test_mm512_sllv_epi16() { | |
15287 | let a = _mm512_set1_epi16(1 << 15); | |
15288 | let count = _mm512_set1_epi16(2); | |
15289 | let r = _mm512_sllv_epi16(a, count); | |
15290 | let e = _mm512_set1_epi16(0); | |
15291 | assert_eq_m512i(r, e); | |
15292 | } | |
15293 | ||
15294 | #[simd_test(enable = "avx512bw")] | |
15295 | unsafe fn test_mm512_mask_sllv_epi16() { | |
15296 | let a = _mm512_set1_epi16(1 << 15); | |
15297 | let count = _mm512_set1_epi16(2); | |
15298 | let r = _mm512_mask_sllv_epi16(a, 0, a, count); | |
15299 | assert_eq_m512i(r, a); | |
15300 | let r = _mm512_mask_sllv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); | |
15301 | let e = _mm512_set1_epi16(0); | |
15302 | assert_eq_m512i(r, e); | |
15303 | } | |
15304 | ||
15305 | #[simd_test(enable = "avx512bw")] | |
15306 | unsafe fn test_mm512_maskz_sllv_epi16() { | |
15307 | let a = _mm512_set1_epi16(1 << 15); | |
15308 | let count = _mm512_set1_epi16(2); | |
15309 | let r = _mm512_maskz_sllv_epi16(0, a, count); | |
15310 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15311 | let r = _mm512_maskz_sllv_epi16(0b11111111_11111111_11111111_11111111, a, count); | |
15312 | let e = _mm512_set1_epi16(0); | |
15313 | assert_eq_m512i(r, e); | |
15314 | } | |
15315 | ||
15316 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15317 | unsafe fn test_mm256_sllv_epi16() { | |
15318 | let a = _mm256_set1_epi16(1 << 15); | |
15319 | let count = _mm256_set1_epi16(2); | |
15320 | let r = _mm256_sllv_epi16(a, count); | |
15321 | let e = _mm256_set1_epi16(0); | |
15322 | assert_eq_m256i(r, e); | |
15323 | } | |
15324 | ||
15325 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15326 | unsafe fn test_mm256_mask_sllv_epi16() { | |
15327 | let a = _mm256_set1_epi16(1 << 15); | |
15328 | let count = _mm256_set1_epi16(2); | |
15329 | let r = _mm256_mask_sllv_epi16(a, 0, a, count); | |
15330 | assert_eq_m256i(r, a); | |
15331 | let r = _mm256_mask_sllv_epi16(a, 0b11111111_11111111, a, count); | |
15332 | let e = _mm256_set1_epi16(0); | |
15333 | assert_eq_m256i(r, e); | |
15334 | } | |
15335 | ||
15336 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15337 | unsafe fn test_mm256_maskz_sllv_epi16() { | |
15338 | let a = _mm256_set1_epi16(1 << 15); | |
15339 | let count = _mm256_set1_epi16(2); | |
15340 | let r = _mm256_maskz_sllv_epi16(0, a, count); | |
15341 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15342 | let r = _mm256_maskz_sllv_epi16(0b11111111_11111111, a, count); | |
15343 | let e = _mm256_set1_epi16(0); | |
15344 | assert_eq_m256i(r, e); | |
15345 | } | |
15346 | ||
15347 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15348 | unsafe fn test_mm_sllv_epi16() { | |
15349 | let a = _mm_set1_epi16(1 << 15); | |
15350 | let count = _mm_set1_epi16(2); | |
15351 | let r = _mm_sllv_epi16(a, count); | |
15352 | let e = _mm_set1_epi16(0); | |
15353 | assert_eq_m128i(r, e); | |
15354 | } | |
15355 | ||
15356 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15357 | unsafe fn test_mm_mask_sllv_epi16() { | |
15358 | let a = _mm_set1_epi16(1 << 15); | |
15359 | let count = _mm_set1_epi16(2); | |
15360 | let r = _mm_mask_sllv_epi16(a, 0, a, count); | |
15361 | assert_eq_m128i(r, a); | |
15362 | let r = _mm_mask_sllv_epi16(a, 0b11111111, a, count); | |
15363 | let e = _mm_set1_epi16(0); | |
15364 | assert_eq_m128i(r, e); | |
15365 | } | |
15366 | ||
15367 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15368 | unsafe fn test_mm_maskz_sllv_epi16() { | |
15369 | let a = _mm_set1_epi16(1 << 15); | |
15370 | let count = _mm_set1_epi16(2); | |
15371 | let r = _mm_maskz_sllv_epi16(0, a, count); | |
15372 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15373 | let r = _mm_maskz_sllv_epi16(0b11111111, a, count); | |
15374 | let e = _mm_set1_epi16(0); | |
15375 | assert_eq_m128i(r, e); | |
15376 | } | |
15377 | ||
15378 | #[simd_test(enable = "avx512bw")] | |
15379 | unsafe fn test_mm512_srl_epi16() { | |
15380 | let a = _mm512_set1_epi16(1 << 1); | |
15381 | let count = _mm_set1_epi16(2); | |
15382 | let r = _mm512_srl_epi16(a, count); | |
15383 | let e = _mm512_set1_epi16(0); | |
15384 | assert_eq_m512i(r, e); | |
15385 | } | |
15386 | ||
15387 | #[simd_test(enable = "avx512bw")] | |
15388 | unsafe fn test_mm512_mask_srl_epi16() { | |
15389 | let a = _mm512_set1_epi16(1 << 1); | |
15390 | let count = _mm_set1_epi16(2); | |
15391 | let r = _mm512_mask_srl_epi16(a, 0, a, count); | |
15392 | assert_eq_m512i(r, a); | |
15393 | let r = _mm512_mask_srl_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); | |
15394 | let e = _mm512_set1_epi16(0); | |
15395 | assert_eq_m512i(r, e); | |
15396 | } | |
15397 | ||
15398 | #[simd_test(enable = "avx512bw")] | |
15399 | unsafe fn test_mm512_maskz_srl_epi16() { | |
15400 | let a = _mm512_set1_epi16(1 << 1); | |
15401 | let count = _mm_set1_epi16(2); | |
15402 | let r = _mm512_maskz_srl_epi16(0, a, count); | |
15403 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15404 | let r = _mm512_maskz_srl_epi16(0b11111111_11111111_11111111_11111111, a, count); | |
15405 | let e = _mm512_set1_epi16(0); | |
15406 | assert_eq_m512i(r, e); | |
15407 | } | |
15408 | ||
15409 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15410 | unsafe fn test_mm256_mask_srl_epi16() { | |
15411 | let a = _mm256_set1_epi16(1 << 1); | |
15412 | let count = _mm_set1_epi16(2); | |
15413 | let r = _mm256_mask_srl_epi16(a, 0, a, count); | |
15414 | assert_eq_m256i(r, a); | |
15415 | let r = _mm256_mask_srl_epi16(a, 0b11111111_11111111, a, count); | |
15416 | let e = _mm256_set1_epi16(0); | |
15417 | assert_eq_m256i(r, e); | |
15418 | } | |
15419 | ||
15420 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15421 | unsafe fn test_mm256_maskz_srl_epi16() { | |
15422 | let a = _mm256_set1_epi16(1 << 1); | |
15423 | let count = _mm_set1_epi16(2); | |
15424 | let r = _mm256_maskz_srl_epi16(0, a, count); | |
15425 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15426 | let r = _mm256_maskz_srl_epi16(0b11111111_11111111, a, count); | |
15427 | let e = _mm256_set1_epi16(0); | |
15428 | assert_eq_m256i(r, e); | |
15429 | } | |
15430 | ||
15431 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15432 | unsafe fn test_mm_mask_srl_epi16() { | |
15433 | let a = _mm_set1_epi16(1 << 1); | |
15434 | let count = _mm_set1_epi16(2); | |
15435 | let r = _mm_mask_srl_epi16(a, 0, a, count); | |
15436 | assert_eq_m128i(r, a); | |
15437 | let r = _mm_mask_srl_epi16(a, 0b11111111, a, count); | |
15438 | let e = _mm_set1_epi16(0); | |
15439 | assert_eq_m128i(r, e); | |
15440 | } | |
15441 | ||
15442 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15443 | unsafe fn test_mm_maskz_srl_epi16() { | |
15444 | let a = _mm_set1_epi16(1 << 1); | |
15445 | let count = _mm_set1_epi16(2); | |
15446 | let r = _mm_maskz_srl_epi16(0, a, count); | |
15447 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15448 | let r = _mm_maskz_srl_epi16(0b11111111, a, count); | |
15449 | let e = _mm_set1_epi16(0); | |
15450 | assert_eq_m128i(r, e); | |
15451 | } | |
15452 | ||
15453 | #[simd_test(enable = "avx512bw")] | |
15454 | unsafe fn test_mm512_srli_epi16() { | |
15455 | let a = _mm512_set1_epi16(1 << 1); | |
17df50a5 | 15456 | let r = _mm512_srli_epi16::<2>(a); |
fc512014 XL |
15457 | let e = _mm512_set1_epi16(0); |
15458 | assert_eq_m512i(r, e); | |
15459 | } | |
15460 | ||
15461 | #[simd_test(enable = "avx512bw")] | |
15462 | unsafe fn test_mm512_mask_srli_epi16() { | |
15463 | let a = _mm512_set1_epi16(1 << 1); | |
17df50a5 | 15464 | let r = _mm512_mask_srli_epi16::<2>(a, 0, a); |
fc512014 | 15465 | assert_eq_m512i(r, a); |
17df50a5 | 15466 | let r = _mm512_mask_srli_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
15467 | let e = _mm512_set1_epi16(0); |
15468 | assert_eq_m512i(r, e); | |
15469 | } | |
15470 | ||
15471 | #[simd_test(enable = "avx512bw")] | |
15472 | unsafe fn test_mm512_maskz_srli_epi16() { | |
15473 | let a = _mm512_set1_epi16(1 << 1); | |
17df50a5 | 15474 | let r = _mm512_maskz_srli_epi16::<2>(0, a); |
fc512014 | 15475 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17df50a5 | 15476 | let r = _mm512_maskz_srli_epi16::<2>(0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
15477 | let e = _mm512_set1_epi16(0); |
15478 | assert_eq_m512i(r, e); | |
15479 | } | |
15480 | ||
15481 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15482 | unsafe fn test_mm256_mask_srli_epi16() { | |
15483 | let a = _mm256_set1_epi16(1 << 1); | |
17df50a5 | 15484 | let r = _mm256_mask_srli_epi16::<2>(a, 0, a); |
fc512014 | 15485 | assert_eq_m256i(r, a); |
17df50a5 | 15486 | let r = _mm256_mask_srli_epi16::<2>(a, 0b11111111_11111111, a); |
fc512014 XL |
15487 | let e = _mm256_set1_epi16(0); |
15488 | assert_eq_m256i(r, e); | |
15489 | } | |
15490 | ||
15491 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15492 | unsafe fn test_mm256_maskz_srli_epi16() { | |
15493 | let a = _mm256_set1_epi16(1 << 1); | |
17df50a5 | 15494 | let r = _mm256_maskz_srli_epi16::<2>(0, a); |
fc512014 | 15495 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 15496 | let r = _mm256_maskz_srli_epi16::<2>(0b11111111_11111111, a); |
fc512014 XL |
15497 | let e = _mm256_set1_epi16(0); |
15498 | assert_eq_m256i(r, e); | |
15499 | } | |
15500 | ||
15501 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15502 | unsafe fn test_mm_mask_srli_epi16() { | |
15503 | let a = _mm_set1_epi16(1 << 1); | |
17df50a5 | 15504 | let r = _mm_mask_srli_epi16::<2>(a, 0, a); |
fc512014 | 15505 | assert_eq_m128i(r, a); |
17df50a5 | 15506 | let r = _mm_mask_srli_epi16::<2>(a, 0b11111111, a); |
fc512014 XL |
15507 | let e = _mm_set1_epi16(0); |
15508 | assert_eq_m128i(r, e); | |
15509 | } | |
15510 | ||
15511 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15512 | unsafe fn test_mm_maskz_srli_epi16() { | |
15513 | let a = _mm_set1_epi16(1 << 1); | |
17df50a5 | 15514 | let r = _mm_maskz_srli_epi16::<2>(0, a); |
fc512014 | 15515 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 15516 | let r = _mm_maskz_srli_epi16::<2>(0b11111111, a); |
fc512014 XL |
15517 | let e = _mm_set1_epi16(0); |
15518 | assert_eq_m128i(r, e); | |
15519 | } | |
15520 | ||
15521 | #[simd_test(enable = "avx512bw")] | |
15522 | unsafe fn test_mm512_srlv_epi16() { | |
15523 | let a = _mm512_set1_epi16(1 << 1); | |
15524 | let count = _mm512_set1_epi16(2); | |
15525 | let r = _mm512_srlv_epi16(a, count); | |
15526 | let e = _mm512_set1_epi16(0); | |
15527 | assert_eq_m512i(r, e); | |
15528 | } | |
15529 | ||
15530 | #[simd_test(enable = "avx512bw")] | |
15531 | unsafe fn test_mm512_mask_srlv_epi16() { | |
15532 | let a = _mm512_set1_epi16(1 << 1); | |
15533 | let count = _mm512_set1_epi16(2); | |
15534 | let r = _mm512_mask_srlv_epi16(a, 0, a, count); | |
15535 | assert_eq_m512i(r, a); | |
15536 | let r = _mm512_mask_srlv_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); | |
15537 | let e = _mm512_set1_epi16(0); | |
15538 | assert_eq_m512i(r, e); | |
15539 | } | |
15540 | ||
15541 | #[simd_test(enable = "avx512bw")] | |
15542 | unsafe fn test_mm512_maskz_srlv_epi16() { | |
15543 | let a = _mm512_set1_epi16(1 << 1); | |
15544 | let count = _mm512_set1_epi16(2); | |
15545 | let r = _mm512_maskz_srlv_epi16(0, a, count); | |
15546 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15547 | let r = _mm512_maskz_srlv_epi16(0b11111111_11111111_11111111_11111111, a, count); | |
15548 | let e = _mm512_set1_epi16(0); | |
15549 | assert_eq_m512i(r, e); | |
15550 | } | |
15551 | ||
15552 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15553 | unsafe fn test_mm256_srlv_epi16() { | |
15554 | let a = _mm256_set1_epi16(1 << 1); | |
15555 | let count = _mm256_set1_epi16(2); | |
15556 | let r = _mm256_srlv_epi16(a, count); | |
15557 | let e = _mm256_set1_epi16(0); | |
15558 | assert_eq_m256i(r, e); | |
15559 | } | |
15560 | ||
15561 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15562 | unsafe fn test_mm256_mask_srlv_epi16() { | |
15563 | let a = _mm256_set1_epi16(1 << 1); | |
15564 | let count = _mm256_set1_epi16(2); | |
15565 | let r = _mm256_mask_srlv_epi16(a, 0, a, count); | |
15566 | assert_eq_m256i(r, a); | |
15567 | let r = _mm256_mask_srlv_epi16(a, 0b11111111_11111111, a, count); | |
15568 | let e = _mm256_set1_epi16(0); | |
15569 | assert_eq_m256i(r, e); | |
15570 | } | |
15571 | ||
15572 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15573 | unsafe fn test_mm256_maskz_srlv_epi16() { | |
15574 | let a = _mm256_set1_epi16(1 << 1); | |
15575 | let count = _mm256_set1_epi16(2); | |
15576 | let r = _mm256_maskz_srlv_epi16(0, a, count); | |
15577 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15578 | let r = _mm256_maskz_srlv_epi16(0b11111111_11111111, a, count); | |
15579 | let e = _mm256_set1_epi16(0); | |
15580 | assert_eq_m256i(r, e); | |
15581 | } | |
15582 | ||
15583 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15584 | unsafe fn test_mm_srlv_epi16() { | |
15585 | let a = _mm_set1_epi16(1 << 1); | |
15586 | let count = _mm_set1_epi16(2); | |
15587 | let r = _mm_srlv_epi16(a, count); | |
15588 | let e = _mm_set1_epi16(0); | |
15589 | assert_eq_m128i(r, e); | |
15590 | } | |
15591 | ||
15592 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15593 | unsafe fn test_mm_mask_srlv_epi16() { | |
15594 | let a = _mm_set1_epi16(1 << 1); | |
15595 | let count = _mm_set1_epi16(2); | |
15596 | let r = _mm_mask_srlv_epi16(a, 0, a, count); | |
15597 | assert_eq_m128i(r, a); | |
15598 | let r = _mm_mask_srlv_epi16(a, 0b11111111, a, count); | |
15599 | let e = _mm_set1_epi16(0); | |
15600 | assert_eq_m128i(r, e); | |
15601 | } | |
15602 | ||
15603 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15604 | unsafe fn test_mm_maskz_srlv_epi16() { | |
15605 | let a = _mm_set1_epi16(1 << 1); | |
15606 | let count = _mm_set1_epi16(2); | |
15607 | let r = _mm_maskz_srlv_epi16(0, a, count); | |
15608 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15609 | let r = _mm_maskz_srlv_epi16(0b11111111, a, count); | |
15610 | let e = _mm_set1_epi16(0); | |
15611 | assert_eq_m128i(r, e); | |
15612 | } | |
15613 | ||
15614 | #[simd_test(enable = "avx512bw")] | |
15615 | unsafe fn test_mm512_sra_epi16() { | |
15616 | let a = _mm512_set1_epi16(8); | |
15617 | let count = _mm_set1_epi16(1); | |
15618 | let r = _mm512_sra_epi16(a, count); | |
15619 | let e = _mm512_set1_epi16(0); | |
15620 | assert_eq_m512i(r, e); | |
15621 | } | |
15622 | ||
15623 | #[simd_test(enable = "avx512bw")] | |
15624 | unsafe fn test_mm512_mask_sra_epi16() { | |
15625 | let a = _mm512_set1_epi16(8); | |
15626 | let count = _mm_set1_epi16(1); | |
15627 | let r = _mm512_mask_sra_epi16(a, 0, a, count); | |
15628 | assert_eq_m512i(r, a); | |
15629 | let r = _mm512_mask_sra_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); | |
15630 | let e = _mm512_set1_epi16(0); | |
15631 | assert_eq_m512i(r, e); | |
15632 | } | |
15633 | ||
15634 | #[simd_test(enable = "avx512bw")] | |
15635 | unsafe fn test_mm512_maskz_sra_epi16() { | |
15636 | let a = _mm512_set1_epi16(8); | |
15637 | let count = _mm_set1_epi16(1); | |
15638 | let r = _mm512_maskz_sra_epi16(0, a, count); | |
15639 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15640 | let r = _mm512_maskz_sra_epi16(0b11111111_11111111_11111111_11111111, a, count); | |
15641 | let e = _mm512_set1_epi16(0); | |
15642 | assert_eq_m512i(r, e); | |
15643 | } | |
15644 | ||
15645 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15646 | unsafe fn test_mm256_mask_sra_epi16() { | |
15647 | let a = _mm256_set1_epi16(8); | |
15648 | let count = _mm_set1_epi16(1); | |
15649 | let r = _mm256_mask_sra_epi16(a, 0, a, count); | |
15650 | assert_eq_m256i(r, a); | |
15651 | let r = _mm256_mask_sra_epi16(a, 0b11111111_11111111, a, count); | |
15652 | let e = _mm256_set1_epi16(0); | |
15653 | assert_eq_m256i(r, e); | |
15654 | } | |
15655 | ||
15656 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15657 | unsafe fn test_mm256_maskz_sra_epi16() { | |
15658 | let a = _mm256_set1_epi16(8); | |
15659 | let count = _mm_set1_epi16(1); | |
15660 | let r = _mm256_maskz_sra_epi16(0, a, count); | |
15661 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15662 | let r = _mm256_maskz_sra_epi16(0b11111111_11111111, a, count); | |
15663 | let e = _mm256_set1_epi16(0); | |
15664 | assert_eq_m256i(r, e); | |
15665 | } | |
15666 | ||
15667 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15668 | unsafe fn test_mm_mask_sra_epi16() { | |
15669 | let a = _mm_set1_epi16(8); | |
15670 | let count = _mm_set1_epi16(1); | |
15671 | let r = _mm_mask_sra_epi16(a, 0, a, count); | |
15672 | assert_eq_m128i(r, a); | |
15673 | let r = _mm_mask_sra_epi16(a, 0b11111111, a, count); | |
15674 | let e = _mm_set1_epi16(0); | |
15675 | assert_eq_m128i(r, e); | |
15676 | } | |
15677 | ||
15678 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15679 | unsafe fn test_mm_maskz_sra_epi16() { | |
15680 | let a = _mm_set1_epi16(8); | |
15681 | let count = _mm_set1_epi16(1); | |
15682 | let r = _mm_maskz_sra_epi16(0, a, count); | |
15683 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15684 | let r = _mm_maskz_sra_epi16(0b11111111, a, count); | |
15685 | let e = _mm_set1_epi16(0); | |
15686 | assert_eq_m128i(r, e); | |
15687 | } | |
15688 | ||
15689 | #[simd_test(enable = "avx512bw")] | |
15690 | unsafe fn test_mm512_srai_epi16() { | |
15691 | let a = _mm512_set1_epi16(8); | |
17df50a5 | 15692 | let r = _mm512_srai_epi16::<2>(a); |
fc512014 XL |
15693 | let e = _mm512_set1_epi16(2); |
15694 | assert_eq_m512i(r, e); | |
15695 | } | |
15696 | ||
15697 | #[simd_test(enable = "avx512bw")] | |
15698 | unsafe fn test_mm512_mask_srai_epi16() { | |
15699 | let a = _mm512_set1_epi16(8); | |
17df50a5 | 15700 | let r = _mm512_mask_srai_epi16::<2>(a, 0, a); |
fc512014 | 15701 | assert_eq_m512i(r, a); |
17df50a5 | 15702 | let r = _mm512_mask_srai_epi16::<2>(a, 0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
15703 | let e = _mm512_set1_epi16(2); |
15704 | assert_eq_m512i(r, e); | |
15705 | } | |
15706 | ||
15707 | #[simd_test(enable = "avx512bw")] | |
15708 | unsafe fn test_mm512_maskz_srai_epi16() { | |
15709 | let a = _mm512_set1_epi16(8); | |
17df50a5 | 15710 | let r = _mm512_maskz_srai_epi16::<2>(0, a); |
fc512014 | 15711 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17df50a5 | 15712 | let r = _mm512_maskz_srai_epi16::<2>(0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
15713 | let e = _mm512_set1_epi16(2); |
15714 | assert_eq_m512i(r, e); | |
15715 | } | |
15716 | ||
15717 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15718 | unsafe fn test_mm256_mask_srai_epi16() { | |
15719 | let a = _mm256_set1_epi16(8); | |
17df50a5 | 15720 | let r = _mm256_mask_srai_epi16::<2>(a, 0, a); |
fc512014 | 15721 | assert_eq_m256i(r, a); |
17df50a5 | 15722 | let r = _mm256_mask_srai_epi16::<2>(a, 0b11111111_11111111, a); |
fc512014 XL |
15723 | let e = _mm256_set1_epi16(2); |
15724 | assert_eq_m256i(r, e); | |
15725 | } | |
15726 | ||
15727 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15728 | unsafe fn test_mm256_maskz_srai_epi16() { | |
15729 | let a = _mm256_set1_epi16(8); | |
17df50a5 | 15730 | let r = _mm256_maskz_srai_epi16::<2>(0, a); |
fc512014 | 15731 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 15732 | let r = _mm256_maskz_srai_epi16::<2>(0b11111111_11111111, a); |
fc512014 XL |
15733 | let e = _mm256_set1_epi16(2); |
15734 | assert_eq_m256i(r, e); | |
15735 | } | |
15736 | ||
15737 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15738 | unsafe fn test_mm_mask_srai_epi16() { | |
15739 | let a = _mm_set1_epi16(8); | |
17df50a5 | 15740 | let r = _mm_mask_srai_epi16::<2>(a, 0, a); |
fc512014 | 15741 | assert_eq_m128i(r, a); |
17df50a5 | 15742 | let r = _mm_mask_srai_epi16::<2>(a, 0b11111111, a); |
fc512014 XL |
15743 | let e = _mm_set1_epi16(2); |
15744 | assert_eq_m128i(r, e); | |
15745 | } | |
15746 | ||
15747 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15748 | unsafe fn test_mm_maskz_srai_epi16() { | |
15749 | let a = _mm_set1_epi16(8); | |
17df50a5 | 15750 | let r = _mm_maskz_srai_epi16::<2>(0, a); |
fc512014 | 15751 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 15752 | let r = _mm_maskz_srai_epi16::<2>(0b11111111, a); |
fc512014 XL |
15753 | let e = _mm_set1_epi16(2); |
15754 | assert_eq_m128i(r, e); | |
15755 | } | |
15756 | ||
15757 | #[simd_test(enable = "avx512bw")] | |
15758 | unsafe fn test_mm512_srav_epi16() { | |
15759 | let a = _mm512_set1_epi16(8); | |
15760 | let count = _mm512_set1_epi16(2); | |
15761 | let r = _mm512_srav_epi16(a, count); | |
15762 | let e = _mm512_set1_epi16(2); | |
15763 | assert_eq_m512i(r, e); | |
15764 | } | |
15765 | ||
15766 | #[simd_test(enable = "avx512bw")] | |
15767 | unsafe fn test_mm512_mask_srav_epi16() { | |
15768 | let a = _mm512_set1_epi16(8); | |
15769 | let count = _mm512_set1_epi16(2); | |
15770 | let r = _mm512_mask_srav_epi16(a, 0, a, count); | |
15771 | assert_eq_m512i(r, a); | |
15772 | let r = _mm512_mask_srav_epi16(a, 0b11111111_11111111_11111111_11111111, a, count); | |
15773 | let e = _mm512_set1_epi16(2); | |
15774 | assert_eq_m512i(r, e); | |
15775 | } | |
15776 | ||
15777 | #[simd_test(enable = "avx512bw")] | |
15778 | unsafe fn test_mm512_maskz_srav_epi16() { | |
15779 | let a = _mm512_set1_epi16(8); | |
15780 | let count = _mm512_set1_epi16(2); | |
15781 | let r = _mm512_maskz_srav_epi16(0, a, count); | |
15782 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15783 | let r = _mm512_maskz_srav_epi16(0b11111111_11111111_11111111_11111111, a, count); | |
15784 | let e = _mm512_set1_epi16(2); | |
15785 | assert_eq_m512i(r, e); | |
15786 | } | |
15787 | ||
15788 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15789 | unsafe fn test_mm256_srav_epi16() { | |
15790 | let a = _mm256_set1_epi16(8); | |
15791 | let count = _mm256_set1_epi16(2); | |
15792 | let r = _mm256_srav_epi16(a, count); | |
15793 | let e = _mm256_set1_epi16(2); | |
15794 | assert_eq_m256i(r, e); | |
15795 | } | |
15796 | ||
15797 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15798 | unsafe fn test_mm256_mask_srav_epi16() { | |
15799 | let a = _mm256_set1_epi16(8); | |
15800 | let count = _mm256_set1_epi16(2); | |
15801 | let r = _mm256_mask_srav_epi16(a, 0, a, count); | |
15802 | assert_eq_m256i(r, a); | |
15803 | let r = _mm256_mask_srav_epi16(a, 0b11111111_11111111, a, count); | |
15804 | let e = _mm256_set1_epi16(2); | |
15805 | assert_eq_m256i(r, e); | |
15806 | } | |
15807 | ||
15808 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15809 | unsafe fn test_mm256_maskz_srav_epi16() { | |
15810 | let a = _mm256_set1_epi16(8); | |
15811 | let count = _mm256_set1_epi16(2); | |
15812 | let r = _mm256_maskz_srav_epi16(0, a, count); | |
15813 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15814 | let r = _mm256_maskz_srav_epi16(0b11111111_11111111, a, count); | |
15815 | let e = _mm256_set1_epi16(2); | |
15816 | assert_eq_m256i(r, e); | |
15817 | } | |
15818 | ||
15819 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15820 | unsafe fn test_mm_srav_epi16() { | |
15821 | let a = _mm_set1_epi16(8); | |
15822 | let count = _mm_set1_epi16(2); | |
15823 | let r = _mm_srav_epi16(a, count); | |
15824 | let e = _mm_set1_epi16(2); | |
15825 | assert_eq_m128i(r, e); | |
15826 | } | |
15827 | ||
15828 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15829 | unsafe fn test_mm_mask_srav_epi16() { | |
15830 | let a = _mm_set1_epi16(8); | |
15831 | let count = _mm_set1_epi16(2); | |
15832 | let r = _mm_mask_srav_epi16(a, 0, a, count); | |
15833 | assert_eq_m128i(r, a); | |
15834 | let r = _mm_mask_srav_epi16(a, 0b11111111, a, count); | |
15835 | let e = _mm_set1_epi16(2); | |
15836 | assert_eq_m128i(r, e); | |
15837 | } | |
15838 | ||
15839 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15840 | unsafe fn test_mm_maskz_srav_epi16() { | |
15841 | let a = _mm_set1_epi16(8); | |
15842 | let count = _mm_set1_epi16(2); | |
15843 | let r = _mm_maskz_srav_epi16(0, a, count); | |
15844 | assert_eq_m128i(r, _mm_setzero_si128()); | |
15845 | let r = _mm_maskz_srav_epi16(0b11111111, a, count); | |
15846 | let e = _mm_set1_epi16(2); | |
15847 | assert_eq_m128i(r, e); | |
15848 | } | |
15849 | ||
15850 | #[simd_test(enable = "avx512bw")] | |
15851 | unsafe fn test_mm512_permutex2var_epi16() { | |
15852 | #[rustfmt::skip] | |
15853 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
15854 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
15855 | #[rustfmt::skip] | |
15856 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, | |
15857 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); | |
15858 | let b = _mm512_set1_epi16(100); | |
15859 | let r = _mm512_permutex2var_epi16(a, idx, b); | |
15860 | #[rustfmt::skip] | |
15861 | let e = _mm512_set_epi16( | |
15862 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, | |
15863 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, | |
15864 | ); | |
15865 | assert_eq_m512i(r, e); | |
15866 | } | |
15867 | ||
15868 | #[simd_test(enable = "avx512bw")] | |
15869 | unsafe fn test_mm512_mask_permutex2var_epi16() { | |
15870 | #[rustfmt::skip] | |
15871 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
15872 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
15873 | #[rustfmt::skip] | |
15874 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, | |
15875 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); | |
15876 | let b = _mm512_set1_epi16(100); | |
15877 | let r = _mm512_mask_permutex2var_epi16(a, 0, idx, b); | |
15878 | assert_eq_m512i(r, a); | |
15879 | let r = _mm512_mask_permutex2var_epi16(a, 0b11111111_11111111_11111111_11111111, idx, b); | |
15880 | #[rustfmt::skip] | |
15881 | let e = _mm512_set_epi16( | |
15882 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, | |
15883 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, | |
15884 | ); | |
15885 | assert_eq_m512i(r, e); | |
15886 | } | |
15887 | ||
15888 | #[simd_test(enable = "avx512bw")] | |
15889 | unsafe fn test_mm512_maskz_permutex2var_epi16() { | |
15890 | #[rustfmt::skip] | |
15891 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
15892 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
15893 | #[rustfmt::skip] | |
15894 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, | |
15895 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); | |
15896 | let b = _mm512_set1_epi16(100); | |
15897 | let r = _mm512_maskz_permutex2var_epi16(0, a, idx, b); | |
15898 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
15899 | let r = _mm512_maskz_permutex2var_epi16(0b11111111_11111111_11111111_11111111, a, idx, b); | |
15900 | #[rustfmt::skip] | |
15901 | let e = _mm512_set_epi16( | |
15902 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, | |
15903 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, | |
15904 | ); | |
15905 | assert_eq_m512i(r, e); | |
15906 | } | |
15907 | ||
15908 | #[simd_test(enable = "avx512bw")] | |
15909 | unsafe fn test_mm512_mask2_permutex2var_epi16() { | |
15910 | #[rustfmt::skip] | |
15911 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
15912 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
15913 | #[rustfmt::skip] | |
15914 | let idx = _mm512_set_epi16(1, 1<<5, 2, 1<<5, 3, 1<<5, 4, 1<<5, 5, 1<<5, 6, 1<<5, 7, 1<<5, 8, 1<<5, | |
15915 | 9, 1<<5, 10, 1<<5, 11, 1<<5, 12, 1<<5, 13, 1<<5, 14, 1<<5, 15, 1<<5, 16, 1<<5); | |
15916 | let b = _mm512_set1_epi16(100); | |
15917 | let r = _mm512_mask2_permutex2var_epi16(a, idx, 0, b); | |
15918 | assert_eq_m512i(r, idx); | |
15919 | let r = _mm512_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111_11111111_11111111, b); | |
15920 | #[rustfmt::skip] | |
15921 | let e = _mm512_set_epi16( | |
15922 | 30, 100, 29, 100, 28, 100, 27, 100, 26, 100, 25, 100, 24, 100, 23, 100, | |
15923 | 22, 100, 21, 100, 20, 100, 19, 100, 18, 100, 17, 100, 16, 100, 15, 100, | |
15924 | ); | |
15925 | assert_eq_m512i(r, e); | |
15926 | } | |
15927 | ||
15928 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15929 | unsafe fn test_mm256_permutex2var_epi16() { | |
15930 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
15931 | #[rustfmt::skip] | |
15932 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); | |
15933 | let b = _mm256_set1_epi16(100); | |
15934 | let r = _mm256_permutex2var_epi16(a, idx, b); | |
15935 | let e = _mm256_set_epi16( | |
15936 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, | |
15937 | ); | |
15938 | assert_eq_m256i(r, e); | |
15939 | } | |
15940 | ||
15941 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15942 | unsafe fn test_mm256_mask_permutex2var_epi16() { | |
15943 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
15944 | #[rustfmt::skip] | |
15945 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); | |
15946 | let b = _mm256_set1_epi16(100); | |
15947 | let r = _mm256_mask_permutex2var_epi16(a, 0, idx, b); | |
15948 | assert_eq_m256i(r, a); | |
15949 | let r = _mm256_mask_permutex2var_epi16(a, 0b11111111_11111111, idx, b); | |
15950 | let e = _mm256_set_epi16( | |
15951 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, | |
15952 | ); | |
15953 | assert_eq_m256i(r, e); | |
15954 | } | |
15955 | ||
15956 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15957 | unsafe fn test_mm256_maskz_permutex2var_epi16() { | |
15958 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
15959 | #[rustfmt::skip] | |
15960 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); | |
15961 | let b = _mm256_set1_epi16(100); | |
15962 | let r = _mm256_maskz_permutex2var_epi16(0, a, idx, b); | |
15963 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
15964 | let r = _mm256_maskz_permutex2var_epi16(0b11111111_11111111, a, idx, b); | |
15965 | let e = _mm256_set_epi16( | |
15966 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, | |
15967 | ); | |
15968 | assert_eq_m256i(r, e); | |
15969 | } | |
15970 | ||
15971 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15972 | unsafe fn test_mm256_mask2_permutex2var_epi16() { | |
15973 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
15974 | #[rustfmt::skip] | |
15975 | let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4); | |
15976 | let b = _mm256_set1_epi16(100); | |
15977 | let r = _mm256_mask2_permutex2var_epi16(a, idx, 0, b); | |
15978 | assert_eq_m256i(r, idx); | |
15979 | let r = _mm256_mask2_permutex2var_epi16(a, idx, 0b11111111_11111111, b); | |
15980 | #[rustfmt::skip] | |
15981 | let e = _mm256_set_epi16( | |
15982 | 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100, | |
15983 | ); | |
15984 | assert_eq_m256i(r, e); | |
15985 | } | |
15986 | ||
15987 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15988 | unsafe fn test_mm_permutex2var_epi16() { | |
15989 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
15990 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); | |
15991 | let b = _mm_set1_epi16(100); | |
15992 | let r = _mm_permutex2var_epi16(a, idx, b); | |
15993 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); | |
15994 | assert_eq_m128i(r, e); | |
15995 | } | |
15996 | ||
15997 | #[simd_test(enable = "avx512bw,avx512vl")] | |
15998 | unsafe fn test_mm_mask_permutex2var_epi16() { | |
15999 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
16000 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); | |
16001 | let b = _mm_set1_epi16(100); | |
16002 | let r = _mm_mask_permutex2var_epi16(a, 0, idx, b); | |
16003 | assert_eq_m128i(r, a); | |
16004 | let r = _mm_mask_permutex2var_epi16(a, 0b11111111, idx, b); | |
16005 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); | |
16006 | assert_eq_m128i(r, e); | |
16007 | } | |
16008 | ||
16009 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16010 | unsafe fn test_mm_maskz_permutex2var_epi16() { | |
16011 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
16012 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); | |
16013 | let b = _mm_set1_epi16(100); | |
16014 | let r = _mm_maskz_permutex2var_epi16(0, a, idx, b); | |
16015 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16016 | let r = _mm_maskz_permutex2var_epi16(0b11111111, a, idx, b); | |
16017 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); | |
16018 | assert_eq_m128i(r, e); | |
16019 | } | |
16020 | ||
16021 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16022 | unsafe fn test_mm_mask2_permutex2var_epi16() { | |
16023 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
16024 | let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); | |
16025 | let b = _mm_set1_epi16(100); | |
16026 | let r = _mm_mask2_permutex2var_epi16(a, idx, 0, b); | |
16027 | assert_eq_m128i(r, idx); | |
16028 | let r = _mm_mask2_permutex2var_epi16(a, idx, 0b11111111, b); | |
16029 | let e = _mm_set_epi16(6, 100, 5, 100, 4, 100, 3, 100); | |
16030 | assert_eq_m128i(r, e); | |
16031 | } | |
16032 | ||
16033 | #[simd_test(enable = "avx512bw")] | |
16034 | unsafe fn test_mm512_permutexvar_epi16() { | |
16035 | let idx = _mm512_set1_epi16(1); | |
16036 | #[rustfmt::skip] | |
16037 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
16038 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
16039 | let r = _mm512_permutexvar_epi16(idx, a); | |
16040 | let e = _mm512_set1_epi16(30); | |
16041 | assert_eq_m512i(r, e); | |
16042 | } | |
16043 | ||
16044 | #[simd_test(enable = "avx512bw")] | |
16045 | unsafe fn test_mm512_mask_permutexvar_epi16() { | |
16046 | let idx = _mm512_set1_epi16(1); | |
16047 | #[rustfmt::skip] | |
16048 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
16049 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
16050 | let r = _mm512_mask_permutexvar_epi16(a, 0, idx, a); | |
16051 | assert_eq_m512i(r, a); | |
16052 | let r = _mm512_mask_permutexvar_epi16(a, 0b11111111_11111111_11111111_11111111, idx, a); | |
16053 | let e = _mm512_set1_epi16(30); | |
16054 | assert_eq_m512i(r, e); | |
16055 | } | |
16056 | ||
16057 | #[simd_test(enable = "avx512bw")] | |
16058 | unsafe fn test_mm512_maskz_permutexvar_epi16() { | |
16059 | let idx = _mm512_set1_epi16(1); | |
16060 | #[rustfmt::skip] | |
16061 | let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
16062 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
16063 | let r = _mm512_maskz_permutexvar_epi16(0, idx, a); | |
16064 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16065 | let r = _mm512_maskz_permutexvar_epi16(0b11111111_11111111_11111111_11111111, idx, a); | |
16066 | let e = _mm512_set1_epi16(30); | |
16067 | assert_eq_m512i(r, e); | |
16068 | } | |
16069 | ||
16070 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16071 | unsafe fn test_mm256_permutexvar_epi16() { | |
16072 | let idx = _mm256_set1_epi16(1); | |
16073 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
16074 | let r = _mm256_permutexvar_epi16(idx, a); | |
16075 | let e = _mm256_set1_epi16(14); | |
16076 | assert_eq_m256i(r, e); | |
16077 | } | |
16078 | ||
16079 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16080 | unsafe fn test_mm256_mask_permutexvar_epi16() { | |
16081 | let idx = _mm256_set1_epi16(1); | |
16082 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
16083 | let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a); | |
16084 | assert_eq_m256i(r, a); | |
16085 | let r = _mm256_mask_permutexvar_epi16(a, 0b11111111_11111111, idx, a); | |
16086 | let e = _mm256_set1_epi16(14); | |
16087 | assert_eq_m256i(r, e); | |
16088 | } | |
16089 | ||
16090 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16091 | unsafe fn test_mm256_maskz_permutexvar_epi16() { | |
16092 | let idx = _mm256_set1_epi16(1); | |
16093 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
16094 | let r = _mm256_maskz_permutexvar_epi16(0, idx, a); | |
16095 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16096 | let r = _mm256_maskz_permutexvar_epi16(0b11111111_11111111, idx, a); | |
16097 | let e = _mm256_set1_epi16(14); | |
16098 | assert_eq_m256i(r, e); | |
16099 | } | |
16100 | ||
16101 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16102 | unsafe fn test_mm_permutexvar_epi16() { | |
16103 | let idx = _mm_set1_epi16(1); | |
16104 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
16105 | let r = _mm_permutexvar_epi16(idx, a); | |
16106 | let e = _mm_set1_epi16(6); | |
16107 | assert_eq_m128i(r, e); | |
16108 | } | |
16109 | ||
16110 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16111 | unsafe fn test_mm_mask_permutexvar_epi16() { | |
16112 | let idx = _mm_set1_epi16(1); | |
16113 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
16114 | let r = _mm_mask_permutexvar_epi16(a, 0, idx, a); | |
16115 | assert_eq_m128i(r, a); | |
16116 | let r = _mm_mask_permutexvar_epi16(a, 0b11111111, idx, a); | |
16117 | let e = _mm_set1_epi16(6); | |
16118 | assert_eq_m128i(r, e); | |
16119 | } | |
16120 | ||
16121 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16122 | unsafe fn test_mm_maskz_permutexvar_epi16() { | |
16123 | let idx = _mm_set1_epi16(1); | |
16124 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
16125 | let r = _mm_maskz_permutexvar_epi16(0, idx, a); | |
16126 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16127 | let r = _mm_maskz_permutexvar_epi16(0b11111111, idx, a); | |
16128 | let e = _mm_set1_epi16(6); | |
16129 | assert_eq_m128i(r, e); | |
16130 | } | |
16131 | ||
16132 | #[simd_test(enable = "avx512bw")] | |
16133 | unsafe fn test_mm512_mask_blend_epi16() { | |
16134 | let a = _mm512_set1_epi16(1); | |
16135 | let b = _mm512_set1_epi16(2); | |
16136 | let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b); | |
16137 | #[rustfmt::skip] | |
16138 | let e = _mm512_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, | |
16139 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); | |
16140 | assert_eq_m512i(r, e); | |
16141 | } | |
16142 | ||
16143 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16144 | unsafe fn test_mm256_mask_blend_epi16() { | |
16145 | let a = _mm256_set1_epi16(1); | |
16146 | let b = _mm256_set1_epi16(2); | |
16147 | let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b); | |
16148 | let e = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); | |
16149 | assert_eq_m256i(r, e); | |
16150 | } | |
16151 | ||
16152 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16153 | unsafe fn test_mm_mask_blend_epi16() { | |
16154 | let a = _mm_set1_epi16(1); | |
16155 | let b = _mm_set1_epi16(2); | |
16156 | let r = _mm_mask_blend_epi16(0b11110000, a, b); | |
16157 | let e = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1); | |
16158 | assert_eq_m128i(r, e); | |
16159 | } | |
16160 | ||
16161 | #[simd_test(enable = "avx512bw")] | |
16162 | unsafe fn test_mm512_mask_blend_epi8() { | |
16163 | let a = _mm512_set1_epi8(1); | |
16164 | let b = _mm512_set1_epi8(2); | |
16165 | let r = _mm512_mask_blend_epi8( | |
16166 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000, | |
16167 | a, | |
16168 | b, | |
16169 | ); | |
16170 | #[rustfmt::skip] | |
16171 | let e = _mm512_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, | |
16172 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, | |
16173 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, | |
16174 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); | |
16175 | assert_eq_m512i(r, e); | |
16176 | } | |
16177 | ||
16178 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16179 | unsafe fn test_mm256_mask_blend_epi8() { | |
16180 | let a = _mm256_set1_epi8(1); | |
16181 | let b = _mm256_set1_epi8(2); | |
16182 | let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b); | |
16183 | #[rustfmt::skip] | |
16184 | let e = _mm256_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, | |
16185 | 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); | |
16186 | assert_eq_m256i(r, e); | |
16187 | } | |
16188 | ||
16189 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16190 | unsafe fn test_mm_mask_blend_epi8() { | |
16191 | let a = _mm_set1_epi8(1); | |
16192 | let b = _mm_set1_epi8(2); | |
16193 | let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b); | |
16194 | let e = _mm_set_epi8(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1); | |
16195 | assert_eq_m128i(r, e); | |
16196 | } | |
16197 | ||
16198 | #[simd_test(enable = "avx512bw")] | |
16199 | unsafe fn test_mm512_broadcastw_epi16() { | |
16200 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16201 | let r = _mm512_broadcastw_epi16(a); | |
16202 | let e = _mm512_set1_epi16(24); | |
16203 | assert_eq_m512i(r, e); | |
16204 | } | |
16205 | ||
16206 | #[simd_test(enable = "avx512bw")] | |
16207 | unsafe fn test_mm512_mask_broadcastw_epi16() { | |
16208 | let src = _mm512_set1_epi16(1); | |
16209 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16210 | let r = _mm512_mask_broadcastw_epi16(src, 0, a); | |
16211 | assert_eq_m512i(r, src); | |
16212 | let r = _mm512_mask_broadcastw_epi16(src, 0b11111111_11111111_11111111_11111111, a); | |
16213 | let e = _mm512_set1_epi16(24); | |
16214 | assert_eq_m512i(r, e); | |
16215 | } | |
16216 | ||
16217 | #[simd_test(enable = "avx512bw")] | |
16218 | unsafe fn test_mm512_maskz_broadcastw_epi16() { | |
16219 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16220 | let r = _mm512_maskz_broadcastw_epi16(0, a); | |
16221 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16222 | let r = _mm512_maskz_broadcastw_epi16(0b11111111_11111111_11111111_11111111, a); | |
16223 | let e = _mm512_set1_epi16(24); | |
16224 | assert_eq_m512i(r, e); | |
16225 | } | |
16226 | ||
16227 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16228 | unsafe fn test_mm256_mask_broadcastw_epi16() { | |
16229 | let src = _mm256_set1_epi16(1); | |
16230 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16231 | let r = _mm256_mask_broadcastw_epi16(src, 0, a); | |
16232 | assert_eq_m256i(r, src); | |
16233 | let r = _mm256_mask_broadcastw_epi16(src, 0b11111111_11111111, a); | |
16234 | let e = _mm256_set1_epi16(24); | |
16235 | assert_eq_m256i(r, e); | |
16236 | } | |
16237 | ||
16238 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16239 | unsafe fn test_mm256_maskz_broadcastw_epi16() { | |
16240 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16241 | let r = _mm256_maskz_broadcastw_epi16(0, a); | |
16242 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16243 | let r = _mm256_maskz_broadcastw_epi16(0b11111111_11111111, a); | |
16244 | let e = _mm256_set1_epi16(24); | |
16245 | assert_eq_m256i(r, e); | |
16246 | } | |
16247 | ||
16248 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16249 | unsafe fn test_mm_mask_broadcastw_epi16() { | |
16250 | let src = _mm_set1_epi16(1); | |
16251 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16252 | let r = _mm_mask_broadcastw_epi16(src, 0, a); | |
16253 | assert_eq_m128i(r, src); | |
16254 | let r = _mm_mask_broadcastw_epi16(src, 0b11111111, a); | |
16255 | let e = _mm_set1_epi16(24); | |
16256 | assert_eq_m128i(r, e); | |
16257 | } | |
16258 | ||
16259 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16260 | unsafe fn test_mm_maskz_broadcastw_epi16() { | |
16261 | let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24); | |
16262 | let r = _mm_maskz_broadcastw_epi16(0, a); | |
16263 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16264 | let r = _mm_maskz_broadcastw_epi16(0b11111111, a); | |
16265 | let e = _mm_set1_epi16(24); | |
16266 | assert_eq_m128i(r, e); | |
16267 | } | |
16268 | ||
16269 | #[simd_test(enable = "avx512bw")] | |
16270 | unsafe fn test_mm512_broadcastb_epi8() { | |
16271 | let a = _mm_set_epi8( | |
16272 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16273 | ); | |
16274 | let r = _mm512_broadcastb_epi8(a); | |
16275 | let e = _mm512_set1_epi8(32); | |
16276 | assert_eq_m512i(r, e); | |
16277 | } | |
16278 | ||
16279 | #[simd_test(enable = "avx512bw")] | |
16280 | unsafe fn test_mm512_mask_broadcastb_epi8() { | |
16281 | let src = _mm512_set1_epi8(1); | |
16282 | let a = _mm_set_epi8( | |
16283 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16284 | ); | |
16285 | let r = _mm512_mask_broadcastb_epi8(src, 0, a); | |
16286 | assert_eq_m512i(r, src); | |
16287 | let r = _mm512_mask_broadcastb_epi8( | |
16288 | src, | |
16289 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16290 | a, | |
16291 | ); | |
16292 | let e = _mm512_set1_epi8(32); | |
16293 | assert_eq_m512i(r, e); | |
16294 | } | |
16295 | ||
16296 | #[simd_test(enable = "avx512bw")] | |
16297 | unsafe fn test_mm512_maskz_broadcastb_epi8() { | |
16298 | let a = _mm_set_epi8( | |
16299 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16300 | ); | |
16301 | let r = _mm512_maskz_broadcastb_epi8(0, a); | |
16302 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16303 | let r = _mm512_maskz_broadcastb_epi8( | |
16304 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16305 | a, | |
16306 | ); | |
16307 | let e = _mm512_set1_epi8(32); | |
16308 | assert_eq_m512i(r, e); | |
16309 | } | |
16310 | ||
16311 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16312 | unsafe fn test_mm256_mask_broadcastb_epi8() { | |
16313 | let src = _mm256_set1_epi8(1); | |
16314 | let a = _mm_set_epi8( | |
16315 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16316 | ); | |
16317 | let r = _mm256_mask_broadcastb_epi8(src, 0, a); | |
16318 | assert_eq_m256i(r, src); | |
16319 | let r = _mm256_mask_broadcastb_epi8(src, 0b11111111_11111111_11111111_11111111, a); | |
16320 | let e = _mm256_set1_epi8(32); | |
16321 | assert_eq_m256i(r, e); | |
16322 | } | |
16323 | ||
16324 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16325 | unsafe fn test_mm256_maskz_broadcastb_epi8() { | |
16326 | let a = _mm_set_epi8( | |
16327 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16328 | ); | |
16329 | let r = _mm256_maskz_broadcastb_epi8(0, a); | |
16330 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16331 | let r = _mm256_maskz_broadcastb_epi8(0b11111111_11111111_11111111_11111111, a); | |
16332 | let e = _mm256_set1_epi8(32); | |
16333 | assert_eq_m256i(r, e); | |
16334 | } | |
16335 | ||
16336 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16337 | unsafe fn test_mm_mask_broadcastb_epi8() { | |
16338 | let src = _mm_set1_epi8(1); | |
16339 | let a = _mm_set_epi8( | |
16340 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16341 | ); | |
16342 | let r = _mm_mask_broadcastb_epi8(src, 0, a); | |
16343 | assert_eq_m128i(r, src); | |
16344 | let r = _mm_mask_broadcastb_epi8(src, 0b11111111_11111111, a); | |
16345 | let e = _mm_set1_epi8(32); | |
16346 | assert_eq_m128i(r, e); | |
16347 | } | |
16348 | ||
16349 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16350 | unsafe fn test_mm_maskz_broadcastb_epi8() { | |
16351 | let a = _mm_set_epi8( | |
16352 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16353 | ); | |
16354 | let r = _mm_maskz_broadcastb_epi8(0, a); | |
16355 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16356 | let r = _mm_maskz_broadcastb_epi8(0b11111111_11111111, a); | |
16357 | let e = _mm_set1_epi8(32); | |
16358 | assert_eq_m128i(r, e); | |
16359 | } | |
16360 | ||
16361 | #[simd_test(enable = "avx512bw")] | |
16362 | unsafe fn test_mm512_unpackhi_epi16() { | |
16363 | #[rustfmt::skip] | |
16364 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16365 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16366 | #[rustfmt::skip] | |
16367 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16368 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16369 | let r = _mm512_unpackhi_epi16(a, b); | |
16370 | #[rustfmt::skip] | |
16371 | let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, | |
16372 | 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); | |
16373 | assert_eq_m512i(r, e); | |
16374 | } | |
16375 | ||
16376 | #[simd_test(enable = "avx512bw")] | |
16377 | unsafe fn test_mm512_mask_unpackhi_epi16() { | |
16378 | #[rustfmt::skip] | |
16379 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16380 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16381 | #[rustfmt::skip] | |
16382 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16383 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16384 | let r = _mm512_mask_unpackhi_epi16(a, 0, a, b); | |
16385 | assert_eq_m512i(r, a); | |
16386 | let r = _mm512_mask_unpackhi_epi16(a, 0b11111111_11111111_11111111_11111111, a, b); | |
16387 | #[rustfmt::skip] | |
16388 | let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, | |
16389 | 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); | |
16390 | assert_eq_m512i(r, e); | |
16391 | } | |
16392 | ||
16393 | #[simd_test(enable = "avx512bw")] | |
16394 | unsafe fn test_mm512_maskz_unpackhi_epi16() { | |
16395 | #[rustfmt::skip] | |
16396 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16397 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16398 | #[rustfmt::skip] | |
16399 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16400 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16401 | let r = _mm512_maskz_unpackhi_epi16(0, a, b); | |
16402 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16403 | let r = _mm512_maskz_unpackhi_epi16(0b11111111_11111111_11111111_11111111, a, b); | |
16404 | #[rustfmt::skip] | |
16405 | let e = _mm512_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12, | |
16406 | 49, 17, 50, 18, 51, 19, 52, 20, 57, 25, 58, 26, 59, 27, 60, 28); | |
16407 | assert_eq_m512i(r, e); | |
16408 | } | |
16409 | ||
16410 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16411 | unsafe fn test_mm256_mask_unpackhi_epi16() { | |
16412 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16413 | let b = _mm256_set_epi16( | |
16414 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16415 | ); | |
16416 | let r = _mm256_mask_unpackhi_epi16(a, 0, a, b); | |
16417 | assert_eq_m256i(r, a); | |
16418 | let r = _mm256_mask_unpackhi_epi16(a, 0b11111111_11111111, a, b); | |
16419 | let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12); | |
16420 | assert_eq_m256i(r, e); | |
16421 | } | |
16422 | ||
16423 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16424 | unsafe fn test_mm256_maskz_unpackhi_epi16() { | |
16425 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16426 | let b = _mm256_set_epi16( | |
16427 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16428 | ); | |
16429 | let r = _mm256_maskz_unpackhi_epi16(0, a, b); | |
16430 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16431 | let r = _mm256_maskz_unpackhi_epi16(0b11111111_11111111, a, b); | |
16432 | let e = _mm256_set_epi16(33, 1, 34, 2, 35, 3, 36, 4, 41, 9, 42, 10, 43, 11, 44, 12); | |
16433 | assert_eq_m256i(r, e); | |
16434 | } | |
16435 | ||
16436 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16437 | unsafe fn test_mm_mask_unpackhi_epi16() { | |
16438 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); | |
16439 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); | |
16440 | let r = _mm_mask_unpackhi_epi16(a, 0, a, b); | |
16441 | assert_eq_m128i(r, a); | |
16442 | let r = _mm_mask_unpackhi_epi16(a, 0b11111111, a, b); | |
16443 | let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4); | |
16444 | assert_eq_m128i(r, e); | |
16445 | } | |
16446 | ||
16447 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16448 | unsafe fn test_mm_maskz_unpackhi_epi16() { | |
16449 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); | |
16450 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); | |
16451 | let r = _mm_maskz_unpackhi_epi16(0, a, b); | |
16452 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16453 | let r = _mm_maskz_unpackhi_epi16(0b11111111, a, b); | |
16454 | let e = _mm_set_epi16(33, 1, 34, 2, 35, 3, 36, 4); | |
16455 | assert_eq_m128i(r, e); | |
16456 | } | |
16457 | ||
16458 | #[simd_test(enable = "avx512bw")] | |
16459 | unsafe fn test_mm512_unpackhi_epi8() { | |
16460 | #[rustfmt::skip] | |
16461 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16462 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16463 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16464 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16465 | #[rustfmt::skip] | |
16466 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16467 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, | |
16468 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, | |
16469 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); | |
16470 | let r = _mm512_unpackhi_epi8(a, b); | |
16471 | #[rustfmt::skip] | |
16472 | let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, | |
16473 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, | |
16474 | 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, | |
16475 | 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); | |
16476 | assert_eq_m512i(r, e); | |
16477 | } | |
16478 | ||
16479 | #[simd_test(enable = "avx512bw")] | |
16480 | unsafe fn test_mm512_mask_unpackhi_epi8() { | |
16481 | #[rustfmt::skip] | |
16482 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16483 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16484 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16485 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16486 | #[rustfmt::skip] | |
16487 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16488 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, | |
16489 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, | |
16490 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); | |
16491 | let r = _mm512_mask_unpackhi_epi8(a, 0, a, b); | |
16492 | assert_eq_m512i(r, a); | |
16493 | let r = _mm512_mask_unpackhi_epi8( | |
16494 | a, | |
16495 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16496 | a, | |
16497 | b, | |
16498 | ); | |
16499 | #[rustfmt::skip] | |
16500 | let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, | |
16501 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, | |
16502 | 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, | |
16503 | 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); | |
16504 | assert_eq_m512i(r, e); | |
16505 | } | |
16506 | ||
16507 | #[simd_test(enable = "avx512bw")] | |
16508 | unsafe fn test_mm512_maskz_unpackhi_epi8() { | |
16509 | #[rustfmt::skip] | |
16510 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16511 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16512 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16513 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16514 | #[rustfmt::skip] | |
16515 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16516 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, | |
16517 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, | |
16518 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); | |
16519 | let r = _mm512_maskz_unpackhi_epi8(0, a, b); | |
16520 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16521 | let r = _mm512_maskz_unpackhi_epi8( | |
16522 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16523 | a, | |
16524 | b, | |
16525 | ); | |
16526 | #[rustfmt::skip] | |
16527 | let e = _mm512_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, | |
16528 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24, | |
16529 | 97, 33, 98, 34, 99, 35, 100, 36, 101, 37, 102, 38, 103, 39, 104, 40, | |
16530 | 113, 49, 114, 50, 115, 51, 116, 52, 117, 53, 118, 54, 119, 55, 120, 56); | |
16531 | assert_eq_m512i(r, e); | |
16532 | } | |
16533 | ||
16534 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16535 | unsafe fn test_mm256_mask_unpackhi_epi8() { | |
16536 | #[rustfmt::skip] | |
16537 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16538 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16539 | #[rustfmt::skip] | |
16540 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16541 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); | |
16542 | let r = _mm256_mask_unpackhi_epi8(a, 0, a, b); | |
16543 | assert_eq_m256i(r, a); | |
16544 | let r = _mm256_mask_unpackhi_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); | |
16545 | #[rustfmt::skip] | |
16546 | let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, | |
16547 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24); | |
16548 | assert_eq_m256i(r, e); | |
16549 | } | |
16550 | ||
16551 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16552 | unsafe fn test_mm256_maskz_unpackhi_epi8() { | |
16553 | #[rustfmt::skip] | |
16554 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16555 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16556 | #[rustfmt::skip] | |
16557 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16558 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); | |
16559 | let r = _mm256_maskz_unpackhi_epi8(0, a, b); | |
16560 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16561 | let r = _mm256_maskz_unpackhi_epi8(0b11111111_11111111_11111111_11111111, a, b); | |
16562 | #[rustfmt::skip] | |
16563 | let e = _mm256_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8, | |
16564 | 81, 17, 82, 18, 83, 19, 84, 20, 85, 21, 86, 22, 87, 23, 88, 24); | |
16565 | assert_eq_m256i(r, e); | |
16566 | } | |
16567 | ||
16568 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16569 | unsafe fn test_mm_mask_unpackhi_epi8() { | |
16570 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16571 | let b = _mm_set_epi8( | |
16572 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16573 | ); | |
16574 | let r = _mm_mask_unpackhi_epi8(a, 0, a, b); | |
16575 | assert_eq_m128i(r, a); | |
16576 | let r = _mm_mask_unpackhi_epi8(a, 0b11111111_11111111, a, b); | |
16577 | let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8); | |
16578 | assert_eq_m128i(r, e); | |
16579 | } | |
16580 | ||
16581 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16582 | unsafe fn test_mm_maskz_unpackhi_epi8() { | |
16583 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16584 | let b = _mm_set_epi8( | |
16585 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16586 | ); | |
16587 | let r = _mm_maskz_unpackhi_epi8(0, a, b); | |
16588 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16589 | let r = _mm_maskz_unpackhi_epi8(0b11111111_11111111, a, b); | |
16590 | let e = _mm_set_epi8(65, 1, 66, 2, 67, 3, 68, 4, 69, 5, 70, 6, 71, 7, 72, 8); | |
16591 | assert_eq_m128i(r, e); | |
16592 | } | |
16593 | ||
16594 | #[simd_test(enable = "avx512bw")] | |
16595 | unsafe fn test_mm512_unpacklo_epi16() { | |
16596 | #[rustfmt::skip] | |
16597 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16598 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16599 | #[rustfmt::skip] | |
16600 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16601 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16602 | let r = _mm512_unpacklo_epi16(a, b); | |
16603 | #[rustfmt::skip] | |
16604 | let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, | |
16605 | 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); | |
16606 | assert_eq_m512i(r, e); | |
16607 | } | |
16608 | ||
16609 | #[simd_test(enable = "avx512bw")] | |
16610 | unsafe fn test_mm512_mask_unpacklo_epi16() { | |
16611 | #[rustfmt::skip] | |
16612 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16613 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16614 | #[rustfmt::skip] | |
16615 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16616 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16617 | let r = _mm512_mask_unpacklo_epi16(a, 0, a, b); | |
16618 | assert_eq_m512i(r, a); | |
16619 | let r = _mm512_mask_unpacklo_epi16(a, 0b11111111_11111111_11111111_11111111, a, b); | |
16620 | #[rustfmt::skip] | |
16621 | let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, | |
16622 | 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); | |
16623 | assert_eq_m512i(r, e); | |
16624 | } | |
16625 | ||
16626 | #[simd_test(enable = "avx512bw")] | |
16627 | unsafe fn test_mm512_maskz_unpacklo_epi16() { | |
16628 | #[rustfmt::skip] | |
16629 | let a = _mm512_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16630 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16631 | #[rustfmt::skip] | |
16632 | let b = _mm512_set_epi16(33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16633 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16634 | let r = _mm512_maskz_unpacklo_epi16(0, a, b); | |
16635 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16636 | let r = _mm512_maskz_unpacklo_epi16(0b11111111_11111111_11111111_11111111, a, b); | |
16637 | #[rustfmt::skip] | |
16638 | let e = _mm512_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16, | |
16639 | 53, 21, 54, 22, 55, 23, 56, 24, 61, 29, 62, 30, 63, 31, 64, 32); | |
16640 | assert_eq_m512i(r, e); | |
16641 | } | |
16642 | ||
16643 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16644 | unsafe fn test_mm256_mask_unpacklo_epi16() { | |
16645 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16646 | let b = _mm256_set_epi16( | |
16647 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16648 | ); | |
16649 | let r = _mm256_mask_unpacklo_epi16(a, 0, a, b); | |
16650 | assert_eq_m256i(r, a); | |
16651 | let r = _mm256_mask_unpacklo_epi16(a, 0b11111111_11111111, a, b); | |
16652 | let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16); | |
16653 | assert_eq_m256i(r, e); | |
16654 | } | |
16655 | ||
16656 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16657 | unsafe fn test_mm256_maskz_unpacklo_epi16() { | |
16658 | let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16659 | let b = _mm256_set_epi16( | |
16660 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16661 | ); | |
16662 | let r = _mm256_maskz_unpacklo_epi16(0, a, b); | |
16663 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16664 | let r = _mm256_maskz_unpacklo_epi16(0b11111111_11111111, a, b); | |
16665 | let e = _mm256_set_epi16(37, 5, 38, 6, 39, 7, 40, 8, 45, 13, 46, 14, 47, 15, 48, 16); | |
16666 | assert_eq_m256i(r, e); | |
16667 | } | |
16668 | ||
16669 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16670 | unsafe fn test_mm_mask_unpacklo_epi16() { | |
16671 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); | |
16672 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); | |
16673 | let r = _mm_mask_unpacklo_epi16(a, 0, a, b); | |
16674 | assert_eq_m128i(r, a); | |
16675 | let r = _mm_mask_unpacklo_epi16(a, 0b11111111, a, b); | |
16676 | let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8); | |
16677 | assert_eq_m128i(r, e); | |
16678 | } | |
16679 | ||
16680 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16681 | unsafe fn test_mm_maskz_unpacklo_epi16() { | |
16682 | let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); | |
16683 | let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40); | |
16684 | let r = _mm_maskz_unpacklo_epi16(0, a, b); | |
16685 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16686 | let r = _mm_maskz_unpacklo_epi16(0b11111111, a, b); | |
16687 | let e = _mm_set_epi16(37, 5, 38, 6, 39, 7, 40, 8); | |
16688 | assert_eq_m128i(r, e); | |
16689 | } | |
16690 | ||
16691 | #[simd_test(enable = "avx512bw")] | |
16692 | unsafe fn test_mm512_unpacklo_epi8() { | |
16693 | #[rustfmt::skip] | |
16694 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16695 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16696 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16697 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16698 | #[rustfmt::skip] | |
16699 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16700 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, | |
16701 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, | |
16702 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); | |
16703 | let r = _mm512_unpacklo_epi8(a, b); | |
16704 | #[rustfmt::skip] | |
16705 | let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16706 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, | |
16707 | 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, | |
16708 | 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); | |
16709 | assert_eq_m512i(r, e); | |
16710 | } | |
16711 | ||
16712 | #[simd_test(enable = "avx512bw")] | |
16713 | unsafe fn test_mm512_mask_unpacklo_epi8() { | |
16714 | #[rustfmt::skip] | |
16715 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16716 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16717 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16718 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16719 | #[rustfmt::skip] | |
16720 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16721 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, | |
16722 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, | |
16723 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); | |
16724 | let r = _mm512_mask_unpacklo_epi8(a, 0, a, b); | |
16725 | assert_eq_m512i(r, a); | |
16726 | let r = _mm512_mask_unpacklo_epi8( | |
16727 | a, | |
16728 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16729 | a, | |
16730 | b, | |
16731 | ); | |
16732 | #[rustfmt::skip] | |
16733 | let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16734 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, | |
16735 | 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, | |
16736 | 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); | |
16737 | assert_eq_m512i(r, e); | |
16738 | } | |
16739 | ||
16740 | #[simd_test(enable = "avx512bw")] | |
16741 | unsafe fn test_mm512_maskz_unpacklo_epi8() { | |
16742 | #[rustfmt::skip] | |
16743 | let a = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16744 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
16745 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
16746 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); | |
16747 | #[rustfmt::skip] | |
16748 | let b = _mm512_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16749 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, | |
16750 | 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, | |
16751 | 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 0); | |
16752 | let r = _mm512_maskz_unpacklo_epi8(0, a, b); | |
16753 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16754 | let r = _mm512_maskz_unpacklo_epi8( | |
16755 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16756 | a, | |
16757 | b, | |
16758 | ); | |
16759 | #[rustfmt::skip] | |
16760 | let e = _mm512_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16761 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32, | |
16762 | 105, 41, 106, 42, 107, 43, 108, 44, 109, 45, 110, 46, 111, 47, 112, 48, | |
16763 | 121, 57, 122, 58, 123, 59, 124, 60, 125, 61, 126, 62, 127, 63, 0, 64); | |
16764 | assert_eq_m512i(r, e); | |
16765 | } | |
16766 | ||
16767 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16768 | unsafe fn test_mm256_mask_unpacklo_epi8() { | |
16769 | #[rustfmt::skip] | |
16770 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16771 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16772 | #[rustfmt::skip] | |
16773 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16774 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); | |
16775 | let r = _mm256_mask_unpacklo_epi8(a, 0, a, b); | |
16776 | assert_eq_m256i(r, a); | |
16777 | let r = _mm256_mask_unpacklo_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); | |
16778 | #[rustfmt::skip] | |
16779 | let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16780 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32); | |
16781 | assert_eq_m256i(r, e); | |
16782 | } | |
16783 | ||
16784 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16785 | unsafe fn test_mm256_maskz_unpacklo_epi8() { | |
16786 | #[rustfmt::skip] | |
16787 | let a = _mm256_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
16788 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); | |
16789 | #[rustfmt::skip] | |
16790 | let b = _mm256_set_epi8(65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16791 | 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96); | |
16792 | let r = _mm256_maskz_unpacklo_epi8(0, a, b); | |
16793 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16794 | let r = _mm256_maskz_unpacklo_epi8(0b11111111_11111111_11111111_11111111, a, b); | |
16795 | #[rustfmt::skip] | |
16796 | let e = _mm256_set_epi8(73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16797 | 89, 25, 90, 26, 91, 27, 92, 28, 93, 29, 94, 30, 95, 31, 96, 32); | |
16798 | assert_eq_m256i(r, e); | |
16799 | } | |
16800 | ||
16801 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16802 | unsafe fn test_mm_mask_unpacklo_epi8() { | |
16803 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16804 | let b = _mm_set_epi8( | |
16805 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16806 | ); | |
16807 | let r = _mm_mask_unpacklo_epi8(a, 0, a, b); | |
16808 | assert_eq_m128i(r, a); | |
16809 | let r = _mm_mask_unpacklo_epi8(a, 0b11111111_11111111, a, b); | |
16810 | let e = _mm_set_epi8( | |
16811 | 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16812 | ); | |
16813 | assert_eq_m128i(r, e); | |
16814 | } | |
16815 | ||
16816 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16817 | unsafe fn test_mm_maskz_unpacklo_epi8() { | |
16818 | let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); | |
16819 | let b = _mm_set_epi8( | |
16820 | 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
16821 | ); | |
16822 | let r = _mm_maskz_unpacklo_epi8(0, a, b); | |
16823 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16824 | let r = _mm_maskz_unpacklo_epi8(0b11111111_11111111, a, b); | |
16825 | let e = _mm_set_epi8( | |
16826 | 73, 9, 74, 10, 75, 11, 76, 12, 77, 13, 78, 14, 79, 15, 80, 16, | |
16827 | ); | |
16828 | assert_eq_m128i(r, e); | |
16829 | } | |
16830 | ||
16831 | #[simd_test(enable = "avx512bw")] | |
16832 | unsafe fn test_mm512_mask_mov_epi16() { | |
16833 | let src = _mm512_set1_epi16(1); | |
16834 | let a = _mm512_set1_epi16(2); | |
16835 | let r = _mm512_mask_mov_epi16(src, 0, a); | |
16836 | assert_eq_m512i(r, src); | |
16837 | let r = _mm512_mask_mov_epi16(src, 0b11111111_11111111_11111111_11111111, a); | |
16838 | assert_eq_m512i(r, a); | |
16839 | } | |
16840 | ||
16841 | #[simd_test(enable = "avx512bw")] | |
16842 | unsafe fn test_mm512_maskz_mov_epi16() { | |
16843 | let a = _mm512_set1_epi16(2); | |
16844 | let r = _mm512_maskz_mov_epi16(0, a); | |
16845 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16846 | let r = _mm512_maskz_mov_epi16(0b11111111_11111111_11111111_11111111, a); | |
16847 | assert_eq_m512i(r, a); | |
16848 | } | |
16849 | ||
16850 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16851 | unsafe fn test_mm256_mask_mov_epi16() { | |
16852 | let src = _mm256_set1_epi16(1); | |
16853 | let a = _mm256_set1_epi16(2); | |
16854 | let r = _mm256_mask_mov_epi16(src, 0, a); | |
16855 | assert_eq_m256i(r, src); | |
16856 | let r = _mm256_mask_mov_epi16(src, 0b11111111_11111111, a); | |
16857 | assert_eq_m256i(r, a); | |
16858 | } | |
16859 | ||
16860 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16861 | unsafe fn test_mm256_maskz_mov_epi16() { | |
16862 | let a = _mm256_set1_epi16(2); | |
16863 | let r = _mm256_maskz_mov_epi16(0, a); | |
16864 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16865 | let r = _mm256_maskz_mov_epi16(0b11111111_11111111, a); | |
16866 | assert_eq_m256i(r, a); | |
16867 | } | |
16868 | ||
16869 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16870 | unsafe fn test_mm_mask_mov_epi16() { | |
16871 | let src = _mm_set1_epi16(1); | |
16872 | let a = _mm_set1_epi16(2); | |
16873 | let r = _mm_mask_mov_epi16(src, 0, a); | |
16874 | assert_eq_m128i(r, src); | |
16875 | let r = _mm_mask_mov_epi16(src, 0b11111111, a); | |
16876 | assert_eq_m128i(r, a); | |
16877 | } | |
16878 | ||
16879 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16880 | unsafe fn test_mm_maskz_mov_epi16() { | |
16881 | let a = _mm_set1_epi16(2); | |
16882 | let r = _mm_maskz_mov_epi16(0, a); | |
16883 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16884 | let r = _mm_maskz_mov_epi16(0b11111111, a); | |
16885 | assert_eq_m128i(r, a); | |
16886 | } | |
16887 | ||
16888 | #[simd_test(enable = "avx512bw")] | |
16889 | unsafe fn test_mm512_mask_mov_epi8() { | |
16890 | let src = _mm512_set1_epi8(1); | |
16891 | let a = _mm512_set1_epi8(2); | |
16892 | let r = _mm512_mask_mov_epi8(src, 0, a); | |
16893 | assert_eq_m512i(r, src); | |
16894 | let r = _mm512_mask_mov_epi8( | |
16895 | src, | |
16896 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16897 | a, | |
16898 | ); | |
16899 | assert_eq_m512i(r, a); | |
16900 | } | |
16901 | ||
16902 | #[simd_test(enable = "avx512bw")] | |
16903 | unsafe fn test_mm512_maskz_mov_epi8() { | |
16904 | let a = _mm512_set1_epi8(2); | |
16905 | let r = _mm512_maskz_mov_epi8(0, a); | |
16906 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16907 | let r = _mm512_maskz_mov_epi8( | |
16908 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
16909 | a, | |
16910 | ); | |
16911 | assert_eq_m512i(r, a); | |
16912 | } | |
16913 | ||
16914 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16915 | unsafe fn test_mm256_mask_mov_epi8() { | |
16916 | let src = _mm256_set1_epi8(1); | |
16917 | let a = _mm256_set1_epi8(2); | |
16918 | let r = _mm256_mask_mov_epi8(src, 0, a); | |
16919 | assert_eq_m256i(r, src); | |
16920 | let r = _mm256_mask_mov_epi8(src, 0b11111111_11111111_11111111_11111111, a); | |
16921 | assert_eq_m256i(r, a); | |
16922 | } | |
16923 | ||
16924 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16925 | unsafe fn test_mm256_maskz_mov_epi8() { | |
16926 | let a = _mm256_set1_epi8(2); | |
16927 | let r = _mm256_maskz_mov_epi8(0, a); | |
16928 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16929 | let r = _mm256_maskz_mov_epi8(0b11111111_11111111_11111111_11111111, a); | |
16930 | assert_eq_m256i(r, a); | |
16931 | } | |
16932 | ||
16933 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16934 | unsafe fn test_mm_mask_mov_epi8() { | |
16935 | let src = _mm_set1_epi8(1); | |
16936 | let a = _mm_set1_epi8(2); | |
16937 | let r = _mm_mask_mov_epi8(src, 0, a); | |
16938 | assert_eq_m128i(r, src); | |
16939 | let r = _mm_mask_mov_epi8(src, 0b11111111_11111111, a); | |
16940 | assert_eq_m128i(r, a); | |
16941 | } | |
16942 | ||
16943 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16944 | unsafe fn test_mm_maskz_mov_epi8() { | |
16945 | let a = _mm_set1_epi8(2); | |
16946 | let r = _mm_maskz_mov_epi8(0, a); | |
16947 | assert_eq_m128i(r, _mm_setzero_si128()); | |
16948 | let r = _mm_maskz_mov_epi8(0b11111111_11111111, a); | |
16949 | assert_eq_m128i(r, a); | |
16950 | } | |
16951 | ||
16952 | #[simd_test(enable = "avx512bw")] | |
16953 | unsafe fn test_mm512_mask_set1_epi16() { | |
16954 | let src = _mm512_set1_epi16(2); | |
16955 | let a: i16 = 11; | |
16956 | let r = _mm512_mask_set1_epi16(src, 0, a); | |
16957 | assert_eq_m512i(r, src); | |
16958 | let r = _mm512_mask_set1_epi16(src, 0b11111111_11111111_11111111_11111111, a); | |
16959 | let e = _mm512_set1_epi16(11); | |
16960 | assert_eq_m512i(r, e); | |
16961 | } | |
16962 | ||
16963 | #[simd_test(enable = "avx512bw")] | |
16964 | unsafe fn test_mm512_maskz_set1_epi16() { | |
16965 | let a: i16 = 11; | |
16966 | let r = _mm512_maskz_set1_epi16(0, a); | |
16967 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
16968 | let r = _mm512_maskz_set1_epi16(0b11111111_11111111_11111111_11111111, a); | |
16969 | let e = _mm512_set1_epi16(11); | |
16970 | assert_eq_m512i(r, e); | |
16971 | } | |
16972 | ||
16973 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16974 | unsafe fn test_mm256_mask_set1_epi16() { | |
16975 | let src = _mm256_set1_epi16(2); | |
16976 | let a: i16 = 11; | |
16977 | let r = _mm256_mask_set1_epi16(src, 0, a); | |
16978 | assert_eq_m256i(r, src); | |
16979 | let r = _mm256_mask_set1_epi16(src, 0b11111111_11111111, a); | |
16980 | let e = _mm256_set1_epi16(11); | |
16981 | assert_eq_m256i(r, e); | |
16982 | } | |
16983 | ||
16984 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16985 | unsafe fn test_mm256_maskz_set1_epi16() { | |
16986 | let a: i16 = 11; | |
16987 | let r = _mm256_maskz_set1_epi16(0, a); | |
16988 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
16989 | let r = _mm256_maskz_set1_epi16(0b11111111_11111111, a); | |
16990 | let e = _mm256_set1_epi16(11); | |
16991 | assert_eq_m256i(r, e); | |
16992 | } | |
16993 | ||
16994 | #[simd_test(enable = "avx512bw,avx512vl")] | |
16995 | unsafe fn test_mm_mask_set1_epi16() { | |
16996 | let src = _mm_set1_epi16(2); | |
16997 | let a: i16 = 11; | |
16998 | let r = _mm_mask_set1_epi16(src, 0, a); | |
16999 | assert_eq_m128i(r, src); | |
17000 | let r = _mm_mask_set1_epi16(src, 0b11111111, a); | |
17001 | let e = _mm_set1_epi16(11); | |
17002 | assert_eq_m128i(r, e); | |
17003 | } | |
17004 | ||
17005 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17006 | unsafe fn test_mm_maskz_set1_epi16() { | |
17007 | let a: i16 = 11; | |
17008 | let r = _mm_maskz_set1_epi16(0, a); | |
17009 | assert_eq_m128i(r, _mm_setzero_si128()); | |
17010 | let r = _mm_maskz_set1_epi16(0b11111111, a); | |
17011 | let e = _mm_set1_epi16(11); | |
17012 | assert_eq_m128i(r, e); | |
17013 | } | |
17014 | ||
17015 | #[simd_test(enable = "avx512bw")] | |
17016 | unsafe fn test_mm512_mask_set1_epi8() { | |
17017 | let src = _mm512_set1_epi8(2); | |
17018 | let a: i8 = 11; | |
17019 | let r = _mm512_mask_set1_epi8(src, 0, a); | |
17020 | assert_eq_m512i(r, src); | |
17021 | let r = _mm512_mask_set1_epi8( | |
17022 | src, | |
17023 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
17024 | a, | |
17025 | ); | |
17026 | let e = _mm512_set1_epi8(11); | |
17027 | assert_eq_m512i(r, e); | |
17028 | } | |
17029 | ||
17030 | #[simd_test(enable = "avx512bw")] | |
17031 | unsafe fn test_mm512_maskz_set1_epi8() { | |
17032 | let a: i8 = 11; | |
17033 | let r = _mm512_maskz_set1_epi8(0, a); | |
17034 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
17035 | let r = _mm512_maskz_set1_epi8( | |
17036 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
17037 | a, | |
17038 | ); | |
17039 | let e = _mm512_set1_epi8(11); | |
17040 | assert_eq_m512i(r, e); | |
17041 | } | |
17042 | ||
17043 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17044 | unsafe fn test_mm256_mask_set1_epi8() { | |
17045 | let src = _mm256_set1_epi8(2); | |
17046 | let a: i8 = 11; | |
17047 | let r = _mm256_mask_set1_epi8(src, 0, a); | |
17048 | assert_eq_m256i(r, src); | |
17049 | let r = _mm256_mask_set1_epi8(src, 0b11111111_11111111_11111111_11111111, a); | |
17050 | let e = _mm256_set1_epi8(11); | |
17051 | assert_eq_m256i(r, e); | |
17052 | } | |
17053 | ||
17054 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17055 | unsafe fn test_mm256_maskz_set1_epi8() { | |
17056 | let a: i8 = 11; | |
17057 | let r = _mm256_maskz_set1_epi8(0, a); | |
17058 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
17059 | let r = _mm256_maskz_set1_epi8(0b11111111_11111111_11111111_11111111, a); | |
17060 | let e = _mm256_set1_epi8(11); | |
17061 | assert_eq_m256i(r, e); | |
17062 | } | |
17063 | ||
17064 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17065 | unsafe fn test_mm_mask_set1_epi8() { | |
17066 | let src = _mm_set1_epi8(2); | |
17067 | let a: i8 = 11; | |
17068 | let r = _mm_mask_set1_epi8(src, 0, a); | |
17069 | assert_eq_m128i(r, src); | |
17070 | let r = _mm_mask_set1_epi8(src, 0b11111111_11111111, a); | |
17071 | let e = _mm_set1_epi8(11); | |
17072 | assert_eq_m128i(r, e); | |
17073 | } | |
17074 | ||
17075 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17076 | unsafe fn test_mm_maskz_set1_epi8() { | |
17077 | let a: i8 = 11; | |
17078 | let r = _mm_maskz_set1_epi8(0, a); | |
17079 | assert_eq_m128i(r, _mm_setzero_si128()); | |
17080 | let r = _mm_maskz_set1_epi8(0b11111111_11111111, a); | |
17081 | let e = _mm_set1_epi8(11); | |
17082 | assert_eq_m128i(r, e); | |
17083 | } | |
17084 | ||
17085 | #[simd_test(enable = "avx512bw")] | |
17086 | unsafe fn test_mm512_shufflelo_epi16() { | |
17087 | #[rustfmt::skip] | |
17088 | let a = _mm512_set_epi16( | |
17089 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17090 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17091 | ); | |
17092 | #[rustfmt::skip] | |
17093 | let e = _mm512_set_epi16( | |
17094 | 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, | |
17095 | 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, | |
17096 | ); | |
17df50a5 | 17097 | let r = _mm512_shufflelo_epi16::<0b00_01_01_11>(a); |
fc512014 XL |
17098 | assert_eq_m512i(r, e); |
17099 | } | |
17100 | ||
17101 | #[simd_test(enable = "avx512bw")] | |
17102 | unsafe fn test_mm512_mask_shufflelo_epi16() { | |
17103 | #[rustfmt::skip] | |
17104 | let a = _mm512_set_epi16( | |
17105 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17106 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17107 | ); | |
17df50a5 | 17108 | let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); |
fc512014 | 17109 | assert_eq_m512i(r, a); |
17df50a5 XL |
17110 | let r = _mm512_mask_shufflelo_epi16::<0b00_01_01_11>( |
17111 | a, | |
17112 | 0b11111111_11111111_11111111_11111111, | |
17113 | a, | |
17114 | ); | |
fc512014 XL |
17115 | #[rustfmt::skip] |
17116 | let e = _mm512_set_epi16( | |
17117 | 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, | |
17118 | 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, | |
17119 | ); | |
17120 | assert_eq_m512i(r, e); | |
17121 | } | |
17122 | ||
17123 | #[simd_test(enable = "avx512bw")] | |
17124 | unsafe fn test_mm512_maskz_shufflelo_epi16() { | |
17125 | #[rustfmt::skip] | |
17126 | let a = _mm512_set_epi16( | |
17127 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17128 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17129 | ); | |
17df50a5 | 17130 | let r = _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); |
fc512014 XL |
17131 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17132 | let r = | |
17df50a5 | 17133 | _mm512_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
17134 | #[rustfmt::skip] |
17135 | let e = _mm512_set_epi16( | |
17136 | 0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12, | |
17137 | 16, 17, 18, 19, 23, 22, 22, 20, 24, 25, 26, 27, 31, 30, 30, 28, | |
17138 | ); | |
17139 | assert_eq_m512i(r, e); | |
17140 | } | |
17141 | ||
cdc7bbd5 XL |
17142 | #[simd_test(enable = "avx512bw,avx512vl")] |
17143 | unsafe fn test_mm256_mask_shufflelo_epi16() { | |
17144 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
17df50a5 | 17145 | let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); |
cdc7bbd5 | 17146 | assert_eq_m256i(r, a); |
17df50a5 | 17147 | let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); |
cdc7bbd5 XL |
17148 | let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); |
17149 | assert_eq_m256i(r, e); | |
17150 | } | |
17151 | ||
17152 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17153 | unsafe fn test_mm256_maskz_shufflelo_epi16() { | |
17154 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
17df50a5 | 17155 | let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); |
cdc7bbd5 | 17156 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 17157 | let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111_11111111, a); |
cdc7bbd5 XL |
17158 | let e = _mm256_set_epi16(0, 1, 2, 3, 7, 6, 6, 4, 8, 9, 10, 11, 15, 14, 14, 12); |
17159 | assert_eq_m256i(r, e); | |
17160 | } | |
17161 | ||
17162 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17163 | unsafe fn test_mm_mask_shufflelo_epi16() { | |
17164 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
17df50a5 | 17165 | let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a); |
cdc7bbd5 | 17166 | assert_eq_m128i(r, a); |
17df50a5 | 17167 | let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0b11111111, a); |
cdc7bbd5 XL |
17168 | let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); |
17169 | assert_eq_m128i(r, e); | |
17170 | } | |
17171 | ||
17172 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17173 | unsafe fn test_mm_maskz_shufflelo_epi16() { | |
17174 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
17df50a5 | 17175 | let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a); |
cdc7bbd5 | 17176 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 17177 | let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0b11111111, a); |
cdc7bbd5 XL |
17178 | let e = _mm_set_epi16(0, 1, 2, 3, 7, 6, 6, 4); |
17179 | assert_eq_m128i(r, e); | |
17180 | } | |
17181 | ||
fc512014 XL |
17182 | #[simd_test(enable = "avx512bw")] |
17183 | unsafe fn test_mm512_shufflehi_epi16() { | |
17184 | #[rustfmt::skip] | |
17185 | let a = _mm512_set_epi16( | |
17186 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17187 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17188 | ); | |
17189 | #[rustfmt::skip] | |
17190 | let e = _mm512_set_epi16( | |
17191 | 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, | |
17192 | 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, | |
17193 | ); | |
17df50a5 | 17194 | let r = _mm512_shufflehi_epi16::<0b00_01_01_11>(a); |
fc512014 XL |
17195 | assert_eq_m512i(r, e); |
17196 | } | |
17197 | ||
17198 | #[simd_test(enable = "avx512bw")] | |
17199 | unsafe fn test_mm512_mask_shufflehi_epi16() { | |
17200 | #[rustfmt::skip] | |
17201 | let a = _mm512_set_epi16( | |
17202 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17203 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17204 | ); | |
17df50a5 | 17205 | let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); |
fc512014 | 17206 | assert_eq_m512i(r, a); |
17df50a5 XL |
17207 | let r = _mm512_mask_shufflehi_epi16::<0b00_01_01_11>( |
17208 | a, | |
17209 | 0b11111111_11111111_11111111_11111111, | |
17210 | a, | |
17211 | ); | |
fc512014 XL |
17212 | #[rustfmt::skip] |
17213 | let e = _mm512_set_epi16( | |
17214 | 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, | |
17215 | 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, | |
17216 | ); | |
17217 | assert_eq_m512i(r, e); | |
17218 | } | |
17219 | ||
17220 | #[simd_test(enable = "avx512bw")] | |
17221 | unsafe fn test_mm512_maskz_shufflehi_epi16() { | |
17222 | #[rustfmt::skip] | |
17223 | let a = _mm512_set_epi16( | |
17224 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17225 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17226 | ); | |
17df50a5 | 17227 | let r = _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); |
fc512014 XL |
17228 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17229 | let r = | |
17df50a5 | 17230 | _mm512_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111_11111111_11111111, a); |
fc512014 XL |
17231 | #[rustfmt::skip] |
17232 | let e = _mm512_set_epi16( | |
17233 | 3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15, | |
17234 | 19, 18, 18, 16, 20, 21, 22, 23, 27, 26, 26, 24, 28, 29, 30, 31, | |
17235 | ); | |
17236 | assert_eq_m512i(r, e); | |
17237 | } | |
17238 | ||
cdc7bbd5 XL |
17239 | #[simd_test(enable = "avx512bw,avx512vl")] |
17240 | unsafe fn test_mm256_mask_shufflehi_epi16() { | |
17241 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
17df50a5 | 17242 | let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); |
cdc7bbd5 | 17243 | assert_eq_m256i(r, a); |
17df50a5 | 17244 | let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111_11111111, a); |
cdc7bbd5 XL |
17245 | let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); |
17246 | assert_eq_m256i(r, e); | |
17247 | } | |
17248 | ||
17249 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17250 | unsafe fn test_mm256_maskz_shufflehi_epi16() { | |
17251 | let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
17df50a5 | 17252 | let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); |
cdc7bbd5 | 17253 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 17254 | let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111_11111111, a); |
cdc7bbd5 XL |
17255 | let e = _mm256_set_epi16(3, 2, 2, 0, 4, 5, 6, 7, 11, 10, 10, 8, 12, 13, 14, 15); |
17256 | assert_eq_m256i(r, e); | |
17257 | } | |
17258 | ||
17259 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17260 | unsafe fn test_mm_mask_shufflehi_epi16() { | |
17261 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
17df50a5 | 17262 | let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a); |
cdc7bbd5 | 17263 | assert_eq_m128i(r, a); |
17df50a5 | 17264 | let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0b11111111, a); |
cdc7bbd5 XL |
17265 | let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); |
17266 | assert_eq_m128i(r, e); | |
17267 | } | |
17268 | ||
17269 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17270 | unsafe fn test_mm_maskz_shufflehi_epi16() { | |
17271 | let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
17df50a5 | 17272 | let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a); |
cdc7bbd5 | 17273 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 17274 | let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0b11111111, a); |
cdc7bbd5 XL |
17275 | let e = _mm_set_epi16(3, 2, 2, 0, 4, 5, 6, 7); |
17276 | assert_eq_m128i(r, e); | |
17277 | } | |
17278 | ||
fc512014 XL |
17279 | #[simd_test(enable = "avx512bw")] |
17280 | unsafe fn test_mm512_shuffle_epi8() { | |
17281 | #[rustfmt::skip] | |
17282 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17283 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17284 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
17285 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
17286 | let b = _mm512_set1_epi8(1); | |
17287 | let r = _mm512_shuffle_epi8(a, b); | |
17288 | #[rustfmt::skip] | |
17289 | let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
17290 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, | |
17291 | 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, | |
17292 | 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); | |
17293 | assert_eq_m512i(r, e); | |
17294 | } | |
17295 | ||
17296 | #[simd_test(enable = "avx512bw")] | |
17297 | unsafe fn test_mm512_mask_shuffle_epi8() { | |
17298 | #[rustfmt::skip] | |
17299 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17300 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17301 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
17302 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
17303 | let b = _mm512_set1_epi8(1); | |
17304 | let r = _mm512_mask_shuffle_epi8(a, 0, a, b); | |
17305 | assert_eq_m512i(r, a); | |
17306 | let r = _mm512_mask_shuffle_epi8( | |
17307 | a, | |
17308 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
17309 | a, | |
17310 | b, | |
17311 | ); | |
cdc7bbd5 XL |
17312 | #[rustfmt::skip] |
17313 | let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
17314 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, | |
17315 | 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, | |
17316 | 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); | |
17317 | assert_eq_m512i(r, e); | |
17318 | } | |
17319 | ||
17320 | #[simd_test(enable = "avx512bw")] | |
17321 | unsafe fn test_mm512_maskz_shuffle_epi8() { | |
17322 | #[rustfmt::skip] | |
17323 | let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17324 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
17325 | 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, | |
17326 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63); | |
17327 | let b = _mm512_set1_epi8(1); | |
17328 | let r = _mm512_maskz_shuffle_epi8(0, a, b); | |
17329 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
17330 | let r = _mm512_maskz_shuffle_epi8( | |
17331 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
17332 | a, | |
17333 | b, | |
17334 | ); | |
17335 | #[rustfmt::skip] | |
17336 | let e = _mm512_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
17337 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, | |
17338 | 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, | |
17339 | 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62); | |
17340 | assert_eq_m512i(r, e); | |
17341 | } | |
17342 | ||
17343 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17344 | unsafe fn test_mm256_mask_shuffle_epi8() { | |
17345 | #[rustfmt::skip] | |
17346 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17347 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
17348 | let b = _mm256_set1_epi8(1); | |
17349 | let r = _mm256_mask_shuffle_epi8(a, 0, a, b); | |
17350 | assert_eq_m256i(r, a); | |
17351 | let r = _mm256_mask_shuffle_epi8(a, 0b11111111_11111111_11111111_11111111, a, b); | |
17352 | #[rustfmt::skip] | |
17353 | let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
17354 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30); | |
17355 | assert_eq_m256i(r, e); | |
17356 | } | |
17357 | ||
17358 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17359 | unsafe fn test_mm256_maskz_shuffle_epi8() { | |
17360 | #[rustfmt::skip] | |
17361 | let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
17362 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); | |
17363 | let b = _mm256_set1_epi8(1); | |
17364 | let r = _mm256_maskz_shuffle_epi8(0, a, b); | |
17365 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
17366 | let r = _mm256_maskz_shuffle_epi8(0b11111111_11111111_11111111_11111111, a, b); | |
17367 | #[rustfmt::skip] | |
17368 | let e = _mm256_set_epi8(14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
17369 | 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30); | |
17370 | assert_eq_m256i(r, e); | |
17371 | } | |
17372 | ||
17373 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17374 | unsafe fn test_mm_mask_shuffle_epi8() { | |
17375 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); | |
17376 | let b = _mm_set1_epi8(1); | |
17377 | let r = _mm_mask_shuffle_epi8(a, 0, a, b); | |
17378 | assert_eq_m128i(r, a); | |
17379 | let r = _mm_mask_shuffle_epi8(a, 0b11111111_11111111, a, b); | |
17380 | let e = _mm_set_epi8( | |
17381 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
17382 | ); | |
17383 | assert_eq_m128i(r, e); | |
fc512014 XL |
17384 | } |
17385 | ||
cdc7bbd5 XL |
17386 | #[simd_test(enable = "avx512bw,avx512vl")] |
17387 | unsafe fn test_mm_maskz_shuffle_epi8() { | |
fc512014 | 17388 | #[rustfmt::skip] |
cdc7bbd5 XL |
17389 | let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
17390 | let b = _mm_set1_epi8(1); | |
17391 | let r = _mm_maskz_shuffle_epi8(0, a, b); | |
17392 | assert_eq_m128i(r, _mm_setzero_si128()); | |
17393 | let r = _mm_maskz_shuffle_epi8(0b11111111_11111111, a, b); | |
17394 | let e = _mm_set_epi8( | |
17395 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, | |
fc512014 | 17396 | ); |
cdc7bbd5 | 17397 | assert_eq_m128i(r, e); |
fc512014 XL |
17398 | } |
17399 | ||
17400 | #[simd_test(enable = "avx512bw")] | |
17401 | unsafe fn test_mm512_test_epi16_mask() { | |
17402 | let a = _mm512_set1_epi16(1 << 0); | |
17403 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); | |
17404 | let r = _mm512_test_epi16_mask(a, b); | |
17405 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17406 | assert_eq!(r, e); | |
17407 | } | |
17408 | ||
17409 | #[simd_test(enable = "avx512bw")] | |
17410 | unsafe fn test_mm512_mask_test_epi16_mask() { | |
17411 | let a = _mm512_set1_epi16(1 << 0); | |
17412 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); | |
17413 | let r = _mm512_mask_test_epi16_mask(0, a, b); | |
17414 | assert_eq!(r, 0); | |
17415 | let r = _mm512_mask_test_epi16_mask(0b11111111_11111111_11111111_11111111, a, b); | |
17416 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17417 | assert_eq!(r, e); | |
17418 | } | |
17419 | ||
cdc7bbd5 XL |
17420 | #[simd_test(enable = "avx512bw,avx512vl")] |
17421 | unsafe fn test_mm256_test_epi16_mask() { | |
17422 | let a = _mm256_set1_epi16(1 << 0); | |
17423 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); | |
17424 | let r = _mm256_test_epi16_mask(a, b); | |
17425 | let e: __mmask16 = 0b11111111_11111111; | |
17426 | assert_eq!(r, e); | |
17427 | } | |
17428 | ||
17429 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17430 | unsafe fn test_mm256_mask_test_epi16_mask() { | |
17431 | let a = _mm256_set1_epi16(1 << 0); | |
17432 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); | |
17433 | let r = _mm256_mask_test_epi16_mask(0, a, b); | |
17434 | assert_eq!(r, 0); | |
17435 | let r = _mm256_mask_test_epi16_mask(0b11111111_11111111, a, b); | |
17436 | let e: __mmask16 = 0b11111111_11111111; | |
17437 | assert_eq!(r, e); | |
17438 | } | |
17439 | ||
17440 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17441 | unsafe fn test_mm_test_epi16_mask() { | |
17442 | let a = _mm_set1_epi16(1 << 0); | |
17443 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); | |
17444 | let r = _mm_test_epi16_mask(a, b); | |
17445 | let e: __mmask8 = 0b11111111; | |
17446 | assert_eq!(r, e); | |
17447 | } | |
17448 | ||
17449 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17450 | unsafe fn test_mm_mask_test_epi16_mask() { | |
17451 | let a = _mm_set1_epi16(1 << 0); | |
17452 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); | |
17453 | let r = _mm_mask_test_epi16_mask(0, a, b); | |
17454 | assert_eq!(r, 0); | |
17455 | let r = _mm_mask_test_epi16_mask(0b11111111, a, b); | |
17456 | let e: __mmask8 = 0b11111111; | |
17457 | assert_eq!(r, e); | |
17458 | } | |
17459 | ||
fc512014 XL |
17460 | #[simd_test(enable = "avx512bw")] |
17461 | unsafe fn test_mm512_test_epi8_mask() { | |
17462 | let a = _mm512_set1_epi8(1 << 0); | |
17463 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); | |
17464 | let r = _mm512_test_epi8_mask(a, b); | |
17465 | let e: __mmask64 = | |
17466 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; | |
17467 | assert_eq!(r, e); | |
17468 | } | |
17469 | ||
17470 | #[simd_test(enable = "avx512bw")] | |
17471 | unsafe fn test_mm512_mask_test_epi8_mask() { | |
17472 | let a = _mm512_set1_epi8(1 << 0); | |
17473 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); | |
17474 | let r = _mm512_mask_test_epi8_mask(0, a, b); | |
17475 | assert_eq!(r, 0); | |
17476 | let r = _mm512_mask_test_epi8_mask( | |
17477 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
17478 | a, | |
17479 | b, | |
17480 | ); | |
17481 | let e: __mmask64 = | |
17482 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; | |
17483 | assert_eq!(r, e); | |
17484 | } | |
17485 | ||
cdc7bbd5 XL |
17486 | #[simd_test(enable = "avx512bw,avx512vl")] |
17487 | unsafe fn test_mm256_test_epi8_mask() { | |
17488 | let a = _mm256_set1_epi8(1 << 0); | |
17489 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); | |
17490 | let r = _mm256_test_epi8_mask(a, b); | |
17491 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17492 | assert_eq!(r, e); | |
17493 | } | |
17494 | ||
17495 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17496 | unsafe fn test_mm256_mask_test_epi8_mask() { | |
17497 | let a = _mm256_set1_epi8(1 << 0); | |
17498 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); | |
17499 | let r = _mm256_mask_test_epi8_mask(0, a, b); | |
17500 | assert_eq!(r, 0); | |
17501 | let r = _mm256_mask_test_epi8_mask(0b11111111_11111111_11111111_11111111, a, b); | |
17502 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17503 | assert_eq!(r, e); | |
17504 | } | |
17505 | ||
17506 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17507 | unsafe fn test_mm_test_epi8_mask() { | |
17508 | let a = _mm_set1_epi8(1 << 0); | |
17509 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); | |
17510 | let r = _mm_test_epi8_mask(a, b); | |
17511 | let e: __mmask16 = 0b11111111_11111111; | |
17512 | assert_eq!(r, e); | |
17513 | } | |
17514 | ||
17515 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17516 | unsafe fn test_mm_mask_test_epi8_mask() { | |
17517 | let a = _mm_set1_epi8(1 << 0); | |
17518 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); | |
17519 | let r = _mm_mask_test_epi8_mask(0, a, b); | |
17520 | assert_eq!(r, 0); | |
17521 | let r = _mm_mask_test_epi8_mask(0b11111111_11111111, a, b); | |
17522 | let e: __mmask16 = 0b11111111_11111111; | |
17523 | assert_eq!(r, e); | |
17524 | } | |
17525 | ||
fc512014 XL |
17526 | #[simd_test(enable = "avx512bw")] |
17527 | unsafe fn test_mm512_testn_epi16_mask() { | |
17528 | let a = _mm512_set1_epi16(1 << 0); | |
17529 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); | |
17530 | let r = _mm512_testn_epi16_mask(a, b); | |
17531 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; | |
17532 | assert_eq!(r, e); | |
17533 | } | |
17534 | ||
17535 | #[simd_test(enable = "avx512bw")] | |
17536 | unsafe fn test_mm512_mask_testn_epi16_mask() { | |
17537 | let a = _mm512_set1_epi16(1 << 0); | |
17538 | let b = _mm512_set1_epi16(1 << 0 | 1 << 1); | |
17539 | let r = _mm512_mask_testn_epi16_mask(0, a, b); | |
17540 | assert_eq!(r, 0); | |
17541 | let r = _mm512_mask_testn_epi16_mask(0b11111111_11111111_11111111_11111111, a, b); | |
17542 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; | |
17543 | assert_eq!(r, e); | |
17544 | } | |
17545 | ||
cdc7bbd5 XL |
17546 | #[simd_test(enable = "avx512bw,avx512vl")] |
17547 | unsafe fn test_mm256_testn_epi16_mask() { | |
17548 | let a = _mm256_set1_epi16(1 << 0); | |
17549 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); | |
17550 | let r = _mm256_testn_epi16_mask(a, b); | |
17551 | let e: __mmask16 = 0b00000000_00000000; | |
17552 | assert_eq!(r, e); | |
17553 | } | |
17554 | ||
17555 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17556 | unsafe fn test_mm256_mask_testn_epi16_mask() { | |
17557 | let a = _mm256_set1_epi16(1 << 0); | |
17558 | let b = _mm256_set1_epi16(1 << 0 | 1 << 1); | |
17559 | let r = _mm256_mask_testn_epi16_mask(0, a, b); | |
17560 | assert_eq!(r, 0); | |
17561 | let r = _mm256_mask_testn_epi16_mask(0b11111111_11111111, a, b); | |
17562 | let e: __mmask16 = 0b00000000_00000000; | |
17563 | assert_eq!(r, e); | |
17564 | } | |
17565 | ||
17566 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17567 | unsafe fn test_mm_testn_epi16_mask() { | |
17568 | let a = _mm_set1_epi16(1 << 0); | |
17569 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); | |
17570 | let r = _mm_testn_epi16_mask(a, b); | |
17571 | let e: __mmask8 = 0b00000000; | |
17572 | assert_eq!(r, e); | |
17573 | } | |
17574 | ||
17575 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17576 | unsafe fn test_mm_mask_testn_epi16_mask() { | |
17577 | let a = _mm_set1_epi16(1 << 0); | |
17578 | let b = _mm_set1_epi16(1 << 0 | 1 << 1); | |
17579 | let r = _mm_mask_testn_epi16_mask(0, a, b); | |
17580 | assert_eq!(r, 0); | |
17581 | let r = _mm_mask_testn_epi16_mask(0b11111111, a, b); | |
17582 | let e: __mmask8 = 0b00000000; | |
17583 | assert_eq!(r, e); | |
17584 | } | |
17585 | ||
fc512014 XL |
17586 | #[simd_test(enable = "avx512bw")] |
17587 | unsafe fn test_mm512_testn_epi8_mask() { | |
17588 | let a = _mm512_set1_epi8(1 << 0); | |
17589 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); | |
17590 | let r = _mm512_testn_epi8_mask(a, b); | |
17591 | let e: __mmask64 = | |
17592 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; | |
17593 | assert_eq!(r, e); | |
17594 | } | |
17595 | ||
17596 | #[simd_test(enable = "avx512bw")] | |
17597 | unsafe fn test_mm512_mask_testn_epi8_mask() { | |
17598 | let a = _mm512_set1_epi8(1 << 0); | |
17599 | let b = _mm512_set1_epi8(1 << 0 | 1 << 1); | |
17600 | let r = _mm512_mask_testn_epi8_mask(0, a, b); | |
17601 | assert_eq!(r, 0); | |
17602 | let r = _mm512_mask_testn_epi8_mask( | |
17603 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
17604 | a, | |
17605 | b, | |
17606 | ); | |
17607 | let e: __mmask64 = | |
17608 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; | |
17609 | assert_eq!(r, e); | |
17610 | } | |
17611 | ||
cdc7bbd5 XL |
17612 | #[simd_test(enable = "avx512bw,avx512vl")] |
17613 | unsafe fn test_mm256_testn_epi8_mask() { | |
17614 | let a = _mm256_set1_epi8(1 << 0); | |
17615 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); | |
17616 | let r = _mm256_testn_epi8_mask(a, b); | |
17617 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; | |
17618 | assert_eq!(r, e); | |
17619 | } | |
17620 | ||
17621 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17622 | unsafe fn test_mm256_mask_testn_epi8_mask() { | |
17623 | let a = _mm256_set1_epi8(1 << 0); | |
17624 | let b = _mm256_set1_epi8(1 << 0 | 1 << 1); | |
17625 | let r = _mm256_mask_testn_epi8_mask(0, a, b); | |
17626 | assert_eq!(r, 0); | |
17627 | let r = _mm256_mask_testn_epi8_mask(0b11111111_11111111_11111111_11111111, a, b); | |
17628 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; | |
17629 | assert_eq!(r, e); | |
17630 | } | |
17631 | ||
17632 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17633 | unsafe fn test_mm_testn_epi8_mask() { | |
17634 | let a = _mm_set1_epi8(1 << 0); | |
17635 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); | |
17636 | let r = _mm_testn_epi8_mask(a, b); | |
17637 | let e: __mmask16 = 0b00000000_00000000; | |
17638 | assert_eq!(r, e); | |
17639 | } | |
17640 | ||
17641 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17642 | unsafe fn test_mm_mask_testn_epi8_mask() { | |
17643 | let a = _mm_set1_epi8(1 << 0); | |
17644 | let b = _mm_set1_epi8(1 << 0 | 1 << 1); | |
17645 | let r = _mm_mask_testn_epi8_mask(0, a, b); | |
17646 | assert_eq!(r, 0); | |
17647 | let r = _mm_mask_testn_epi8_mask(0b11111111_11111111, a, b); | |
17648 | let e: __mmask16 = 0b00000000_00000000; | |
17649 | assert_eq!(r, e); | |
17650 | } | |
17651 | ||
fc512014 XL |
17652 | #[simd_test(enable = "avx512bw")] |
17653 | unsafe fn test_store_mask64() { | |
17654 | let a: __mmask64 = | |
17655 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; | |
17656 | let mut r = 0; | |
17657 | _store_mask64(&mut r as *mut _ as *mut u64, a); | |
17658 | assert_eq!(r, a); | |
17659 | } | |
17660 | ||
17661 | #[simd_test(enable = "avx512bw")] | |
17662 | unsafe fn test_store_mask32() { | |
17663 | let a: __mmask32 = 0b11111111_00000000_11111111_00000000; | |
17664 | let mut r = 0; | |
17665 | _store_mask32(&mut r as *mut _ as *mut u32, a); | |
17666 | assert_eq!(r, a); | |
17667 | } | |
17668 | ||
17669 | #[simd_test(enable = "avx512bw")] | |
17670 | unsafe fn test_load_mask64() { | |
17671 | let p: __mmask64 = | |
17672 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; | |
17673 | let r = _load_mask64(&p); | |
17674 | let e: __mmask64 = | |
17675 | 0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000; | |
17676 | assert_eq!(r, e); | |
17677 | } | |
17678 | ||
17679 | #[simd_test(enable = "avx512bw")] | |
17680 | unsafe fn test_load_mask32() { | |
17681 | let p: __mmask32 = 0b11111111_00000000_11111111_00000000; | |
17682 | let r = _load_mask32(&p); | |
17683 | let e: __mmask32 = 0b11111111_00000000_11111111_00000000; | |
17684 | assert_eq!(r, e); | |
17685 | } | |
17686 | ||
17687 | #[simd_test(enable = "avx512bw")] | |
17688 | unsafe fn test_mm512_sad_epu8() { | |
17689 | let a = _mm512_set1_epi8(2); | |
17690 | let b = _mm512_set1_epi8(4); | |
17691 | let r = _mm512_sad_epu8(a, b); | |
17692 | let e = _mm512_set1_epi64(16); | |
17693 | assert_eq_m512i(r, e); | |
17694 | } | |
17695 | ||
17696 | #[simd_test(enable = "avx512bw")] | |
17697 | unsafe fn test_mm512_dbsad_epu8() { | |
17698 | let a = _mm512_set1_epi8(2); | |
17699 | let b = _mm512_set1_epi8(4); | |
17df50a5 | 17700 | let r = _mm512_dbsad_epu8::<0>(a, b); |
fc512014 XL |
17701 | let e = _mm512_set1_epi16(8); |
17702 | assert_eq_m512i(r, e); | |
17703 | } | |
17704 | ||
17705 | #[simd_test(enable = "avx512bw")] | |
17706 | unsafe fn test_mm512_mask_dbsad_epu8() { | |
17707 | let src = _mm512_set1_epi16(1); | |
17708 | let a = _mm512_set1_epi8(2); | |
17709 | let b = _mm512_set1_epi8(4); | |
17df50a5 | 17710 | let r = _mm512_mask_dbsad_epu8::<0>(src, 0, a, b); |
fc512014 | 17711 | assert_eq_m512i(r, src); |
17df50a5 | 17712 | let r = _mm512_mask_dbsad_epu8::<0>(src, 0b11111111_11111111_11111111_11111111, a, b); |
fc512014 XL |
17713 | let e = _mm512_set1_epi16(8); |
17714 | assert_eq_m512i(r, e); | |
17715 | } | |
17716 | ||
17717 | #[simd_test(enable = "avx512bw")] | |
17718 | unsafe fn test_mm512_maskz_dbsad_epu8() { | |
17719 | let a = _mm512_set1_epi8(2); | |
17720 | let b = _mm512_set1_epi8(4); | |
17df50a5 | 17721 | let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b); |
fc512014 | 17722 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17df50a5 | 17723 | let r = _mm512_maskz_dbsad_epu8::<0>(0b11111111_11111111_11111111_11111111, a, b); |
fc512014 XL |
17724 | let e = _mm512_set1_epi16(8); |
17725 | assert_eq_m512i(r, e); | |
17726 | } | |
17727 | ||
cdc7bbd5 XL |
17728 | #[simd_test(enable = "avx512bw,avx512vl")] |
17729 | unsafe fn test_mm256_dbsad_epu8() { | |
17730 | let a = _mm256_set1_epi8(2); | |
17731 | let b = _mm256_set1_epi8(4); | |
17df50a5 | 17732 | let r = _mm256_dbsad_epu8::<0>(a, b); |
cdc7bbd5 XL |
17733 | let e = _mm256_set1_epi16(8); |
17734 | assert_eq_m256i(r, e); | |
17735 | } | |
17736 | ||
17737 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17738 | unsafe fn test_mm256_mask_dbsad_epu8() { | |
17739 | let src = _mm256_set1_epi16(1); | |
17740 | let a = _mm256_set1_epi8(2); | |
17741 | let b = _mm256_set1_epi8(4); | |
17df50a5 | 17742 | let r = _mm256_mask_dbsad_epu8::<0>(src, 0, a, b); |
cdc7bbd5 | 17743 | assert_eq_m256i(r, src); |
17df50a5 | 17744 | let r = _mm256_mask_dbsad_epu8::<0>(src, 0b11111111_11111111, a, b); |
cdc7bbd5 XL |
17745 | let e = _mm256_set1_epi16(8); |
17746 | assert_eq_m256i(r, e); | |
17747 | } | |
17748 | ||
17749 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17750 | unsafe fn test_mm256_maskz_dbsad_epu8() { | |
17751 | let a = _mm256_set1_epi8(2); | |
17752 | let b = _mm256_set1_epi8(4); | |
17df50a5 | 17753 | let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b); |
cdc7bbd5 | 17754 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 17755 | let r = _mm256_maskz_dbsad_epu8::<0>(0b11111111_11111111, a, b); |
cdc7bbd5 XL |
17756 | let e = _mm256_set1_epi16(8); |
17757 | assert_eq_m256i(r, e); | |
17758 | } | |
17759 | ||
17760 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17761 | unsafe fn test_mm_dbsad_epu8() { | |
17762 | let a = _mm_set1_epi8(2); | |
17763 | let b = _mm_set1_epi8(4); | |
17df50a5 | 17764 | let r = _mm_dbsad_epu8::<0>(a, b); |
cdc7bbd5 XL |
17765 | let e = _mm_set1_epi16(8); |
17766 | assert_eq_m128i(r, e); | |
17767 | } | |
17768 | ||
17769 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17770 | unsafe fn test_mm_mask_dbsad_epu8() { | |
17771 | let src = _mm_set1_epi16(1); | |
17772 | let a = _mm_set1_epi8(2); | |
17773 | let b = _mm_set1_epi8(4); | |
17df50a5 | 17774 | let r = _mm_mask_dbsad_epu8::<0>(src, 0, a, b); |
cdc7bbd5 | 17775 | assert_eq_m128i(r, src); |
17df50a5 | 17776 | let r = _mm_mask_dbsad_epu8::<0>(src, 0b11111111, a, b); |
cdc7bbd5 XL |
17777 | let e = _mm_set1_epi16(8); |
17778 | assert_eq_m128i(r, e); | |
17779 | } | |
17780 | ||
17781 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17782 | unsafe fn test_mm_maskz_dbsad_epu8() { | |
17783 | let a = _mm_set1_epi8(2); | |
17784 | let b = _mm_set1_epi8(4); | |
17df50a5 | 17785 | let r = _mm_maskz_dbsad_epu8::<0>(0, a, b); |
cdc7bbd5 | 17786 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 17787 | let r = _mm_maskz_dbsad_epu8::<0>(0b11111111, a, b); |
cdc7bbd5 XL |
17788 | let e = _mm_set1_epi16(8); |
17789 | assert_eq_m128i(r, e); | |
17790 | } | |
17791 | ||
fc512014 XL |
17792 | #[simd_test(enable = "avx512bw")] |
17793 | unsafe fn test_mm512_movepi16_mask() { | |
17794 | let a = _mm512_set1_epi16(1 << 15); | |
17795 | let r = _mm512_movepi16_mask(a); | |
17796 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17797 | assert_eq!(r, e); | |
17798 | } | |
17799 | ||
cdc7bbd5 XL |
17800 | #[simd_test(enable = "avx512bw,avx512vl")] |
17801 | unsafe fn test_mm256_movepi16_mask() { | |
17802 | let a = _mm256_set1_epi16(1 << 15); | |
17803 | let r = _mm256_movepi16_mask(a); | |
17804 | let e: __mmask16 = 0b11111111_11111111; | |
17805 | assert_eq!(r, e); | |
17806 | } | |
17807 | ||
17808 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17809 | unsafe fn test_mm_movepi16_mask() { | |
17810 | let a = _mm_set1_epi16(1 << 15); | |
17811 | let r = _mm_movepi16_mask(a); | |
17812 | let e: __mmask8 = 0b11111111; | |
17813 | assert_eq!(r, e); | |
17814 | } | |
17815 | ||
fc512014 XL |
17816 | #[simd_test(enable = "avx512bw")] |
17817 | unsafe fn test_mm512_movepi8_mask() { | |
17818 | let a = _mm512_set1_epi8(1 << 7); | |
17819 | let r = _mm512_movepi8_mask(a); | |
17820 | let e: __mmask64 = | |
17821 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; | |
17822 | assert_eq!(r, e); | |
17823 | } | |
17824 | ||
cdc7bbd5 XL |
17825 | #[simd_test(enable = "avx512bw,avx512vl")] |
17826 | unsafe fn test_mm256_movepi8_mask() { | |
17827 | let a = _mm256_set1_epi8(1 << 7); | |
17828 | let r = _mm256_movepi8_mask(a); | |
17829 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17830 | assert_eq!(r, e); | |
17831 | } | |
17832 | ||
17833 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17834 | unsafe fn test_mm_movepi8_mask() { | |
17835 | let a = _mm_set1_epi8(1 << 7); | |
17836 | let r = _mm_movepi8_mask(a); | |
17837 | let e: __mmask16 = 0b11111111_11111111; | |
17838 | assert_eq!(r, e); | |
17839 | } | |
17840 | ||
fc512014 XL |
17841 | #[simd_test(enable = "avx512bw")] |
17842 | unsafe fn test_mm512_movm_epi16() { | |
17843 | let a: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17844 | let r = _mm512_movm_epi16(a); | |
17845 | let e = _mm512_set1_epi16( | |
17846 | 1 << 15 | |
17847 | | 1 << 14 | |
17848 | | 1 << 13 | |
17849 | | 1 << 12 | |
17850 | | 1 << 11 | |
17851 | | 1 << 10 | |
17852 | | 1 << 9 | |
17853 | | 1 << 8 | |
17854 | | 1 << 7 | |
17855 | | 1 << 6 | |
17856 | | 1 << 5 | |
17857 | | 1 << 4 | |
17858 | | 1 << 3 | |
17859 | | 1 << 2 | |
17860 | | 1 << 1 | |
17861 | | 1 << 0, | |
17862 | ); | |
17863 | assert_eq_m512i(r, e); | |
17864 | } | |
17865 | ||
cdc7bbd5 XL |
17866 | #[simd_test(enable = "avx512bw,avx512vl")] |
17867 | unsafe fn test_mm256_movm_epi16() { | |
17868 | let a: __mmask16 = 0b11111111_11111111; | |
17869 | let r = _mm256_movm_epi16(a); | |
17870 | let e = _mm256_set1_epi16( | |
17871 | 1 << 15 | |
17872 | | 1 << 14 | |
17873 | | 1 << 13 | |
17874 | | 1 << 12 | |
17875 | | 1 << 11 | |
17876 | | 1 << 10 | |
17877 | | 1 << 9 | |
17878 | | 1 << 8 | |
17879 | | 1 << 7 | |
17880 | | 1 << 6 | |
17881 | | 1 << 5 | |
17882 | | 1 << 4 | |
17883 | | 1 << 3 | |
17884 | | 1 << 2 | |
17885 | | 1 << 1 | |
17886 | | 1 << 0, | |
17887 | ); | |
17888 | assert_eq_m256i(r, e); | |
17889 | } | |
17890 | ||
17891 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17892 | unsafe fn test_mm_movm_epi16() { | |
17893 | let a: __mmask8 = 0b11111111; | |
17894 | let r = _mm_movm_epi16(a); | |
17895 | let e = _mm_set1_epi16( | |
17896 | 1 << 15 | |
17897 | | 1 << 14 | |
17898 | | 1 << 13 | |
17899 | | 1 << 12 | |
17900 | | 1 << 11 | |
17901 | | 1 << 10 | |
17902 | | 1 << 9 | |
17903 | | 1 << 8 | |
17904 | | 1 << 7 | |
17905 | | 1 << 6 | |
17906 | | 1 << 5 | |
17907 | | 1 << 4 | |
17908 | | 1 << 3 | |
17909 | | 1 << 2 | |
17910 | | 1 << 1 | |
17911 | | 1 << 0, | |
17912 | ); | |
17913 | assert_eq_m128i(r, e); | |
17914 | } | |
17915 | ||
fc512014 XL |
17916 | #[simd_test(enable = "avx512bw")] |
17917 | unsafe fn test_mm512_movm_epi8() { | |
17918 | let a: __mmask64 = | |
17919 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; | |
17920 | let r = _mm512_movm_epi8(a); | |
17921 | let e = | |
17922 | _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); | |
17923 | assert_eq_m512i(r, e); | |
17924 | } | |
17925 | ||
cdc7bbd5 XL |
17926 | #[simd_test(enable = "avx512bw,avx512vl")] |
17927 | unsafe fn test_mm256_movm_epi8() { | |
17928 | let a: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
17929 | let r = _mm256_movm_epi8(a); | |
17930 | let e = | |
17931 | _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); | |
17932 | assert_eq_m256i(r, e); | |
17933 | } | |
17934 | ||
17935 | #[simd_test(enable = "avx512bw,avx512vl")] | |
17936 | unsafe fn test_mm_movm_epi8() { | |
17937 | let a: __mmask16 = 0b11111111_11111111; | |
17938 | let r = _mm_movm_epi8(a); | |
17939 | let e = | |
17940 | _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0); | |
17941 | assert_eq_m128i(r, e); | |
17942 | } | |
17943 | ||
fc512014 XL |
17944 | #[simd_test(enable = "avx512bw")] |
17945 | unsafe fn test_kadd_mask32() { | |
17946 | let a: __mmask32 = 11; | |
17947 | let b: __mmask32 = 22; | |
17948 | let r = _kadd_mask32(a, b); | |
17949 | let e: __mmask32 = 33; | |
17950 | assert_eq!(r, e); | |
17951 | } | |
17952 | ||
17953 | #[simd_test(enable = "avx512bw")] | |
17954 | unsafe fn test_kadd_mask64() { | |
17955 | let a: __mmask64 = 11; | |
17956 | let b: __mmask64 = 22; | |
17957 | let r = _kadd_mask64(a, b); | |
17958 | let e: __mmask64 = 33; | |
17959 | assert_eq!(r, e); | |
17960 | } | |
17961 | ||
17962 | #[simd_test(enable = "avx512bw")] | |
17963 | unsafe fn test_kand_mask32() { | |
17964 | let a: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
17965 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
17966 | let r = _kand_mask32(a, b); | |
17967 | let e: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
17968 | assert_eq!(r, e); | |
17969 | } | |
17970 | ||
17971 | #[simd_test(enable = "avx512bw")] | |
17972 | unsafe fn test_kand_mask64() { | |
17973 | let a: __mmask64 = | |
17974 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
17975 | let b: __mmask64 = | |
17976 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
17977 | let r = _kand_mask64(a, b); | |
17978 | let e: __mmask64 = | |
17979 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
17980 | assert_eq!(r, e); | |
17981 | } | |
17982 | ||
17983 | #[simd_test(enable = "avx512bw")] | |
17984 | unsafe fn test_knot_mask32() { | |
17985 | let a: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
17986 | let r = _knot_mask32(a); | |
17987 | let e: __mmask32 = 0b00110011_11001100_00110011_11001100; | |
17988 | assert_eq!(r, e); | |
17989 | } | |
17990 | ||
17991 | #[simd_test(enable = "avx512bw")] | |
17992 | unsafe fn test_knot_mask64() { | |
17993 | let a: __mmask64 = | |
17994 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
17995 | let r = _knot_mask64(a); | |
17996 | let e: __mmask64 = | |
17997 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; | |
17998 | assert_eq!(r, e); | |
17999 | } | |
18000 | ||
18001 | #[simd_test(enable = "avx512bw")] | |
18002 | unsafe fn test_kandn_mask32() { | |
18003 | let a: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
18004 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
18005 | let r = _kandn_mask32(a, b); | |
18006 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; | |
18007 | assert_eq!(r, e); | |
18008 | } | |
18009 | ||
18010 | #[simd_test(enable = "avx512bw")] | |
18011 | unsafe fn test_kandn_mask64() { | |
18012 | let a: __mmask64 = | |
18013 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
18014 | let b: __mmask64 = | |
18015 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
18016 | let r = _kandn_mask64(a, b); | |
18017 | let e: __mmask64 = | |
18018 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; | |
18019 | assert_eq!(r, e); | |
18020 | } | |
18021 | ||
18022 | #[simd_test(enable = "avx512bw")] | |
18023 | unsafe fn test_kor_mask32() { | |
18024 | let a: __mmask32 = 0b00110011_11001100_00110011_11001100; | |
18025 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
18026 | let r = _kor_mask32(a, b); | |
18027 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
18028 | assert_eq!(r, e); | |
18029 | } | |
18030 | ||
18031 | #[simd_test(enable = "avx512bw")] | |
18032 | unsafe fn test_kor_mask64() { | |
18033 | let a: __mmask64 = | |
18034 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; | |
18035 | let b: __mmask64 = | |
18036 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
18037 | let r = _kor_mask64(a, b); | |
18038 | let e: __mmask64 = | |
18039 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; | |
18040 | assert_eq!(r, e); | |
18041 | } | |
18042 | ||
18043 | #[simd_test(enable = "avx512bw")] | |
18044 | unsafe fn test_kxor_mask32() { | |
18045 | let a: __mmask32 = 0b00110011_11001100_00110011_11001100; | |
18046 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
18047 | let r = _kxor_mask32(a, b); | |
18048 | let e: __mmask32 = 0b11111111_11111111_11111111_11111111; | |
18049 | assert_eq!(r, e); | |
18050 | } | |
18051 | ||
18052 | #[simd_test(enable = "avx512bw")] | |
18053 | unsafe fn test_kxor_mask64() { | |
18054 | let a: __mmask64 = | |
18055 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; | |
18056 | let b: __mmask64 = | |
18057 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
18058 | let r = _kxor_mask64(a, b); | |
18059 | let e: __mmask64 = | |
18060 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111; | |
18061 | assert_eq!(r, e); | |
18062 | } | |
18063 | ||
18064 | #[simd_test(enable = "avx512bw")] | |
18065 | unsafe fn test_kxnor_mask32() { | |
18066 | let a: __mmask32 = 0b00110011_11001100_00110011_11001100; | |
18067 | let b: __mmask32 = 0b11001100_00110011_11001100_00110011; | |
18068 | let r = _kxnor_mask32(a, b); | |
18069 | let e: __mmask32 = 0b00000000_00000000_00000000_00000000; | |
18070 | assert_eq!(r, e); | |
18071 | } | |
18072 | ||
18073 | #[simd_test(enable = "avx512bw")] | |
18074 | unsafe fn test_kxnor_mask64() { | |
18075 | let a: __mmask64 = | |
18076 | 0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100; | |
18077 | let b: __mmask64 = | |
18078 | 0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011; | |
18079 | let r = _kxnor_mask64(a, b); | |
18080 | let e: __mmask64 = | |
18081 | 0b00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000; | |
18082 | assert_eq!(r, e); | |
18083 | } | |
18084 | ||
18085 | #[simd_test(enable = "avx512bw")] | |
18086 | unsafe fn test_mm512_cvtepi16_epi8() { | |
18087 | let a = _mm512_set1_epi16(2); | |
18088 | let r = _mm512_cvtepi16_epi8(a); | |
18089 | let e = _mm256_set1_epi8(2); | |
18090 | assert_eq_m256i(r, e); | |
18091 | } | |
18092 | ||
cdc7bbd5 XL |
18093 | #[simd_test(enable = "avx512bw")] |
18094 | unsafe fn test_mm512_mask_cvtepi16_epi8() { | |
18095 | let src = _mm256_set1_epi8(1); | |
18096 | let a = _mm512_set1_epi16(2); | |
18097 | let r = _mm512_mask_cvtepi16_epi8(src, 0, a); | |
18098 | assert_eq_m256i(r, src); | |
18099 | let r = _mm512_mask_cvtepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); | |
18100 | let e = _mm256_set1_epi8(2); | |
18101 | assert_eq_m256i(r, e); | |
18102 | } | |
18103 | ||
18104 | #[simd_test(enable = "avx512bw")] | |
18105 | unsafe fn test_mm512_maskz_cvtepi16_epi8() { | |
18106 | let a = _mm512_set1_epi16(2); | |
18107 | let r = _mm512_maskz_cvtepi16_epi8(0, a); | |
18108 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
18109 | let r = _mm512_maskz_cvtepi16_epi8(0b11111111_11111111_11111111_11111111, a); | |
18110 | let e = _mm256_set1_epi8(2); | |
18111 | assert_eq_m256i(r, e); | |
18112 | } | |
18113 | ||
18114 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18115 | unsafe fn test_mm256_cvtepi16_epi8() { | |
18116 | let a = _mm256_set1_epi16(2); | |
18117 | let r = _mm256_cvtepi16_epi8(a); | |
18118 | let e = _mm_set1_epi8(2); | |
18119 | assert_eq_m128i(r, e); | |
18120 | } | |
18121 | ||
18122 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18123 | unsafe fn test_mm256_mask_cvtepi16_epi8() { | |
18124 | let src = _mm_set1_epi8(1); | |
18125 | let a = _mm256_set1_epi16(2); | |
18126 | let r = _mm256_mask_cvtepi16_epi8(src, 0, a); | |
18127 | assert_eq_m128i(r, src); | |
18128 | let r = _mm256_mask_cvtepi16_epi8(src, 0b11111111_11111111, a); | |
18129 | let e = _mm_set1_epi8(2); | |
18130 | assert_eq_m128i(r, e); | |
18131 | } | |
18132 | ||
18133 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18134 | unsafe fn test_mm256_maskz_cvtepi16_epi8() { | |
18135 | let a = _mm256_set1_epi16(2); | |
18136 | let r = _mm256_maskz_cvtepi16_epi8(0, a); | |
18137 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18138 | let r = _mm256_maskz_cvtepi16_epi8(0b11111111_11111111, a); | |
18139 | let e = _mm_set1_epi8(2); | |
18140 | assert_eq_m128i(r, e); | |
18141 | } | |
18142 | ||
18143 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18144 | unsafe fn test_mm_cvtepi16_epi8() { | |
18145 | let a = _mm_set1_epi16(2); | |
18146 | let r = _mm_cvtepi16_epi8(a); | |
18147 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); | |
18148 | assert_eq_m128i(r, e); | |
18149 | } | |
18150 | ||
18151 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18152 | unsafe fn test_mm_mask_cvtepi16_epi8() { | |
18153 | let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
18154 | let a = _mm_set1_epi16(2); | |
18155 | let r = _mm_mask_cvtepi16_epi8(src, 0, a); | |
18156 | assert_eq_m128i(r, src); | |
18157 | let r = _mm_mask_cvtepi16_epi8(src, 0b11111111, a); | |
18158 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); | |
18159 | assert_eq_m128i(r, e); | |
fc512014 XL |
18160 | } |
18161 | ||
cdc7bbd5 XL |
18162 | #[simd_test(enable = "avx512bw,avx512vl")] |
18163 | unsafe fn test_mm_maskz_cvtepi16_epi8() { | |
18164 | let a = _mm_set1_epi16(2); | |
18165 | let r = _mm_maskz_cvtepi16_epi8(0, a); | |
18166 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18167 | let r = _mm_maskz_cvtepi16_epi8(0b11111111, a); | |
18168 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2); | |
18169 | assert_eq_m128i(r, e); | |
fc512014 XL |
18170 | } |
18171 | ||
18172 | #[simd_test(enable = "avx512bw")] | |
18173 | unsafe fn test_mm512_cvtsepi16_epi8() { | |
18174 | let a = _mm512_set1_epi16(i16::MAX); | |
18175 | let r = _mm512_cvtsepi16_epi8(a); | |
18176 | let e = _mm256_set1_epi8(i8::MAX); | |
18177 | assert_eq_m256i(r, e); | |
18178 | } | |
18179 | ||
18180 | #[simd_test(enable = "avx512bw")] | |
18181 | unsafe fn test_mm512_mask_cvtsepi16_epi8() { | |
18182 | let src = _mm256_set1_epi8(1); | |
18183 | let a = _mm512_set1_epi16(i16::MAX); | |
18184 | let r = _mm512_mask_cvtsepi16_epi8(src, 0, a); | |
18185 | assert_eq_m256i(r, src); | |
18186 | let r = _mm512_mask_cvtsepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); | |
18187 | let e = _mm256_set1_epi8(i8::MAX); | |
18188 | assert_eq_m256i(r, e); | |
18189 | } | |
18190 | ||
cdc7bbd5 XL |
18191 | #[simd_test(enable = "avx512bw,avx512vl")] |
18192 | unsafe fn test_mm256_cvtsepi16_epi8() { | |
18193 | let a = _mm256_set1_epi16(i16::MAX); | |
18194 | let r = _mm256_cvtsepi16_epi8(a); | |
18195 | let e = _mm_set1_epi8(i8::MAX); | |
18196 | assert_eq_m128i(r, e); | |
18197 | } | |
18198 | ||
18199 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18200 | unsafe fn test_mm256_mask_cvtsepi16_epi8() { | |
18201 | let src = _mm_set1_epi8(1); | |
18202 | let a = _mm256_set1_epi16(i16::MAX); | |
18203 | let r = _mm256_mask_cvtsepi16_epi8(src, 0, a); | |
18204 | assert_eq_m128i(r, src); | |
18205 | let r = _mm256_mask_cvtsepi16_epi8(src, 0b11111111_11111111, a); | |
18206 | let e = _mm_set1_epi8(i8::MAX); | |
18207 | assert_eq_m128i(r, e); | |
18208 | } | |
18209 | ||
18210 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18211 | unsafe fn test_mm256_maskz_cvtsepi16_epi8() { | |
18212 | let a = _mm256_set1_epi16(i16::MAX); | |
18213 | let r = _mm256_maskz_cvtsepi16_epi8(0, a); | |
18214 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18215 | let r = _mm256_maskz_cvtsepi16_epi8(0b11111111_11111111, a); | |
18216 | let e = _mm_set1_epi8(i8::MAX); | |
18217 | assert_eq_m128i(r, e); | |
18218 | } | |
18219 | ||
18220 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18221 | unsafe fn test_mm_cvtsepi16_epi8() { | |
18222 | let a = _mm_set1_epi16(i16::MAX); | |
18223 | let r = _mm_cvtsepi16_epi8(a); | |
18224 | #[rustfmt::skip] | |
18225 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
18226 | assert_eq_m128i(r, e); | |
18227 | } | |
18228 | ||
18229 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18230 | unsafe fn test_mm_mask_cvtsepi16_epi8() { | |
18231 | let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
18232 | let a = _mm_set1_epi16(i16::MAX); | |
18233 | let r = _mm_mask_cvtsepi16_epi8(src, 0, a); | |
18234 | assert_eq_m128i(r, src); | |
18235 | let r = _mm_mask_cvtsepi16_epi8(src, 0b11111111, a); | |
18236 | #[rustfmt::skip] | |
18237 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
18238 | assert_eq_m128i(r, e); | |
18239 | } | |
18240 | ||
18241 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18242 | unsafe fn test_mm_maskz_cvtsepi16_epi8() { | |
18243 | let a = _mm_set1_epi16(i16::MAX); | |
18244 | let r = _mm_maskz_cvtsepi16_epi8(0, a); | |
18245 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18246 | let r = _mm_maskz_cvtsepi16_epi8(0b11111111, a); | |
18247 | #[rustfmt::skip] | |
18248 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX); | |
18249 | assert_eq_m128i(r, e); | |
18250 | } | |
18251 | ||
fc512014 XL |
18252 | #[simd_test(enable = "avx512bw")] |
18253 | unsafe fn test_mm512_maskz_cvtsepi16_epi8() { | |
18254 | let a = _mm512_set1_epi16(i16::MAX); | |
18255 | let r = _mm512_maskz_cvtsepi16_epi8(0, a); | |
18256 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
18257 | let r = _mm512_maskz_cvtsepi16_epi8(0b11111111_11111111_11111111_11111111, a); | |
18258 | let e = _mm256_set1_epi8(i8::MAX); | |
18259 | assert_eq_m256i(r, e); | |
18260 | } | |
18261 | ||
18262 | #[simd_test(enable = "avx512bw")] | |
18263 | unsafe fn test_mm512_cvtusepi16_epi8() { | |
18264 | let a = _mm512_set1_epi16(i16::MIN); | |
18265 | let r = _mm512_cvtusepi16_epi8(a); | |
18266 | let e = _mm256_set1_epi8(-1); | |
18267 | assert_eq_m256i(r, e); | |
18268 | } | |
18269 | ||
18270 | #[simd_test(enable = "avx512bw")] | |
18271 | unsafe fn test_mm512_mask_cvtusepi16_epi8() { | |
18272 | let src = _mm256_set1_epi8(1); | |
18273 | let a = _mm512_set1_epi16(i16::MIN); | |
18274 | let r = _mm512_mask_cvtusepi16_epi8(src, 0, a); | |
18275 | assert_eq_m256i(r, src); | |
18276 | let r = _mm512_mask_cvtusepi16_epi8(src, 0b11111111_11111111_11111111_11111111, a); | |
18277 | let e = _mm256_set1_epi8(-1); | |
18278 | assert_eq_m256i(r, e); | |
18279 | } | |
18280 | ||
18281 | #[simd_test(enable = "avx512bw")] | |
18282 | unsafe fn test_mm512_maskz_cvtusepi16_epi8() { | |
18283 | let a = _mm512_set1_epi16(i16::MIN); | |
18284 | let r = _mm512_maskz_cvtusepi16_epi8(0, a); | |
18285 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
18286 | let r = _mm512_maskz_cvtusepi16_epi8(0b11111111_11111111_11111111_11111111, a); | |
18287 | let e = _mm256_set1_epi8(-1); | |
18288 | assert_eq_m256i(r, e); | |
18289 | } | |
18290 | ||
cdc7bbd5 XL |
18291 | #[simd_test(enable = "avx512bw,avx512vl")] |
18292 | unsafe fn test_mm256_cvtusepi16_epi8() { | |
18293 | let a = _mm256_set1_epi16(i16::MIN); | |
18294 | let r = _mm256_cvtusepi16_epi8(a); | |
18295 | let e = _mm_set1_epi8(-1); | |
18296 | assert_eq_m128i(r, e); | |
18297 | } | |
18298 | ||
18299 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18300 | unsafe fn test_mm256_mask_cvtusepi16_epi8() { | |
18301 | let src = _mm_set1_epi8(1); | |
18302 | let a = _mm256_set1_epi16(i16::MIN); | |
18303 | let r = _mm256_mask_cvtusepi16_epi8(src, 0, a); | |
18304 | assert_eq_m128i(r, src); | |
18305 | let r = _mm256_mask_cvtusepi16_epi8(src, 0b11111111_11111111, a); | |
18306 | let e = _mm_set1_epi8(-1); | |
18307 | assert_eq_m128i(r, e); | |
18308 | } | |
18309 | ||
18310 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18311 | unsafe fn test_mm256_maskz_cvtusepi16_epi8() { | |
18312 | let a = _mm256_set1_epi16(i16::MIN); | |
18313 | let r = _mm256_maskz_cvtusepi16_epi8(0, a); | |
18314 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18315 | let r = _mm256_maskz_cvtusepi16_epi8(0b11111111_11111111, a); | |
18316 | let e = _mm_set1_epi8(-1); | |
18317 | assert_eq_m128i(r, e); | |
18318 | } | |
18319 | ||
18320 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18321 | unsafe fn test_mm_cvtusepi16_epi8() { | |
18322 | let a = _mm_set1_epi16(i16::MIN); | |
18323 | let r = _mm_cvtusepi16_epi8(a); | |
18324 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
18325 | assert_eq_m128i(r, e); | |
18326 | } | |
18327 | ||
18328 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18329 | unsafe fn test_mm_mask_cvtusepi16_epi8() { | |
18330 | let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); | |
18331 | let a = _mm_set1_epi16(i16::MIN); | |
18332 | let r = _mm_mask_cvtusepi16_epi8(src, 0, a); | |
18333 | assert_eq_m128i(r, src); | |
18334 | let r = _mm_mask_cvtusepi16_epi8(src, 0b11111111, a); | |
18335 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
18336 | assert_eq_m128i(r, e); | |
18337 | } | |
18338 | ||
18339 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18340 | unsafe fn test_mm_maskz_cvtusepi16_epi8() { | |
18341 | let a = _mm_set1_epi16(i16::MIN); | |
18342 | let r = _mm_maskz_cvtusepi16_epi8(0, a); | |
18343 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18344 | let r = _mm_maskz_cvtusepi16_epi8(0b11111111, a); | |
18345 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); | |
18346 | assert_eq_m128i(r, e); | |
18347 | } | |
18348 | ||
fc512014 XL |
18349 | #[simd_test(enable = "avx512bw")] |
18350 | unsafe fn test_mm512_cvtepi8_epi16() { | |
18351 | let a = _mm256_set1_epi8(2); | |
18352 | let r = _mm512_cvtepi8_epi16(a); | |
18353 | let e = _mm512_set1_epi16(2); | |
18354 | assert_eq_m512i(r, e); | |
18355 | } | |
18356 | ||
18357 | #[simd_test(enable = "avx512bw")] | |
18358 | unsafe fn test_mm512_mask_cvtepi8_epi16() { | |
18359 | let src = _mm512_set1_epi16(1); | |
18360 | let a = _mm256_set1_epi8(2); | |
18361 | let r = _mm512_mask_cvtepi8_epi16(src, 0, a); | |
18362 | assert_eq_m512i(r, src); | |
18363 | let r = _mm512_mask_cvtepi8_epi16(src, 0b11111111_11111111_11111111_11111111, a); | |
18364 | let e = _mm512_set1_epi16(2); | |
18365 | assert_eq_m512i(r, e); | |
18366 | } | |
18367 | ||
18368 | #[simd_test(enable = "avx512bw")] | |
18369 | unsafe fn test_mm512_maskz_cvtepi8_epi16() { | |
18370 | let a = _mm256_set1_epi8(2); | |
18371 | let r = _mm512_maskz_cvtepi8_epi16(0, a); | |
18372 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
18373 | let r = _mm512_maskz_cvtepi8_epi16(0b11111111_11111111_11111111_11111111, a); | |
18374 | let e = _mm512_set1_epi16(2); | |
18375 | assert_eq_m512i(r, e); | |
18376 | } | |
18377 | ||
cdc7bbd5 XL |
18378 | #[simd_test(enable = "avx512bw,avx512vl")] |
18379 | unsafe fn test_mm256_mask_cvtepi8_epi16() { | |
18380 | let src = _mm256_set1_epi16(1); | |
18381 | let a = _mm_set1_epi8(2); | |
18382 | let r = _mm256_mask_cvtepi8_epi16(src, 0, a); | |
18383 | assert_eq_m256i(r, src); | |
18384 | let r = _mm256_mask_cvtepi8_epi16(src, 0b11111111_11111111, a); | |
18385 | let e = _mm256_set1_epi16(2); | |
18386 | assert_eq_m256i(r, e); | |
18387 | } | |
18388 | ||
18389 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18390 | unsafe fn test_mm256_maskz_cvtepi8_epi16() { | |
18391 | let a = _mm_set1_epi8(2); | |
18392 | let r = _mm256_maskz_cvtepi8_epi16(0, a); | |
18393 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
18394 | let r = _mm256_maskz_cvtepi8_epi16(0b11111111_11111111, a); | |
18395 | let e = _mm256_set1_epi16(2); | |
18396 | assert_eq_m256i(r, e); | |
18397 | } | |
18398 | ||
18399 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18400 | unsafe fn test_mm_mask_cvtepi8_epi16() { | |
18401 | let src = _mm_set1_epi16(1); | |
18402 | let a = _mm_set1_epi8(2); | |
18403 | let r = _mm_mask_cvtepi8_epi16(src, 0, a); | |
18404 | assert_eq_m128i(r, src); | |
18405 | let r = _mm_mask_cvtepi8_epi16(src, 0b11111111, a); | |
18406 | let e = _mm_set1_epi16(2); | |
18407 | assert_eq_m128i(r, e); | |
18408 | } | |
18409 | ||
18410 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18411 | unsafe fn test_mm_maskz_cvtepi8_epi16() { | |
18412 | let a = _mm_set1_epi8(2); | |
18413 | let r = _mm_maskz_cvtepi8_epi16(0, a); | |
18414 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18415 | let r = _mm_maskz_cvtepi8_epi16(0b11111111, a); | |
18416 | let e = _mm_set1_epi16(2); | |
18417 | assert_eq_m128i(r, e); | |
18418 | } | |
18419 | ||
fc512014 XL |
18420 | #[simd_test(enable = "avx512bw")] |
18421 | unsafe fn test_mm512_cvtepu8_epi16() { | |
18422 | let a = _mm256_set1_epi8(2); | |
18423 | let r = _mm512_cvtepu8_epi16(a); | |
18424 | let e = _mm512_set1_epi16(2); | |
18425 | assert_eq_m512i(r, e); | |
18426 | } | |
18427 | ||
18428 | #[simd_test(enable = "avx512bw")] | |
18429 | unsafe fn test_mm512_mask_cvtepu8_epi16() { | |
18430 | let src = _mm512_set1_epi16(1); | |
18431 | let a = _mm256_set1_epi8(2); | |
18432 | let r = _mm512_mask_cvtepu8_epi16(src, 0, a); | |
18433 | assert_eq_m512i(r, src); | |
18434 | let r = _mm512_mask_cvtepu8_epi16(src, 0b11111111_11111111_11111111_11111111, a); | |
18435 | let e = _mm512_set1_epi16(2); | |
18436 | assert_eq_m512i(r, e); | |
18437 | } | |
18438 | ||
18439 | #[simd_test(enable = "avx512bw")] | |
18440 | unsafe fn test_mm512_maskz_cvtepu8_epi16() { | |
18441 | let a = _mm256_set1_epi8(2); | |
18442 | let r = _mm512_maskz_cvtepu8_epi16(0, a); | |
18443 | assert_eq_m512i(r, _mm512_setzero_si512()); | |
18444 | let r = _mm512_maskz_cvtepu8_epi16(0b11111111_11111111_11111111_11111111, a); | |
18445 | let e = _mm512_set1_epi16(2); | |
18446 | assert_eq_m512i(r, e); | |
18447 | } | |
18448 | ||
cdc7bbd5 XL |
18449 | #[simd_test(enable = "avx512bw,avx512vl")] |
18450 | unsafe fn test_mm256_mask_cvtepu8_epi16() { | |
18451 | let src = _mm256_set1_epi16(1); | |
18452 | let a = _mm_set1_epi8(2); | |
18453 | let r = _mm256_mask_cvtepu8_epi16(src, 0, a); | |
18454 | assert_eq_m256i(r, src); | |
18455 | let r = _mm256_mask_cvtepu8_epi16(src, 0b11111111_11111111, a); | |
18456 | let e = _mm256_set1_epi16(2); | |
18457 | assert_eq_m256i(r, e); | |
18458 | } | |
18459 | ||
18460 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18461 | unsafe fn test_mm256_maskz_cvtepu8_epi16() { | |
18462 | let a = _mm_set1_epi8(2); | |
18463 | let r = _mm256_maskz_cvtepu8_epi16(0, a); | |
18464 | assert_eq_m256i(r, _mm256_setzero_si256()); | |
18465 | let r = _mm256_maskz_cvtepu8_epi16(0b11111111_11111111, a); | |
18466 | let e = _mm256_set1_epi16(2); | |
18467 | assert_eq_m256i(r, e); | |
18468 | } | |
18469 | ||
18470 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18471 | unsafe fn test_mm_mask_cvtepu8_epi16() { | |
18472 | let src = _mm_set1_epi16(1); | |
18473 | let a = _mm_set1_epi8(2); | |
18474 | let r = _mm_mask_cvtepu8_epi16(src, 0, a); | |
18475 | assert_eq_m128i(r, src); | |
18476 | let r = _mm_mask_cvtepu8_epi16(src, 0b11111111, a); | |
18477 | let e = _mm_set1_epi16(2); | |
18478 | assert_eq_m128i(r, e); | |
18479 | } | |
18480 | ||
18481 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18482 | unsafe fn test_mm_maskz_cvtepu8_epi16() { | |
18483 | let a = _mm_set1_epi8(2); | |
18484 | let r = _mm_maskz_cvtepu8_epi16(0, a); | |
18485 | assert_eq_m128i(r, _mm_setzero_si128()); | |
18486 | let r = _mm_maskz_cvtepu8_epi16(0b11111111, a); | |
18487 | let e = _mm_set1_epi16(2); | |
18488 | assert_eq_m128i(r, e); | |
18489 | } | |
18490 | ||
fc512014 XL |
18491 | #[simd_test(enable = "avx512bw")] |
18492 | unsafe fn test_mm512_bslli_epi128() { | |
18493 | #[rustfmt::skip] | |
18494 | let a = _mm512_set_epi8( | |
18495 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18496 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18497 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18498 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18499 | ); | |
17df50a5 | 18500 | let r = _mm512_bslli_epi128::<9>(a); |
fc512014 XL |
18501 | #[rustfmt::skip] |
18502 | let e = _mm512_set_epi8( | |
18503 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
18504 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
18505 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
18506 | 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
18507 | ); | |
18508 | assert_eq_m512i(r, e); | |
18509 | } | |
18510 | ||
18511 | #[simd_test(enable = "avx512bw")] | |
18512 | unsafe fn test_mm512_bsrli_epi128() { | |
18513 | #[rustfmt::skip] | |
18514 | let a = _mm512_set_epi8( | |
17df50a5 XL |
18515 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
18516 | 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, | |
18517 | 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, | |
18518 | 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, | |
fc512014 | 18519 | ); |
17df50a5 | 18520 | let r = _mm512_bsrli_epi128::<3>(a); |
fc512014 XL |
18521 | #[rustfmt::skip] |
18522 | let e = _mm512_set_epi8( | |
17df50a5 XL |
18523 | 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, |
18524 | 0, 0, 0, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, | |
18525 | 0, 0, 0, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, | |
18526 | 0, 0, 0, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, | |
fc512014 XL |
18527 | ); |
18528 | assert_eq_m512i(r, e); | |
18529 | } | |
18530 | ||
18531 | #[simd_test(enable = "avx512bw")] | |
18532 | unsafe fn test_mm512_alignr_epi8() { | |
18533 | #[rustfmt::skip] | |
18534 | let a = _mm512_set_epi8( | |
18535 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18536 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18537 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18538 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18539 | ); | |
18540 | let b = _mm512_set1_epi8(1); | |
17df50a5 | 18541 | let r = _mm512_alignr_epi8::<14>(a, b); |
fc512014 XL |
18542 | #[rustfmt::skip] |
18543 | let e = _mm512_set_epi8( | |
18544 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18545 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18546 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18547 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18548 | ); | |
18549 | assert_eq_m512i(r, e); | |
18550 | } | |
18551 | ||
18552 | #[simd_test(enable = "avx512bw")] | |
18553 | unsafe fn test_mm512_mask_alignr_epi8() { | |
18554 | #[rustfmt::skip] | |
18555 | let a = _mm512_set_epi8( | |
18556 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18557 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18558 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18559 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18560 | ); | |
18561 | let b = _mm512_set1_epi8(1); | |
17df50a5 | 18562 | let r = _mm512_mask_alignr_epi8::<14>(a, 0, a, b); |
fc512014 | 18563 | assert_eq_m512i(r, a); |
17df50a5 | 18564 | let r = _mm512_mask_alignr_epi8::<14>( |
fc512014 XL |
18565 | a, |
18566 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, | |
18567 | a, | |
18568 | b, | |
fc512014 XL |
18569 | ); |
18570 | #[rustfmt::skip] | |
18571 | let e = _mm512_set_epi8( | |
18572 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18573 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18574 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18575 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18576 | ); | |
18577 | assert_eq_m512i(r, e); | |
18578 | } | |
18579 | ||
18580 | #[simd_test(enable = "avx512bw")] | |
18581 | unsafe fn test_mm512_maskz_alignr_epi8() { | |
18582 | #[rustfmt::skip] | |
18583 | let a = _mm512_set_epi8( | |
18584 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18585 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18586 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18587 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18588 | ); | |
18589 | let b = _mm512_set1_epi8(1); | |
17df50a5 | 18590 | let r = _mm512_maskz_alignr_epi8::<14>(0, a, b); |
fc512014 | 18591 | assert_eq_m512i(r, _mm512_setzero_si512()); |
17df50a5 | 18592 | let r = _mm512_maskz_alignr_epi8::<14>( |
fc512014 XL |
18593 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, |
18594 | a, | |
18595 | b, | |
fc512014 XL |
18596 | ); |
18597 | #[rustfmt::skip] | |
18598 | let e = _mm512_set_epi8( | |
18599 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18600 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18601 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18602 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18603 | ); | |
18604 | assert_eq_m512i(r, e); | |
18605 | } | |
cdc7bbd5 XL |
18606 | |
18607 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18608 | unsafe fn test_mm256_mask_alignr_epi8() { | |
18609 | #[rustfmt::skip] | |
18610 | let a = _mm256_set_epi8( | |
18611 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18612 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18613 | ); | |
18614 | let b = _mm256_set1_epi8(1); | |
17df50a5 | 18615 | let r = _mm256_mask_alignr_epi8::<14>(a, 0, a, b); |
cdc7bbd5 | 18616 | assert_eq_m256i(r, a); |
17df50a5 | 18617 | let r = _mm256_mask_alignr_epi8::<14>(a, 0b11111111_11111111_11111111_11111111, a, b); |
cdc7bbd5 XL |
18618 | #[rustfmt::skip] |
18619 | let e = _mm256_set_epi8( | |
18620 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18621 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18622 | ); | |
18623 | assert_eq_m256i(r, e); | |
18624 | } | |
18625 | ||
18626 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18627 | unsafe fn test_mm256_maskz_alignr_epi8() { | |
18628 | #[rustfmt::skip] | |
18629 | let a = _mm256_set_epi8( | |
18630 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18631 | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, | |
18632 | ); | |
18633 | let b = _mm256_set1_epi8(1); | |
17df50a5 | 18634 | let r = _mm256_maskz_alignr_epi8::<14>(0, a, b); |
cdc7bbd5 | 18635 | assert_eq_m256i(r, _mm256_setzero_si256()); |
17df50a5 | 18636 | let r = _mm256_maskz_alignr_epi8::<14>(0b11111111_11111111_11111111_11111111, a, b); |
cdc7bbd5 XL |
18637 | #[rustfmt::skip] |
18638 | let e = _mm256_set_epi8( | |
18639 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18640 | 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, | |
18641 | ); | |
18642 | assert_eq_m256i(r, e); | |
18643 | } | |
18644 | ||
18645 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18646 | unsafe fn test_mm_mask_alignr_epi8() { | |
18647 | let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); | |
18648 | let b = _mm_set1_epi8(1); | |
17df50a5 | 18649 | let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b); |
cdc7bbd5 | 18650 | assert_eq_m128i(r, a); |
17df50a5 | 18651 | let r = _mm_mask_alignr_epi8::<14>(a, 0b11111111_11111111, a, b); |
cdc7bbd5 XL |
18652 | let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); |
18653 | assert_eq_m128i(r, e); | |
18654 | } | |
18655 | ||
18656 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18657 | unsafe fn test_mm_maskz_alignr_epi8() { | |
18658 | let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0); | |
18659 | let b = _mm_set1_epi8(1); | |
17df50a5 | 18660 | let r = _mm_maskz_alignr_epi8::<14>(0, a, b); |
cdc7bbd5 | 18661 | assert_eq_m128i(r, _mm_setzero_si128()); |
17df50a5 | 18662 | let r = _mm_maskz_alignr_epi8::<14>(0b11111111_11111111, a, b); |
cdc7bbd5 XL |
18663 | let e = _mm_set_epi8(0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1); |
18664 | assert_eq_m128i(r, e); | |
18665 | } | |
18666 | ||
18667 | #[simd_test(enable = "avx512bw")] | |
18668 | unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() { | |
18669 | let a = _mm512_set1_epi16(i16::MAX); | |
18670 | let mut r = _mm256_undefined_si256(); | |
18671 | _mm512_mask_cvtsepi16_storeu_epi8( | |
18672 | &mut r as *mut _ as *mut i8, | |
18673 | 0b11111111_11111111_11111111_11111111, | |
18674 | a, | |
18675 | ); | |
18676 | let e = _mm256_set1_epi8(i8::MAX); | |
18677 | assert_eq_m256i(r, e); | |
18678 | } | |
18679 | ||
18680 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18681 | unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() { | |
18682 | let a = _mm256_set1_epi16(i16::MAX); | |
18683 | let mut r = _mm_undefined_si128(); | |
18684 | _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); | |
18685 | let e = _mm_set1_epi8(i8::MAX); | |
18686 | assert_eq_m128i(r, e); | |
18687 | } | |
18688 | ||
18689 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18690 | unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() { | |
18691 | let a = _mm_set1_epi16(i16::MAX); | |
18692 | let mut r = _mm_set1_epi8(0); | |
18693 | _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); | |
18694 | #[rustfmt::skip] | |
18695 | let e = _mm_set_epi8( | |
18696 | 0, 0, 0, 0, 0, 0, 0, 0, | |
18697 | i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, i8::MAX, | |
18698 | ); | |
18699 | assert_eq_m128i(r, e); | |
18700 | } | |
18701 | ||
18702 | #[simd_test(enable = "avx512bw")] | |
18703 | unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() { | |
18704 | let a = _mm512_set1_epi16(8); | |
18705 | let mut r = _mm256_undefined_si256(); | |
18706 | _mm512_mask_cvtepi16_storeu_epi8( | |
18707 | &mut r as *mut _ as *mut i8, | |
18708 | 0b11111111_11111111_11111111_11111111, | |
18709 | a, | |
18710 | ); | |
18711 | let e = _mm256_set1_epi8(8); | |
18712 | assert_eq_m256i(r, e); | |
18713 | } | |
18714 | ||
18715 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18716 | unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() { | |
18717 | let a = _mm256_set1_epi16(8); | |
18718 | let mut r = _mm_undefined_si128(); | |
18719 | _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); | |
18720 | let e = _mm_set1_epi8(8); | |
18721 | assert_eq_m128i(r, e); | |
18722 | } | |
18723 | ||
18724 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18725 | unsafe fn test_mm_mask_cvtepi16_storeu_epi8() { | |
18726 | let a = _mm_set1_epi16(8); | |
18727 | let mut r = _mm_set1_epi8(0); | |
18728 | _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); | |
18729 | let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8); | |
18730 | assert_eq_m128i(r, e); | |
18731 | } | |
18732 | ||
18733 | #[simd_test(enable = "avx512bw")] | |
18734 | unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() { | |
18735 | let a = _mm512_set1_epi16(i16::MAX); | |
18736 | let mut r = _mm256_undefined_si256(); | |
18737 | _mm512_mask_cvtusepi16_storeu_epi8( | |
18738 | &mut r as *mut _ as *mut i8, | |
18739 | 0b11111111_11111111_11111111_11111111, | |
18740 | a, | |
18741 | ); | |
18742 | let e = _mm256_set1_epi8(u8::MAX as i8); | |
18743 | assert_eq_m256i(r, e); | |
18744 | } | |
18745 | ||
18746 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18747 | unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() { | |
18748 | let a = _mm256_set1_epi16(i16::MAX); | |
18749 | let mut r = _mm_undefined_si128(); | |
18750 | _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); | |
18751 | let e = _mm_set1_epi8(u8::MAX as i8); | |
18752 | assert_eq_m128i(r, e); | |
18753 | } | |
18754 | ||
18755 | #[simd_test(enable = "avx512bw,avx512vl")] | |
18756 | unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() { | |
18757 | let a = _mm_set1_epi16(i16::MAX); | |
18758 | let mut r = _mm_set1_epi8(0); | |
18759 | _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); | |
18760 | #[rustfmt::skip] | |
18761 | let e = _mm_set_epi8( | |
18762 | 0, 0, 0, 0, | |
18763 | 0, 0, 0, 0, | |
18764 | u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, | |
18765 | u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, | |
18766 | ); | |
18767 | assert_eq_m128i(r, e); | |
18768 | } | |
fc512014 | 18769 | } |