1 //! Streaming SIMD Extensions 2 (SSE2)
4 use stdarch_test
::assert_instr
;
7 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
9 mem
::{self, transmute}
,
13 /// Provides a hint to the processor that the code sequence is a spin-wait loop.
15 /// This can help improve the performance and power consumption of spin-wait
18 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause)
20 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21 #[stable(feature = "simd_x86", since = "1.27.0")]
22 pub unsafe fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
28 /// Invalidates and flushes the cache line that contains `p` from all levels of
29 /// the cache hierarchy.
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
33 #[target_feature(enable = "sse2")]
34 #[cfg_attr(test, assert_instr(clflush))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm_clflush(p
: *const u8) {
40 /// Performs a serializing operation on all load-from-memory instructions
41 /// that were issued prior to this instruction.
43 /// Guarantees that every load instruction that precedes, in program order, is
44 /// globally visible before any load instruction which follows the fence in
47 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence)
49 #[target_feature(enable = "sse2")]
50 #[cfg_attr(test, assert_instr(lfence))]
51 #[stable(feature = "simd_x86", since = "1.27.0")]
52 pub unsafe fn _mm_lfence() {
56 /// Performs a serializing operation on all load-from-memory and store-to-memory
57 /// instructions that were issued prior to this instruction.
59 /// Guarantees that every memory access that precedes, in program order, the
60 /// memory fence instruction is globally visible before any memory instruction
61 /// which follows the fence in program order.
63 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence)
65 #[target_feature(enable = "sse2")]
66 #[cfg_attr(test, assert_instr(mfence))]
67 #[stable(feature = "simd_x86", since = "1.27.0")]
68 pub unsafe fn _mm_mfence() {
72 /// Adds packed 8-bit integers in `a` and `b`.
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
76 #[target_feature(enable = "sse2")]
77 #[cfg_attr(test, assert_instr(paddb))]
78 #[stable(feature = "simd_x86", since = "1.27.0")]
79 pub unsafe fn _mm_add_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
80 transmute(simd_add(a
.as_i8x16(), b
.as_i8x16()))
83 /// Adds packed 16-bit integers in `a` and `b`.
85 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
87 #[target_feature(enable = "sse2")]
88 #[cfg_attr(test, assert_instr(paddw))]
89 #[stable(feature = "simd_x86", since = "1.27.0")]
90 pub unsafe fn _mm_add_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
91 transmute(simd_add(a
.as_i16x8(), b
.as_i16x8()))
94 /// Adds packed 32-bit integers in `a` and `b`.
96 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
98 #[target_feature(enable = "sse2")]
99 #[cfg_attr(test, assert_instr(paddd))]
100 #[stable(feature = "simd_x86", since = "1.27.0")]
101 pub unsafe fn _mm_add_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
102 transmute(simd_add(a
.as_i32x4(), b
.as_i32x4()))
105 /// Adds packed 64-bit integers in `a` and `b`.
107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
109 #[target_feature(enable = "sse2")]
110 #[cfg_attr(test, assert_instr(paddq))]
111 #[stable(feature = "simd_x86", since = "1.27.0")]
112 pub unsafe fn _mm_add_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
113 transmute(simd_add(a
.as_i64x2(), b
.as_i64x2()))
116 /// Adds packed 8-bit integers in `a` and `b` using saturation.
118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
120 #[target_feature(enable = "sse2")]
121 #[cfg_attr(test, assert_instr(paddsb))]
122 #[stable(feature = "simd_x86", since = "1.27.0")]
123 pub unsafe fn _mm_adds_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
124 transmute(simd_saturating_add(a
.as_i8x16(), b
.as_i8x16()))
127 /// Adds packed 16-bit integers in `a` and `b` using saturation.
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
131 #[target_feature(enable = "sse2")]
132 #[cfg_attr(test, assert_instr(paddsw))]
133 #[stable(feature = "simd_x86", since = "1.27.0")]
134 pub unsafe fn _mm_adds_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
135 transmute(simd_saturating_add(a
.as_i16x8(), b
.as_i16x8()))
138 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
142 #[target_feature(enable = "sse2")]
143 #[cfg_attr(test, assert_instr(paddusb))]
144 #[stable(feature = "simd_x86", since = "1.27.0")]
145 pub unsafe fn _mm_adds_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
146 transmute(simd_saturating_add(a
.as_u8x16(), b
.as_u8x16()))
149 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
153 #[target_feature(enable = "sse2")]
154 #[cfg_attr(test, assert_instr(paddusw))]
155 #[stable(feature = "simd_x86", since = "1.27.0")]
156 pub unsafe fn _mm_adds_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
157 transmute(simd_saturating_add(a
.as_u16x8(), b
.as_u16x8()))
160 /// Averages packed unsigned 8-bit integers in `a` and `b`.
162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
164 #[target_feature(enable = "sse2")]
165 #[cfg_attr(test, assert_instr(pavgb))]
166 #[stable(feature = "simd_x86", since = "1.27.0")]
167 pub unsafe fn _mm_avg_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
168 transmute(pavgb(a
.as_u8x16(), b
.as_u8x16()))
171 /// Averages packed unsigned 16-bit integers in `a` and `b`.
173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
175 #[target_feature(enable = "sse2")]
176 #[cfg_attr(test, assert_instr(pavgw))]
177 #[stable(feature = "simd_x86", since = "1.27.0")]
178 pub unsafe fn _mm_avg_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
179 transmute(pavgw(a
.as_u16x8(), b
.as_u16x8()))
182 /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
184 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
185 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
186 /// intermediate 32-bit integers.
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16)
190 #[target_feature(enable = "sse2")]
191 #[cfg_attr(test, assert_instr(pmaddwd))]
192 #[stable(feature = "simd_x86", since = "1.27.0")]
193 pub unsafe fn _mm_madd_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
194 transmute(pmaddwd(a
.as_i16x8(), b
.as_i16x8()))
197 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
200 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
202 #[target_feature(enable = "sse2")]
203 #[cfg_attr(test, assert_instr(pmaxsw))]
204 #[stable(feature = "simd_x86", since = "1.27.0")]
205 pub unsafe fn _mm_max_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
206 transmute(pmaxsw(a
.as_i16x8(), b
.as_i16x8()))
209 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
210 /// packed maximum values.
212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
214 #[target_feature(enable = "sse2")]
215 #[cfg_attr(test, assert_instr(pmaxub))]
216 #[stable(feature = "simd_x86", since = "1.27.0")]
217 pub unsafe fn _mm_max_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
218 transmute(pmaxub(a
.as_u8x16(), b
.as_u8x16()))
221 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
226 #[target_feature(enable = "sse2")]
227 #[cfg_attr(test, assert_instr(pminsw))]
228 #[stable(feature = "simd_x86", since = "1.27.0")]
229 pub unsafe fn _mm_min_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
230 transmute(pminsw(a
.as_i16x8(), b
.as_i16x8()))
233 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
234 /// packed minimum values.
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
238 #[target_feature(enable = "sse2")]
239 #[cfg_attr(test, assert_instr(pminub))]
240 #[stable(feature = "simd_x86", since = "1.27.0")]
241 pub unsafe fn _mm_min_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
242 transmute(pminub(a
.as_u8x16(), b
.as_u8x16()))
245 /// Multiplies the packed 16-bit integers in `a` and `b`.
247 /// The multiplication produces intermediate 32-bit integers, and returns the
248 /// high 16 bits of the intermediate integers.
250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16)
252 #[target_feature(enable = "sse2")]
253 #[cfg_attr(test, assert_instr(pmulhw))]
254 #[stable(feature = "simd_x86", since = "1.27.0")]
255 pub unsafe fn _mm_mulhi_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
256 transmute(pmulhw(a
.as_i16x8(), b
.as_i16x8()))
259 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
261 /// The multiplication produces intermediate 32-bit integers, and returns the
262 /// high 16 bits of the intermediate integers.
264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16)
266 #[target_feature(enable = "sse2")]
267 #[cfg_attr(test, assert_instr(pmulhuw))]
268 #[stable(feature = "simd_x86", since = "1.27.0")]
269 pub unsafe fn _mm_mulhi_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
270 transmute(pmulhuw(a
.as_u16x8(), b
.as_u16x8()))
273 /// Multiplies the packed 16-bit integers in `a` and `b`.
275 /// The multiplication produces intermediate 32-bit integers, and returns the
276 /// low 16 bits of the intermediate integers.
278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16)
280 #[target_feature(enable = "sse2")]
281 #[cfg_attr(test, assert_instr(pmullw))]
282 #[stable(feature = "simd_x86", since = "1.27.0")]
283 pub unsafe fn _mm_mullo_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
284 transmute(simd_mul(a
.as_i16x8(), b
.as_i16x8()))
287 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
290 /// Returns the unsigned 64-bit results.
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
294 #[target_feature(enable = "sse2")]
295 #[cfg_attr(test, assert_instr(pmuludq))]
296 #[stable(feature = "simd_x86", since = "1.27.0")]
297 pub unsafe fn _mm_mul_epu32(a
: __m128i
, b
: __m128i
) -> __m128i
{
298 transmute(pmuludq(a
.as_u32x4(), b
.as_u32x4()))
301 /// Sum the absolute differences of packed unsigned 8-bit integers.
303 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
304 /// and `b`, then horizontally sum each consecutive 8 differences to produce
305 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
306 /// the low 16 bits of 64-bit elements returned.
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8)
310 #[target_feature(enable = "sse2")]
311 #[cfg_attr(test, assert_instr(psadbw))]
312 #[stable(feature = "simd_x86", since = "1.27.0")]
313 pub unsafe fn _mm_sad_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
314 transmute(psadbw(a
.as_u8x16(), b
.as_u8x16()))
317 /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
321 #[target_feature(enable = "sse2")]
322 #[cfg_attr(test, assert_instr(psubb))]
323 #[stable(feature = "simd_x86", since = "1.27.0")]
324 pub unsafe fn _mm_sub_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
325 transmute(simd_sub(a
.as_i8x16(), b
.as_i8x16()))
328 /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
332 #[target_feature(enable = "sse2")]
333 #[cfg_attr(test, assert_instr(psubw))]
334 #[stable(feature = "simd_x86", since = "1.27.0")]
335 pub unsafe fn _mm_sub_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
336 transmute(simd_sub(a
.as_i16x8(), b
.as_i16x8()))
339 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32)
343 #[target_feature(enable = "sse2")]
344 #[cfg_attr(test, assert_instr(psubd))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm_sub_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
347 transmute(simd_sub(a
.as_i32x4(), b
.as_i32x4()))
350 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64)
354 #[target_feature(enable = "sse2")]
355 #[cfg_attr(test, assert_instr(psubq))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm_sub_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
358 transmute(simd_sub(a
.as_i64x2(), b
.as_i64x2()))
361 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
362 /// using saturation.
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8)
366 #[target_feature(enable = "sse2")]
367 #[cfg_attr(test, assert_instr(psubsb))]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_subs_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
370 transmute(simd_saturating_sub(a
.as_i8x16(), b
.as_i8x16()))
373 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
374 /// using saturation.
376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16)
378 #[target_feature(enable = "sse2")]
379 #[cfg_attr(test, assert_instr(psubsw))]
380 #[stable(feature = "simd_x86", since = "1.27.0")]
381 pub unsafe fn _mm_subs_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
382 transmute(simd_saturating_sub(a
.as_i16x8(), b
.as_i16x8()))
385 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
386 /// integers in `a` using saturation.
388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8)
390 #[target_feature(enable = "sse2")]
391 #[cfg_attr(test, assert_instr(psubusb))]
392 #[stable(feature = "simd_x86", since = "1.27.0")]
393 pub unsafe fn _mm_subs_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
394 transmute(simd_saturating_sub(a
.as_u8x16(), b
.as_u8x16()))
397 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
398 /// integers in `a` using saturation.
400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16)
402 #[target_feature(enable = "sse2")]
403 #[cfg_attr(test, assert_instr(psubusw))]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm_subs_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
406 transmute(simd_saturating_sub(a
.as_u16x8(), b
.as_u16x8()))
409 /// Shifts `a` left by `imm8` bytes while shifting in zeros.
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
413 #[target_feature(enable = "sse2")]
414 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
415 #[rustc_args_required_const(1)]
416 #[stable(feature = "simd_x86", since = "1.27.0")]
417 pub unsafe fn _mm_slli_si128(a
: __m128i
, imm8
: i32) -> __m128i
{
418 _mm_slli_si128_impl(a
, imm8
)
421 /// Implementation detail: converts the immediate argument of the
422 /// `_mm_slli_si128` intrinsic into a compile-time constant.
424 #[target_feature(enable = "sse2")]
425 unsafe fn _mm_slli_si128_impl(a
: __m128i
, imm8
: i32) -> __m128i
{
426 let (zero
, imm8
) = (_mm_set1_epi8(0).as_i8x16(), imm8
as u32);
427 let a
= a
.as_i8x16();
428 macro_rules
! shuffle
{
430 simd_shuffle16
::<i8x16
, i8x16
>(
476 /// Shifts `a` left by `imm8` bytes while shifting in zeros.
478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
480 #[target_feature(enable = "sse2")]
481 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
482 #[rustc_args_required_const(1)]
483 #[stable(feature = "simd_x86", since = "1.27.0")]
484 pub unsafe fn _mm_bslli_si128(a
: __m128i
, imm8
: i32) -> __m128i
{
485 _mm_slli_si128_impl(a
, imm8
)
488 /// Shifts `a` right by `imm8` bytes while shifting in zeros.
490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
492 #[target_feature(enable = "sse2")]
493 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
494 #[rustc_args_required_const(1)]
495 #[stable(feature = "simd_x86", since = "1.27.0")]
496 pub unsafe fn _mm_bsrli_si128(a
: __m128i
, imm8
: i32) -> __m128i
{
497 _mm_srli_si128_impl(a
, imm8
)
500 /// Shifts packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
502 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
504 #[target_feature(enable = "sse2")]
505 #[cfg_attr(test, assert_instr(psllw, imm8 = 7))]
506 #[rustc_args_required_const(1)]
507 #[stable(feature = "simd_x86", since = "1.27.0")]
508 pub unsafe fn _mm_slli_epi16(a
: __m128i
, imm8
: i32) -> __m128i
{
509 let a
= a
.as_i16x8();
512 transmute(pslliw(a
, $imm8
))
515 constify_imm8
!(imm8
, call
)
518 /// Shifts packed 16-bit integers in `a` left by `count` while shifting in
521 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
523 #[target_feature(enable = "sse2")]
524 #[cfg_attr(test, assert_instr(psllw))]
525 #[stable(feature = "simd_x86", since = "1.27.0")]
526 pub unsafe fn _mm_sll_epi16(a
: __m128i
, count
: __m128i
) -> __m128i
{
527 transmute(psllw(a
.as_i16x8(), count
.as_i16x8()))
530 /// Shifts packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
532 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
534 #[target_feature(enable = "sse2")]
535 #[cfg_attr(test, assert_instr(pslld, imm8 = 7))]
536 #[rustc_args_required_const(1)]
537 #[stable(feature = "simd_x86", since = "1.27.0")]
538 pub unsafe fn _mm_slli_epi32(a
: __m128i
, imm8
: i32) -> __m128i
{
539 let a
= a
.as_i32x4();
542 transmute(psllid(a
, $imm8
))
545 constify_imm8
!(imm8
, call
)
548 /// Shifts packed 32-bit integers in `a` left by `count` while shifting in
551 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
553 #[target_feature(enable = "sse2")]
554 #[cfg_attr(test, assert_instr(pslld))]
555 #[stable(feature = "simd_x86", since = "1.27.0")]
556 pub unsafe fn _mm_sll_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
557 transmute(pslld(a
.as_i32x4(), count
.as_i32x4()))
560 /// Shifts packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
562 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
564 #[target_feature(enable = "sse2")]
565 #[cfg_attr(test, assert_instr(psllq, imm8 = 7))]
566 #[rustc_args_required_const(1)]
567 #[stable(feature = "simd_x86", since = "1.27.0")]
568 pub unsafe fn _mm_slli_epi64(a
: __m128i
, imm8
: i32) -> __m128i
{
569 let a
= a
.as_i64x2();
572 transmute(pslliq(a
, $imm8
))
575 constify_imm8
!(imm8
, call
)
578 /// Shifts packed 64-bit integers in `a` left by `count` while shifting in
581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
583 #[target_feature(enable = "sse2")]
584 #[cfg_attr(test, assert_instr(psllq))]
585 #[stable(feature = "simd_x86", since = "1.27.0")]
586 pub unsafe fn _mm_sll_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
587 transmute(psllq(a
.as_i64x2(), count
.as_i64x2()))
590 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in sign
593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
595 #[target_feature(enable = "sse2")]
596 #[cfg_attr(test, assert_instr(psraw, imm8 = 1))]
597 #[rustc_args_required_const(1)]
598 #[stable(feature = "simd_x86", since = "1.27.0")]
599 pub unsafe fn _mm_srai_epi16(a
: __m128i
, imm8
: i32) -> __m128i
{
600 let a
= a
.as_i16x8();
603 transmute(psraiw(a
, $imm8
))
606 constify_imm8
!(imm8
, call
)
609 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
612 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
614 #[target_feature(enable = "sse2")]
615 #[cfg_attr(test, assert_instr(psraw))]
616 #[stable(feature = "simd_x86", since = "1.27.0")]
617 pub unsafe fn _mm_sra_epi16(a
: __m128i
, count
: __m128i
) -> __m128i
{
618 transmute(psraw(a
.as_i16x8(), count
.as_i16x8()))
621 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in sign
624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
626 #[target_feature(enable = "sse2")]
627 #[cfg_attr(test, assert_instr(psrad, imm8 = 1))]
628 #[rustc_args_required_const(1)]
629 #[stable(feature = "simd_x86", since = "1.27.0")]
630 pub unsafe fn _mm_srai_epi32(a
: __m128i
, imm8
: i32) -> __m128i
{
631 let a
= a
.as_i32x4();
634 transmute(psraid(a
, $imm8
))
637 constify_imm8
!(imm8
, call
)
640 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
643 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
645 #[target_feature(enable = "sse2")]
646 #[cfg_attr(test, assert_instr(psrad))]
647 #[stable(feature = "simd_x86", since = "1.27.0")]
648 pub unsafe fn _mm_sra_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
649 transmute(psrad(a
.as_i32x4(), count
.as_i32x4()))
652 /// Shifts `a` right by `imm8` bytes while shifting in zeros.
654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
656 #[target_feature(enable = "sse2")]
657 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
658 #[rustc_args_required_const(1)]
659 #[stable(feature = "simd_x86", since = "1.27.0")]
660 pub unsafe fn _mm_srli_si128(a
: __m128i
, imm8
: i32) -> __m128i
{
661 _mm_srli_si128_impl(a
, imm8
)
664 /// Implementation detail: converts the immediate argument of the
665 /// `_mm_srli_si128` intrinsic into a compile-time constant.
667 #[target_feature(enable = "sse2")]
668 unsafe fn _mm_srli_si128_impl(a
: __m128i
, imm8
: i32) -> __m128i
{
669 let (zero
, imm8
) = (_mm_set1_epi8(0).as_i8x16(), imm8
as u32);
670 let a
= a
.as_i8x16();
671 macro_rules
! shuffle
{
697 let x
: i8x16
= match imm8
{
719 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
724 #[target_feature(enable = "sse2")]
725 #[cfg_attr(test, assert_instr(psrlw, imm8 = 1))]
726 #[rustc_args_required_const(1)]
727 #[stable(feature = "simd_x86", since = "1.27.0")]
728 pub unsafe fn _mm_srli_epi16(a
: __m128i
, imm8
: i32) -> __m128i
{
729 let a
= a
.as_i16x8();
732 transmute(psrliw(a
, $imm8
))
735 constify_imm8
!(imm8
, call
)
738 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
741 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
743 #[target_feature(enable = "sse2")]
744 #[cfg_attr(test, assert_instr(psrlw))]
745 #[stable(feature = "simd_x86", since = "1.27.0")]
746 pub unsafe fn _mm_srl_epi16(a
: __m128i
, count
: __m128i
) -> __m128i
{
747 transmute(psrlw(a
.as_i16x8(), count
.as_i16x8()))
750 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
755 #[target_feature(enable = "sse2")]
756 #[cfg_attr(test, assert_instr(psrld, imm8 = 8))]
757 #[rustc_args_required_const(1)]
758 #[stable(feature = "simd_x86", since = "1.27.0")]
759 pub unsafe fn _mm_srli_epi32(a
: __m128i
, imm8
: i32) -> __m128i
{
760 let a
= a
.as_i32x4();
763 transmute(psrlid(a
, $imm8
))
766 constify_imm8
!(imm8
, call
)
769 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
774 #[target_feature(enable = "sse2")]
775 #[cfg_attr(test, assert_instr(psrld))]
776 #[stable(feature = "simd_x86", since = "1.27.0")]
777 pub unsafe fn _mm_srl_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
778 transmute(psrld(a
.as_i32x4(), count
.as_i32x4()))
781 /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
784 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
786 #[target_feature(enable = "sse2")]
787 #[cfg_attr(test, assert_instr(psrlq, imm8 = 1))]
788 #[rustc_args_required_const(1)]
789 #[stable(feature = "simd_x86", since = "1.27.0")]
790 pub unsafe fn _mm_srli_epi64(a
: __m128i
, imm8
: i32) -> __m128i
{
791 transmute(psrliq(a
.as_i64x2(), imm8
))
794 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
797 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
799 #[target_feature(enable = "sse2")]
800 #[cfg_attr(test, assert_instr(psrlq))]
801 #[stable(feature = "simd_x86", since = "1.27.0")]
802 pub unsafe fn _mm_srl_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
803 transmute(psrlq(a
.as_i64x2(), count
.as_i64x2()))
806 /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
811 #[target_feature(enable = "sse2")]
812 #[cfg_attr(test, assert_instr(andps))]
813 #[stable(feature = "simd_x86", since = "1.27.0")]
814 pub unsafe fn _mm_and_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
818 /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
819 /// then AND with `b`.
821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
823 #[target_feature(enable = "sse2")]
824 #[cfg_attr(test, assert_instr(andnps))]
825 #[stable(feature = "simd_x86", since = "1.27.0")]
826 pub unsafe fn _mm_andnot_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
827 simd_and(simd_xor(_mm_set1_epi8(-1), a
), b
)
830 /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
833 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
835 #[target_feature(enable = "sse2")]
836 #[cfg_attr(test, assert_instr(orps))]
837 #[stable(feature = "simd_x86", since = "1.27.0")]
838 pub unsafe fn _mm_or_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
842 /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
845 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
847 #[target_feature(enable = "sse2")]
848 #[cfg_attr(test, assert_instr(xorps))]
849 #[stable(feature = "simd_x86", since = "1.27.0")]
850 pub unsafe fn _mm_xor_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
854 /// Compares packed 8-bit integers in `a` and `b` for equality.
856 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
858 #[target_feature(enable = "sse2")]
859 #[cfg_attr(test, assert_instr(pcmpeqb))]
860 #[stable(feature = "simd_x86", since = "1.27.0")]
861 pub unsafe fn _mm_cmpeq_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
862 transmute
::<i8x16
, _
>(simd_eq(a
.as_i8x16(), b
.as_i8x16()))
865 /// Compares packed 16-bit integers in `a` and `b` for equality.
867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
869 #[target_feature(enable = "sse2")]
870 #[cfg_attr(test, assert_instr(pcmpeqw))]
871 #[stable(feature = "simd_x86", since = "1.27.0")]
872 pub unsafe fn _mm_cmpeq_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
873 transmute
::<i16x8
, _
>(simd_eq(a
.as_i16x8(), b
.as_i16x8()))
876 /// Compares packed 32-bit integers in `a` and `b` for equality.
878 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
880 #[target_feature(enable = "sse2")]
881 #[cfg_attr(test, assert_instr(pcmpeqd))]
882 #[stable(feature = "simd_x86", since = "1.27.0")]
883 pub unsafe fn _mm_cmpeq_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
884 transmute
::<i32x4
, _
>(simd_eq(a
.as_i32x4(), b
.as_i32x4()))
887 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
889 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
891 #[target_feature(enable = "sse2")]
892 #[cfg_attr(test, assert_instr(pcmpgtb))]
893 #[stable(feature = "simd_x86", since = "1.27.0")]
894 pub unsafe fn _mm_cmpgt_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
895 transmute
::<i8x16
, _
>(simd_gt(a
.as_i8x16(), b
.as_i8x16()))
898 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
902 #[target_feature(enable = "sse2")]
903 #[cfg_attr(test, assert_instr(pcmpgtw))]
904 #[stable(feature = "simd_x86", since = "1.27.0")]
905 pub unsafe fn _mm_cmpgt_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
906 transmute
::<i16x8
, _
>(simd_gt(a
.as_i16x8(), b
.as_i16x8()))
909 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
911 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
913 #[target_feature(enable = "sse2")]
914 #[cfg_attr(test, assert_instr(pcmpgtd))]
915 #[stable(feature = "simd_x86", since = "1.27.0")]
916 pub unsafe fn _mm_cmpgt_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
917 transmute
::<i32x4
, _
>(simd_gt(a
.as_i32x4(), b
.as_i32x4()))
920 /// Compares packed 8-bit integers in `a` and `b` for less-than.
922 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
924 #[target_feature(enable = "sse2")]
925 #[cfg_attr(test, assert_instr(pcmpgtb))]
926 #[stable(feature = "simd_x86", since = "1.27.0")]
927 pub unsafe fn _mm_cmplt_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
928 transmute
::<i8x16
, _
>(simd_lt(a
.as_i8x16(), b
.as_i8x16()))
931 /// Compares packed 16-bit integers in `a` and `b` for less-than.
933 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
935 #[target_feature(enable = "sse2")]
936 #[cfg_attr(test, assert_instr(pcmpgtw))]
937 #[stable(feature = "simd_x86", since = "1.27.0")]
938 pub unsafe fn _mm_cmplt_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
939 transmute
::<i16x8
, _
>(simd_lt(a
.as_i16x8(), b
.as_i16x8()))
942 /// Compares packed 32-bit integers in `a` and `b` for less-than.
944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
946 #[target_feature(enable = "sse2")]
947 #[cfg_attr(test, assert_instr(pcmpgtd))]
948 #[stable(feature = "simd_x86", since = "1.27.0")]
949 pub unsafe fn _mm_cmplt_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
950 transmute
::<i32x4
, _
>(simd_lt(a
.as_i32x4(), b
.as_i32x4()))
953 /// Converts the lower two packed 32-bit integers in `a` to packed
954 /// double-precision (64-bit) floating-point elements.
956 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
958 #[target_feature(enable = "sse2")]
959 #[cfg_attr(test, assert_instr(cvtdq2pd))]
960 #[stable(feature = "simd_x86", since = "1.27.0")]
961 pub unsafe fn _mm_cvtepi32_pd(a
: __m128i
) -> __m128d
{
962 let a
= a
.as_i32x4();
963 simd_cast
::<i32x2
, __m128d
>(simd_shuffle2(a
, a
, [0, 1]))
966 /// Returns `a` with its lower element replaced by `b` after converting it to
969 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
971 #[target_feature(enable = "sse2")]
972 #[cfg_attr(test, assert_instr(cvtsi2sd))]
973 #[stable(feature = "simd_x86", since = "1.27.0")]
974 pub unsafe fn _mm_cvtsi32_sd(a
: __m128d
, b
: i32) -> __m128d
{
975 simd_insert(a
, 0, b
as f64)
978 /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
979 /// floating-point elements.
981 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
983 #[target_feature(enable = "sse2")]
984 #[cfg_attr(test, assert_instr(cvtdq2ps))]
985 #[stable(feature = "simd_x86", since = "1.27.0")]
986 pub unsafe fn _mm_cvtepi32_ps(a
: __m128i
) -> __m128
{
987 cvtdq2ps(a
.as_i32x4())
990 /// Converts packed single-precision (32-bit) floating-point elements in `a`
991 /// to packed 32-bit integers.
993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
995 #[target_feature(enable = "sse2")]
996 #[cfg_attr(test, assert_instr(cvtps2dq))]
997 #[stable(feature = "simd_x86", since = "1.27.0")]
998 pub unsafe fn _mm_cvtps_epi32(a
: __m128
) -> __m128i
{
999 transmute(cvtps2dq(a
))
1002 /// Returns a vector whose lowest element is `a` and all higher elements are
1005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
1007 #[target_feature(enable = "sse2")]
1008 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
1009 #[stable(feature = "simd_x86", since = "1.27.0")]
1010 pub unsafe fn _mm_cvtsi32_si128(a
: i32) -> __m128i
{
1011 transmute(i32x4
::new(a
, 0, 0, 0))
1014 /// Returns the lowest element of `a`.
1016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
1018 #[target_feature(enable = "sse2")]
1019 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))]
1020 #[stable(feature = "simd_x86", since = "1.27.0")]
1021 pub unsafe fn _mm_cvtsi128_si32(a
: __m128i
) -> i32 {
1022 simd_extract(a
.as_i32x4(), 0)
1025 /// Sets packed 64-bit integers with the supplied values, from highest to
1028 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
1030 #[target_feature(enable = "sse2")]
1031 // no particular instruction to test
1032 #[stable(feature = "simd_x86", since = "1.27.0")]
1033 pub unsafe fn _mm_set_epi64x(e1
: i64, e0
: i64) -> __m128i
{
1034 transmute(i64x2
::new(e0
, e1
))
1037 /// Sets packed 32-bit integers with the supplied values.
1039 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
1041 #[target_feature(enable = "sse2")]
1042 // no particular instruction to test
1043 #[stable(feature = "simd_x86", since = "1.27.0")]
1044 pub unsafe fn _mm_set_epi32(e3
: i32, e2
: i32, e1
: i32, e0
: i32) -> __m128i
{
1045 transmute(i32x4
::new(e0
, e1
, e2
, e3
))
1048 /// Sets packed 16-bit integers with the supplied values.
1050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
1052 #[target_feature(enable = "sse2")]
1053 // no particular instruction to test
1054 #[stable(feature = "simd_x86", since = "1.27.0")]
1055 pub unsafe fn _mm_set_epi16(
1065 transmute(i16x8
::new(e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
))
1068 /// Sets packed 8-bit integers with the supplied values.
1070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
1072 #[target_feature(enable = "sse2")]
1073 // no particular instruction to test
1074 #[stable(feature = "simd_x86", since = "1.27.0")]
1075 pub unsafe fn _mm_set_epi8(
1094 transmute(i8x16
::new(
1095 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
,
1099 /// Broadcasts 64-bit integer `a` to all elements.
1101 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
1103 #[target_feature(enable = "sse2")]
1104 // no particular instruction to test
1105 #[stable(feature = "simd_x86", since = "1.27.0")]
1106 pub unsafe fn _mm_set1_epi64x(a
: i64) -> __m128i
{
1107 _mm_set_epi64x(a
, a
)
1110 /// Broadcasts 32-bit integer `a` to all elements.
1112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
1114 #[target_feature(enable = "sse2")]
1115 // no particular instruction to test
1116 #[stable(feature = "simd_x86", since = "1.27.0")]
1117 pub unsafe fn _mm_set1_epi32(a
: i32) -> __m128i
{
1118 _mm_set_epi32(a
, a
, a
, a
)
1121 /// Broadcasts 16-bit integer `a` to all elements.
1123 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
1125 #[target_feature(enable = "sse2")]
1126 // no particular instruction to test
1127 #[stable(feature = "simd_x86", since = "1.27.0")]
1128 pub unsafe fn _mm_set1_epi16(a
: i16) -> __m128i
{
1129 _mm_set_epi16(a
, a
, a
, a
, a
, a
, a
, a
)
1132 /// Broadcasts 8-bit integer `a` to all elements.
1134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
1136 #[target_feature(enable = "sse2")]
1137 // no particular instruction to test
1138 #[stable(feature = "simd_x86", since = "1.27.0")]
1139 pub unsafe fn _mm_set1_epi8(a
: i8) -> __m128i
{
1140 _mm_set_epi8(a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
)
1143 /// Sets packed 32-bit integers with the supplied values in reverse order.
1145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
1147 #[target_feature(enable = "sse2")]
1148 // no particular instruction to test
1149 #[stable(feature = "simd_x86", since = "1.27.0")]
1150 pub unsafe fn _mm_setr_epi32(e3
: i32, e2
: i32, e1
: i32, e0
: i32) -> __m128i
{
1151 _mm_set_epi32(e0
, e1
, e2
, e3
)
1154 /// Sets packed 16-bit integers with the supplied values in reverse order.
1156 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
1158 #[target_feature(enable = "sse2")]
1159 // no particular instruction to test
1160 #[stable(feature = "simd_x86", since = "1.27.0")]
1161 pub unsafe fn _mm_setr_epi16(
1171 _mm_set_epi16(e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
)
1174 /// Sets packed 8-bit integers with the supplied values in reverse order.
1176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
1178 #[target_feature(enable = "sse2")]
1179 // no particular instruction to test
1180 #[stable(feature = "simd_x86", since = "1.27.0")]
1181 pub unsafe fn _mm_setr_epi8(
1201 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
,
1205 /// Returns a vector with all elements set to zero.
1207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128)
1209 #[target_feature(enable = "sse2")]
1210 #[cfg_attr(test, assert_instr(xorps))]
1211 #[stable(feature = "simd_x86", since = "1.27.0")]
1212 pub unsafe fn _mm_setzero_si128() -> __m128i
{
1216 /// Loads 64-bit integer from memory into first element of returned vector.
1218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
1220 #[target_feature(enable = "sse2")]
1221 // FIXME movsd on windows
1226 not(all(target_os
= "linux", target_arch
= "x86_64")),
1227 target_arch
= "x86_64"
1231 #[stable(feature = "simd_x86", since = "1.27.0")]
1232 pub unsafe fn _mm_loadl_epi64(mem_addr
: *const __m128i
) -> __m128i
{
1233 _mm_set_epi64x(0, ptr
::read_unaligned(mem_addr
as *const i64))
1236 /// Loads 128-bits of integer data from memory into a new vector.
1238 /// `mem_addr` must be aligned on a 16-byte boundary.
1240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128)
1242 #[target_feature(enable = "sse2")]
1243 #[cfg_attr(test, assert_instr(movaps))]
1244 #[stable(feature = "simd_x86", since = "1.27.0")]
1245 pub unsafe fn _mm_load_si128(mem_addr
: *const __m128i
) -> __m128i
{
1249 /// Loads 128-bits of integer data from memory into a new vector.
1251 /// `mem_addr` does not need to be aligned on any particular boundary.
1253 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128)
1255 #[target_feature(enable = "sse2")]
1256 #[cfg_attr(test, assert_instr(movups))]
1257 #[stable(feature = "simd_x86", since = "1.27.0")]
1258 pub unsafe fn _mm_loadu_si128(mem_addr
: *const __m128i
) -> __m128i
{
1259 let mut dst
: __m128i
= _mm_undefined_si128();
1260 ptr
::copy_nonoverlapping(
1261 mem_addr
as *const u8,
1262 &mut dst
as *mut __m128i
as *mut u8,
1263 mem
::size_of
::<__m128i
>(),
1268 /// Conditionally store 8-bit integer elements from `a` into memory using
1271 /// Elements are not stored when the highest bit is not set in the
1272 /// corresponding element.
1274 /// `mem_addr` should correspond to a 128-bit memory location and does not need
1275 /// to be aligned on any particular boundary.
1277 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128)
1279 #[target_feature(enable = "sse2")]
1280 #[cfg_attr(test, assert_instr(maskmovdqu))]
1281 #[stable(feature = "simd_x86", since = "1.27.0")]
1282 pub unsafe fn _mm_maskmoveu_si128(a
: __m128i
, mask
: __m128i
, mem_addr
: *mut i8) {
1283 maskmovdqu(a
.as_i8x16(), mask
.as_i8x16(), mem_addr
)
1286 /// Stores 128-bits of integer data from `a` into memory.
1288 /// `mem_addr` must be aligned on a 16-byte boundary.
1290 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128)
1292 #[target_feature(enable = "sse2")]
1293 #[cfg_attr(test, assert_instr(movaps))]
1294 #[stable(feature = "simd_x86", since = "1.27.0")]
1295 pub unsafe fn _mm_store_si128(mem_addr
: *mut __m128i
, a
: __m128i
) {
1299 /// Stores 128-bits of integer data from `a` into memory.
1301 /// `mem_addr` does not need to be aligned on any particular boundary.
1303 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128)
1305 #[target_feature(enable = "sse2")]
1306 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1307 #[stable(feature = "simd_x86", since = "1.27.0")]
1308 pub unsafe fn _mm_storeu_si128(mem_addr
: *mut __m128i
, a
: __m128i
) {
1309 storeudq(mem_addr
as *mut i8, a
);
1312 /// Stores the lower 64-bit integer `a` to a memory location.
1314 /// `mem_addr` does not need to be aligned on any particular boundary.
1316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64)
1318 #[target_feature(enable = "sse2")]
1319 // FIXME mov on windows, movlps on i686
1324 not(all(target_os
= "linux", target_arch
= "x86_64")),
1325 target_arch
= "x86_64"
1329 #[stable(feature = "simd_x86", since = "1.27.0")]
1330 pub unsafe fn _mm_storel_epi64(mem_addr
: *mut __m128i
, a
: __m128i
) {
1331 ptr
::copy_nonoverlapping(&a
as *const _
as *const u8, mem_addr
as *mut u8, 8);
1334 /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1335 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1336 /// used again soon).
1338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128)
1340 #[target_feature(enable = "sse2")]
1341 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1342 #[stable(feature = "simd_x86", since = "1.27.0")]
1343 pub unsafe fn _mm_stream_si128(mem_addr
: *mut __m128i
, a
: __m128i
) {
1344 intrinsics
::nontemporal_store(mem_addr
, a
);
1347 /// Stores a 32-bit integer value in the specified memory location.
1348 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1349 /// used again soon).
1351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32)
1353 #[target_feature(enable = "sse2")]
1354 #[cfg_attr(test, assert_instr(movnti))]
1355 #[stable(feature = "simd_x86", since = "1.27.0")]
1356 pub unsafe fn _mm_stream_si32(mem_addr
: *mut i32, a
: i32) {
1357 intrinsics
::nontemporal_store(mem_addr
, a
);
1360 /// Returns a vector where the low element is extracted from `a` and its upper
1361 /// element is zero.
1363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
1365 #[target_feature(enable = "sse2")]
1366 // FIXME movd on windows, movd on i686
1367 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1368 #[stable(feature = "simd_x86", since = "1.27.0")]
1369 pub unsafe fn _mm_move_epi64(a
: __m128i
) -> __m128i
{
1370 let zero
= _mm_setzero_si128();
1371 let r
: i64x2
= simd_shuffle2(a
.as_i64x2(), zero
.as_i64x2(), [0, 2]);
1375 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1376 /// using signed saturation.
1378 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
1380 #[target_feature(enable = "sse2")]
1381 #[cfg_attr(test, assert_instr(packsswb))]
1382 #[stable(feature = "simd_x86", since = "1.27.0")]
1383 pub unsafe fn _mm_packs_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1384 transmute(packsswb(a
.as_i16x8(), b
.as_i16x8()))
1387 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1388 /// using signed saturation.
1390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
1392 #[target_feature(enable = "sse2")]
1393 #[cfg_attr(test, assert_instr(packssdw))]
1394 #[stable(feature = "simd_x86", since = "1.27.0")]
1395 pub unsafe fn _mm_packs_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
1396 transmute(packssdw(a
.as_i32x4(), b
.as_i32x4()))
1399 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1400 /// using unsigned saturation.
1402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
1404 #[target_feature(enable = "sse2")]
1405 #[cfg_attr(test, assert_instr(packuswb))]
1406 #[stable(feature = "simd_x86", since = "1.27.0")]
1407 pub unsafe fn _mm_packus_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1408 transmute(packuswb(a
.as_i16x8(), b
.as_i16x8()))
1411 /// Returns the `imm8` element of `a`.
1413 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
1415 #[target_feature(enable = "sse2")]
1416 #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))]
1417 #[rustc_args_required_const(1)]
1418 #[stable(feature = "simd_x86", since = "1.27.0")]
1419 pub unsafe fn _mm_extract_epi16(a
: __m128i
, imm8
: i32) -> i32 {
1420 let a
= a
.as_u16x8();
1423 simd_extract
::<_
, u16>(a
, $imm3
) as i32
1426 constify_imm3
!(imm8
, call
)
1429 /// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
1433 #[target_feature(enable = "sse2")]
1434 #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))]
1435 #[rustc_args_required_const(2)]
1436 #[stable(feature = "simd_x86", since = "1.27.0")]
1437 pub unsafe fn _mm_insert_epi16(a
: __m128i
, i
: i32, imm8
: i32) -> __m128i
{
1438 let a
= a
.as_i16x8();
1441 transmute(simd_insert(a
, $imm3
, i
as i16))
1444 constify_imm3
!(imm8
, call
)
1447 /// Returns a mask of the most significant bit of each element in `a`.
1449 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
1451 #[target_feature(enable = "sse2")]
1452 #[cfg_attr(test, assert_instr(pmovmskb))]
1453 #[stable(feature = "simd_x86", since = "1.27.0")]
1454 pub unsafe fn _mm_movemask_epi8(a
: __m128i
) -> i32 {
1455 pmovmskb(a
.as_i8x16())
1458 /// Shuffles 32-bit integers in `a` using the control in `imm8`.
1460 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
1462 #[target_feature(enable = "sse2")]
1463 #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))]
1464 #[rustc_args_required_const(1)]
1465 #[stable(feature = "simd_x86", since = "1.27.0")]
1466 pub unsafe fn _mm_shuffle_epi32(a
: __m128i
, imm8
: i32) -> __m128i
{
1467 // simd_shuffleX requires that its selector parameter be made up of
1468 // constant values, but we can't enforce that here. In spirit, we need
1469 // to write a `match` on all possible values of a byte, and for each value,
1470 // hard-code the correct `simd_shuffleX` call using only constants. We
1471 // then hope for LLVM to do the rest.
1473 // Of course, that's... awful. So we try to use macros to do it for us.
1474 let imm8
= (imm8
& 0xFF) as u8;
1475 let a
= a
.as_i32x4();
1477 macro_rules
! shuffle_done
{
1478 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
1479 simd_shuffle4(a
, a
, [$x01
, $x23
, $x45
, $x67
])
1482 macro_rules
! shuffle_x67
{
1483 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
1484 match (imm8
>> 6) & 0b11 {
1485 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
1486 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
1487 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
1488 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
1492 macro_rules
! shuffle_x45
{
1493 ($x01
:expr
, $x23
:expr
) => {
1494 match (imm8
>> 4) & 0b11 {
1495 0b00 => shuffle_x67
!($x01
, $x23
, 0),
1496 0b01 => shuffle_x67
!($x01
, $x23
, 1),
1497 0b10 => shuffle_x67
!($x01
, $x23
, 2),
1498 _
=> shuffle_x67
!($x01
, $x23
, 3),
1502 macro_rules
! shuffle_x23
{
1504 match (imm8
>> 2) & 0b11 {
1505 0b00 => shuffle_x45
!($x01
, 0),
1506 0b01 => shuffle_x45
!($x01
, 1),
1507 0b10 => shuffle_x45
!($x01
, 2),
1508 _
=> shuffle_x45
!($x01
, 3),
1512 let x
: i32x4
= match imm8
& 0b11 {
1513 0b00 => shuffle_x23
!(0),
1514 0b01 => shuffle_x23
!(1),
1515 0b10 => shuffle_x23
!(2),
1516 _
=> shuffle_x23
!(3),
1521 /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1524 /// Put the results in the high 64 bits of the returned vector, with the low 64
1525 /// bits being copied from from `a`.
1527 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
1529 #[target_feature(enable = "sse2")]
1530 #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))]
1531 #[rustc_args_required_const(1)]
1532 #[stable(feature = "simd_x86", since = "1.27.0")]
1533 pub unsafe fn _mm_shufflehi_epi16(a
: __m128i
, imm8
: i32) -> __m128i
{
1534 // See _mm_shuffle_epi32.
1535 let imm8
= (imm8
& 0xFF) as u8;
1536 let a
= a
.as_i16x8();
1537 macro_rules
! shuffle_done
{
1538 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
1539 simd_shuffle8(a
, a
, [0, 1, 2, 3, $x01
+ 4, $x23
+ 4, $x45
+ 4, $x67
+ 4])
1542 macro_rules
! shuffle_x67
{
1543 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
1544 match (imm8
>> 6) & 0b11 {
1545 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
1546 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
1547 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
1548 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
1552 macro_rules
! shuffle_x45
{
1553 ($x01
:expr
, $x23
:expr
) => {
1554 match (imm8
>> 4) & 0b11 {
1555 0b00 => shuffle_x67
!($x01
, $x23
, 0),
1556 0b01 => shuffle_x67
!($x01
, $x23
, 1),
1557 0b10 => shuffle_x67
!($x01
, $x23
, 2),
1558 _
=> shuffle_x67
!($x01
, $x23
, 3),
1562 macro_rules
! shuffle_x23
{
1564 match (imm8
>> 2) & 0b11 {
1565 0b00 => shuffle_x45
!($x01
, 0),
1566 0b01 => shuffle_x45
!($x01
, 1),
1567 0b10 => shuffle_x45
!($x01
, 2),
1568 _
=> shuffle_x45
!($x01
, 3),
1572 let x
: i16x8
= match imm8
& 0b11 {
1573 0b00 => shuffle_x23
!(0),
1574 0b01 => shuffle_x23
!(1),
1575 0b10 => shuffle_x23
!(2),
1576 _
=> shuffle_x23
!(3),
1581 /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1584 /// Put the results in the low 64 bits of the returned vector, with the high 64
1585 /// bits being copied from from `a`.
1587 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
1589 #[target_feature(enable = "sse2")]
1590 #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))]
1591 #[rustc_args_required_const(1)]
1592 #[stable(feature = "simd_x86", since = "1.27.0")]
1593 pub unsafe fn _mm_shufflelo_epi16(a
: __m128i
, imm8
: i32) -> __m128i
{
1594 // See _mm_shuffle_epi32.
1595 let imm8
= (imm8
& 0xFF) as u8;
1596 let a
= a
.as_i16x8();
1598 macro_rules
! shuffle_done
{
1599 ($x01
:expr
, $x23
:expr
, $x45
:expr
, $x67
:expr
) => {
1600 simd_shuffle8(a
, a
, [$x01
, $x23
, $x45
, $x67
, 4, 5, 6, 7])
1603 macro_rules
! shuffle_x67
{
1604 ($x01
:expr
, $x23
:expr
, $x45
:expr
) => {
1605 match (imm8
>> 6) & 0b11 {
1606 0b00 => shuffle_done
!($x01
, $x23
, $x45
, 0),
1607 0b01 => shuffle_done
!($x01
, $x23
, $x45
, 1),
1608 0b10 => shuffle_done
!($x01
, $x23
, $x45
, 2),
1609 _
=> shuffle_done
!($x01
, $x23
, $x45
, 3),
1613 macro_rules
! shuffle_x45
{
1614 ($x01
:expr
, $x23
:expr
) => {
1615 match (imm8
>> 4) & 0b11 {
1616 0b00 => shuffle_x67
!($x01
, $x23
, 0),
1617 0b01 => shuffle_x67
!($x01
, $x23
, 1),
1618 0b10 => shuffle_x67
!($x01
, $x23
, 2),
1619 _
=> shuffle_x67
!($x01
, $x23
, 3),
1623 macro_rules
! shuffle_x23
{
1625 match (imm8
>> 2) & 0b11 {
1626 0b00 => shuffle_x45
!($x01
, 0),
1627 0b01 => shuffle_x45
!($x01
, 1),
1628 0b10 => shuffle_x45
!($x01
, 2),
1629 _
=> shuffle_x45
!($x01
, 3),
1633 let x
: i16x8
= match imm8
& 0b11 {
1634 0b00 => shuffle_x23
!(0),
1635 0b01 => shuffle_x23
!(1),
1636 0b10 => shuffle_x23
!(2),
1637 _
=> shuffle_x23
!(3),
1642 /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
1646 #[target_feature(enable = "sse2")]
1647 #[cfg_attr(test, assert_instr(punpckhbw))]
1648 #[stable(feature = "simd_x86", since = "1.27.0")]
1649 pub unsafe fn _mm_unpackhi_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
1650 transmute
::<i8x16
, _
>(simd_shuffle16(
1653 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1657 /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1659 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
1661 #[target_feature(enable = "sse2")]
1662 #[cfg_attr(test, assert_instr(punpckhwd))]
1663 #[stable(feature = "simd_x86", since = "1.27.0")]
1664 pub unsafe fn _mm_unpackhi_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1665 let x
= simd_shuffle8(a
.as_i16x8(), b
.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1666 transmute
::<i16x8
, _
>(x
)
1669 /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1671 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
1673 #[target_feature(enable = "sse2")]
1674 #[cfg_attr(test, assert_instr(unpckhps))]
1675 #[stable(feature = "simd_x86", since = "1.27.0")]
1676 pub unsafe fn _mm_unpackhi_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
1677 transmute
::<i32x4
, _
>(simd_shuffle4(a
.as_i32x4(), b
.as_i32x4(), [2, 6, 3, 7]))
1680 /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
1684 #[target_feature(enable = "sse2")]
1685 #[cfg_attr(test, assert_instr(unpckhpd))]
1686 #[stable(feature = "simd_x86", since = "1.27.0")]
1687 pub unsafe fn _mm_unpackhi_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
1688 transmute
::<i64x2
, _
>(simd_shuffle2(a
.as_i64x2(), b
.as_i64x2(), [1, 3]))
1691 /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
1695 #[target_feature(enable = "sse2")]
1696 #[cfg_attr(test, assert_instr(punpcklbw))]
1697 #[stable(feature = "simd_x86", since = "1.27.0")]
1698 pub unsafe fn _mm_unpacklo_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
1699 transmute
::<i8x16
, _
>(simd_shuffle16(
1702 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1706 /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1708 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
1710 #[target_feature(enable = "sse2")]
1711 #[cfg_attr(test, assert_instr(punpcklwd))]
1712 #[stable(feature = "simd_x86", since = "1.27.0")]
1713 pub unsafe fn _mm_unpacklo_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1714 let x
= simd_shuffle8(a
.as_i16x8(), b
.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1715 transmute
::<i16x8
, _
>(x
)
1718 /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1720 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
1722 #[target_feature(enable = "sse2")]
1723 #[cfg_attr(test, assert_instr(unpcklps))]
1724 #[stable(feature = "simd_x86", since = "1.27.0")]
1725 pub unsafe fn _mm_unpacklo_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
1726 transmute
::<i32x4
, _
>(simd_shuffle4(a
.as_i32x4(), b
.as_i32x4(), [0, 4, 1, 5]))
1729 /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1731 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
1733 #[target_feature(enable = "sse2")]
1734 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1735 #[stable(feature = "simd_x86", since = "1.27.0")]
1736 pub unsafe fn _mm_unpacklo_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
1737 transmute
::<i64x2
, _
>(simd_shuffle2(a
.as_i64x2(), b
.as_i64x2(), [0, 2]))
1740 /// Returns a new vector with the low element of `a` replaced by the sum of the
1741 /// low elements of `a` and `b`.
1743 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
1745 #[target_feature(enable = "sse2")]
1746 #[cfg_attr(test, assert_instr(addsd))]
1747 #[stable(feature = "simd_x86", since = "1.27.0")]
1748 pub unsafe fn _mm_add_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1749 simd_insert(a
, 0, _mm_cvtsd_f64(a
) + _mm_cvtsd_f64(b
))
1752 /// Adds packed double-precision (64-bit) floating-point elements in `a` and
1755 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
1757 #[target_feature(enable = "sse2")]
1758 #[cfg_attr(test, assert_instr(addpd))]
1759 #[stable(feature = "simd_x86", since = "1.27.0")]
1760 pub unsafe fn _mm_add_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1764 /// Returns a new vector with the low element of `a` replaced by the result of
1765 /// diving the lower element of `a` by the lower element of `b`.
1767 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
1769 #[target_feature(enable = "sse2")]
1770 #[cfg_attr(test, assert_instr(divsd))]
1771 #[stable(feature = "simd_x86", since = "1.27.0")]
1772 pub unsafe fn _mm_div_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1773 simd_insert(a
, 0, _mm_cvtsd_f64(a
) / _mm_cvtsd_f64(b
))
1776 /// Divide packed double-precision (64-bit) floating-point elements in `a` by
1777 /// packed elements in `b`.
1779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd)
1781 #[target_feature(enable = "sse2")]
1782 #[cfg_attr(test, assert_instr(divpd))]
1783 #[stable(feature = "simd_x86", since = "1.27.0")]
1784 pub unsafe fn _mm_div_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1788 /// Returns a new vector with the low element of `a` replaced by the maximum
1789 /// of the lower elements of `a` and `b`.
1791 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
1793 #[target_feature(enable = "sse2")]
1794 #[cfg_attr(test, assert_instr(maxsd))]
1795 #[stable(feature = "simd_x86", since = "1.27.0")]
1796 pub unsafe fn _mm_max_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1800 /// Returns a new vector with the maximum values from corresponding elements in
1803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
1805 #[target_feature(enable = "sse2")]
1806 #[cfg_attr(test, assert_instr(maxpd))]
1807 #[stable(feature = "simd_x86", since = "1.27.0")]
1808 pub unsafe fn _mm_max_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1812 /// Returns a new vector with the low element of `a` replaced by the minimum
1813 /// of the lower elements of `a` and `b`.
1815 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
1817 #[target_feature(enable = "sse2")]
1818 #[cfg_attr(test, assert_instr(minsd))]
1819 #[stable(feature = "simd_x86", since = "1.27.0")]
1820 pub unsafe fn _mm_min_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1824 /// Returns a new vector with the minimum values from corresponding elements in
1827 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
1829 #[target_feature(enable = "sse2")]
1830 #[cfg_attr(test, assert_instr(minpd))]
1831 #[stable(feature = "simd_x86", since = "1.27.0")]
1832 pub unsafe fn _mm_min_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1836 /// Returns a new vector with the low element of `a` replaced by multiplying the
1837 /// low elements of `a` and `b`.
1839 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
1841 #[target_feature(enable = "sse2")]
1842 #[cfg_attr(test, assert_instr(mulsd))]
1843 #[stable(feature = "simd_x86", since = "1.27.0")]
1844 pub unsafe fn _mm_mul_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1845 simd_insert(a
, 0, _mm_cvtsd_f64(a
) * _mm_cvtsd_f64(b
))
1848 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
1853 #[target_feature(enable = "sse2")]
1854 #[cfg_attr(test, assert_instr(mulpd))]
1855 #[stable(feature = "simd_x86", since = "1.27.0")]
1856 pub unsafe fn _mm_mul_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1860 /// Returns a new vector with the low element of `a` replaced by the square
1861 /// root of the lower element `b`.
1863 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
1865 #[target_feature(enable = "sse2")]
1866 #[cfg_attr(test, assert_instr(sqrtsd))]
1867 #[stable(feature = "simd_x86", since = "1.27.0")]
1868 pub unsafe fn _mm_sqrt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1869 simd_insert(a
, 0, _mm_cvtsd_f64(sqrtsd(b
)))
1872 /// Returns a new vector with the square root of each of the values in `a`.
1874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
1876 #[target_feature(enable = "sse2")]
1877 #[cfg_attr(test, assert_instr(sqrtpd))]
1878 #[stable(feature = "simd_x86", since = "1.27.0")]
1879 pub unsafe fn _mm_sqrt_pd(a
: __m128d
) -> __m128d
{
1883 /// Returns a new vector with the low element of `a` replaced by subtracting the
1884 /// low element by `b` from the low element of `a`.
1886 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
1888 #[target_feature(enable = "sse2")]
1889 #[cfg_attr(test, assert_instr(subsd))]
1890 #[stable(feature = "simd_x86", since = "1.27.0")]
1891 pub unsafe fn _mm_sub_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1892 simd_insert(a
, 0, _mm_cvtsd_f64(a
) - _mm_cvtsd_f64(b
))
1895 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
1898 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd)
1900 #[target_feature(enable = "sse2")]
1901 #[cfg_attr(test, assert_instr(subpd))]
1902 #[stable(feature = "simd_x86", since = "1.27.0")]
1903 pub unsafe fn _mm_sub_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1907 /// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1908 /// elements in `a` and `b`.
1910 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
1912 #[target_feature(enable = "sse2")]
1913 #[cfg_attr(test, assert_instr(andps))]
1914 #[stable(feature = "simd_x86", since = "1.27.0")]
1915 pub unsafe fn _mm_and_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1916 let a
: __m128i
= transmute(a
);
1917 let b
: __m128i
= transmute(b
);
1918 transmute(_mm_and_si128(a
, b
))
1921 /// Computes the bitwise NOT of `a` and then AND with `b`.
1923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
1925 #[target_feature(enable = "sse2")]
1926 #[cfg_attr(test, assert_instr(andnps))]
1927 #[stable(feature = "simd_x86", since = "1.27.0")]
1928 pub unsafe fn _mm_andnot_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1929 let a
: __m128i
= transmute(a
);
1930 let b
: __m128i
= transmute(b
);
1931 transmute(_mm_andnot_si128(a
, b
))
1934 /// Computes the bitwise OR of `a` and `b`.
1936 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
1938 #[target_feature(enable = "sse2")]
1939 #[cfg_attr(test, assert_instr(orps))]
1940 #[stable(feature = "simd_x86", since = "1.27.0")]
1941 pub unsafe fn _mm_or_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1942 let a
: __m128i
= transmute(a
);
1943 let b
: __m128i
= transmute(b
);
1944 transmute(_mm_or_si128(a
, b
))
1947 /// Computes the bitwise OR of `a` and `b`.
1949 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
1951 #[target_feature(enable = "sse2")]
1952 #[cfg_attr(test, assert_instr(xorps))]
1953 #[stable(feature = "simd_x86", since = "1.27.0")]
1954 pub unsafe fn _mm_xor_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1955 let a
: __m128i
= transmute(a
);
1956 let b
: __m128i
= transmute(b
);
1957 transmute(_mm_xor_si128(a
, b
))
1960 /// Returns a new vector with the low element of `a` replaced by the equality
1961 /// comparison of the lower elements of `a` and `b`.
1963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
1965 #[target_feature(enable = "sse2")]
1966 #[cfg_attr(test, assert_instr(cmpeqsd))]
1967 #[stable(feature = "simd_x86", since = "1.27.0")]
1968 pub unsafe fn _mm_cmpeq_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1972 /// Returns a new vector with the low element of `a` replaced by the less-than
1973 /// comparison of the lower elements of `a` and `b`.
1975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
1977 #[target_feature(enable = "sse2")]
1978 #[cfg_attr(test, assert_instr(cmpltsd))]
1979 #[stable(feature = "simd_x86", since = "1.27.0")]
1980 pub unsafe fn _mm_cmplt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1984 /// Returns a new vector with the low element of `a` replaced by the
1985 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
1987 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
1989 #[target_feature(enable = "sse2")]
1990 #[cfg_attr(test, assert_instr(cmplesd))]
1991 #[stable(feature = "simd_x86", since = "1.27.0")]
1992 pub unsafe fn _mm_cmple_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1996 /// Returns a new vector with the low element of `a` replaced by the
1997 /// greater-than comparison of the lower elements of `a` and `b`.
1999 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
2001 #[target_feature(enable = "sse2")]
2002 #[cfg_attr(test, assert_instr(cmpltsd))]
2003 #[stable(feature = "simd_x86", since = "1.27.0")]
2004 pub unsafe fn _mm_cmpgt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2005 simd_insert(_mm_cmplt_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
2008 /// Returns a new vector with the low element of `a` replaced by the
2009 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
2011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
2013 #[target_feature(enable = "sse2")]
2014 #[cfg_attr(test, assert_instr(cmplesd))]
2015 #[stable(feature = "simd_x86", since = "1.27.0")]
2016 pub unsafe fn _mm_cmpge_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2017 simd_insert(_mm_cmple_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
2020 /// Returns a new vector with the low element of `a` replaced by the result
2021 /// of comparing both of the lower elements of `a` and `b` to `NaN`. If
2022 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
2025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd)
2027 #[target_feature(enable = "sse2")]
2028 #[cfg_attr(test, assert_instr(cmpordsd))]
2029 #[stable(feature = "simd_x86", since = "1.27.0")]
2030 pub unsafe fn _mm_cmpord_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2034 /// Returns a new vector with the low element of `a` replaced by the result of
2035 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
2036 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
2038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd)
2040 #[target_feature(enable = "sse2")]
2041 #[cfg_attr(test, assert_instr(cmpunordsd))]
2042 #[stable(feature = "simd_x86", since = "1.27.0")]
2043 pub unsafe fn _mm_cmpunord_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2047 /// Returns a new vector with the low element of `a` replaced by the not-equal
2048 /// comparison of the lower elements of `a` and `b`.
2050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
2052 #[target_feature(enable = "sse2")]
2053 #[cfg_attr(test, assert_instr(cmpneqsd))]
2054 #[stable(feature = "simd_x86", since = "1.27.0")]
2055 pub unsafe fn _mm_cmpneq_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2059 /// Returns a new vector with the low element of `a` replaced by the
2060 /// not-less-than comparison of the lower elements of `a` and `b`.
2062 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
2064 #[target_feature(enable = "sse2")]
2065 #[cfg_attr(test, assert_instr(cmpnltsd))]
2066 #[stable(feature = "simd_x86", since = "1.27.0")]
2067 pub unsafe fn _mm_cmpnlt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2071 /// Returns a new vector with the low element of `a` replaced by the
2072 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
2074 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
2076 #[target_feature(enable = "sse2")]
2077 #[cfg_attr(test, assert_instr(cmpnlesd))]
2078 #[stable(feature = "simd_x86", since = "1.27.0")]
2079 pub unsafe fn _mm_cmpnle_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2083 /// Returns a new vector with the low element of `a` replaced by the
2084 /// not-greater-than comparison of the lower elements of `a` and `b`.
2086 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
2088 #[target_feature(enable = "sse2")]
2089 #[cfg_attr(test, assert_instr(cmpnltsd))]
2090 #[stable(feature = "simd_x86", since = "1.27.0")]
2091 pub unsafe fn _mm_cmpngt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2092 simd_insert(_mm_cmpnlt_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
2095 /// Returns a new vector with the low element of `a` replaced by the
2096 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
2098 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
2100 #[target_feature(enable = "sse2")]
2101 #[cfg_attr(test, assert_instr(cmpnlesd))]
2102 #[stable(feature = "simd_x86", since = "1.27.0")]
2103 pub unsafe fn _mm_cmpnge_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2104 simd_insert(_mm_cmpnle_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
2107 /// Compares corresponding elements in `a` and `b` for equality.
2109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
2111 #[target_feature(enable = "sse2")]
2112 #[cfg_attr(test, assert_instr(cmpeqpd))]
2113 #[stable(feature = "simd_x86", since = "1.27.0")]
2114 pub unsafe fn _mm_cmpeq_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2118 /// Compares corresponding elements in `a` and `b` for less-than.
2120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
2122 #[target_feature(enable = "sse2")]
2123 #[cfg_attr(test, assert_instr(cmpltpd))]
2124 #[stable(feature = "simd_x86", since = "1.27.0")]
2125 pub unsafe fn _mm_cmplt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2129 /// Compares corresponding elements in `a` and `b` for less-than-or-equal
2131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
2133 #[target_feature(enable = "sse2")]
2134 #[cfg_attr(test, assert_instr(cmplepd))]
2135 #[stable(feature = "simd_x86", since = "1.27.0")]
2136 pub unsafe fn _mm_cmple_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2140 /// Compares corresponding elements in `a` and `b` for greater-than.
2142 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
2144 #[target_feature(enable = "sse2")]
2145 #[cfg_attr(test, assert_instr(cmpltpd))]
2146 #[stable(feature = "simd_x86", since = "1.27.0")]
2147 pub unsafe fn _mm_cmpgt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2151 /// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
2155 #[target_feature(enable = "sse2")]
2156 #[cfg_attr(test, assert_instr(cmplepd))]
2157 #[stable(feature = "simd_x86", since = "1.27.0")]
2158 pub unsafe fn _mm_cmpge_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2162 /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
2166 #[target_feature(enable = "sse2")]
2167 #[cfg_attr(test, assert_instr(cmpordpd))]
2168 #[stable(feature = "simd_x86", since = "1.27.0")]
2169 pub unsafe fn _mm_cmpord_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2173 /// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
2177 #[target_feature(enable = "sse2")]
2178 #[cfg_attr(test, assert_instr(cmpunordpd))]
2179 #[stable(feature = "simd_x86", since = "1.27.0")]
2180 pub unsafe fn _mm_cmpunord_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2184 /// Compares corresponding elements in `a` and `b` for not-equal.
2186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
2188 #[target_feature(enable = "sse2")]
2189 #[cfg_attr(test, assert_instr(cmpneqpd))]
2190 #[stable(feature = "simd_x86", since = "1.27.0")]
2191 pub unsafe fn _mm_cmpneq_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2195 /// Compares corresponding elements in `a` and `b` for not-less-than.
2197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
2199 #[target_feature(enable = "sse2")]
2200 #[cfg_attr(test, assert_instr(cmpnltpd))]
2201 #[stable(feature = "simd_x86", since = "1.27.0")]
2202 pub unsafe fn _mm_cmpnlt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2206 /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2208 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
2210 #[target_feature(enable = "sse2")]
2211 #[cfg_attr(test, assert_instr(cmpnlepd))]
2212 #[stable(feature = "simd_x86", since = "1.27.0")]
2213 pub unsafe fn _mm_cmpnle_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2217 /// Compares corresponding elements in `a` and `b` for not-greater-than.
2219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
2221 #[target_feature(enable = "sse2")]
2222 #[cfg_attr(test, assert_instr(cmpnltpd))]
2223 #[stable(feature = "simd_x86", since = "1.27.0")]
2224 pub unsafe fn _mm_cmpngt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2228 /// Compares corresponding elements in `a` and `b` for
2229 /// not-greater-than-or-equal.
2231 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
2233 #[target_feature(enable = "sse2")]
2234 #[cfg_attr(test, assert_instr(cmpnlepd))]
2235 #[stable(feature = "simd_x86", since = "1.27.0")]
2236 pub unsafe fn _mm_cmpnge_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2240 /// Compares the lower element of `a` and `b` for equality.
2242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
2244 #[target_feature(enable = "sse2")]
2245 #[cfg_attr(test, assert_instr(comisd))]
2246 #[stable(feature = "simd_x86", since = "1.27.0")]
2247 pub unsafe fn _mm_comieq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2251 /// Compares the lower element of `a` and `b` for less-than.
2253 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
2255 #[target_feature(enable = "sse2")]
2256 #[cfg_attr(test, assert_instr(comisd))]
2257 #[stable(feature = "simd_x86", since = "1.27.0")]
2258 pub unsafe fn _mm_comilt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2262 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
2266 #[target_feature(enable = "sse2")]
2267 #[cfg_attr(test, assert_instr(comisd))]
2268 #[stable(feature = "simd_x86", since = "1.27.0")]
2269 pub unsafe fn _mm_comile_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2273 /// Compares the lower element of `a` and `b` for greater-than.
2275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
2277 #[target_feature(enable = "sse2")]
2278 #[cfg_attr(test, assert_instr(comisd))]
2279 #[stable(feature = "simd_x86", since = "1.27.0")]
2280 pub unsafe fn _mm_comigt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2284 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2286 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
2288 #[target_feature(enable = "sse2")]
2289 #[cfg_attr(test, assert_instr(comisd))]
2290 #[stable(feature = "simd_x86", since = "1.27.0")]
2291 pub unsafe fn _mm_comige_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2295 /// Compares the lower element of `a` and `b` for not-equal.
2297 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
2299 #[target_feature(enable = "sse2")]
2300 #[cfg_attr(test, assert_instr(comisd))]
2301 #[stable(feature = "simd_x86", since = "1.27.0")]
2302 pub unsafe fn _mm_comineq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2306 /// Compares the lower element of `a` and `b` for equality.
2308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
2310 #[target_feature(enable = "sse2")]
2311 #[cfg_attr(test, assert_instr(ucomisd))]
2312 #[stable(feature = "simd_x86", since = "1.27.0")]
2313 pub unsafe fn _mm_ucomieq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2317 /// Compares the lower element of `a` and `b` for less-than.
2319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
2321 #[target_feature(enable = "sse2")]
2322 #[cfg_attr(test, assert_instr(ucomisd))]
2323 #[stable(feature = "simd_x86", since = "1.27.0")]
2324 pub unsafe fn _mm_ucomilt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2328 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
2332 #[target_feature(enable = "sse2")]
2333 #[cfg_attr(test, assert_instr(ucomisd))]
2334 #[stable(feature = "simd_x86", since = "1.27.0")]
2335 pub unsafe fn _mm_ucomile_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2339 /// Compares the lower element of `a` and `b` for greater-than.
2341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
2343 #[target_feature(enable = "sse2")]
2344 #[cfg_attr(test, assert_instr(ucomisd))]
2345 #[stable(feature = "simd_x86", since = "1.27.0")]
2346 pub unsafe fn _mm_ucomigt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2350 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
2354 #[target_feature(enable = "sse2")]
2355 #[cfg_attr(test, assert_instr(ucomisd))]
2356 #[stable(feature = "simd_x86", since = "1.27.0")]
2357 pub unsafe fn _mm_ucomige_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2361 /// Compares the lower element of `a` and `b` for not-equal.
2363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
2365 #[target_feature(enable = "sse2")]
2366 #[cfg_attr(test, assert_instr(ucomisd))]
2367 #[stable(feature = "simd_x86", since = "1.27.0")]
2368 pub unsafe fn _mm_ucomineq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2372 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2373 /// packed single-precision (32-bit) floating-point elements
2375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
2377 #[target_feature(enable = "sse2")]
2378 #[cfg_attr(test, assert_instr(cvtpd2ps))]
2379 #[stable(feature = "simd_x86", since = "1.27.0")]
2380 pub unsafe fn _mm_cvtpd_ps(a
: __m128d
) -> __m128
{
2384 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2386 /// double-precision (64-bit) floating-point elements.
2388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd)
2390 #[target_feature(enable = "sse2")]
2391 #[cfg_attr(test, assert_instr(cvtps2pd))]
2392 #[stable(feature = "simd_x86", since = "1.27.0")]
2393 pub unsafe fn _mm_cvtps_pd(a
: __m128
) -> __m128d
{
2397 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2398 /// packed 32-bit integers.
2400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
2402 #[target_feature(enable = "sse2")]
2403 #[cfg_attr(test, assert_instr(cvtpd2dq))]
2404 #[stable(feature = "simd_x86", since = "1.27.0")]
2405 pub unsafe fn _mm_cvtpd_epi32(a
: __m128d
) -> __m128i
{
2406 transmute(cvtpd2dq(a
))
2409 /// Converts the lower double-precision (64-bit) floating-point element in a to
2410 /// a 32-bit integer.
2412 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
2414 #[target_feature(enable = "sse2")]
2415 #[cfg_attr(test, assert_instr(cvtsd2si))]
2416 #[stable(feature = "simd_x86", since = "1.27.0")]
2417 pub unsafe fn _mm_cvtsd_si32(a
: __m128d
) -> i32 {
2421 /// Converts the lower double-precision (64-bit) floating-point element in `b`
2422 /// to a single-precision (32-bit) floating-point element, store the result in
2423 /// the lower element of the return value, and copies the upper element from `a`
2424 /// to the upper element the return value.
2426 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
2428 #[target_feature(enable = "sse2")]
2429 #[cfg_attr(test, assert_instr(cvtsd2ss))]
2430 #[stable(feature = "simd_x86", since = "1.27.0")]
2431 pub unsafe fn _mm_cvtsd_ss(a
: __m128
, b
: __m128d
) -> __m128
{
2435 /// Returns the lower double-precision (64-bit) floating-point element of `a`.
2437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
2439 #[target_feature(enable = "sse2")]
2440 #[stable(feature = "simd_x86", since = "1.27.0")]
2441 pub unsafe fn _mm_cvtsd_f64(a
: __m128d
) -> f64 {
2445 /// Converts the lower single-precision (32-bit) floating-point element in `b`
2446 /// to a double-precision (64-bit) floating-point element, store the result in
2447 /// the lower element of the return value, and copies the upper element from `a`
2448 /// to the upper element the return value.
2450 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
2452 #[target_feature(enable = "sse2")]
2453 #[cfg_attr(test, assert_instr(cvtss2sd))]
2454 #[stable(feature = "simd_x86", since = "1.27.0")]
2455 pub unsafe fn _mm_cvtss_sd(a
: __m128d
, b
: __m128
) -> __m128d
{
2459 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2460 /// packed 32-bit integers with truncation.
2462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
2464 #[target_feature(enable = "sse2")]
2465 #[cfg_attr(test, assert_instr(cvttpd2dq))]
2466 #[stable(feature = "simd_x86", since = "1.27.0")]
2467 pub unsafe fn _mm_cvttpd_epi32(a
: __m128d
) -> __m128i
{
2468 transmute(cvttpd2dq(a
))
2471 /// Converts the lower double-precision (64-bit) floating-point element in `a`
2472 /// to a 32-bit integer with truncation.
2474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
2476 #[target_feature(enable = "sse2")]
2477 #[cfg_attr(test, assert_instr(cvttsd2si))]
2478 #[stable(feature = "simd_x86", since = "1.27.0")]
2479 pub unsafe fn _mm_cvttsd_si32(a
: __m128d
) -> i32 {
2483 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2484 /// packed 32-bit integers with truncation.
2486 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
2488 #[target_feature(enable = "sse2")]
2489 #[cfg_attr(test, assert_instr(cvttps2dq))]
2490 #[stable(feature = "simd_x86", since = "1.27.0")]
2491 pub unsafe fn _mm_cvttps_epi32(a
: __m128
) -> __m128i
{
2492 transmute(cvttps2dq(a
))
2495 /// Copies double-precision (64-bit) floating-point element `a` to the lower
2496 /// element of the packed 64-bit return value.
2498 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
2500 #[target_feature(enable = "sse2")]
2501 #[stable(feature = "simd_x86", since = "1.27.0")]
2502 pub unsafe fn _mm_set_sd(a
: f64) -> __m128d
{
2506 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2507 /// of the return value.
2509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
2511 #[target_feature(enable = "sse2")]
2512 #[stable(feature = "simd_x86", since = "1.27.0")]
2513 pub unsafe fn _mm_set1_pd(a
: f64) -> __m128d
{
2517 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2518 /// of the return value.
2520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
2522 #[target_feature(enable = "sse2")]
2523 #[stable(feature = "simd_x86", since = "1.27.0")]
2524 pub unsafe fn _mm_set_pd1(a
: f64) -> __m128d
{
2528 /// Sets packed double-precision (64-bit) floating-point elements in the return
2529 /// value with the supplied values.
2531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
2533 #[target_feature(enable = "sse2")]
2534 #[stable(feature = "simd_x86", since = "1.27.0")]
2535 pub unsafe fn _mm_set_pd(a
: f64, b
: f64) -> __m128d
{
2539 /// Sets packed double-precision (64-bit) floating-point elements in the return
2540 /// value with the supplied values in reverse order.
2542 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
2544 #[target_feature(enable = "sse2")]
2545 #[stable(feature = "simd_x86", since = "1.27.0")]
2546 pub unsafe fn _mm_setr_pd(a
: f64, b
: f64) -> __m128d
{
2550 /// Returns packed double-precision (64-bit) floating-point elements with all
2553 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd)
2555 #[target_feature(enable = "sse2")]
2556 #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2557 #[stable(feature = "simd_x86", since = "1.27.0")]
2558 pub unsafe fn _mm_setzero_pd() -> __m128d
{
2559 _mm_set_pd(0.0, 0.0)
2562 /// Returns a mask of the most significant bit of each element in `a`.
2564 /// The mask is stored in the 2 least significant bits of the return value.
2565 /// All other bits are set to `0`.
2567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd)
2569 #[target_feature(enable = "sse2")]
2570 #[cfg_attr(test, assert_instr(movmskpd))]
2571 #[stable(feature = "simd_x86", since = "1.27.0")]
2572 pub unsafe fn _mm_movemask_pd(a
: __m128d
) -> i32 {
2576 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2577 /// floating-point elements) from memory into the returned vector.
2578 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2579 /// exception may be generated.
2581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd)
2583 #[target_feature(enable = "sse2")]
2584 #[cfg_attr(test, assert_instr(movaps))]
2585 #[stable(feature = "simd_x86", since = "1.27.0")]
2586 #[allow(clippy::cast_ptr_alignment)]
2587 pub unsafe fn _mm_load_pd(mem_addr
: *const f64) -> __m128d
{
2588 *(mem_addr
as *const __m128d
)
2591 /// Loads a 64-bit double-precision value to the low element of a
2592 /// 128-bit integer vector and clears the upper element.
2594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd)
2596 #[target_feature(enable = "sse2")]
2597 #[cfg_attr(test, assert_instr(movsd))]
2598 #[stable(feature = "simd_x86", since = "1.27.0")]
2599 pub unsafe fn _mm_load_sd(mem_addr
: *const f64) -> __m128d
{
2600 _mm_setr_pd(*mem_addr
, 0.)
2603 /// Loads a double-precision value into the high-order bits of a 128-bit
2604 /// vector of `[2 x double]`. The low-order bits are copied from the low-order
2605 /// bits of the first operand.
2607 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd)
2609 #[target_feature(enable = "sse2")]
2610 #[cfg_attr(test, assert_instr(movhps))]
2611 #[stable(feature = "simd_x86", since = "1.27.0")]
2612 pub unsafe fn _mm_loadh_pd(a
: __m128d
, mem_addr
: *const f64) -> __m128d
{
2613 _mm_setr_pd(simd_extract(a
, 0), *mem_addr
)
2616 /// Loads a double-precision value into the low-order bits of a 128-bit
2617 /// vector of `[2 x double]`. The high-order bits are copied from the
2618 /// high-order bits of the first operand.
2620 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd)
2622 #[target_feature(enable = "sse2")]
2623 #[cfg_attr(test, assert_instr(movlps))]
2624 #[stable(feature = "simd_x86", since = "1.27.0")]
2625 pub unsafe fn _mm_loadl_pd(a
: __m128d
, mem_addr
: *const f64) -> __m128d
{
2626 _mm_setr_pd(*mem_addr
, simd_extract(a
, 1))
2629 /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2630 /// aligned memory location.
2631 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
2632 /// used again soon).
2634 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd)
2636 #[target_feature(enable = "sse2")]
2637 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2638 #[stable(feature = "simd_x86", since = "1.27.0")]
2639 #[allow(clippy::cast_ptr_alignment)]
2640 pub unsafe fn _mm_stream_pd(mem_addr
: *mut f64, a
: __m128d
) {
2641 intrinsics
::nontemporal_store(mem_addr
as *mut __m128d
, a
);
2644 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2645 /// memory location.
2647 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd)
2649 #[target_feature(enable = "sse2")]
2650 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2651 #[stable(feature = "simd_x86", since = "1.27.0")]
2652 pub unsafe fn _mm_store_sd(mem_addr
: *mut f64, a
: __m128d
) {
2653 *mem_addr
= simd_extract(a
, 0)
2656 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2657 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2658 /// on a 16-byte boundary or a general-protection exception may be generated.
2660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd)
2662 #[target_feature(enable = "sse2")]
2663 #[cfg_attr(test, assert_instr(movaps))]
2664 #[stable(feature = "simd_x86", since = "1.27.0")]
2665 #[allow(clippy::cast_ptr_alignment)]
2666 pub unsafe fn _mm_store_pd(mem_addr
: *mut f64, a
: __m128d
) {
2667 *(mem_addr
as *mut __m128d
) = a
;
2670 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2671 /// floating-point elements) from `a` into memory.
2672 /// `mem_addr` does not need to be aligned on any particular boundary.
2674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd)
2676 #[target_feature(enable = "sse2")]
2677 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2678 #[stable(feature = "simd_x86", since = "1.27.0")]
2679 pub unsafe fn _mm_storeu_pd(mem_addr
: *mut f64, a
: __m128d
) {
2680 storeupd(mem_addr
as *mut i8, a
);
2683 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2684 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2685 /// 16-byte boundary or a general-protection exception may be generated.
2687 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd)
2689 #[target_feature(enable = "sse2")]
2690 #[stable(feature = "simd_x86", since = "1.27.0")]
2691 #[allow(clippy::cast_ptr_alignment)]
2692 pub unsafe fn _mm_store1_pd(mem_addr
: *mut f64, a
: __m128d
) {
2693 let b
: __m128d
= simd_shuffle2(a
, a
, [0, 0]);
2694 *(mem_addr
as *mut __m128d
) = b
;
2697 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2698 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2699 /// 16-byte boundary or a general-protection exception may be generated.
2701 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1)
2703 #[target_feature(enable = "sse2")]
2704 #[stable(feature = "simd_x86", since = "1.27.0")]
2705 #[allow(clippy::cast_ptr_alignment)]
2706 pub unsafe fn _mm_store_pd1(mem_addr
: *mut f64, a
: __m128d
) {
2707 let b
: __m128d
= simd_shuffle2(a
, a
, [0, 0]);
2708 *(mem_addr
as *mut __m128d
) = b
;
2711 /// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2712 /// memory in reverse order.
2713 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2714 /// exception may be generated.
2716 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd)
2718 #[target_feature(enable = "sse2")]
2719 #[stable(feature = "simd_x86", since = "1.27.0")]
2720 #[allow(clippy::cast_ptr_alignment)]
2721 pub unsafe fn _mm_storer_pd(mem_addr
: *mut f64, a
: __m128d
) {
2722 let b
: __m128d
= simd_shuffle2(a
, a
, [1, 0]);
2723 *(mem_addr
as *mut __m128d
) = b
;
2726 /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2727 /// memory location.
2729 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd)
2731 #[target_feature(enable = "sse2")]
2732 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2733 #[stable(feature = "simd_x86", since = "1.27.0")]
2734 pub unsafe fn _mm_storeh_pd(mem_addr
: *mut f64, a
: __m128d
) {
2735 *mem_addr
= simd_extract(a
, 1);
2738 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2739 /// memory location.
2741 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd)
2743 #[target_feature(enable = "sse2")]
2744 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2745 #[stable(feature = "simd_x86", since = "1.27.0")]
2746 pub unsafe fn _mm_storel_pd(mem_addr
: *mut f64, a
: __m128d
) {
2747 *mem_addr
= simd_extract(a
, 0);
2750 /// Loads a double-precision (64-bit) floating-point element from memory
2751 /// into both elements of returned vector.
2753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
2755 #[target_feature(enable = "sse2")]
2756 // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2757 #[stable(feature = "simd_x86", since = "1.27.0")]
2758 pub unsafe fn _mm_load1_pd(mem_addr
: *const f64) -> __m128d
{
2763 /// Loads a double-precision (64-bit) floating-point element from memory
2764 /// into both elements of returned vector.
2766 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
2768 #[target_feature(enable = "sse2")]
2769 // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2770 #[stable(feature = "simd_x86", since = "1.27.0")]
2771 pub unsafe fn _mm_load_pd1(mem_addr
: *const f64) -> __m128d
{
2772 _mm_load1_pd(mem_addr
)
2775 /// Loads 2 double-precision (64-bit) floating-point elements from memory into
2776 /// the returned vector in reverse order. `mem_addr` must be aligned on a
2777 /// 16-byte boundary or a general-protection exception may be generated.
2779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd)
2781 #[target_feature(enable = "sse2")]
2782 #[cfg_attr(test, assert_instr(movaps))]
2783 #[stable(feature = "simd_x86", since = "1.27.0")]
2784 pub unsafe fn _mm_loadr_pd(mem_addr
: *const f64) -> __m128d
{
2785 let a
= _mm_load_pd(mem_addr
);
2786 simd_shuffle2(a
, a
, [1, 0])
2789 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2790 /// floating-point elements) from memory into the returned vector.
2791 /// `mem_addr` does not need to be aligned on any particular boundary.
2793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd)
2795 #[target_feature(enable = "sse2")]
2796 #[cfg_attr(test, assert_instr(movups))]
2797 #[stable(feature = "simd_x86", since = "1.27.0")]
2798 pub unsafe fn _mm_loadu_pd(mem_addr
: *const f64) -> __m128d
{
2799 let mut dst
= _mm_undefined_pd();
2800 ptr
::copy_nonoverlapping(
2801 mem_addr
as *const u8,
2802 &mut dst
as *mut __m128d
as *mut u8,
2803 mem
::size_of
::<__m128d
>(),
2808 /// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2809 /// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2810 /// parameter as a specifier.
2812 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd)
2814 #[target_feature(enable = "sse2")]
2815 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(shufps, imm8 = 1))]
2816 #[cfg_attr(all(test, target_os = "windows"), assert_instr(shufpd, imm8 = 1))]
2817 #[rustc_args_required_const(2)]
2818 #[stable(feature = "simd_x86", since = "1.27.0")]
2819 pub unsafe fn _mm_shuffle_pd(a
: __m128d
, b
: __m128d
, imm8
: i32) -> __m128d
{
2821 0b00 => simd_shuffle2(a
, b
, [0, 2]),
2822 0b01 => simd_shuffle2(a
, b
, [1, 2]),
2823 0b10 => simd_shuffle2(a
, b
, [0, 3]),
2824 _
=> simd_shuffle2(a
, b
, [1, 3]),
2828 /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2829 /// 64 bits are set to the lower 64 bits of the second parameter. The upper
2830 /// 64 bits are set to the upper 64 bits of the first parameter.
2832 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd)
2834 #[target_feature(enable = "sse2")]
2835 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movsd))]
2836 #[cfg_attr(all(test, target_os = "windows"), assert_instr(movlps))]
2837 #[stable(feature = "simd_x86", since = "1.27.0")]
2838 pub unsafe fn _mm_move_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2839 _mm_setr_pd(simd_extract(b
, 0), simd_extract(a
, 1))
2842 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2843 /// floating-point vector of `[4 x float]`.
2845 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps)
2847 #[target_feature(enable = "sse2")]
2848 #[stable(feature = "simd_x86", since = "1.27.0")]
2849 pub unsafe fn _mm_castpd_ps(a
: __m128d
) -> __m128
{
2853 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2856 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128)
2858 #[target_feature(enable = "sse2")]
2859 #[stable(feature = "simd_x86", since = "1.27.0")]
2860 pub unsafe fn _mm_castpd_si128(a
: __m128d
) -> __m128i
{
2864 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2865 /// floating-point vector of `[2 x double]`.
2867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd)
2869 #[target_feature(enable = "sse2")]
2870 #[stable(feature = "simd_x86", since = "1.27.0")]
2871 pub unsafe fn _mm_castps_pd(a
: __m128
) -> __m128d
{
2875 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2878 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128)
2880 #[target_feature(enable = "sse2")]
2881 #[stable(feature = "simd_x86", since = "1.27.0")]
2882 pub unsafe fn _mm_castps_si128(a
: __m128
) -> __m128i
{
2886 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2887 /// of `[2 x double]`.
2889 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd)
2891 #[target_feature(enable = "sse2")]
2892 #[stable(feature = "simd_x86", since = "1.27.0")]
2893 pub unsafe fn _mm_castsi128_pd(a
: __m128i
) -> __m128d
{
2897 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2898 /// of `[4 x float]`.
2900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps)
2902 #[target_feature(enable = "sse2")]
2903 #[stable(feature = "simd_x86", since = "1.27.0")]
2904 pub unsafe fn _mm_castsi128_ps(a
: __m128i
) -> __m128
{
2908 /// Returns vector of type __m128d with undefined elements.
2910 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
2912 #[target_feature(enable = "sse2")]
2913 #[stable(feature = "simd_x86", since = "1.27.0")]
2914 pub unsafe fn _mm_undefined_pd() -> __m128d
{
2915 // FIXME: this function should return MaybeUninit<__m128d>
2916 mem
::MaybeUninit
::<__m128d
>::uninit().assume_init()
2919 /// Returns vector of type __m128i with undefined elements.
2921 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
2923 #[target_feature(enable = "sse2")]
2924 #[stable(feature = "simd_x86", since = "1.27.0")]
2925 pub unsafe fn _mm_undefined_si128() -> __m128i
{
2926 // FIXME: this function should return MaybeUninit<__m128i>
2927 mem
::MaybeUninit
::<__m128i
>::uninit().assume_init()
2930 /// The resulting `__m128d` element is composed by the low-order values of
2931 /// the two `__m128d` interleaved input elements, i.e.:
2933 /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2934 /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2937 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
2939 #[target_feature(enable = "sse2")]
2940 #[cfg_attr(test, assert_instr(unpckhpd))]
2941 #[stable(feature = "simd_x86", since = "1.27.0")]
2942 pub unsafe fn _mm_unpackhi_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2943 simd_shuffle2(a
, b
, [1, 3])
2946 /// The resulting `__m128d` element is composed by the high-order values of
2947 /// the two `__m128d` interleaved input elements, i.e.:
2949 /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2950 /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd)
2954 #[target_feature(enable = "sse2")]
2955 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2956 #[stable(feature = "simd_x86", since = "1.27.0")]
2957 pub unsafe fn _mm_unpacklo_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2958 simd_shuffle2(a
, b
, [0, 2])
2961 #[allow(improper_ctypes)]
2963 #[link_name = "llvm.x86.sse2.pause"]
2965 #[link_name = "llvm.x86.sse2.clflush"]
2966 fn clflush(p
: *const u8);
2967 #[link_name = "llvm.x86.sse2.lfence"]
2969 #[link_name = "llvm.x86.sse2.mfence"]
2971 #[link_name = "llvm.x86.sse2.pavg.b"]
2972 fn pavgb(a
: u8x16
, b
: u8x16
) -> u8x16
;
2973 #[link_name = "llvm.x86.sse2.pavg.w"]
2974 fn pavgw(a
: u16x8
, b
: u16x8
) -> u16x8
;
2975 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2976 fn pmaddwd(a
: i16x8
, b
: i16x8
) -> i32x4
;
2977 #[link_name = "llvm.x86.sse2.pmaxs.w"]
2978 fn pmaxsw(a
: i16x8
, b
: i16x8
) -> i16x8
;
2979 #[link_name = "llvm.x86.sse2.pmaxu.b"]
2980 fn pmaxub(a
: u8x16
, b
: u8x16
) -> u8x16
;
2981 #[link_name = "llvm.x86.sse2.pmins.w"]
2982 fn pminsw(a
: i16x8
, b
: i16x8
) -> i16x8
;
2983 #[link_name = "llvm.x86.sse2.pminu.b"]
2984 fn pminub(a
: u8x16
, b
: u8x16
) -> u8x16
;
2985 #[link_name = "llvm.x86.sse2.pmulh.w"]
2986 fn pmulhw(a
: i16x8
, b
: i16x8
) -> i16x8
;
2987 #[link_name = "llvm.x86.sse2.pmulhu.w"]
2988 fn pmulhuw(a
: u16x8
, b
: u16x8
) -> u16x8
;
2989 #[link_name = "llvm.x86.sse2.pmulu.dq"]
2990 fn pmuludq(a
: u32x4
, b
: u32x4
) -> u64x2
;
2991 #[link_name = "llvm.x86.sse2.psad.bw"]
2992 fn psadbw(a
: u8x16
, b
: u8x16
) -> u64x2
;
2993 #[link_name = "llvm.x86.sse2.pslli.w"]
2994 fn pslliw(a
: i16x8
, imm8
: i32) -> i16x8
;
2995 #[link_name = "llvm.x86.sse2.psll.w"]
2996 fn psllw(a
: i16x8
, count
: i16x8
) -> i16x8
;
2997 #[link_name = "llvm.x86.sse2.pslli.d"]
2998 fn psllid(a
: i32x4
, imm8
: i32) -> i32x4
;
2999 #[link_name = "llvm.x86.sse2.psll.d"]
3000 fn pslld(a
: i32x4
, count
: i32x4
) -> i32x4
;
3001 #[link_name = "llvm.x86.sse2.pslli.q"]
3002 fn pslliq(a
: i64x2
, imm8
: i32) -> i64x2
;
3003 #[link_name = "llvm.x86.sse2.psll.q"]
3004 fn psllq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3005 #[link_name = "llvm.x86.sse2.psrai.w"]
3006 fn psraiw(a
: i16x8
, imm8
: i32) -> i16x8
;
3007 #[link_name = "llvm.x86.sse2.psra.w"]
3008 fn psraw(a
: i16x8
, count
: i16x8
) -> i16x8
;
3009 #[link_name = "llvm.x86.sse2.psrai.d"]
3010 fn psraid(a
: i32x4
, imm8
: i32) -> i32x4
;
3011 #[link_name = "llvm.x86.sse2.psra.d"]
3012 fn psrad(a
: i32x4
, count
: i32x4
) -> i32x4
;
3013 #[link_name = "llvm.x86.sse2.psrli.w"]
3014 fn psrliw(a
: i16x8
, imm8
: i32) -> i16x8
;
3015 #[link_name = "llvm.x86.sse2.psrl.w"]
3016 fn psrlw(a
: i16x8
, count
: i16x8
) -> i16x8
;
3017 #[link_name = "llvm.x86.sse2.psrli.d"]
3018 fn psrlid(a
: i32x4
, imm8
: i32) -> i32x4
;
3019 #[link_name = "llvm.x86.sse2.psrl.d"]
3020 fn psrld(a
: i32x4
, count
: i32x4
) -> i32x4
;
3021 #[link_name = "llvm.x86.sse2.psrli.q"]
3022 fn psrliq(a
: i64x2
, imm8
: i32) -> i64x2
;
3023 #[link_name = "llvm.x86.sse2.psrl.q"]
3024 fn psrlq(a
: i64x2
, count
: i64x2
) -> i64x2
;
3025 #[link_name = "llvm.x86.sse2.cvtdq2ps"]
3026 fn cvtdq2ps(a
: i32x4
) -> __m128
;
3027 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3028 fn cvtps2dq(a
: __m128
) -> i32x4
;
3029 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3030 fn maskmovdqu(a
: i8x16
, mask
: i8x16
, mem_addr
: *mut i8);
3031 #[link_name = "llvm.x86.sse2.packsswb.128"]
3032 fn packsswb(a
: i16x8
, b
: i16x8
) -> i8x16
;
3033 #[link_name = "llvm.x86.sse2.packssdw.128"]
3034 fn packssdw(a
: i32x4
, b
: i32x4
) -> i16x8
;
3035 #[link_name = "llvm.x86.sse2.packuswb.128"]
3036 fn packuswb(a
: i16x8
, b
: i16x8
) -> u8x16
;
3037 #[link_name = "llvm.x86.sse2.pmovmskb.128"]
3038 fn pmovmskb(a
: i8x16
) -> i32;
3039 #[link_name = "llvm.x86.sse2.max.sd"]
3040 fn maxsd(a
: __m128d
, b
: __m128d
) -> __m128d
;
3041 #[link_name = "llvm.x86.sse2.max.pd"]
3042 fn maxpd(a
: __m128d
, b
: __m128d
) -> __m128d
;
3043 #[link_name = "llvm.x86.sse2.min.sd"]
3044 fn minsd(a
: __m128d
, b
: __m128d
) -> __m128d
;
3045 #[link_name = "llvm.x86.sse2.min.pd"]
3046 fn minpd(a
: __m128d
, b
: __m128d
) -> __m128d
;
3047 #[link_name = "llvm.x86.sse2.sqrt.sd"]
3048 fn sqrtsd(a
: __m128d
) -> __m128d
;
3049 #[link_name = "llvm.x86.sse2.sqrt.pd"]
3050 fn sqrtpd(a
: __m128d
) -> __m128d
;
3051 #[link_name = "llvm.x86.sse2.cmp.sd"]
3052 fn cmpsd(a
: __m128d
, b
: __m128d
, imm8
: i8) -> __m128d
;
3053 #[link_name = "llvm.x86.sse2.cmp.pd"]
3054 fn cmppd(a
: __m128d
, b
: __m128d
, imm8
: i8) -> __m128d
;
3055 #[link_name = "llvm.x86.sse2.comieq.sd"]
3056 fn comieqsd(a
: __m128d
, b
: __m128d
) -> i32;
3057 #[link_name = "llvm.x86.sse2.comilt.sd"]
3058 fn comiltsd(a
: __m128d
, b
: __m128d
) -> i32;
3059 #[link_name = "llvm.x86.sse2.comile.sd"]
3060 fn comilesd(a
: __m128d
, b
: __m128d
) -> i32;
3061 #[link_name = "llvm.x86.sse2.comigt.sd"]
3062 fn comigtsd(a
: __m128d
, b
: __m128d
) -> i32;
3063 #[link_name = "llvm.x86.sse2.comige.sd"]
3064 fn comigesd(a
: __m128d
, b
: __m128d
) -> i32;
3065 #[link_name = "llvm.x86.sse2.comineq.sd"]
3066 fn comineqsd(a
: __m128d
, b
: __m128d
) -> i32;
3067 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3068 fn ucomieqsd(a
: __m128d
, b
: __m128d
) -> i32;
3069 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3070 fn ucomiltsd(a
: __m128d
, b
: __m128d
) -> i32;
3071 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3072 fn ucomilesd(a
: __m128d
, b
: __m128d
) -> i32;
3073 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3074 fn ucomigtsd(a
: __m128d
, b
: __m128d
) -> i32;
3075 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3076 fn ucomigesd(a
: __m128d
, b
: __m128d
) -> i32;
3077 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3078 fn ucomineqsd(a
: __m128d
, b
: __m128d
) -> i32;
3079 #[link_name = "llvm.x86.sse2.movmsk.pd"]
3080 fn movmskpd(a
: __m128d
) -> i32;
3081 #[link_name = "llvm.x86.sse2.cvtpd2ps"]
3082 fn cvtpd2ps(a
: __m128d
) -> __m128
;
3083 #[link_name = "llvm.x86.sse2.cvtps2pd"]
3084 fn cvtps2pd(a
: __m128
) -> __m128d
;
3085 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3086 fn cvtpd2dq(a
: __m128d
) -> i32x4
;
3087 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3088 fn cvtsd2si(a
: __m128d
) -> i32;
3089 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3090 fn cvtsd2ss(a
: __m128
, b
: __m128d
) -> __m128
;
3091 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3092 fn cvtss2sd(a
: __m128d
, b
: __m128
) -> __m128d
;
3093 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3094 fn cvttpd2dq(a
: __m128d
) -> i32x4
;
3095 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3096 fn cvttsd2si(a
: __m128d
) -> i32;
3097 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3098 fn cvttps2dq(a
: __m128
) -> i32x4
;
3099 #[link_name = "llvm.x86.sse2.storeu.dq"]
3100 fn storeudq(mem_addr
: *mut i8, a
: __m128i
);
3101 #[link_name = "llvm.x86.sse2.storeu.pd"]
3102 fn storeupd(mem_addr
: *mut i8, a
: __m128d
);
3108 core_arch
::{simd::*, x86::*}
,
3115 mem
::{self, transmute}
,
3117 use stdarch_test
::simd_test
;
3120 fn test_mm_pause() {
3121 unsafe { _mm_pause() }
3124 #[simd_test(enable = "sse2")]
3125 unsafe fn test_mm_clflush() {
3127 _mm_clflush(&x
as *const _
);
3130 #[simd_test(enable = "sse2")]
3131 unsafe fn test_mm_lfence() {
3135 #[simd_test(enable = "sse2")]
3136 unsafe fn test_mm_mfence() {
3140 #[simd_test(enable = "sse2")]
3141 unsafe fn test_mm_add_epi8() {
3142 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3144 let b
= _mm_setr_epi8(
3145 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3147 let r
= _mm_add_epi8(a
, b
);
3149 let e
= _mm_setr_epi8(
3150 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3152 assert_eq_m128i(r
, e
);
3155 #[simd_test(enable = "sse2")]
3156 unsafe fn test_mm_add_epi8_overflow() {
3157 let a
= _mm_set1_epi8(0x7F);
3158 let b
= _mm_set1_epi8(1);
3159 let r
= _mm_add_epi8(a
, b
);
3160 assert_eq_m128i(r
, _mm_set1_epi8(-128));
3163 #[simd_test(enable = "sse2")]
3164 unsafe fn test_mm_add_epi16() {
3165 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3166 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3167 let r
= _mm_add_epi16(a
, b
);
3168 let e
= _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3169 assert_eq_m128i(r
, e
);
3172 #[simd_test(enable = "sse2")]
3173 unsafe fn test_mm_add_epi32() {
3174 let a
= _mm_setr_epi32(0, 1, 2, 3);
3175 let b
= _mm_setr_epi32(4, 5, 6, 7);
3176 let r
= _mm_add_epi32(a
, b
);
3177 let e
= _mm_setr_epi32(4, 6, 8, 10);
3178 assert_eq_m128i(r
, e
);
3181 #[simd_test(enable = "sse2")]
3182 unsafe fn test_mm_add_epi64() {
3183 let a
= _mm_setr_epi64x(0, 1);
3184 let b
= _mm_setr_epi64x(2, 3);
3185 let r
= _mm_add_epi64(a
, b
);
3186 let e
= _mm_setr_epi64x(2, 4);
3187 assert_eq_m128i(r
, e
);
3190 #[simd_test(enable = "sse2")]
3191 unsafe fn test_mm_adds_epi8() {
3192 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3194 let b
= _mm_setr_epi8(
3195 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3197 let r
= _mm_adds_epi8(a
, b
);
3199 let e
= _mm_setr_epi8(
3200 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3202 assert_eq_m128i(r
, e
);
3205 #[simd_test(enable = "sse2")]
3206 unsafe fn test_mm_adds_epi8_saturate_positive() {
3207 let a
= _mm_set1_epi8(0x7F);
3208 let b
= _mm_set1_epi8(1);
3209 let r
= _mm_adds_epi8(a
, b
);
3210 assert_eq_m128i(r
, a
);
3213 #[simd_test(enable = "sse2")]
3214 unsafe fn test_mm_adds_epi8_saturate_negative() {
3215 let a
= _mm_set1_epi8(-0x80);
3216 let b
= _mm_set1_epi8(-1);
3217 let r
= _mm_adds_epi8(a
, b
);
3218 assert_eq_m128i(r
, a
);
3221 #[simd_test(enable = "sse2")]
3222 unsafe fn test_mm_adds_epi16() {
3223 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3224 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3225 let r
= _mm_adds_epi16(a
, b
);
3226 let e
= _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3227 assert_eq_m128i(r
, e
);
3230 #[simd_test(enable = "sse2")]
3231 unsafe fn test_mm_adds_epi16_saturate_positive() {
3232 let a
= _mm_set1_epi16(0x7FFF);
3233 let b
= _mm_set1_epi16(1);
3234 let r
= _mm_adds_epi16(a
, b
);
3235 assert_eq_m128i(r
, a
);
3238 #[simd_test(enable = "sse2")]
3239 unsafe fn test_mm_adds_epi16_saturate_negative() {
3240 let a
= _mm_set1_epi16(-0x8000);
3241 let b
= _mm_set1_epi16(-1);
3242 let r
= _mm_adds_epi16(a
, b
);
3243 assert_eq_m128i(r
, a
);
3246 #[simd_test(enable = "sse2")]
3247 unsafe fn test_mm_adds_epu8() {
3248 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3250 let b
= _mm_setr_epi8(
3251 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3253 let r
= _mm_adds_epu8(a
, b
);
3255 let e
= _mm_setr_epi8(
3256 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3258 assert_eq_m128i(r
, e
);
3261 #[simd_test(enable = "sse2")]
3262 unsafe fn test_mm_adds_epu8_saturate() {
3263 let a
= _mm_set1_epi8(!0);
3264 let b
= _mm_set1_epi8(1);
3265 let r
= _mm_adds_epu8(a
, b
);
3266 assert_eq_m128i(r
, a
);
3269 #[simd_test(enable = "sse2")]
3270 unsafe fn test_mm_adds_epu16() {
3271 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3272 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3273 let r
= _mm_adds_epu16(a
, b
);
3274 let e
= _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3275 assert_eq_m128i(r
, e
);
3278 #[simd_test(enable = "sse2")]
3279 unsafe fn test_mm_adds_epu16_saturate() {
3280 let a
= _mm_set1_epi16(!0);
3281 let b
= _mm_set1_epi16(1);
3282 let r
= _mm_adds_epu16(a
, b
);
3283 assert_eq_m128i(r
, a
);
3286 #[simd_test(enable = "sse2")]
3287 unsafe fn test_mm_avg_epu8() {
3288 let (a
, b
) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3289 let r
= _mm_avg_epu8(a
, b
);
3290 assert_eq_m128i(r
, _mm_set1_epi8(6));
3293 #[simd_test(enable = "sse2")]
3294 unsafe fn test_mm_avg_epu16() {
3295 let (a
, b
) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3296 let r
= _mm_avg_epu16(a
, b
);
3297 assert_eq_m128i(r
, _mm_set1_epi16(6));
3300 #[simd_test(enable = "sse2")]
3301 unsafe fn test_mm_madd_epi16() {
3302 let a
= _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3303 let b
= _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3304 let r
= _mm_madd_epi16(a
, b
);
3305 let e
= _mm_setr_epi32(29, 81, 149, 233);
3306 assert_eq_m128i(r
, e
);
3309 #[simd_test(enable = "sse2")]
3310 unsafe fn test_mm_max_epi16() {
3311 let a
= _mm_set1_epi16(1);
3312 let b
= _mm_set1_epi16(-1);
3313 let r
= _mm_max_epi16(a
, b
);
3314 assert_eq_m128i(r
, a
);
3317 #[simd_test(enable = "sse2")]
3318 unsafe fn test_mm_max_epu8() {
3319 let a
= _mm_set1_epi8(1);
3320 let b
= _mm_set1_epi8(!0);
3321 let r
= _mm_max_epu8(a
, b
);
3322 assert_eq_m128i(r
, b
);
3325 #[simd_test(enable = "sse2")]
3326 unsafe fn test_mm_min_epi16() {
3327 let a
= _mm_set1_epi16(1);
3328 let b
= _mm_set1_epi16(-1);
3329 let r
= _mm_min_epi16(a
, b
);
3330 assert_eq_m128i(r
, b
);
3333 #[simd_test(enable = "sse2")]
3334 unsafe fn test_mm_min_epu8() {
3335 let a
= _mm_set1_epi8(1);
3336 let b
= _mm_set1_epi8(!0);
3337 let r
= _mm_min_epu8(a
, b
);
3338 assert_eq_m128i(r
, a
);
3341 #[simd_test(enable = "sse2")]
3342 unsafe fn test_mm_mulhi_epi16() {
3343 let (a
, b
) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3344 let r
= _mm_mulhi_epi16(a
, b
);
3345 assert_eq_m128i(r
, _mm_set1_epi16(-16));
3348 #[simd_test(enable = "sse2")]
3349 unsafe fn test_mm_mulhi_epu16() {
3350 let (a
, b
) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3351 let r
= _mm_mulhi_epu16(a
, b
);
3352 assert_eq_m128i(r
, _mm_set1_epi16(15));
3355 #[simd_test(enable = "sse2")]
3356 unsafe fn test_mm_mullo_epi16() {
3357 let (a
, b
) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3358 let r
= _mm_mullo_epi16(a
, b
);
3359 assert_eq_m128i(r
, _mm_set1_epi16(-17960));
3362 #[simd_test(enable = "sse2")]
3363 unsafe fn test_mm_mul_epu32() {
3364 let a
= _mm_setr_epi64x(1_000_000_000, 1 << 34);
3365 let b
= _mm_setr_epi64x(1_000_000_000, 1 << 35);
3366 let r
= _mm_mul_epu32(a
, b
);
3367 let e
= _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3368 assert_eq_m128i(r
, e
);
3371 #[simd_test(enable = "sse2")]
3372 unsafe fn test_mm_sad_epu8() {
3374 let a
= _mm_setr_epi8(
3375 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3377 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3380 let b
= _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3381 let r
= _mm_sad_epu8(a
, b
);
3382 let e
= _mm_setr_epi64x(1020, 614);
3383 assert_eq_m128i(r
, e
);
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_sub_epi8() {
3388 let (a
, b
) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3389 let r
= _mm_sub_epi8(a
, b
);
3390 assert_eq_m128i(r
, _mm_set1_epi8(-1));
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_sub_epi16() {
3395 let (a
, b
) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3396 let r
= _mm_sub_epi16(a
, b
);
3397 assert_eq_m128i(r
, _mm_set1_epi16(-1));
3400 #[simd_test(enable = "sse2")]
3401 unsafe fn test_mm_sub_epi32() {
3402 let (a
, b
) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3403 let r
= _mm_sub_epi32(a
, b
);
3404 assert_eq_m128i(r
, _mm_set1_epi32(-1));
3407 #[simd_test(enable = "sse2")]
3408 unsafe fn test_mm_sub_epi64() {
3409 let (a
, b
) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3410 let r
= _mm_sub_epi64(a
, b
);
3411 assert_eq_m128i(r
, _mm_set1_epi64x(-1));
3414 #[simd_test(enable = "sse2")]
3415 unsafe fn test_mm_subs_epi8() {
3416 let (a
, b
) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3417 let r
= _mm_subs_epi8(a
, b
);
3418 assert_eq_m128i(r
, _mm_set1_epi8(3));
3421 #[simd_test(enable = "sse2")]
3422 unsafe fn test_mm_subs_epi8_saturate_positive() {
3423 let a
= _mm_set1_epi8(0x7F);
3424 let b
= _mm_set1_epi8(-1);
3425 let r
= _mm_subs_epi8(a
, b
);
3426 assert_eq_m128i(r
, a
);
3429 #[simd_test(enable = "sse2")]
3430 unsafe fn test_mm_subs_epi8_saturate_negative() {
3431 let a
= _mm_set1_epi8(-0x80);
3432 let b
= _mm_set1_epi8(1);
3433 let r
= _mm_subs_epi8(a
, b
);
3434 assert_eq_m128i(r
, a
);
3437 #[simd_test(enable = "sse2")]
3438 unsafe fn test_mm_subs_epi16() {
3439 let (a
, b
) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3440 let r
= _mm_subs_epi16(a
, b
);
3441 assert_eq_m128i(r
, _mm_set1_epi16(3));
3444 #[simd_test(enable = "sse2")]
3445 unsafe fn test_mm_subs_epi16_saturate_positive() {
3446 let a
= _mm_set1_epi16(0x7FFF);
3447 let b
= _mm_set1_epi16(-1);
3448 let r
= _mm_subs_epi16(a
, b
);
3449 assert_eq_m128i(r
, a
);
3452 #[simd_test(enable = "sse2")]
3453 unsafe fn test_mm_subs_epi16_saturate_negative() {
3454 let a
= _mm_set1_epi16(-0x8000);
3455 let b
= _mm_set1_epi16(1);
3456 let r
= _mm_subs_epi16(a
, b
);
3457 assert_eq_m128i(r
, a
);
3460 #[simd_test(enable = "sse2")]
3461 unsafe fn test_mm_subs_epu8() {
3462 let (a
, b
) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3463 let r
= _mm_subs_epu8(a
, b
);
3464 assert_eq_m128i(r
, _mm_set1_epi8(3));
3467 #[simd_test(enable = "sse2")]
3468 unsafe fn test_mm_subs_epu8_saturate() {
3469 let a
= _mm_set1_epi8(0);
3470 let b
= _mm_set1_epi8(1);
3471 let r
= _mm_subs_epu8(a
, b
);
3472 assert_eq_m128i(r
, a
);
3475 #[simd_test(enable = "sse2")]
3476 unsafe fn test_mm_subs_epu16() {
3477 let (a
, b
) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3478 let r
= _mm_subs_epu16(a
, b
);
3479 assert_eq_m128i(r
, _mm_set1_epi16(3));
3482 #[simd_test(enable = "sse2")]
3483 unsafe fn test_mm_subs_epu16_saturate() {
3484 let a
= _mm_set1_epi16(0);
3485 let b
= _mm_set1_epi16(1);
3486 let r
= _mm_subs_epu16(a
, b
);
3487 assert_eq_m128i(r
, a
);
3490 #[simd_test(enable = "sse2")]
3491 unsafe fn test_mm_slli_si128() {
3493 let a
= _mm_setr_epi8(
3494 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3496 let r
= _mm_slli_si128(a
, 1);
3497 let e
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3498 assert_eq_m128i(r
, e
);
3501 let a
= _mm_setr_epi8(
3502 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3504 let r
= _mm_slli_si128(a
, 15);
3505 let e
= _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3506 assert_eq_m128i(r
, e
);
3509 let a
= _mm_setr_epi8(
3510 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3512 let r
= _mm_slli_si128(a
, 16);
3513 assert_eq_m128i(r
, _mm_set1_epi8(0));
3516 let a
= _mm_setr_epi8(
3517 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3519 let r
= _mm_slli_si128(a
, -1);
3520 assert_eq_m128i(_mm_set1_epi8(0), r
);
3523 let a
= _mm_setr_epi8(
3524 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3526 let r
= _mm_slli_si128(a
, -0x80000000);
3527 assert_eq_m128i(r
, _mm_set1_epi8(0));
3530 #[simd_test(enable = "sse2")]
3531 unsafe fn test_mm_slli_epi16() {
3533 let a
= _mm_setr_epi16(
3534 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3536 let r
= _mm_slli_epi16(a
, 4);
3539 let e
= _mm_setr_epi16(
3540 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
3543 assert_eq_m128i(r
, e
);
3546 #[simd_test(enable = "sse2")]
3547 unsafe fn test_mm_sll_epi16() {
3548 let a
= _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3549 let r
= _mm_sll_epi16(a
, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3550 assert_eq_m128i(r
, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0));
3551 let r
= _mm_sll_epi16(a
, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3552 assert_eq_m128i(r
, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3555 #[simd_test(enable = "sse2")]
3556 unsafe fn test_mm_slli_epi32() {
3557 let r
= _mm_slli_epi32(_mm_set1_epi32(0xFFFF), 4);
3558 assert_eq_m128i(r
, _mm_set1_epi32(0xFFFF0));
3561 #[simd_test(enable = "sse2")]
3562 unsafe fn test_mm_sll_epi32() {
3563 let a
= _mm_set1_epi32(0xFFFF);
3564 let b
= _mm_setr_epi32(4, 0, 0, 0);
3565 let r
= _mm_sll_epi32(a
, b
);
3566 assert_eq_m128i(r
, _mm_set1_epi32(0xFFFF0));
3569 #[simd_test(enable = "sse2")]
3570 unsafe fn test_mm_slli_epi64() {
3571 let r
= _mm_slli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4);
3572 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFFF0));
3575 #[simd_test(enable = "sse2")]
3576 unsafe fn test_mm_sll_epi64() {
3577 let a
= _mm_set1_epi64x(0xFFFFFFFF);
3578 let b
= _mm_setr_epi64x(4, 0);
3579 let r
= _mm_sll_epi64(a
, b
);
3580 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFFF0));
3583 #[simd_test(enable = "sse2")]
3584 unsafe fn test_mm_srai_epi16() {
3585 let r
= _mm_srai_epi16(_mm_set1_epi16(-1), 1);
3586 assert_eq_m128i(r
, _mm_set1_epi16(-1));
3589 #[simd_test(enable = "sse2")]
3590 unsafe fn test_mm_sra_epi16() {
3591 let a
= _mm_set1_epi16(-1);
3592 let b
= _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
3593 let r
= _mm_sra_epi16(a
, b
);
3594 assert_eq_m128i(r
, _mm_set1_epi16(-1));
3597 #[simd_test(enable = "sse2")]
3598 unsafe fn test_mm_srai_epi32() {
3599 let r
= _mm_srai_epi32(_mm_set1_epi32(-1), 1);
3600 assert_eq_m128i(r
, _mm_set1_epi32(-1));
3603 #[simd_test(enable = "sse2")]
3604 unsafe fn test_mm_sra_epi32() {
3605 let a
= _mm_set1_epi32(-1);
3606 let b
= _mm_setr_epi32(1, 0, 0, 0);
3607 let r
= _mm_sra_epi32(a
, b
);
3608 assert_eq_m128i(r
, _mm_set1_epi32(-1));
3611 #[simd_test(enable = "sse2")]
3612 unsafe fn test_mm_srli_si128() {
3614 let a
= _mm_setr_epi8(
3615 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3617 let r
= _mm_srli_si128(a
, 1);
3619 let e
= _mm_setr_epi8(
3620 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3622 assert_eq_m128i(r
, e
);
3625 let a
= _mm_setr_epi8(
3626 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3628 let r
= _mm_srli_si128(a
, 15);
3629 let e
= _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3630 assert_eq_m128i(r
, e
);
3633 let a
= _mm_setr_epi8(
3634 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3636 let r
= _mm_srli_si128(a
, 16);
3637 assert_eq_m128i(r
, _mm_set1_epi8(0));
3640 let a
= _mm_setr_epi8(
3641 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3643 let r
= _mm_srli_si128(a
, -1);
3644 assert_eq_m128i(r
, _mm_set1_epi8(0));
3647 let a
= _mm_setr_epi8(
3648 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3650 let r
= _mm_srli_si128(a
, -0x80000000);
3651 assert_eq_m128i(r
, _mm_set1_epi8(0));
3654 #[simd_test(enable = "sse2")]
3655 unsafe fn test_mm_srli_epi16() {
3657 let a
= _mm_setr_epi16(
3658 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3660 let r
= _mm_srli_epi16(a
, 4);
3662 let e
= _mm_setr_epi16(
3663 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
3665 assert_eq_m128i(r
, e
);
3668 #[simd_test(enable = "sse2")]
3669 unsafe fn test_mm_srl_epi16() {
3670 let a
= _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3671 let r
= _mm_srl_epi16(a
, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3672 assert_eq_m128i(r
, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0));
3673 let r
= _mm_srl_epi16(a
, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3674 assert_eq_m128i(r
, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3677 #[simd_test(enable = "sse2")]
3678 unsafe fn test_mm_srli_epi32() {
3679 let r
= _mm_srli_epi32(_mm_set1_epi32(0xFFFF), 4);
3680 assert_eq_m128i(r
, _mm_set1_epi32(0xFFF));
3683 #[simd_test(enable = "sse2")]
3684 unsafe fn test_mm_srl_epi32() {
3685 let a
= _mm_set1_epi32(0xFFFF);
3686 let b
= _mm_setr_epi32(4, 0, 0, 0);
3687 let r
= _mm_srl_epi32(a
, b
);
3688 assert_eq_m128i(r
, _mm_set1_epi32(0xFFF));
3691 #[simd_test(enable = "sse2")]
3692 unsafe fn test_mm_srli_epi64() {
3693 let r
= _mm_srli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4);
3694 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFF));
3697 #[simd_test(enable = "sse2")]
3698 unsafe fn test_mm_srl_epi64() {
3699 let a
= _mm_set1_epi64x(0xFFFFFFFF);
3700 let b
= _mm_setr_epi64x(4, 0);
3701 let r
= _mm_srl_epi64(a
, b
);
3702 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFF));
3705 #[simd_test(enable = "sse2")]
3706 unsafe fn test_mm_and_si128() {
3707 let a
= _mm_set1_epi8(5);
3708 let b
= _mm_set1_epi8(3);
3709 let r
= _mm_and_si128(a
, b
);
3710 assert_eq_m128i(r
, _mm_set1_epi8(1));
3713 #[simd_test(enable = "sse2")]
3714 unsafe fn test_mm_andnot_si128() {
3715 let a
= _mm_set1_epi8(5);
3716 let b
= _mm_set1_epi8(3);
3717 let r
= _mm_andnot_si128(a
, b
);
3718 assert_eq_m128i(r
, _mm_set1_epi8(2));
3721 #[simd_test(enable = "sse2")]
3722 unsafe fn test_mm_or_si128() {
3723 let a
= _mm_set1_epi8(5);
3724 let b
= _mm_set1_epi8(3);
3725 let r
= _mm_or_si128(a
, b
);
3726 assert_eq_m128i(r
, _mm_set1_epi8(7));
3729 #[simd_test(enable = "sse2")]
3730 unsafe fn test_mm_xor_si128() {
3731 let a
= _mm_set1_epi8(5);
3732 let b
= _mm_set1_epi8(3);
3733 let r
= _mm_xor_si128(a
, b
);
3734 assert_eq_m128i(r
, _mm_set1_epi8(6));
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_cmpeq_epi8() {
3739 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3740 let b
= _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3741 let r
= _mm_cmpeq_epi8(a
, b
);
3746 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3751 #[simd_test(enable = "sse2")]
3752 unsafe fn test_mm_cmpeq_epi16() {
3753 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3754 let b
= _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3755 let r
= _mm_cmpeq_epi16(a
, b
);
3756 assert_eq_m128i(r
, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3759 #[simd_test(enable = "sse2")]
3760 unsafe fn test_mm_cmpeq_epi32() {
3761 let a
= _mm_setr_epi32(0, 1, 2, 3);
3762 let b
= _mm_setr_epi32(3, 2, 2, 0);
3763 let r
= _mm_cmpeq_epi32(a
, b
);
3764 assert_eq_m128i(r
, _mm_setr_epi32(0, 0, !0, 0));
3767 #[simd_test(enable = "sse2")]
3768 unsafe fn test_mm_cmpgt_epi8() {
3769 let a
= _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3770 let b
= _mm_set1_epi8(0);
3771 let r
= _mm_cmpgt_epi8(a
, b
);
3772 let e
= _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3773 assert_eq_m128i(r
, e
);
3776 #[simd_test(enable = "sse2")]
3777 unsafe fn test_mm_cmpgt_epi16() {
3778 let a
= _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3779 let b
= _mm_set1_epi16(0);
3780 let r
= _mm_cmpgt_epi16(a
, b
);
3781 let e
= _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3782 assert_eq_m128i(r
, e
);
3785 #[simd_test(enable = "sse2")]
3786 unsafe fn test_mm_cmpgt_epi32() {
3787 let a
= _mm_set_epi32(5, 0, 0, 0);
3788 let b
= _mm_set1_epi32(0);
3789 let r
= _mm_cmpgt_epi32(a
, b
);
3790 assert_eq_m128i(r
, _mm_set_epi32(!0, 0, 0, 0));
3793 #[simd_test(enable = "sse2")]
3794 unsafe fn test_mm_cmplt_epi8() {
3795 let a
= _mm_set1_epi8(0);
3796 let b
= _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3797 let r
= _mm_cmplt_epi8(a
, b
);
3798 let e
= _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3799 assert_eq_m128i(r
, e
);
3802 #[simd_test(enable = "sse2")]
3803 unsafe fn test_mm_cmplt_epi16() {
3804 let a
= _mm_set1_epi16(0);
3805 let b
= _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3806 let r
= _mm_cmplt_epi16(a
, b
);
3807 let e
= _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3808 assert_eq_m128i(r
, e
);
3811 #[simd_test(enable = "sse2")]
3812 unsafe fn test_mm_cmplt_epi32() {
3813 let a
= _mm_set1_epi32(0);
3814 let b
= _mm_set_epi32(5, 0, 0, 0);
3815 let r
= _mm_cmplt_epi32(a
, b
);
3816 assert_eq_m128i(r
, _mm_set_epi32(!0, 0, 0, 0));
3819 #[simd_test(enable = "sse2")]
3820 unsafe fn test_mm_cvtepi32_pd() {
3821 let a
= _mm_set_epi32(35, 25, 15, 5);
3822 let r
= _mm_cvtepi32_pd(a
);
3823 assert_eq_m128d(r
, _mm_setr_pd(5.0, 15.0));
3826 #[simd_test(enable = "sse2")]
3827 unsafe fn test_mm_cvtsi32_sd() {
3828 let a
= _mm_set1_pd(3.5);
3829 let r
= _mm_cvtsi32_sd(a
, 5);
3830 assert_eq_m128d(r
, _mm_setr_pd(5.0, 3.5));
3833 #[simd_test(enable = "sse2")]
3834 unsafe fn test_mm_cvtepi32_ps() {
3835 let a
= _mm_setr_epi32(1, 2, 3, 4);
3836 let r
= _mm_cvtepi32_ps(a
);
3837 assert_eq_m128(r
, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3840 #[simd_test(enable = "sse2")]
3841 unsafe fn test_mm_cvtps_epi32() {
3842 let a
= _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3843 let r
= _mm_cvtps_epi32(a
);
3844 assert_eq_m128i(r
, _mm_setr_epi32(1, 2, 3, 4));
3847 #[simd_test(enable = "sse2")]
3848 unsafe fn test_mm_cvtsi32_si128() {
3849 let r
= _mm_cvtsi32_si128(5);
3850 assert_eq_m128i(r
, _mm_setr_epi32(5, 0, 0, 0));
3853 #[simd_test(enable = "sse2")]
3854 unsafe fn test_mm_cvtsi128_si32() {
3855 let r
= _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3859 #[simd_test(enable = "sse2")]
3860 unsafe fn test_mm_set_epi64x() {
3861 let r
= _mm_set_epi64x(0, 1);
3862 assert_eq_m128i(r
, _mm_setr_epi64x(1, 0));
3865 #[simd_test(enable = "sse2")]
3866 unsafe fn test_mm_set_epi32() {
3867 let r
= _mm_set_epi32(0, 1, 2, 3);
3868 assert_eq_m128i(r
, _mm_setr_epi32(3, 2, 1, 0));
3871 #[simd_test(enable = "sse2")]
3872 unsafe fn test_mm_set_epi16() {
3873 let r
= _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3874 assert_eq_m128i(r
, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3877 #[simd_test(enable = "sse2")]
3878 unsafe fn test_mm_set_epi8() {
3880 let r
= _mm_set_epi8(
3881 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3884 let e
= _mm_setr_epi8(
3885 15, 14, 13, 12, 11, 10, 9, 8,
3886 7, 6, 5, 4, 3, 2, 1, 0,
3888 assert_eq_m128i(r
, e
);
3891 #[simd_test(enable = "sse2")]
3892 unsafe fn test_mm_set1_epi64x() {
3893 let r
= _mm_set1_epi64x(1);
3894 assert_eq_m128i(r
, _mm_set1_epi64x(1));
3897 #[simd_test(enable = "sse2")]
3898 unsafe fn test_mm_set1_epi32() {
3899 let r
= _mm_set1_epi32(1);
3900 assert_eq_m128i(r
, _mm_set1_epi32(1));
3903 #[simd_test(enable = "sse2")]
3904 unsafe fn test_mm_set1_epi16() {
3905 let r
= _mm_set1_epi16(1);
3906 assert_eq_m128i(r
, _mm_set1_epi16(1));
3909 #[simd_test(enable = "sse2")]
3910 unsafe fn test_mm_set1_epi8() {
3911 let r
= _mm_set1_epi8(1);
3912 assert_eq_m128i(r
, _mm_set1_epi8(1));
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_setr_epi32() {
3917 let r
= _mm_setr_epi32(0, 1, 2, 3);
3918 assert_eq_m128i(r
, _mm_setr_epi32(0, 1, 2, 3));
3921 #[simd_test(enable = "sse2")]
3922 unsafe fn test_mm_setr_epi16() {
3923 let r
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3924 assert_eq_m128i(r
, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3927 #[simd_test(enable = "sse2")]
3928 unsafe fn test_mm_setr_epi8() {
3930 let r
= _mm_setr_epi8(
3931 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3934 let e
= _mm_setr_epi8(
3935 0, 1, 2, 3, 4, 5, 6, 7,
3936 8, 9, 10, 11, 12, 13, 14, 15,
3938 assert_eq_m128i(r
, e
);
3941 #[simd_test(enable = "sse2")]
3942 unsafe fn test_mm_setzero_si128() {
3943 let r
= _mm_setzero_si128();
3944 assert_eq_m128i(r
, _mm_set1_epi64x(0));
3947 #[simd_test(enable = "sse2")]
3948 unsafe fn test_mm_loadl_epi64() {
3949 let a
= _mm_setr_epi64x(6, 5);
3950 let r
= _mm_loadl_epi64(&a
as *const _
);
3951 assert_eq_m128i(r
, _mm_setr_epi64x(6, 0));
3954 #[simd_test(enable = "sse2")]
3955 unsafe fn test_mm_load_si128() {
3956 let a
= _mm_set_epi64x(5, 6);
3957 let r
= _mm_load_si128(&a
as *const _
as *const _
);
3958 assert_eq_m128i(a
, r
);
3961 #[simd_test(enable = "sse2")]
3962 unsafe fn test_mm_loadu_si128() {
3963 let a
= _mm_set_epi64x(5, 6);
3964 let r
= _mm_loadu_si128(&a
as *const _
as *const _
);
3965 assert_eq_m128i(a
, r
);
3968 #[simd_test(enable = "sse2")]
3969 unsafe fn test_mm_maskmoveu_si128() {
3970 let a
= _mm_set1_epi8(9);
3972 let mask
= _mm_set_epi8(
3973 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3974 0, 0, 0, 0, 0, 0, 0, 0,
3976 let mut r
= _mm_set1_epi8(0);
3977 _mm_maskmoveu_si128(a
, mask
, &mut r
as *mut _
as *mut i8);
3978 let e
= _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3979 assert_eq_m128i(r
, e
);
3982 #[simd_test(enable = "sse2")]
3983 unsafe fn test_mm_store_si128() {
3984 let a
= _mm_set1_epi8(9);
3985 let mut r
= _mm_set1_epi8(0);
3986 _mm_store_si128(&mut r
as *mut _
as *mut __m128i
, a
);
3987 assert_eq_m128i(r
, a
);
3990 #[simd_test(enable = "sse2")]
3991 unsafe fn test_mm_storeu_si128() {
3992 let a
= _mm_set1_epi8(9);
3993 let mut r
= _mm_set1_epi8(0);
3994 _mm_storeu_si128(&mut r
as *mut _
as *mut __m128i
, a
);
3995 assert_eq_m128i(r
, a
);
3998 #[simd_test(enable = "sse2")]
3999 unsafe fn test_mm_storel_epi64() {
4000 let a
= _mm_setr_epi64x(2, 9);
4001 let mut r
= _mm_set1_epi8(0);
4002 _mm_storel_epi64(&mut r
as *mut _
as *mut __m128i
, a
);
4003 assert_eq_m128i(r
, _mm_setr_epi64x(2, 0));
4006 #[simd_test(enable = "sse2")]
4007 unsafe fn test_mm_stream_si128() {
4008 let a
= _mm_setr_epi32(1, 2, 3, 4);
4009 let mut r
= _mm_undefined_si128();
4010 _mm_stream_si128(&mut r
as *mut _
, a
);
4011 assert_eq_m128i(r
, a
);
4014 #[simd_test(enable = "sse2")]
4015 unsafe fn test_mm_stream_si32() {
4017 let mut mem
= boxed
::Box
::<i32>::new(-1);
4018 _mm_stream_si32(&mut *mem
as *mut i32, a
);
4019 assert_eq
!(a
, *mem
);
4022 #[simd_test(enable = "sse2")]
4023 unsafe fn test_mm_move_epi64() {
4024 let a
= _mm_setr_epi64x(5, 6);
4025 let r
= _mm_move_epi64(a
);
4026 assert_eq_m128i(r
, _mm_setr_epi64x(5, 0));
4029 #[simd_test(enable = "sse2")]
4030 unsafe fn test_mm_packs_epi16() {
4031 let a
= _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4032 let b
= _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4033 let r
= _mm_packs_epi16(a
, b
);
4038 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4043 #[simd_test(enable = "sse2")]
4044 unsafe fn test_mm_packs_epi32() {
4045 let a
= _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4046 let b
= _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4047 let r
= _mm_packs_epi32(a
, b
);
4050 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4054 #[simd_test(enable = "sse2")]
4055 unsafe fn test_mm_packus_epi16() {
4056 let a
= _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4057 let b
= _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4058 let r
= _mm_packus_epi16(a
, b
);
4061 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4065 #[simd_test(enable = "sse2")]
4066 unsafe fn test_mm_extract_epi16() {
4067 let a
= _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4068 let r1
= _mm_extract_epi16(a
, 0);
4069 let r2
= _mm_extract_epi16(a
, 11);
4070 assert_eq
!(r1
, 0xFFFF);
4074 #[simd_test(enable = "sse2")]
4075 unsafe fn test_mm_insert_epi16() {
4076 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4077 let r
= _mm_insert_epi16(a
, 9, 0);
4078 let e
= _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4079 assert_eq_m128i(r
, e
);
4082 #[simd_test(enable = "sse2")]
4083 unsafe fn test_mm_movemask_epi8() {
4085 let a
= _mm_setr_epi8(
4086 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4087 0b0101, 0b1111_0000u8 as i8, 0, 0,
4088 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4089 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4091 let r
= _mm_movemask_epi8(a
);
4092 assert_eq
!(r
, 0b10100110_00100101);
4095 #[simd_test(enable = "sse2")]
4096 unsafe fn test_mm_shuffle_epi32() {
4097 let a
= _mm_setr_epi32(5, 10, 15, 20);
4098 let r
= _mm_shuffle_epi32(a
, 0b00_01_01_11);
4099 let e
= _mm_setr_epi32(20, 10, 10, 5);
4100 assert_eq_m128i(r
, e
);
4103 #[simd_test(enable = "sse2")]
4104 unsafe fn test_mm_shufflehi_epi16() {
4105 let a
= _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4106 let r
= _mm_shufflehi_epi16(a
, 0b00_01_01_11);
4107 let e
= _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4108 assert_eq_m128i(r
, e
);
4111 #[simd_test(enable = "sse2")]
4112 unsafe fn test_mm_shufflelo_epi16() {
4113 let a
= _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4114 let r
= _mm_shufflelo_epi16(a
, 0b00_01_01_11);
4115 let e
= _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4116 assert_eq_m128i(r
, e
);
4119 #[simd_test(enable = "sse2")]
4120 unsafe fn test_mm_unpackhi_epi8() {
4122 let a
= _mm_setr_epi8(
4123 0, 1, 2, 3, 4, 5, 6, 7,
4124 8, 9, 10, 11, 12, 13, 14, 15,
4127 let b
= _mm_setr_epi8(
4128 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4130 let r
= _mm_unpackhi_epi8(a
, b
);
4132 let e
= _mm_setr_epi8(
4133 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4135 assert_eq_m128i(r
, e
);
4138 #[simd_test(enable = "sse2")]
4139 unsafe fn test_mm_unpackhi_epi16() {
4140 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4141 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4142 let r
= _mm_unpackhi_epi16(a
, b
);
4143 let e
= _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4144 assert_eq_m128i(r
, e
);
4147 #[simd_test(enable = "sse2")]
4148 unsafe fn test_mm_unpackhi_epi32() {
4149 let a
= _mm_setr_epi32(0, 1, 2, 3);
4150 let b
= _mm_setr_epi32(4, 5, 6, 7);
4151 let r
= _mm_unpackhi_epi32(a
, b
);
4152 let e
= _mm_setr_epi32(2, 6, 3, 7);
4153 assert_eq_m128i(r
, e
);
4156 #[simd_test(enable = "sse2")]
4157 unsafe fn test_mm_unpackhi_epi64() {
4158 let a
= _mm_setr_epi64x(0, 1);
4159 let b
= _mm_setr_epi64x(2, 3);
4160 let r
= _mm_unpackhi_epi64(a
, b
);
4161 let e
= _mm_setr_epi64x(1, 3);
4162 assert_eq_m128i(r
, e
);
4165 #[simd_test(enable = "sse2")]
4166 unsafe fn test_mm_unpacklo_epi8() {
4168 let a
= _mm_setr_epi8(
4169 0, 1, 2, 3, 4, 5, 6, 7,
4170 8, 9, 10, 11, 12, 13, 14, 15,
4173 let b
= _mm_setr_epi8(
4174 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4176 let r
= _mm_unpacklo_epi8(a
, b
);
4178 let e
= _mm_setr_epi8(
4179 0, 16, 1, 17, 2, 18, 3, 19,
4180 4, 20, 5, 21, 6, 22, 7, 23,
4182 assert_eq_m128i(r
, e
);
4185 #[simd_test(enable = "sse2")]
4186 unsafe fn test_mm_unpacklo_epi16() {
4187 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4188 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4189 let r
= _mm_unpacklo_epi16(a
, b
);
4190 let e
= _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4191 assert_eq_m128i(r
, e
);
4194 #[simd_test(enable = "sse2")]
4195 unsafe fn test_mm_unpacklo_epi32() {
4196 let a
= _mm_setr_epi32(0, 1, 2, 3);
4197 let b
= _mm_setr_epi32(4, 5, 6, 7);
4198 let r
= _mm_unpacklo_epi32(a
, b
);
4199 let e
= _mm_setr_epi32(0, 4, 1, 5);
4200 assert_eq_m128i(r
, e
);
4203 #[simd_test(enable = "sse2")]
4204 unsafe fn test_mm_unpacklo_epi64() {
4205 let a
= _mm_setr_epi64x(0, 1);
4206 let b
= _mm_setr_epi64x(2, 3);
4207 let r
= _mm_unpacklo_epi64(a
, b
);
4208 let e
= _mm_setr_epi64x(0, 2);
4209 assert_eq_m128i(r
, e
);
4212 #[simd_test(enable = "sse2")]
4213 unsafe fn test_mm_add_sd() {
4214 let a
= _mm_setr_pd(1.0, 2.0);
4215 let b
= _mm_setr_pd(5.0, 10.0);
4216 let r
= _mm_add_sd(a
, b
);
4217 assert_eq_m128d(r
, _mm_setr_pd(6.0, 2.0));
4220 #[simd_test(enable = "sse2")]
4221 unsafe fn test_mm_add_pd() {
4222 let a
= _mm_setr_pd(1.0, 2.0);
4223 let b
= _mm_setr_pd(5.0, 10.0);
4224 let r
= _mm_add_pd(a
, b
);
4225 assert_eq_m128d(r
, _mm_setr_pd(6.0, 12.0));
4228 #[simd_test(enable = "sse2")]
4229 unsafe fn test_mm_div_sd() {
4230 let a
= _mm_setr_pd(1.0, 2.0);
4231 let b
= _mm_setr_pd(5.0, 10.0);
4232 let r
= _mm_div_sd(a
, b
);
4233 assert_eq_m128d(r
, _mm_setr_pd(0.2, 2.0));
4236 #[simd_test(enable = "sse2")]
4237 unsafe fn test_mm_div_pd() {
4238 let a
= _mm_setr_pd(1.0, 2.0);
4239 let b
= _mm_setr_pd(5.0, 10.0);
4240 let r
= _mm_div_pd(a
, b
);
4241 assert_eq_m128d(r
, _mm_setr_pd(0.2, 0.2));
4244 #[simd_test(enable = "sse2")]
4245 unsafe fn test_mm_max_sd() {
4246 let a
= _mm_setr_pd(1.0, 2.0);
4247 let b
= _mm_setr_pd(5.0, 10.0);
4248 let r
= _mm_max_sd(a
, b
);
4249 assert_eq_m128d(r
, _mm_setr_pd(5.0, 2.0));
4252 #[simd_test(enable = "sse2")]
4253 unsafe fn test_mm_max_pd() {
4254 let a
= _mm_setr_pd(1.0, 2.0);
4255 let b
= _mm_setr_pd(5.0, 10.0);
4256 let r
= _mm_max_pd(a
, b
);
4257 assert_eq_m128d(r
, _mm_setr_pd(5.0, 10.0));
4260 #[simd_test(enable = "sse2")]
4261 unsafe fn test_mm_min_sd() {
4262 let a
= _mm_setr_pd(1.0, 2.0);
4263 let b
= _mm_setr_pd(5.0, 10.0);
4264 let r
= _mm_min_sd(a
, b
);
4265 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.0));
4268 #[simd_test(enable = "sse2")]
4269 unsafe fn test_mm_min_pd() {
4270 let a
= _mm_setr_pd(1.0, 2.0);
4271 let b
= _mm_setr_pd(5.0, 10.0);
4272 let r
= _mm_min_pd(a
, b
);
4273 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.0));
4276 #[simd_test(enable = "sse2")]
4277 unsafe fn test_mm_mul_sd() {
4278 let a
= _mm_setr_pd(1.0, 2.0);
4279 let b
= _mm_setr_pd(5.0, 10.0);
4280 let r
= _mm_mul_sd(a
, b
);
4281 assert_eq_m128d(r
, _mm_setr_pd(5.0, 2.0));
4284 #[simd_test(enable = "sse2")]
4285 unsafe fn test_mm_mul_pd() {
4286 let a
= _mm_setr_pd(1.0, 2.0);
4287 let b
= _mm_setr_pd(5.0, 10.0);
4288 let r
= _mm_mul_pd(a
, b
);
4289 assert_eq_m128d(r
, _mm_setr_pd(5.0, 20.0));
4292 #[simd_test(enable = "sse2")]
4293 unsafe fn test_mm_sqrt_sd() {
4294 let a
= _mm_setr_pd(1.0, 2.0);
4295 let b
= _mm_setr_pd(5.0, 10.0);
4296 let r
= _mm_sqrt_sd(a
, b
);
4297 assert_eq_m128d(r
, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4300 #[simd_test(enable = "sse2")]
4301 unsafe fn test_mm_sqrt_pd() {
4302 let r
= _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4303 assert_eq_m128d(r
, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4306 #[simd_test(enable = "sse2")]
4307 unsafe fn test_mm_sub_sd() {
4308 let a
= _mm_setr_pd(1.0, 2.0);
4309 let b
= _mm_setr_pd(5.0, 10.0);
4310 let r
= _mm_sub_sd(a
, b
);
4311 assert_eq_m128d(r
, _mm_setr_pd(-4.0, 2.0));
4314 #[simd_test(enable = "sse2")]
4315 unsafe fn test_mm_sub_pd() {
4316 let a
= _mm_setr_pd(1.0, 2.0);
4317 let b
= _mm_setr_pd(5.0, 10.0);
4318 let r
= _mm_sub_pd(a
, b
);
4319 assert_eq_m128d(r
, _mm_setr_pd(-4.0, -8.0));
4322 #[simd_test(enable = "sse2")]
4323 unsafe fn test_mm_and_pd() {
4324 let a
= transmute(u64x2
::splat(5));
4325 let b
= transmute(u64x2
::splat(3));
4326 let r
= _mm_and_pd(a
, b
);
4327 let e
= transmute(u64x2
::splat(1));
4328 assert_eq_m128d(r
, e
);
4331 #[simd_test(enable = "sse2")]
4332 unsafe fn test_mm_andnot_pd() {
4333 let a
= transmute(u64x2
::splat(5));
4334 let b
= transmute(u64x2
::splat(3));
4335 let r
= _mm_andnot_pd(a
, b
);
4336 let e
= transmute(u64x2
::splat(2));
4337 assert_eq_m128d(r
, e
);
4340 #[simd_test(enable = "sse2")]
4341 unsafe fn test_mm_or_pd() {
4342 let a
= transmute(u64x2
::splat(5));
4343 let b
= transmute(u64x2
::splat(3));
4344 let r
= _mm_or_pd(a
, b
);
4345 let e
= transmute(u64x2
::splat(7));
4346 assert_eq_m128d(r
, e
);
4349 #[simd_test(enable = "sse2")]
4350 unsafe fn test_mm_xor_pd() {
4351 let a
= transmute(u64x2
::splat(5));
4352 let b
= transmute(u64x2
::splat(3));
4353 let r
= _mm_xor_pd(a
, b
);
4354 let e
= transmute(u64x2
::splat(6));
4355 assert_eq_m128d(r
, e
);
4358 #[simd_test(enable = "sse2")]
4359 unsafe fn test_mm_cmpeq_sd() {
4360 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4361 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4362 let r
= transmute
::<_
, __m128i
>(_mm_cmpeq_sd(a
, b
));
4363 assert_eq_m128i(r
, e
);
4366 #[simd_test(enable = "sse2")]
4367 unsafe fn test_mm_cmplt_sd() {
4368 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4369 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4370 let r
= transmute
::<_
, __m128i
>(_mm_cmplt_sd(a
, b
));
4371 assert_eq_m128i(r
, e
);
4374 #[simd_test(enable = "sse2")]
4375 unsafe fn test_mm_cmple_sd() {
4376 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4377 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4378 let r
= transmute
::<_
, __m128i
>(_mm_cmple_sd(a
, b
));
4379 assert_eq_m128i(r
, e
);
4382 #[simd_test(enable = "sse2")]
4383 unsafe fn test_mm_cmpgt_sd() {
4384 let (a
, b
) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4385 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4386 let r
= transmute
::<_
, __m128i
>(_mm_cmpgt_sd(a
, b
));
4387 assert_eq_m128i(r
, e
);
4390 #[simd_test(enable = "sse2")]
4391 unsafe fn test_mm_cmpge_sd() {
4392 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4393 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4394 let r
= transmute
::<_
, __m128i
>(_mm_cmpge_sd(a
, b
));
4395 assert_eq_m128i(r
, e
);
4398 #[simd_test(enable = "sse2")]
4399 unsafe fn test_mm_cmpord_sd() {
4400 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4401 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4402 let r
= transmute
::<_
, __m128i
>(_mm_cmpord_sd(a
, b
));
4403 assert_eq_m128i(r
, e
);
4406 #[simd_test(enable = "sse2")]
4407 unsafe fn test_mm_cmpunord_sd() {
4408 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4409 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4410 let r
= transmute
::<_
, __m128i
>(_mm_cmpunord_sd(a
, b
));
4411 assert_eq_m128i(r
, e
);
4414 #[simd_test(enable = "sse2")]
4415 unsafe fn test_mm_cmpneq_sd() {
4416 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4417 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4418 let r
= transmute
::<_
, __m128i
>(_mm_cmpneq_sd(a
, b
));
4419 assert_eq_m128i(r
, e
);
4422 #[simd_test(enable = "sse2")]
4423 unsafe fn test_mm_cmpnlt_sd() {
4424 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4425 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4426 let r
= transmute
::<_
, __m128i
>(_mm_cmpnlt_sd(a
, b
));
4427 assert_eq_m128i(r
, e
);
4430 #[simd_test(enable = "sse2")]
4431 unsafe fn test_mm_cmpnle_sd() {
4432 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4433 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4434 let r
= transmute
::<_
, __m128i
>(_mm_cmpnle_sd(a
, b
));
4435 assert_eq_m128i(r
, e
);
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_cmpngt_sd() {
4440 let (a
, b
) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4441 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4442 let r
= transmute
::<_
, __m128i
>(_mm_cmpngt_sd(a
, b
));
4443 assert_eq_m128i(r
, e
);
4446 #[simd_test(enable = "sse2")]
4447 unsafe fn test_mm_cmpnge_sd() {
4448 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4449 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4450 let r
= transmute
::<_
, __m128i
>(_mm_cmpnge_sd(a
, b
));
4451 assert_eq_m128i(r
, e
);
4454 #[simd_test(enable = "sse2")]
4455 unsafe fn test_mm_cmpeq_pd() {
4456 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4457 let e
= _mm_setr_epi64x(!0, 0);
4458 let r
= transmute
::<_
, __m128i
>(_mm_cmpeq_pd(a
, b
));
4459 assert_eq_m128i(r
, e
);
4462 #[simd_test(enable = "sse2")]
4463 unsafe fn test_mm_cmplt_pd() {
4464 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4465 let e
= _mm_setr_epi64x(0, !0);
4466 let r
= transmute
::<_
, __m128i
>(_mm_cmplt_pd(a
, b
));
4467 assert_eq_m128i(r
, e
);
4470 #[simd_test(enable = "sse2")]
4471 unsafe fn test_mm_cmple_pd() {
4472 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4473 let e
= _mm_setr_epi64x(!0, !0);
4474 let r
= transmute
::<_
, __m128i
>(_mm_cmple_pd(a
, b
));
4475 assert_eq_m128i(r
, e
);
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_cmpgt_pd() {
4480 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4481 let e
= _mm_setr_epi64x(0, 0);
4482 let r
= transmute
::<_
, __m128i
>(_mm_cmpgt_pd(a
, b
));
4483 assert_eq_m128i(r
, e
);
4486 #[simd_test(enable = "sse2")]
4487 unsafe fn test_mm_cmpge_pd() {
4488 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4489 let e
= _mm_setr_epi64x(!0, 0);
4490 let r
= transmute
::<_
, __m128i
>(_mm_cmpge_pd(a
, b
));
4491 assert_eq_m128i(r
, e
);
4494 #[simd_test(enable = "sse2")]
4495 unsafe fn test_mm_cmpord_pd() {
4496 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4497 let e
= _mm_setr_epi64x(0, !0);
4498 let r
= transmute
::<_
, __m128i
>(_mm_cmpord_pd(a
, b
));
4499 assert_eq_m128i(r
, e
);
4502 #[simd_test(enable = "sse2")]
4503 unsafe fn test_mm_cmpunord_pd() {
4504 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4505 let e
= _mm_setr_epi64x(!0, 0);
4506 let r
= transmute
::<_
, __m128i
>(_mm_cmpunord_pd(a
, b
));
4507 assert_eq_m128i(r
, e
);
4510 #[simd_test(enable = "sse2")]
4511 unsafe fn test_mm_cmpneq_pd() {
4512 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4513 let e
= _mm_setr_epi64x(!0, !0);
4514 let r
= transmute
::<_
, __m128i
>(_mm_cmpneq_pd(a
, b
));
4515 assert_eq_m128i(r
, e
);
4518 #[simd_test(enable = "sse2")]
4519 unsafe fn test_mm_cmpnlt_pd() {
4520 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4521 let e
= _mm_setr_epi64x(0, 0);
4522 let r
= transmute
::<_
, __m128i
>(_mm_cmpnlt_pd(a
, b
));
4523 assert_eq_m128i(r
, e
);
4526 #[simd_test(enable = "sse2")]
4527 unsafe fn test_mm_cmpnle_pd() {
4528 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4529 let e
= _mm_setr_epi64x(0, 0);
4530 let r
= transmute
::<_
, __m128i
>(_mm_cmpnle_pd(a
, b
));
4531 assert_eq_m128i(r
, e
);
4534 #[simd_test(enable = "sse2")]
4535 unsafe fn test_mm_cmpngt_pd() {
4536 let (a
, b
) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4537 let e
= _mm_setr_epi64x(0, !0);
4538 let r
= transmute
::<_
, __m128i
>(_mm_cmpngt_pd(a
, b
));
4539 assert_eq_m128i(r
, e
);
4542 #[simd_test(enable = "sse2")]
4543 unsafe fn test_mm_cmpnge_pd() {
4544 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4545 let e
= _mm_setr_epi64x(0, !0);
4546 let r
= transmute
::<_
, __m128i
>(_mm_cmpnge_pd(a
, b
));
4547 assert_eq_m128i(r
, e
);
4550 #[simd_test(enable = "sse2")]
4551 unsafe fn test_mm_comieq_sd() {
4552 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4553 assert
!(_mm_comieq_sd(a
, b
) != 0);
4555 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(1.0, 3.0));
4556 assert
!(_mm_comieq_sd(a
, b
) == 0);
4559 #[simd_test(enable = "sse2")]
4560 unsafe fn test_mm_comilt_sd() {
4561 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4562 assert
!(_mm_comilt_sd(a
, b
) == 0);
4565 #[simd_test(enable = "sse2")]
4566 unsafe fn test_mm_comile_sd() {
4567 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4568 assert
!(_mm_comile_sd(a
, b
) != 0);
4571 #[simd_test(enable = "sse2")]
4572 unsafe fn test_mm_comigt_sd() {
4573 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4574 assert
!(_mm_comigt_sd(a
, b
) == 0);
4577 #[simd_test(enable = "sse2")]
4578 unsafe fn test_mm_comige_sd() {
4579 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4580 assert
!(_mm_comige_sd(a
, b
) != 0);
4583 #[simd_test(enable = "sse2")]
4584 unsafe fn test_mm_comineq_sd() {
4585 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4586 assert
!(_mm_comineq_sd(a
, b
) == 0);
4589 #[simd_test(enable = "sse2")]
4590 unsafe fn test_mm_ucomieq_sd() {
4591 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4592 assert
!(_mm_ucomieq_sd(a
, b
) != 0);
4594 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(NAN
, 3.0));
4595 assert
!(_mm_ucomieq_sd(a
, b
) == 0);
4598 #[simd_test(enable = "sse2")]
4599 unsafe fn test_mm_ucomilt_sd() {
4600 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4601 assert
!(_mm_ucomilt_sd(a
, b
) == 0);
4604 #[simd_test(enable = "sse2")]
4605 unsafe fn test_mm_ucomile_sd() {
4606 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4607 assert
!(_mm_ucomile_sd(a
, b
) != 0);
4610 #[simd_test(enable = "sse2")]
4611 unsafe fn test_mm_ucomigt_sd() {
4612 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4613 assert
!(_mm_ucomigt_sd(a
, b
) == 0);
4616 #[simd_test(enable = "sse2")]
4617 unsafe fn test_mm_ucomige_sd() {
4618 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4619 assert
!(_mm_ucomige_sd(a
, b
) != 0);
4622 #[simd_test(enable = "sse2")]
4623 unsafe fn test_mm_ucomineq_sd() {
4624 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4625 assert
!(_mm_ucomineq_sd(a
, b
) == 0);
4628 #[simd_test(enable = "sse2")]
4629 unsafe fn test_mm_movemask_pd() {
4630 let r
= _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4631 assert_eq
!(r
, 0b01);
4633 let r
= _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4634 assert_eq
!(r
, 0b11);
4642 #[simd_test(enable = "sse2")]
4643 unsafe fn test_mm_load_pd() {
4645 data
: [1.0f64, 2.0, 3.0, 4.0],
4647 let vals
= &mem
.data
;
4648 let d
= vals
.as_ptr();
4650 let r
= _mm_load_pd(d
);
4651 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.0));
4654 #[simd_test(enable = "sse2")]
4655 unsafe fn test_mm_load_sd() {
4657 let expected
= _mm_setr_pd(a
, 0.);
4658 let r
= _mm_load_sd(&a
);
4659 assert_eq_m128d(r
, expected
);
4662 #[simd_test(enable = "sse2")]
4663 unsafe fn test_mm_loadh_pd() {
4664 let a
= _mm_setr_pd(1., 2.);
4666 let expected
= _mm_setr_pd(_mm_cvtsd_f64(a
), 3.);
4667 let r
= _mm_loadh_pd(a
, &b
);
4668 assert_eq_m128d(r
, expected
);
4671 #[simd_test(enable = "sse2")]
4672 unsafe fn test_mm_loadl_pd() {
4673 let a
= _mm_setr_pd(1., 2.);
4675 let expected
= _mm_setr_pd(3., get_m128d(a
, 1));
4676 let r
= _mm_loadl_pd(a
, &b
);
4677 assert_eq_m128d(r
, expected
);
4680 #[simd_test(enable = "sse2")]
4681 unsafe fn test_mm_stream_pd() {
4686 let a
= _mm_set1_pd(7.0);
4687 let mut mem
= Memory { data: [-1.0; 2] }
;
4689 _mm_stream_pd(&mut mem
.data
[0] as *mut f64, a
);
4691 assert_eq
!(mem
.data
[i
], get_m128d(a
, i
));
4695 #[simd_test(enable = "sse2")]
4696 unsafe fn test_mm_store_sd() {
4698 let a
= _mm_setr_pd(1., 2.);
4699 _mm_store_sd(&mut dest
, a
);
4700 assert_eq
!(dest
, _mm_cvtsd_f64(a
));
4703 #[simd_test(enable = "sse2")]
4704 unsafe fn test_mm_store_pd() {
4705 let mut mem
= Memory { data: [0.0f64; 4] }
;
4706 let vals
= &mut mem
.data
;
4707 let a
= _mm_setr_pd(1.0, 2.0);
4708 let d
= vals
.as_mut_ptr();
4710 _mm_store_pd(d
, *black_box(&a
));
4711 assert_eq
!(vals
[0], 1.0);
4712 assert_eq
!(vals
[1], 2.0);
4715 #[simd_test(enable = "sse")]
4716 unsafe fn test_mm_storeu_pd() {
4717 let mut mem
= Memory { data: [0.0f64; 4] }
;
4718 let vals
= &mut mem
.data
;
4719 let a
= _mm_setr_pd(1.0, 2.0);
4722 let mut p
= vals
.as_mut_ptr();
4724 // Make sure p is **not** aligned to 16-byte boundary
4725 if (p
as usize) & 0xf == 0 {
4730 _mm_storeu_pd(p
, *black_box(&a
));
4733 assert_eq
!(vals
[ofs
- 1], 0.0);
4735 assert_eq
!(vals
[ofs
+ 0], 1.0);
4736 assert_eq
!(vals
[ofs
+ 1], 2.0);
4739 #[simd_test(enable = "sse2")]
4740 unsafe fn test_mm_store1_pd() {
4741 let mut mem
= Memory { data: [0.0f64; 4] }
;
4742 let vals
= &mut mem
.data
;
4743 let a
= _mm_setr_pd(1.0, 2.0);
4744 let d
= vals
.as_mut_ptr();
4746 _mm_store1_pd(d
, *black_box(&a
));
4747 assert_eq
!(vals
[0], 1.0);
4748 assert_eq
!(vals
[1], 1.0);
4751 #[simd_test(enable = "sse2")]
4752 unsafe fn test_mm_store_pd1() {
4753 let mut mem
= Memory { data: [0.0f64; 4] }
;
4754 let vals
= &mut mem
.data
;
4755 let a
= _mm_setr_pd(1.0, 2.0);
4756 let d
= vals
.as_mut_ptr();
4758 _mm_store_pd1(d
, *black_box(&a
));
4759 assert_eq
!(vals
[0], 1.0);
4760 assert_eq
!(vals
[1], 1.0);
4763 #[simd_test(enable = "sse2")]
4764 unsafe fn test_mm_storer_pd() {
4765 let mut mem
= Memory { data: [0.0f64; 4] }
;
4766 let vals
= &mut mem
.data
;
4767 let a
= _mm_setr_pd(1.0, 2.0);
4768 let d
= vals
.as_mut_ptr();
4770 _mm_storer_pd(d
, *black_box(&a
));
4771 assert_eq
!(vals
[0], 2.0);
4772 assert_eq
!(vals
[1], 1.0);
4775 #[simd_test(enable = "sse2")]
4776 unsafe fn test_mm_storeh_pd() {
4778 let a
= _mm_setr_pd(1., 2.);
4779 _mm_storeh_pd(&mut dest
, a
);
4780 assert_eq
!(dest
, get_m128d(a
, 1));
4783 #[simd_test(enable = "sse2")]
4784 unsafe fn test_mm_storel_pd() {
4786 let a
= _mm_setr_pd(1., 2.);
4787 _mm_storel_pd(&mut dest
, a
);
4788 assert_eq
!(dest
, _mm_cvtsd_f64(a
));
4791 #[simd_test(enable = "sse2")]
4792 unsafe fn test_mm_loadr_pd() {
4793 let mut mem
= Memory
{
4794 data
: [1.0f64, 2.0, 3.0, 4.0],
4796 let vals
= &mut mem
.data
;
4797 let d
= vals
.as_ptr();
4799 let r
= _mm_loadr_pd(d
);
4800 assert_eq_m128d(r
, _mm_setr_pd(2.0, 1.0));
4803 #[simd_test(enable = "sse2")]
4804 unsafe fn test_mm_loadu_pd() {
4805 let mut mem
= Memory
{
4806 data
: [1.0f64, 2.0, 3.0, 4.0],
4808 let vals
= &mut mem
.data
;
4809 let mut d
= vals
.as_ptr();
4811 // make sure d is not aligned to 16-byte boundary
4813 if (d
as usize) & 0xf == 0 {
4815 d
= d
.offset(offset
as isize);
4818 let r
= _mm_loadu_pd(d
);
4819 let e
= _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset
as f64));
4820 assert_eq_m128d(r
, e
);
4823 #[simd_test(enable = "sse2")]
4824 unsafe fn test_mm_cvtpd_ps() {
4825 let r
= _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4826 assert_eq_m128(r
, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4828 let r
= _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4829 assert_eq_m128(r
, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4831 let r
= _mm_cvtpd_ps(_mm_setr_pd(f64::MAX
, f64::MIN
));
4832 assert_eq_m128(r
, _mm_setr_ps(f32::INFINITY
, f32::NEG_INFINITY
, 0.0, 0.0));
4834 let r
= _mm_cvtpd_ps(_mm_setr_pd(f32::MAX
as f64, f32::MIN
as f64));
4835 assert_eq_m128(r
, _mm_setr_ps(f32::MAX
, f32::MIN
, 0.0, 0.0));
4838 #[simd_test(enable = "sse2")]
4839 unsafe fn test_mm_cvtps_pd() {
4840 let r
= _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4841 assert_eq_m128d(r
, _mm_setr_pd(-1.0, 2.0));
4843 let r
= _mm_cvtps_pd(_mm_setr_ps(
4849 assert_eq_m128d(r
, _mm_setr_pd(f32::MAX
as f64, f64::INFINITY
));
4852 #[simd_test(enable = "sse2")]
4853 unsafe fn test_mm_cvtpd_epi32() {
4854 let r
= _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4855 assert_eq_m128i(r
, _mm_setr_epi32(-1, 5, 0, 0));
4857 let r
= _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4858 assert_eq_m128i(r
, _mm_setr_epi32(-1, -5, 0, 0));
4860 let r
= _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX
, f64::MIN
));
4861 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4863 let r
= _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY
, f64::NEG_INFINITY
));
4864 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4866 let r
= _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN
, f64::NAN
));
4867 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4870 #[simd_test(enable = "sse2")]
4871 unsafe fn test_mm_cvtsd_si32() {
4872 let r
= _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4875 let r
= _mm_cvtsd_si32(_mm_setr_pd(f64::MAX
, f64::MIN
));
4876 assert_eq
!(r
, i32::MIN
);
4878 let r
= _mm_cvtsd_si32(_mm_setr_pd(f64::NAN
, f64::NAN
));
4879 assert_eq
!(r
, i32::MIN
);
4882 #[simd_test(enable = "sse2")]
4883 unsafe fn test_mm_cvtsd_ss() {
4884 let a
= _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4885 let b
= _mm_setr_pd(2.0, -5.0);
4887 let r
= _mm_cvtsd_ss(a
, b
);
4889 assert_eq_m128(r
, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4891 let a
= _mm_setr_ps(-1.1, f32::NEG_INFINITY
, f32::MAX
, f32::NEG_INFINITY
);
4892 let b
= _mm_setr_pd(f64::INFINITY
, -5.0);
4894 let r
= _mm_cvtsd_ss(a
, b
);
4907 #[simd_test(enable = "sse2")]
4908 unsafe fn test_mm_cvtsd_f64() {
4909 let r
= _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4910 assert_eq
!(r
, -1.1);
4913 #[simd_test(enable = "sse2")]
4914 unsafe fn test_mm_cvtss_sd() {
4915 let a
= _mm_setr_pd(-1.1, 2.2);
4916 let b
= _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4918 let r
= _mm_cvtss_sd(a
, b
);
4919 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.2));
4921 let a
= _mm_setr_pd(-1.1, f64::INFINITY
);
4922 let b
= _mm_setr_ps(f32::NEG_INFINITY
, 2.0, 3.0, 4.0);
4924 let r
= _mm_cvtss_sd(a
, b
);
4925 assert_eq_m128d(r
, _mm_setr_pd(f64::NEG_INFINITY
, f64::INFINITY
));
4928 #[simd_test(enable = "sse2")]
4929 unsafe fn test_mm_cvttpd_epi32() {
4930 let a
= _mm_setr_pd(-1.1, 2.2);
4931 let r
= _mm_cvttpd_epi32(a
);
4932 assert_eq_m128i(r
, _mm_setr_epi32(-1, 2, 0, 0));
4934 let a
= _mm_setr_pd(f64::NEG_INFINITY
, f64::NAN
);
4935 let r
= _mm_cvttpd_epi32(a
);
4936 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4939 #[simd_test(enable = "sse2")]
4940 unsafe fn test_mm_cvttsd_si32() {
4941 let a
= _mm_setr_pd(-1.1, 2.2);
4942 let r
= _mm_cvttsd_si32(a
);
4945 let a
= _mm_setr_pd(f64::NEG_INFINITY
, f64::NAN
);
4946 let r
= _mm_cvttsd_si32(a
);
4947 assert_eq
!(r
, i32::MIN
);
4950 #[simd_test(enable = "sse2")]
4951 unsafe fn test_mm_cvttps_epi32() {
4952 let a
= _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4953 let r
= _mm_cvttps_epi32(a
);
4954 assert_eq_m128i(r
, _mm_setr_epi32(-1, 2, -3, 6));
4956 let a
= _mm_setr_ps(f32::NEG_INFINITY
, f32::INFINITY
, f32::MIN
, f32::MAX
);
4957 let r
= _mm_cvttps_epi32(a
);
4958 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, i32::MIN
, i32::MIN
));
4961 #[simd_test(enable = "sse2")]
4962 unsafe fn test_mm_set_sd() {
4963 let r
= _mm_set_sd(-1.0_f64);
4964 assert_eq_m128d(r
, _mm_setr_pd(-1.0_f64, 0_f64));
4967 #[simd_test(enable = "sse2")]
4968 unsafe fn test_mm_set1_pd() {
4969 let r
= _mm_set1_pd(-1.0_f64);
4970 assert_eq_m128d(r
, _mm_setr_pd(-1.0_f64, -1.0_f64));
4973 #[simd_test(enable = "sse2")]
4974 unsafe fn test_mm_set_pd1() {
4975 let r
= _mm_set_pd1(-2.0_f64);
4976 assert_eq_m128d(r
, _mm_setr_pd(-2.0_f64, -2.0_f64));
4979 #[simd_test(enable = "sse2")]
4980 unsafe fn test_mm_set_pd() {
4981 let r
= _mm_set_pd(1.0_f64, 5.0_f64);
4982 assert_eq_m128d(r
, _mm_setr_pd(5.0_f64, 1.0_f64));
4985 #[simd_test(enable = "sse2")]
4986 unsafe fn test_mm_setr_pd() {
4987 let r
= _mm_setr_pd(1.0_f64, -5.0_f64);
4988 assert_eq_m128d(r
, _mm_setr_pd(1.0_f64, -5.0_f64));
4991 #[simd_test(enable = "sse2")]
4992 unsafe fn test_mm_setzero_pd() {
4993 let r
= _mm_setzero_pd();
4994 assert_eq_m128d(r
, _mm_setr_pd(0_f64, 0_f64));
4997 #[simd_test(enable = "sse2")]
4998 unsafe fn test_mm_load1_pd() {
5000 let r
= _mm_load1_pd(&d
);
5001 assert_eq_m128d(r
, _mm_setr_pd(d
, d
));
5004 #[simd_test(enable = "sse2")]
5005 unsafe fn test_mm_load_pd1() {
5007 let r
= _mm_load_pd1(&d
);
5008 assert_eq_m128d(r
, _mm_setr_pd(d
, d
));
5011 #[simd_test(enable = "sse2")]
5012 unsafe fn test_mm_unpackhi_pd() {
5013 let a
= _mm_setr_pd(1.0, 2.0);
5014 let b
= _mm_setr_pd(3.0, 4.0);
5015 let r
= _mm_unpackhi_pd(a
, b
);
5016 assert_eq_m128d(r
, _mm_setr_pd(2.0, 4.0));
5019 #[simd_test(enable = "sse2")]
5020 unsafe fn test_mm_unpacklo_pd() {
5021 let a
= _mm_setr_pd(1.0, 2.0);
5022 let b
= _mm_setr_pd(3.0, 4.0);
5023 let r
= _mm_unpacklo_pd(a
, b
);
5024 assert_eq_m128d(r
, _mm_setr_pd(1.0, 3.0));
5027 #[simd_test(enable = "sse2")]
5028 unsafe fn test_mm_shuffle_pd() {
5029 let a
= _mm_setr_pd(1., 2.);
5030 let b
= _mm_setr_pd(3., 4.);
5031 let expected
= _mm_setr_pd(1., 3.);
5032 let r
= _mm_shuffle_pd(a
, b
, 0);
5033 assert_eq_m128d(r
, expected
);
5036 #[simd_test(enable = "sse2")]
5037 unsafe fn test_mm_move_sd() {
5038 let a
= _mm_setr_pd(1., 2.);
5039 let b
= _mm_setr_pd(3., 4.);
5040 let expected
= _mm_setr_pd(3., 2.);
5041 let r
= _mm_move_sd(a
, b
);
5042 assert_eq_m128d(r
, expected
);
5045 #[simd_test(enable = "sse2")]
5046 unsafe fn test_mm_castpd_ps() {
5047 let a
= _mm_set1_pd(0.);
5048 let expected
= _mm_set1_ps(0.);
5049 let r
= _mm_castpd_ps(a
);
5050 assert_eq_m128(r
, expected
);
5053 #[simd_test(enable = "sse2")]
5054 unsafe fn test_mm_castpd_si128() {
5055 let a
= _mm_set1_pd(0.);
5056 let expected
= _mm_set1_epi64x(0);
5057 let r
= _mm_castpd_si128(a
);
5058 assert_eq_m128i(r
, expected
);
5061 #[simd_test(enable = "sse2")]
5062 unsafe fn test_mm_castps_pd() {
5063 let a
= _mm_set1_ps(0.);
5064 let expected
= _mm_set1_pd(0.);
5065 let r
= _mm_castps_pd(a
);
5066 assert_eq_m128d(r
, expected
);
5069 #[simd_test(enable = "sse2")]
5070 unsafe fn test_mm_castps_si128() {
5071 let a
= _mm_set1_ps(0.);
5072 let expected
= _mm_set1_epi32(0);
5073 let r
= _mm_castps_si128(a
);
5074 assert_eq_m128i(r
, expected
);
5077 #[simd_test(enable = "sse2")]
5078 unsafe fn test_mm_castsi128_pd() {
5079 let a
= _mm_set1_epi64x(0);
5080 let expected
= _mm_set1_pd(0.);
5081 let r
= _mm_castsi128_pd(a
);
5082 assert_eq_m128d(r
, expected
);
5085 #[simd_test(enable = "sse2")]
5086 unsafe fn test_mm_castsi128_ps() {
5087 let a
= _mm_set1_epi32(0);
5088 let expected
= _mm_set1_ps(0.);
5089 let r
= _mm_castsi128_ps(a
);
5090 assert_eq_m128(r
, expected
);