1 //! Streaming SIMD Extensions 2 (SSE2)
4 use stdarch_test
::assert_instr
;
7 core_arch
::{simd::*, simd_llvm::*, x86::*}
,
9 mem
::{self, transmute}
,
13 /// Provides a hint to the processor that the code sequence is a spin-wait loop.
15 /// This can help improve the performance and power consumption of spin-wait
18 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause)
20 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21 #[stable(feature = "simd_x86", since = "1.27.0")]
22 pub unsafe fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
28 /// Invalidates and flushes the cache line that contains `p` from all levels of
29 /// the cache hierarchy.
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
33 #[target_feature(enable = "sse2")]
34 #[cfg_attr(test, assert_instr(clflush))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm_clflush(p
: *const u8) {
40 /// Performs a serializing operation on all load-from-memory instructions
41 /// that were issued prior to this instruction.
43 /// Guarantees that every load instruction that precedes, in program order, is
44 /// globally visible before any load instruction which follows the fence in
47 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence)
49 #[target_feature(enable = "sse2")]
50 #[cfg_attr(test, assert_instr(lfence))]
51 #[stable(feature = "simd_x86", since = "1.27.0")]
52 pub unsafe fn _mm_lfence() {
56 /// Performs a serializing operation on all load-from-memory and store-to-memory
57 /// instructions that were issued prior to this instruction.
59 /// Guarantees that every memory access that precedes, in program order, the
60 /// memory fence instruction is globally visible before any memory instruction
61 /// which follows the fence in program order.
63 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence)
65 #[target_feature(enable = "sse2")]
66 #[cfg_attr(test, assert_instr(mfence))]
67 #[stable(feature = "simd_x86", since = "1.27.0")]
68 pub unsafe fn _mm_mfence() {
72 /// Adds packed 8-bit integers in `a` and `b`.
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
76 #[target_feature(enable = "sse2")]
77 #[cfg_attr(test, assert_instr(paddb))]
78 #[stable(feature = "simd_x86", since = "1.27.0")]
79 pub unsafe fn _mm_add_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
80 transmute(simd_add(a
.as_i8x16(), b
.as_i8x16()))
83 /// Adds packed 16-bit integers in `a` and `b`.
85 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
87 #[target_feature(enable = "sse2")]
88 #[cfg_attr(test, assert_instr(paddw))]
89 #[stable(feature = "simd_x86", since = "1.27.0")]
90 pub unsafe fn _mm_add_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
91 transmute(simd_add(a
.as_i16x8(), b
.as_i16x8()))
94 /// Adds packed 32-bit integers in `a` and `b`.
96 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
98 #[target_feature(enable = "sse2")]
99 #[cfg_attr(test, assert_instr(paddd))]
100 #[stable(feature = "simd_x86", since = "1.27.0")]
101 pub unsafe fn _mm_add_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
102 transmute(simd_add(a
.as_i32x4(), b
.as_i32x4()))
105 /// Adds packed 64-bit integers in `a` and `b`.
107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
109 #[target_feature(enable = "sse2")]
110 #[cfg_attr(test, assert_instr(paddq))]
111 #[stable(feature = "simd_x86", since = "1.27.0")]
112 pub unsafe fn _mm_add_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
113 transmute(simd_add(a
.as_i64x2(), b
.as_i64x2()))
116 /// Adds packed 8-bit integers in `a` and `b` using saturation.
118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
120 #[target_feature(enable = "sse2")]
121 #[cfg_attr(test, assert_instr(paddsb))]
122 #[stable(feature = "simd_x86", since = "1.27.0")]
123 pub unsafe fn _mm_adds_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
124 transmute(simd_saturating_add(a
.as_i8x16(), b
.as_i8x16()))
127 /// Adds packed 16-bit integers in `a` and `b` using saturation.
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
131 #[target_feature(enable = "sse2")]
132 #[cfg_attr(test, assert_instr(paddsw))]
133 #[stable(feature = "simd_x86", since = "1.27.0")]
134 pub unsafe fn _mm_adds_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
135 transmute(simd_saturating_add(a
.as_i16x8(), b
.as_i16x8()))
138 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
142 #[target_feature(enable = "sse2")]
143 #[cfg_attr(test, assert_instr(paddusb))]
144 #[stable(feature = "simd_x86", since = "1.27.0")]
145 pub unsafe fn _mm_adds_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
146 transmute(simd_saturating_add(a
.as_u8x16(), b
.as_u8x16()))
149 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
153 #[target_feature(enable = "sse2")]
154 #[cfg_attr(test, assert_instr(paddusw))]
155 #[stable(feature = "simd_x86", since = "1.27.0")]
156 pub unsafe fn _mm_adds_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
157 transmute(simd_saturating_add(a
.as_u16x8(), b
.as_u16x8()))
160 /// Averages packed unsigned 8-bit integers in `a` and `b`.
162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
164 #[target_feature(enable = "sse2")]
165 #[cfg_attr(test, assert_instr(pavgb))]
166 #[stable(feature = "simd_x86", since = "1.27.0")]
167 pub unsafe fn _mm_avg_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
168 transmute(pavgb(a
.as_u8x16(), b
.as_u8x16()))
171 /// Averages packed unsigned 16-bit integers in `a` and `b`.
173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
175 #[target_feature(enable = "sse2")]
176 #[cfg_attr(test, assert_instr(pavgw))]
177 #[stable(feature = "simd_x86", since = "1.27.0")]
178 pub unsafe fn _mm_avg_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
179 transmute(pavgw(a
.as_u16x8(), b
.as_u16x8()))
182 /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
184 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
185 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
186 /// intermediate 32-bit integers.
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16)
190 #[target_feature(enable = "sse2")]
191 #[cfg_attr(test, assert_instr(pmaddwd))]
192 #[stable(feature = "simd_x86", since = "1.27.0")]
193 pub unsafe fn _mm_madd_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
194 transmute(pmaddwd(a
.as_i16x8(), b
.as_i16x8()))
197 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
200 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
202 #[target_feature(enable = "sse2")]
203 #[cfg_attr(test, assert_instr(pmaxsw))]
204 #[stable(feature = "simd_x86", since = "1.27.0")]
205 pub unsafe fn _mm_max_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
206 transmute(pmaxsw(a
.as_i16x8(), b
.as_i16x8()))
209 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
210 /// packed maximum values.
212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
214 #[target_feature(enable = "sse2")]
215 #[cfg_attr(test, assert_instr(pmaxub))]
216 #[stable(feature = "simd_x86", since = "1.27.0")]
217 pub unsafe fn _mm_max_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
218 transmute(pmaxub(a
.as_u8x16(), b
.as_u8x16()))
221 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
226 #[target_feature(enable = "sse2")]
227 #[cfg_attr(test, assert_instr(pminsw))]
228 #[stable(feature = "simd_x86", since = "1.27.0")]
229 pub unsafe fn _mm_min_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
230 transmute(pminsw(a
.as_i16x8(), b
.as_i16x8()))
233 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
234 /// packed minimum values.
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
238 #[target_feature(enable = "sse2")]
239 #[cfg_attr(test, assert_instr(pminub))]
240 #[stable(feature = "simd_x86", since = "1.27.0")]
241 pub unsafe fn _mm_min_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
242 transmute(pminub(a
.as_u8x16(), b
.as_u8x16()))
245 /// Multiplies the packed 16-bit integers in `a` and `b`.
247 /// The multiplication produces intermediate 32-bit integers, and returns the
248 /// high 16 bits of the intermediate integers.
250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16)
252 #[target_feature(enable = "sse2")]
253 #[cfg_attr(test, assert_instr(pmulhw))]
254 #[stable(feature = "simd_x86", since = "1.27.0")]
255 pub unsafe fn _mm_mulhi_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
256 transmute(pmulhw(a
.as_i16x8(), b
.as_i16x8()))
259 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
261 /// The multiplication produces intermediate 32-bit integers, and returns the
262 /// high 16 bits of the intermediate integers.
264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16)
266 #[target_feature(enable = "sse2")]
267 #[cfg_attr(test, assert_instr(pmulhuw))]
268 #[stable(feature = "simd_x86", since = "1.27.0")]
269 pub unsafe fn _mm_mulhi_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
270 transmute(pmulhuw(a
.as_u16x8(), b
.as_u16x8()))
273 /// Multiplies the packed 16-bit integers in `a` and `b`.
275 /// The multiplication produces intermediate 32-bit integers, and returns the
276 /// low 16 bits of the intermediate integers.
278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16)
280 #[target_feature(enable = "sse2")]
281 #[cfg_attr(test, assert_instr(pmullw))]
282 #[stable(feature = "simd_x86", since = "1.27.0")]
283 pub unsafe fn _mm_mullo_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
284 transmute(simd_mul(a
.as_i16x8(), b
.as_i16x8()))
287 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
290 /// Returns the unsigned 64-bit results.
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
294 #[target_feature(enable = "sse2")]
295 #[cfg_attr(test, assert_instr(pmuludq))]
296 #[stable(feature = "simd_x86", since = "1.27.0")]
297 pub unsafe fn _mm_mul_epu32(a
: __m128i
, b
: __m128i
) -> __m128i
{
298 transmute(pmuludq(a
.as_u32x4(), b
.as_u32x4()))
301 /// Sum the absolute differences of packed unsigned 8-bit integers.
303 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
304 /// and `b`, then horizontally sum each consecutive 8 differences to produce
305 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
306 /// the low 16 bits of 64-bit elements returned.
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8)
310 #[target_feature(enable = "sse2")]
311 #[cfg_attr(test, assert_instr(psadbw))]
312 #[stable(feature = "simd_x86", since = "1.27.0")]
313 pub unsafe fn _mm_sad_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
314 transmute(psadbw(a
.as_u8x16(), b
.as_u8x16()))
317 /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
321 #[target_feature(enable = "sse2")]
322 #[cfg_attr(test, assert_instr(psubb))]
323 #[stable(feature = "simd_x86", since = "1.27.0")]
324 pub unsafe fn _mm_sub_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
325 transmute(simd_sub(a
.as_i8x16(), b
.as_i8x16()))
328 /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
332 #[target_feature(enable = "sse2")]
333 #[cfg_attr(test, assert_instr(psubw))]
334 #[stable(feature = "simd_x86", since = "1.27.0")]
335 pub unsafe fn _mm_sub_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
336 transmute(simd_sub(a
.as_i16x8(), b
.as_i16x8()))
339 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32)
343 #[target_feature(enable = "sse2")]
344 #[cfg_attr(test, assert_instr(psubd))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm_sub_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
347 transmute(simd_sub(a
.as_i32x4(), b
.as_i32x4()))
350 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64)
354 #[target_feature(enable = "sse2")]
355 #[cfg_attr(test, assert_instr(psubq))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm_sub_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
358 transmute(simd_sub(a
.as_i64x2(), b
.as_i64x2()))
361 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
362 /// using saturation.
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8)
366 #[target_feature(enable = "sse2")]
367 #[cfg_attr(test, assert_instr(psubsb))]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_subs_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
370 transmute(simd_saturating_sub(a
.as_i8x16(), b
.as_i8x16()))
373 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
374 /// using saturation.
376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16)
378 #[target_feature(enable = "sse2")]
379 #[cfg_attr(test, assert_instr(psubsw))]
380 #[stable(feature = "simd_x86", since = "1.27.0")]
381 pub unsafe fn _mm_subs_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
382 transmute(simd_saturating_sub(a
.as_i16x8(), b
.as_i16x8()))
385 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
386 /// integers in `a` using saturation.
388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8)
390 #[target_feature(enable = "sse2")]
391 #[cfg_attr(test, assert_instr(psubusb))]
392 #[stable(feature = "simd_x86", since = "1.27.0")]
393 pub unsafe fn _mm_subs_epu8(a
: __m128i
, b
: __m128i
) -> __m128i
{
394 transmute(simd_saturating_sub(a
.as_u8x16(), b
.as_u8x16()))
397 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
398 /// integers in `a` using saturation.
400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16)
402 #[target_feature(enable = "sse2")]
403 #[cfg_attr(test, assert_instr(psubusw))]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm_subs_epu16(a
: __m128i
, b
: __m128i
) -> __m128i
{
406 transmute(simd_saturating_sub(a
.as_u16x8(), b
.as_u16x8()))
409 /// Shifts `a` left by `IMM8` bytes while shifting in zeros.
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
413 #[target_feature(enable = "sse2")]
414 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
415 #[rustc_legacy_const_generics(1)]
416 #[stable(feature = "simd_x86", since = "1.27.0")]
417 pub unsafe fn _mm_slli_si128
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
418 static_assert_imm8
!(IMM8
);
419 _mm_slli_si128_impl
::<IMM8
>(a
)
422 /// Implementation detail: converts the immediate argument of the
423 /// `_mm_slli_si128` intrinsic into a compile-time constant.
425 #[target_feature(enable = "sse2")]
426 unsafe fn _mm_slli_si128_impl
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
427 const fn mask(shift
: i32, i
: u32) -> u32 {
428 let shift
= shift
as u32 & 0xff;
435 let zero
= _mm_set1_epi8(0).as_i8x16();
436 transmute
::<i8x16
, _
>(simd_shuffle16
!(
460 /// Shifts `a` left by `IMM8` bytes while shifting in zeros.
462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
464 #[target_feature(enable = "sse2")]
465 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
466 #[rustc_legacy_const_generics(1)]
467 #[stable(feature = "simd_x86", since = "1.27.0")]
468 pub unsafe fn _mm_bslli_si128
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
469 static_assert_imm8
!(IMM8
);
470 _mm_slli_si128_impl
::<IMM8
>(a
)
473 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
475 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
477 #[target_feature(enable = "sse2")]
478 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
479 #[rustc_legacy_const_generics(1)]
480 #[stable(feature = "simd_x86", since = "1.27.0")]
481 pub unsafe fn _mm_bsrli_si128
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
482 static_assert_imm8
!(IMM8
);
483 _mm_srli_si128_impl
::<IMM8
>(a
)
486 /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
488 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
490 #[target_feature(enable = "sse2")]
491 #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
492 #[rustc_legacy_const_generics(1)]
493 #[stable(feature = "simd_x86", since = "1.27.0")]
494 pub unsafe fn _mm_slli_epi16
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
495 static_assert_imm8
!(IMM8
);
496 transmute(pslliw(a
.as_i16x8(), IMM8
))
499 /// Shifts packed 16-bit integers in `a` left by `count` while shifting in
502 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
504 #[target_feature(enable = "sse2")]
505 #[cfg_attr(test, assert_instr(psllw))]
506 #[stable(feature = "simd_x86", since = "1.27.0")]
507 pub unsafe fn _mm_sll_epi16(a
: __m128i
, count
: __m128i
) -> __m128i
{
508 transmute(psllw(a
.as_i16x8(), count
.as_i16x8()))
511 /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
513 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
515 #[target_feature(enable = "sse2")]
516 #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
517 #[rustc_legacy_const_generics(1)]
518 #[stable(feature = "simd_x86", since = "1.27.0")]
519 pub unsafe fn _mm_slli_epi32
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
520 static_assert_imm8
!(IMM8
);
521 transmute(psllid(a
.as_i32x4(), IMM8
))
524 /// Shifts packed 32-bit integers in `a` left by `count` while shifting in
527 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
529 #[target_feature(enable = "sse2")]
530 #[cfg_attr(test, assert_instr(pslld))]
531 #[stable(feature = "simd_x86", since = "1.27.0")]
532 pub unsafe fn _mm_sll_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
533 transmute(pslld(a
.as_i32x4(), count
.as_i32x4()))
536 /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
540 #[target_feature(enable = "sse2")]
541 #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
542 #[rustc_legacy_const_generics(1)]
543 #[stable(feature = "simd_x86", since = "1.27.0")]
544 pub unsafe fn _mm_slli_epi64
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
545 static_assert_imm8
!(IMM8
);
546 transmute(pslliq(a
.as_i64x2(), IMM8
))
549 /// Shifts packed 64-bit integers in `a` left by `count` while shifting in
552 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
554 #[target_feature(enable = "sse2")]
555 #[cfg_attr(test, assert_instr(psllq))]
556 #[stable(feature = "simd_x86", since = "1.27.0")]
557 pub unsafe fn _mm_sll_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
558 transmute(psllq(a
.as_i64x2(), count
.as_i64x2()))
561 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
566 #[target_feature(enable = "sse2")]
567 #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
568 #[rustc_legacy_const_generics(1)]
569 #[stable(feature = "simd_x86", since = "1.27.0")]
570 pub unsafe fn _mm_srai_epi16
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
571 static_assert_imm8
!(IMM8
);
572 transmute(psraiw(a
.as_i16x8(), IMM8
))
575 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
580 #[target_feature(enable = "sse2")]
581 #[cfg_attr(test, assert_instr(psraw))]
582 #[stable(feature = "simd_x86", since = "1.27.0")]
583 pub unsafe fn _mm_sra_epi16(a
: __m128i
, count
: __m128i
) -> __m128i
{
584 transmute(psraw(a
.as_i16x8(), count
.as_i16x8()))
587 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
590 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
592 #[target_feature(enable = "sse2")]
593 #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
594 #[rustc_legacy_const_generics(1)]
595 #[stable(feature = "simd_x86", since = "1.27.0")]
596 pub unsafe fn _mm_srai_epi32
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
597 static_assert_imm8
!(IMM8
);
598 transmute(psraid(a
.as_i32x4(), IMM8
))
601 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
604 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
606 #[target_feature(enable = "sse2")]
607 #[cfg_attr(test, assert_instr(psrad))]
608 #[stable(feature = "simd_x86", since = "1.27.0")]
609 pub unsafe fn _mm_sra_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
610 transmute(psrad(a
.as_i32x4(), count
.as_i32x4()))
613 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
615 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
617 #[target_feature(enable = "sse2")]
618 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
619 #[rustc_legacy_const_generics(1)]
620 #[stable(feature = "simd_x86", since = "1.27.0")]
621 pub unsafe fn _mm_srli_si128
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
622 static_assert_imm8
!(IMM8
);
623 _mm_srli_si128_impl
::<IMM8
>(a
)
626 /// Implementation detail: converts the immediate argument of the
627 /// `_mm_srli_si128` intrinsic into a compile-time constant.
629 #[target_feature(enable = "sse2")]
630 unsafe fn _mm_srli_si128_impl
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
631 const fn mask(shift
: i32, i
: u32) -> u32 {
632 if (shift
as u32) > 15 {
638 let zero
= _mm_set1_epi8(0).as_i8x16();
639 let x
: i8x16
= simd_shuffle16
!(
664 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
669 #[target_feature(enable = "sse2")]
670 #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
671 #[rustc_legacy_const_generics(1)]
672 #[stable(feature = "simd_x86", since = "1.27.0")]
673 pub unsafe fn _mm_srli_epi16
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
674 static_assert_imm8
!(IMM8
);
675 transmute(psrliw(a
.as_i16x8(), IMM8
))
678 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
683 #[target_feature(enable = "sse2")]
684 #[cfg_attr(test, assert_instr(psrlw))]
685 #[stable(feature = "simd_x86", since = "1.27.0")]
686 pub unsafe fn _mm_srl_epi16(a
: __m128i
, count
: __m128i
) -> __m128i
{
687 transmute(psrlw(a
.as_i16x8(), count
.as_i16x8()))
690 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
695 #[target_feature(enable = "sse2")]
696 #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
697 #[rustc_legacy_const_generics(1)]
698 #[stable(feature = "simd_x86", since = "1.27.0")]
699 pub unsafe fn _mm_srli_epi32
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
700 static_assert_imm8
!(IMM8
);
701 transmute(psrlid(a
.as_i32x4(), IMM8
))
704 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
709 #[target_feature(enable = "sse2")]
710 #[cfg_attr(test, assert_instr(psrld))]
711 #[stable(feature = "simd_x86", since = "1.27.0")]
712 pub unsafe fn _mm_srl_epi32(a
: __m128i
, count
: __m128i
) -> __m128i
{
713 transmute(psrld(a
.as_i32x4(), count
.as_i32x4()))
716 /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
721 #[target_feature(enable = "sse2")]
722 #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
723 #[rustc_legacy_const_generics(1)]
724 #[stable(feature = "simd_x86", since = "1.27.0")]
725 pub unsafe fn _mm_srli_epi64
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
726 static_assert_imm8
!(IMM8
);
727 transmute(psrliq(a
.as_i64x2(), IMM8
))
730 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
735 #[target_feature(enable = "sse2")]
736 #[cfg_attr(test, assert_instr(psrlq))]
737 #[stable(feature = "simd_x86", since = "1.27.0")]
738 pub unsafe fn _mm_srl_epi64(a
: __m128i
, count
: __m128i
) -> __m128i
{
739 transmute(psrlq(a
.as_i64x2(), count
.as_i64x2()))
742 /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
747 #[target_feature(enable = "sse2")]
748 #[cfg_attr(test, assert_instr(andps))]
749 #[stable(feature = "simd_x86", since = "1.27.0")]
750 pub unsafe fn _mm_and_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
754 /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
755 /// then AND with `b`.
757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
759 #[target_feature(enable = "sse2")]
760 #[cfg_attr(test, assert_instr(andnps))]
761 #[stable(feature = "simd_x86", since = "1.27.0")]
762 pub unsafe fn _mm_andnot_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
763 simd_and(simd_xor(_mm_set1_epi8(-1), a
), b
)
766 /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
771 #[target_feature(enable = "sse2")]
772 #[cfg_attr(test, assert_instr(orps))]
773 #[stable(feature = "simd_x86", since = "1.27.0")]
774 pub unsafe fn _mm_or_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
778 /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
781 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
783 #[target_feature(enable = "sse2")]
784 #[cfg_attr(test, assert_instr(xorps))]
785 #[stable(feature = "simd_x86", since = "1.27.0")]
786 pub unsafe fn _mm_xor_si128(a
: __m128i
, b
: __m128i
) -> __m128i
{
790 /// Compares packed 8-bit integers in `a` and `b` for equality.
792 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
794 #[target_feature(enable = "sse2")]
795 #[cfg_attr(test, assert_instr(pcmpeqb))]
796 #[stable(feature = "simd_x86", since = "1.27.0")]
797 pub unsafe fn _mm_cmpeq_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
798 transmute
::<i8x16
, _
>(simd_eq(a
.as_i8x16(), b
.as_i8x16()))
801 /// Compares packed 16-bit integers in `a` and `b` for equality.
803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
805 #[target_feature(enable = "sse2")]
806 #[cfg_attr(test, assert_instr(pcmpeqw))]
807 #[stable(feature = "simd_x86", since = "1.27.0")]
808 pub unsafe fn _mm_cmpeq_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
809 transmute
::<i16x8
, _
>(simd_eq(a
.as_i16x8(), b
.as_i16x8()))
812 /// Compares packed 32-bit integers in `a` and `b` for equality.
814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
816 #[target_feature(enable = "sse2")]
817 #[cfg_attr(test, assert_instr(pcmpeqd))]
818 #[stable(feature = "simd_x86", since = "1.27.0")]
819 pub unsafe fn _mm_cmpeq_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
820 transmute
::<i32x4
, _
>(simd_eq(a
.as_i32x4(), b
.as_i32x4()))
823 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
827 #[target_feature(enable = "sse2")]
828 #[cfg_attr(test, assert_instr(pcmpgtb))]
829 #[stable(feature = "simd_x86", since = "1.27.0")]
830 pub unsafe fn _mm_cmpgt_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
831 transmute
::<i8x16
, _
>(simd_gt(a
.as_i8x16(), b
.as_i8x16()))
834 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
838 #[target_feature(enable = "sse2")]
839 #[cfg_attr(test, assert_instr(pcmpgtw))]
840 #[stable(feature = "simd_x86", since = "1.27.0")]
841 pub unsafe fn _mm_cmpgt_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
842 transmute
::<i16x8
, _
>(simd_gt(a
.as_i16x8(), b
.as_i16x8()))
845 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
849 #[target_feature(enable = "sse2")]
850 #[cfg_attr(test, assert_instr(pcmpgtd))]
851 #[stable(feature = "simd_x86", since = "1.27.0")]
852 pub unsafe fn _mm_cmpgt_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
853 transmute
::<i32x4
, _
>(simd_gt(a
.as_i32x4(), b
.as_i32x4()))
856 /// Compares packed 8-bit integers in `a` and `b` for less-than.
858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
860 #[target_feature(enable = "sse2")]
861 #[cfg_attr(test, assert_instr(pcmpgtb))]
862 #[stable(feature = "simd_x86", since = "1.27.0")]
863 pub unsafe fn _mm_cmplt_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
864 transmute
::<i8x16
, _
>(simd_lt(a
.as_i8x16(), b
.as_i8x16()))
867 /// Compares packed 16-bit integers in `a` and `b` for less-than.
869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
871 #[target_feature(enable = "sse2")]
872 #[cfg_attr(test, assert_instr(pcmpgtw))]
873 #[stable(feature = "simd_x86", since = "1.27.0")]
874 pub unsafe fn _mm_cmplt_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
875 transmute
::<i16x8
, _
>(simd_lt(a
.as_i16x8(), b
.as_i16x8()))
878 /// Compares packed 32-bit integers in `a` and `b` for less-than.
880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
882 #[target_feature(enable = "sse2")]
883 #[cfg_attr(test, assert_instr(pcmpgtd))]
884 #[stable(feature = "simd_x86", since = "1.27.0")]
885 pub unsafe fn _mm_cmplt_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
886 transmute
::<i32x4
, _
>(simd_lt(a
.as_i32x4(), b
.as_i32x4()))
889 /// Converts the lower two packed 32-bit integers in `a` to packed
890 /// double-precision (64-bit) floating-point elements.
892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
894 #[target_feature(enable = "sse2")]
895 #[cfg_attr(test, assert_instr(cvtdq2pd))]
896 #[stable(feature = "simd_x86", since = "1.27.0")]
897 pub unsafe fn _mm_cvtepi32_pd(a
: __m128i
) -> __m128d
{
898 let a
= a
.as_i32x4();
899 simd_cast
::<i32x2
, __m128d
>(simd_shuffle2
!(a
, a
, [0, 1]))
902 /// Returns `a` with its lower element replaced by `b` after converting it to
905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
907 #[target_feature(enable = "sse2")]
908 #[cfg_attr(test, assert_instr(cvtsi2sd))]
909 #[stable(feature = "simd_x86", since = "1.27.0")]
910 pub unsafe fn _mm_cvtsi32_sd(a
: __m128d
, b
: i32) -> __m128d
{
911 simd_insert(a
, 0, b
as f64)
914 /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
915 /// floating-point elements.
917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
919 #[target_feature(enable = "sse2")]
920 #[cfg_attr(test, assert_instr(cvtdq2ps))]
921 #[stable(feature = "simd_x86", since = "1.27.0")]
922 pub unsafe fn _mm_cvtepi32_ps(a
: __m128i
) -> __m128
{
923 cvtdq2ps(a
.as_i32x4())
926 /// Converts packed single-precision (32-bit) floating-point elements in `a`
927 /// to packed 32-bit integers.
929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
931 #[target_feature(enable = "sse2")]
932 #[cfg_attr(test, assert_instr(cvtps2dq))]
933 #[stable(feature = "simd_x86", since = "1.27.0")]
934 pub unsafe fn _mm_cvtps_epi32(a
: __m128
) -> __m128i
{
935 transmute(cvtps2dq(a
))
938 /// Returns a vector whose lowest element is `a` and all higher elements are
941 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
943 #[target_feature(enable = "sse2")]
944 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
945 #[stable(feature = "simd_x86", since = "1.27.0")]
946 pub unsafe fn _mm_cvtsi32_si128(a
: i32) -> __m128i
{
947 transmute(i32x4
::new(a
, 0, 0, 0))
950 /// Returns the lowest element of `a`.
952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
954 #[target_feature(enable = "sse2")]
955 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))]
956 #[stable(feature = "simd_x86", since = "1.27.0")]
957 pub unsafe fn _mm_cvtsi128_si32(a
: __m128i
) -> i32 {
958 simd_extract(a
.as_i32x4(), 0)
961 /// Sets packed 64-bit integers with the supplied values, from highest to
964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
966 #[target_feature(enable = "sse2")]
967 // no particular instruction to test
968 #[stable(feature = "simd_x86", since = "1.27.0")]
969 pub unsafe fn _mm_set_epi64x(e1
: i64, e0
: i64) -> __m128i
{
970 transmute(i64x2
::new(e0
, e1
))
973 /// Sets packed 32-bit integers with the supplied values.
975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
977 #[target_feature(enable = "sse2")]
978 // no particular instruction to test
979 #[stable(feature = "simd_x86", since = "1.27.0")]
980 pub unsafe fn _mm_set_epi32(e3
: i32, e2
: i32, e1
: i32, e0
: i32) -> __m128i
{
981 transmute(i32x4
::new(e0
, e1
, e2
, e3
))
984 /// Sets packed 16-bit integers with the supplied values.
986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
988 #[target_feature(enable = "sse2")]
989 // no particular instruction to test
990 #[stable(feature = "simd_x86", since = "1.27.0")]
991 pub unsafe fn _mm_set_epi16(
1001 transmute(i16x8
::new(e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
))
1004 /// Sets packed 8-bit integers with the supplied values.
1006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
1008 #[target_feature(enable = "sse2")]
1009 // no particular instruction to test
1010 #[stable(feature = "simd_x86", since = "1.27.0")]
1011 pub unsafe fn _mm_set_epi8(
1030 transmute(i8x16
::new(
1031 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
,
1035 /// Broadcasts 64-bit integer `a` to all elements.
1037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
1039 #[target_feature(enable = "sse2")]
1040 // no particular instruction to test
1041 #[stable(feature = "simd_x86", since = "1.27.0")]
1042 pub unsafe fn _mm_set1_epi64x(a
: i64) -> __m128i
{
1043 _mm_set_epi64x(a
, a
)
1046 /// Broadcasts 32-bit integer `a` to all elements.
1048 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
1050 #[target_feature(enable = "sse2")]
1051 // no particular instruction to test
1052 #[stable(feature = "simd_x86", since = "1.27.0")]
1053 pub unsafe fn _mm_set1_epi32(a
: i32) -> __m128i
{
1054 _mm_set_epi32(a
, a
, a
, a
)
1057 /// Broadcasts 16-bit integer `a` to all elements.
1059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
1061 #[target_feature(enable = "sse2")]
1062 // no particular instruction to test
1063 #[stable(feature = "simd_x86", since = "1.27.0")]
1064 pub unsafe fn _mm_set1_epi16(a
: i16) -> __m128i
{
1065 _mm_set_epi16(a
, a
, a
, a
, a
, a
, a
, a
)
1068 /// Broadcasts 8-bit integer `a` to all elements.
1070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
1072 #[target_feature(enable = "sse2")]
1073 // no particular instruction to test
1074 #[stable(feature = "simd_x86", since = "1.27.0")]
1075 pub unsafe fn _mm_set1_epi8(a
: i8) -> __m128i
{
1076 _mm_set_epi8(a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
, a
)
1079 /// Sets packed 32-bit integers with the supplied values in reverse order.
1081 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
1083 #[target_feature(enable = "sse2")]
1084 // no particular instruction to test
1085 #[stable(feature = "simd_x86", since = "1.27.0")]
1086 pub unsafe fn _mm_setr_epi32(e3
: i32, e2
: i32, e1
: i32, e0
: i32) -> __m128i
{
1087 _mm_set_epi32(e0
, e1
, e2
, e3
)
1090 /// Sets packed 16-bit integers with the supplied values in reverse order.
1092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
1094 #[target_feature(enable = "sse2")]
1095 // no particular instruction to test
1096 #[stable(feature = "simd_x86", since = "1.27.0")]
1097 pub unsafe fn _mm_setr_epi16(
1107 _mm_set_epi16(e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
)
1110 /// Sets packed 8-bit integers with the supplied values in reverse order.
1112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
1114 #[target_feature(enable = "sse2")]
1115 // no particular instruction to test
1116 #[stable(feature = "simd_x86", since = "1.27.0")]
1117 pub unsafe fn _mm_setr_epi8(
1137 e0
, e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, e11
, e12
, e13
, e14
, e15
,
1141 /// Returns a vector with all elements set to zero.
1143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128)
1145 #[target_feature(enable = "sse2")]
1146 #[cfg_attr(test, assert_instr(xorps))]
1147 #[stable(feature = "simd_x86", since = "1.27.0")]
1148 pub unsafe fn _mm_setzero_si128() -> __m128i
{
1152 /// Loads 64-bit integer from memory into first element of returned vector.
1154 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
1156 #[target_feature(enable = "sse2")]
1157 // FIXME movsd on windows
1162 not(all(target_os
= "linux", target_arch
= "x86_64")),
1163 target_arch
= "x86_64"
1167 #[stable(feature = "simd_x86", since = "1.27.0")]
1168 pub unsafe fn _mm_loadl_epi64(mem_addr
: *const __m128i
) -> __m128i
{
1169 _mm_set_epi64x(0, ptr
::read_unaligned(mem_addr
as *const i64))
1172 /// Loads 128-bits of integer data from memory into a new vector.
1174 /// `mem_addr` must be aligned on a 16-byte boundary.
1176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128)
1178 #[target_feature(enable = "sse2")]
1179 #[cfg_attr(test, assert_instr(movaps))]
1180 #[stable(feature = "simd_x86", since = "1.27.0")]
1181 pub unsafe fn _mm_load_si128(mem_addr
: *const __m128i
) -> __m128i
{
1185 /// Loads 128-bits of integer data from memory into a new vector.
1187 /// `mem_addr` does not need to be aligned on any particular boundary.
1189 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128)
1191 #[target_feature(enable = "sse2")]
1192 #[cfg_attr(test, assert_instr(movups))]
1193 #[stable(feature = "simd_x86", since = "1.27.0")]
1194 pub unsafe fn _mm_loadu_si128(mem_addr
: *const __m128i
) -> __m128i
{
1195 let mut dst
: __m128i
= _mm_undefined_si128();
1196 ptr
::copy_nonoverlapping(
1197 mem_addr
as *const u8,
1198 &mut dst
as *mut __m128i
as *mut u8,
1199 mem
::size_of
::<__m128i
>(),
1204 /// Conditionally store 8-bit integer elements from `a` into memory using
1207 /// Elements are not stored when the highest bit is not set in the
1208 /// corresponding element.
1210 /// `mem_addr` should correspond to a 128-bit memory location and does not need
1211 /// to be aligned on any particular boundary.
1213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128)
1215 #[target_feature(enable = "sse2")]
1216 #[cfg_attr(test, assert_instr(maskmovdqu))]
1217 #[stable(feature = "simd_x86", since = "1.27.0")]
1218 pub unsafe fn _mm_maskmoveu_si128(a
: __m128i
, mask
: __m128i
, mem_addr
: *mut i8) {
1219 maskmovdqu(a
.as_i8x16(), mask
.as_i8x16(), mem_addr
)
1222 /// Stores 128-bits of integer data from `a` into memory.
1224 /// `mem_addr` must be aligned on a 16-byte boundary.
1226 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128)
1228 #[target_feature(enable = "sse2")]
1229 #[cfg_attr(test, assert_instr(movaps))]
1230 #[stable(feature = "simd_x86", since = "1.27.0")]
1231 pub unsafe fn _mm_store_si128(mem_addr
: *mut __m128i
, a
: __m128i
) {
1235 /// Stores 128-bits of integer data from `a` into memory.
1237 /// `mem_addr` does not need to be aligned on any particular boundary.
1239 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128)
1241 #[target_feature(enable = "sse2")]
1242 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1243 #[stable(feature = "simd_x86", since = "1.27.0")]
1244 pub unsafe fn _mm_storeu_si128(mem_addr
: *mut __m128i
, a
: __m128i
) {
1245 storeudq(mem_addr
as *mut i8, a
);
1248 /// Stores the lower 64-bit integer `a` to a memory location.
1250 /// `mem_addr` does not need to be aligned on any particular boundary.
1252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64)
1254 #[target_feature(enable = "sse2")]
1255 // FIXME mov on windows, movlps on i686
1260 not(all(target_os
= "linux", target_arch
= "x86_64")),
1261 target_arch
= "x86_64"
1265 #[stable(feature = "simd_x86", since = "1.27.0")]
1266 pub unsafe fn _mm_storel_epi64(mem_addr
: *mut __m128i
, a
: __m128i
) {
1267 ptr
::copy_nonoverlapping(&a
as *const _
as *const u8, mem_addr
as *mut u8, 8);
1270 /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1271 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1272 /// used again soon).
1274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128)
1276 #[target_feature(enable = "sse2")]
1277 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1278 #[stable(feature = "simd_x86", since = "1.27.0")]
1279 pub unsafe fn _mm_stream_si128(mem_addr
: *mut __m128i
, a
: __m128i
) {
1280 intrinsics
::nontemporal_store(mem_addr
, a
);
1283 /// Stores a 32-bit integer value in the specified memory location.
1284 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1285 /// used again soon).
1287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32)
1289 #[target_feature(enable = "sse2")]
1290 #[cfg_attr(test, assert_instr(movnti))]
1291 #[stable(feature = "simd_x86", since = "1.27.0")]
1292 pub unsafe fn _mm_stream_si32(mem_addr
: *mut i32, a
: i32) {
1293 intrinsics
::nontemporal_store(mem_addr
, a
);
1296 /// Returns a vector where the low element is extracted from `a` and its upper
1297 /// element is zero.
1299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
1301 #[target_feature(enable = "sse2")]
1302 // FIXME movd on windows, movd on i686
1303 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1304 #[stable(feature = "simd_x86", since = "1.27.0")]
1305 pub unsafe fn _mm_move_epi64(a
: __m128i
) -> __m128i
{
1306 let zero
= _mm_setzero_si128();
1307 let r
: i64x2
= simd_shuffle2
!(a
.as_i64x2(), zero
.as_i64x2(), [0, 2]);
1311 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1312 /// using signed saturation.
1314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
1316 #[target_feature(enable = "sse2")]
1317 #[cfg_attr(test, assert_instr(packsswb))]
1318 #[stable(feature = "simd_x86", since = "1.27.0")]
1319 pub unsafe fn _mm_packs_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1320 transmute(packsswb(a
.as_i16x8(), b
.as_i16x8()))
1323 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1324 /// using signed saturation.
1326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
1328 #[target_feature(enable = "sse2")]
1329 #[cfg_attr(test, assert_instr(packssdw))]
1330 #[stable(feature = "simd_x86", since = "1.27.0")]
1331 pub unsafe fn _mm_packs_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
1332 transmute(packssdw(a
.as_i32x4(), b
.as_i32x4()))
1335 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1336 /// using unsigned saturation.
1338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
1340 #[target_feature(enable = "sse2")]
1341 #[cfg_attr(test, assert_instr(packuswb))]
1342 #[stable(feature = "simd_x86", since = "1.27.0")]
1343 pub unsafe fn _mm_packus_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1344 transmute(packuswb(a
.as_i16x8(), b
.as_i16x8()))
1347 /// Returns the `imm8` element of `a`.
1349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
1351 #[target_feature(enable = "sse2")]
1352 #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1353 #[rustc_legacy_const_generics(1)]
1354 #[stable(feature = "simd_x86", since = "1.27.0")]
1355 pub unsafe fn _mm_extract_epi16
<const IMM8
: i32>(a
: __m128i
) -> i32 {
1356 static_assert_imm3
!(IMM8
);
1357 simd_extract
::<_
, u16>(a
.as_u16x8(), IMM8
as u32) as i32
1360 /// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1362 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
1364 #[target_feature(enable = "sse2")]
1365 #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1366 #[rustc_legacy_const_generics(2)]
1367 #[stable(feature = "simd_x86", since = "1.27.0")]
1368 pub unsafe fn _mm_insert_epi16
<const IMM8
: i32>(a
: __m128i
, i
: i32) -> __m128i
{
1369 static_assert_imm3
!(IMM8
);
1370 transmute(simd_insert(a
.as_i16x8(), IMM8
as u32, i
as i16))
1373 /// Returns a mask of the most significant bit of each element in `a`.
1375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
1377 #[target_feature(enable = "sse2")]
1378 #[cfg_attr(test, assert_instr(pmovmskb))]
1379 #[stable(feature = "simd_x86", since = "1.27.0")]
1380 pub unsafe fn _mm_movemask_epi8(a
: __m128i
) -> i32 {
1381 pmovmskb(a
.as_i8x16())
1384 /// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1386 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
1388 #[target_feature(enable = "sse2")]
1389 #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1390 #[rustc_legacy_const_generics(1)]
1391 #[stable(feature = "simd_x86", since = "1.27.0")]
1392 pub unsafe fn _mm_shuffle_epi32
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
1393 static_assert_imm8
!(IMM8
);
1394 let a
= a
.as_i32x4();
1395 let x
: i32x4
= simd_shuffle4
!(
1400 (IMM8
as u32 >> 2) & 0b11,
1401 (IMM8
as u32 >> 4) & 0b11,
1402 (IMM8
as u32 >> 6) & 0b11,
1408 /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1411 /// Put the results in the high 64 bits of the returned vector, with the low 64
1412 /// bits being copied from from `a`.
1414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
1416 #[target_feature(enable = "sse2")]
1417 #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1418 #[rustc_legacy_const_generics(1)]
1419 #[stable(feature = "simd_x86", since = "1.27.0")]
1420 pub unsafe fn _mm_shufflehi_epi16
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
1421 static_assert_imm8
!(IMM8
);
1422 let a
= a
.as_i16x8();
1423 let x
: i16x8
= simd_shuffle8
!(
1431 (IMM8
as u32 & 0b11) + 4,
1432 ((IMM8
as u32 >> 2) & 0b11) + 4,
1433 ((IMM8
as u32 >> 4) & 0b11) + 4,
1434 ((IMM8
as u32 >> 6) & 0b11) + 4,
1440 /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1443 /// Put the results in the low 64 bits of the returned vector, with the high 64
1444 /// bits being copied from from `a`.
1446 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
1448 #[target_feature(enable = "sse2")]
1449 #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1450 #[rustc_legacy_const_generics(1)]
1451 #[stable(feature = "simd_x86", since = "1.27.0")]
1452 pub unsafe fn _mm_shufflelo_epi16
<const IMM8
: i32>(a
: __m128i
) -> __m128i
{
1453 static_assert_imm8
!(IMM8
);
1454 let a
= a
.as_i16x8();
1455 let x
: i16x8
= simd_shuffle8
!(
1460 (IMM8
as u32 >> 2) & 0b11,
1461 (IMM8
as u32 >> 4) & 0b11,
1462 (IMM8
as u32 >> 6) & 0b11,
1472 /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
1476 #[target_feature(enable = "sse2")]
1477 #[cfg_attr(test, assert_instr(punpckhbw))]
1478 #[stable(feature = "simd_x86", since = "1.27.0")]
1479 pub unsafe fn _mm_unpackhi_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
1480 transmute
::<i8x16
, _
>(simd_shuffle16
!(
1483 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1487 /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1489 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
1491 #[target_feature(enable = "sse2")]
1492 #[cfg_attr(test, assert_instr(punpckhwd))]
1493 #[stable(feature = "simd_x86", since = "1.27.0")]
1494 pub unsafe fn _mm_unpackhi_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1495 let x
= simd_shuffle8
!(a
.as_i16x8(), b
.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1496 transmute
::<i16x8
, _
>(x
)
1499 /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1501 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
1503 #[target_feature(enable = "sse2")]
1504 #[cfg_attr(test, assert_instr(unpckhps))]
1505 #[stable(feature = "simd_x86", since = "1.27.0")]
1506 pub unsafe fn _mm_unpackhi_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
1507 transmute
::<i32x4
, _
>(simd_shuffle4
!(a
.as_i32x4(), b
.as_i32x4(), [2, 6, 3, 7]))
1510 /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1512 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
1514 #[target_feature(enable = "sse2")]
1515 #[cfg_attr(test, assert_instr(unpckhpd))]
1516 #[stable(feature = "simd_x86", since = "1.27.0")]
1517 pub unsafe fn _mm_unpackhi_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
1518 transmute
::<i64x2
, _
>(simd_shuffle2
!(a
.as_i64x2(), b
.as_i64x2(), [1, 3]))
1521 /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1523 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
1525 #[target_feature(enable = "sse2")]
1526 #[cfg_attr(test, assert_instr(punpcklbw))]
1527 #[stable(feature = "simd_x86", since = "1.27.0")]
1528 pub unsafe fn _mm_unpacklo_epi8(a
: __m128i
, b
: __m128i
) -> __m128i
{
1529 transmute
::<i8x16
, _
>(simd_shuffle16
!(
1532 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1536 /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
1540 #[target_feature(enable = "sse2")]
1541 #[cfg_attr(test, assert_instr(punpcklwd))]
1542 #[stable(feature = "simd_x86", since = "1.27.0")]
1543 pub unsafe fn _mm_unpacklo_epi16(a
: __m128i
, b
: __m128i
) -> __m128i
{
1544 let x
= simd_shuffle8
!(a
.as_i16x8(), b
.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1545 transmute
::<i16x8
, _
>(x
)
1548 /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1550 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
1552 #[target_feature(enable = "sse2")]
1553 #[cfg_attr(test, assert_instr(unpcklps))]
1554 #[stable(feature = "simd_x86", since = "1.27.0")]
1555 pub unsafe fn _mm_unpacklo_epi32(a
: __m128i
, b
: __m128i
) -> __m128i
{
1556 transmute
::<i32x4
, _
>(simd_shuffle4
!(a
.as_i32x4(), b
.as_i32x4(), [0, 4, 1, 5]))
1559 /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1561 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
1563 #[target_feature(enable = "sse2")]
1564 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1565 #[stable(feature = "simd_x86", since = "1.27.0")]
1566 pub unsafe fn _mm_unpacklo_epi64(a
: __m128i
, b
: __m128i
) -> __m128i
{
1567 transmute
::<i64x2
, _
>(simd_shuffle2
!(a
.as_i64x2(), b
.as_i64x2(), [0, 2]))
1570 /// Returns a new vector with the low element of `a` replaced by the sum of the
1571 /// low elements of `a` and `b`.
1573 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
1575 #[target_feature(enable = "sse2")]
1576 #[cfg_attr(test, assert_instr(addsd))]
1577 #[stable(feature = "simd_x86", since = "1.27.0")]
1578 pub unsafe fn _mm_add_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1579 simd_insert(a
, 0, _mm_cvtsd_f64(a
) + _mm_cvtsd_f64(b
))
1582 /// Adds packed double-precision (64-bit) floating-point elements in `a` and
1585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
1587 #[target_feature(enable = "sse2")]
1588 #[cfg_attr(test, assert_instr(addpd))]
1589 #[stable(feature = "simd_x86", since = "1.27.0")]
1590 pub unsafe fn _mm_add_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1594 /// Returns a new vector with the low element of `a` replaced by the result of
1595 /// diving the lower element of `a` by the lower element of `b`.
1597 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
1599 #[target_feature(enable = "sse2")]
1600 #[cfg_attr(test, assert_instr(divsd))]
1601 #[stable(feature = "simd_x86", since = "1.27.0")]
1602 pub unsafe fn _mm_div_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1603 simd_insert(a
, 0, _mm_cvtsd_f64(a
) / _mm_cvtsd_f64(b
))
1606 /// Divide packed double-precision (64-bit) floating-point elements in `a` by
1607 /// packed elements in `b`.
1609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd)
1611 #[target_feature(enable = "sse2")]
1612 #[cfg_attr(test, assert_instr(divpd))]
1613 #[stable(feature = "simd_x86", since = "1.27.0")]
1614 pub unsafe fn _mm_div_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1618 /// Returns a new vector with the low element of `a` replaced by the maximum
1619 /// of the lower elements of `a` and `b`.
1621 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
1623 #[target_feature(enable = "sse2")]
1624 #[cfg_attr(test, assert_instr(maxsd))]
1625 #[stable(feature = "simd_x86", since = "1.27.0")]
1626 pub unsafe fn _mm_max_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1630 /// Returns a new vector with the maximum values from corresponding elements in
1633 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
1635 #[target_feature(enable = "sse2")]
1636 #[cfg_attr(test, assert_instr(maxpd))]
1637 #[stable(feature = "simd_x86", since = "1.27.0")]
1638 pub unsafe fn _mm_max_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1642 /// Returns a new vector with the low element of `a` replaced by the minimum
1643 /// of the lower elements of `a` and `b`.
1645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
1647 #[target_feature(enable = "sse2")]
1648 #[cfg_attr(test, assert_instr(minsd))]
1649 #[stable(feature = "simd_x86", since = "1.27.0")]
1650 pub unsafe fn _mm_min_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1654 /// Returns a new vector with the minimum values from corresponding elements in
1657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
1659 #[target_feature(enable = "sse2")]
1660 #[cfg_attr(test, assert_instr(minpd))]
1661 #[stable(feature = "simd_x86", since = "1.27.0")]
1662 pub unsafe fn _mm_min_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1666 /// Returns a new vector with the low element of `a` replaced by multiplying the
1667 /// low elements of `a` and `b`.
1669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
1671 #[target_feature(enable = "sse2")]
1672 #[cfg_attr(test, assert_instr(mulsd))]
1673 #[stable(feature = "simd_x86", since = "1.27.0")]
1674 pub unsafe fn _mm_mul_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1675 simd_insert(a
, 0, _mm_cvtsd_f64(a
) * _mm_cvtsd_f64(b
))
1678 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
1683 #[target_feature(enable = "sse2")]
1684 #[cfg_attr(test, assert_instr(mulpd))]
1685 #[stable(feature = "simd_x86", since = "1.27.0")]
1686 pub unsafe fn _mm_mul_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1690 /// Returns a new vector with the low element of `a` replaced by the square
1691 /// root of the lower element `b`.
1693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
1695 #[target_feature(enable = "sse2")]
1696 #[cfg_attr(test, assert_instr(sqrtsd))]
1697 #[stable(feature = "simd_x86", since = "1.27.0")]
1698 pub unsafe fn _mm_sqrt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1699 simd_insert(a
, 0, _mm_cvtsd_f64(sqrtsd(b
)))
1702 /// Returns a new vector with the square root of each of the values in `a`.
1704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
1706 #[target_feature(enable = "sse2")]
1707 #[cfg_attr(test, assert_instr(sqrtpd))]
1708 #[stable(feature = "simd_x86", since = "1.27.0")]
1709 pub unsafe fn _mm_sqrt_pd(a
: __m128d
) -> __m128d
{
1713 /// Returns a new vector with the low element of `a` replaced by subtracting the
1714 /// low element by `b` from the low element of `a`.
1716 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
1718 #[target_feature(enable = "sse2")]
1719 #[cfg_attr(test, assert_instr(subsd))]
1720 #[stable(feature = "simd_x86", since = "1.27.0")]
1721 pub unsafe fn _mm_sub_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1722 simd_insert(a
, 0, _mm_cvtsd_f64(a
) - _mm_cvtsd_f64(b
))
1725 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
1728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd)
1730 #[target_feature(enable = "sse2")]
1731 #[cfg_attr(test, assert_instr(subpd))]
1732 #[stable(feature = "simd_x86", since = "1.27.0")]
1733 pub unsafe fn _mm_sub_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1737 /// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1738 /// elements in `a` and `b`.
1740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
1742 #[target_feature(enable = "sse2")]
1743 #[cfg_attr(test, assert_instr(andps))]
1744 #[stable(feature = "simd_x86", since = "1.27.0")]
1745 pub unsafe fn _mm_and_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1746 let a
: __m128i
= transmute(a
);
1747 let b
: __m128i
= transmute(b
);
1748 transmute(_mm_and_si128(a
, b
))
1751 /// Computes the bitwise NOT of `a` and then AND with `b`.
1753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
1755 #[target_feature(enable = "sse2")]
1756 #[cfg_attr(test, assert_instr(andnps))]
1757 #[stable(feature = "simd_x86", since = "1.27.0")]
1758 pub unsafe fn _mm_andnot_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1759 let a
: __m128i
= transmute(a
);
1760 let b
: __m128i
= transmute(b
);
1761 transmute(_mm_andnot_si128(a
, b
))
1764 /// Computes the bitwise OR of `a` and `b`.
1766 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
1768 #[target_feature(enable = "sse2")]
1769 #[cfg_attr(test, assert_instr(orps))]
1770 #[stable(feature = "simd_x86", since = "1.27.0")]
1771 pub unsafe fn _mm_or_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1772 let a
: __m128i
= transmute(a
);
1773 let b
: __m128i
= transmute(b
);
1774 transmute(_mm_or_si128(a
, b
))
1777 /// Computes the bitwise OR of `a` and `b`.
1779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
1781 #[target_feature(enable = "sse2")]
1782 #[cfg_attr(test, assert_instr(xorps))]
1783 #[stable(feature = "simd_x86", since = "1.27.0")]
1784 pub unsafe fn _mm_xor_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1785 let a
: __m128i
= transmute(a
);
1786 let b
: __m128i
= transmute(b
);
1787 transmute(_mm_xor_si128(a
, b
))
1790 /// Returns a new vector with the low element of `a` replaced by the equality
1791 /// comparison of the lower elements of `a` and `b`.
1793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
1795 #[target_feature(enable = "sse2")]
1796 #[cfg_attr(test, assert_instr(cmpeqsd))]
1797 #[stable(feature = "simd_x86", since = "1.27.0")]
1798 pub unsafe fn _mm_cmpeq_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1802 /// Returns a new vector with the low element of `a` replaced by the less-than
1803 /// comparison of the lower elements of `a` and `b`.
1805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
1807 #[target_feature(enable = "sse2")]
1808 #[cfg_attr(test, assert_instr(cmpltsd))]
1809 #[stable(feature = "simd_x86", since = "1.27.0")]
1810 pub unsafe fn _mm_cmplt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1814 /// Returns a new vector with the low element of `a` replaced by the
1815 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
1817 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
1819 #[target_feature(enable = "sse2")]
1820 #[cfg_attr(test, assert_instr(cmplesd))]
1821 #[stable(feature = "simd_x86", since = "1.27.0")]
1822 pub unsafe fn _mm_cmple_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1826 /// Returns a new vector with the low element of `a` replaced by the
1827 /// greater-than comparison of the lower elements of `a` and `b`.
1829 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
1831 #[target_feature(enable = "sse2")]
1832 #[cfg_attr(test, assert_instr(cmpltsd))]
1833 #[stable(feature = "simd_x86", since = "1.27.0")]
1834 pub unsafe fn _mm_cmpgt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1835 simd_insert(_mm_cmplt_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
1838 /// Returns a new vector with the low element of `a` replaced by the
1839 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
1843 #[target_feature(enable = "sse2")]
1844 #[cfg_attr(test, assert_instr(cmplesd))]
1845 #[stable(feature = "simd_x86", since = "1.27.0")]
1846 pub unsafe fn _mm_cmpge_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1847 simd_insert(_mm_cmple_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
1850 /// Returns a new vector with the low element of `a` replaced by the result
1851 /// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1852 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd)
1857 #[target_feature(enable = "sse2")]
1858 #[cfg_attr(test, assert_instr(cmpordsd))]
1859 #[stable(feature = "simd_x86", since = "1.27.0")]
1860 pub unsafe fn _mm_cmpord_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1864 /// Returns a new vector with the low element of `a` replaced by the result of
1865 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1866 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd)
1870 #[target_feature(enable = "sse2")]
1871 #[cfg_attr(test, assert_instr(cmpunordsd))]
1872 #[stable(feature = "simd_x86", since = "1.27.0")]
1873 pub unsafe fn _mm_cmpunord_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1877 /// Returns a new vector with the low element of `a` replaced by the not-equal
1878 /// comparison of the lower elements of `a` and `b`.
1880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
1882 #[target_feature(enable = "sse2")]
1883 #[cfg_attr(test, assert_instr(cmpneqsd))]
1884 #[stable(feature = "simd_x86", since = "1.27.0")]
1885 pub unsafe fn _mm_cmpneq_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1889 /// Returns a new vector with the low element of `a` replaced by the
1890 /// not-less-than comparison of the lower elements of `a` and `b`.
1892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
1894 #[target_feature(enable = "sse2")]
1895 #[cfg_attr(test, assert_instr(cmpnltsd))]
1896 #[stable(feature = "simd_x86", since = "1.27.0")]
1897 pub unsafe fn _mm_cmpnlt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1901 /// Returns a new vector with the low element of `a` replaced by the
1902 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
1904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
1906 #[target_feature(enable = "sse2")]
1907 #[cfg_attr(test, assert_instr(cmpnlesd))]
1908 #[stable(feature = "simd_x86", since = "1.27.0")]
1909 pub unsafe fn _mm_cmpnle_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1913 /// Returns a new vector with the low element of `a` replaced by the
1914 /// not-greater-than comparison of the lower elements of `a` and `b`.
1916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
1918 #[target_feature(enable = "sse2")]
1919 #[cfg_attr(test, assert_instr(cmpnltsd))]
1920 #[stable(feature = "simd_x86", since = "1.27.0")]
1921 pub unsafe fn _mm_cmpngt_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1922 simd_insert(_mm_cmpnlt_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
1925 /// Returns a new vector with the low element of `a` replaced by the
1926 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
1928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
1930 #[target_feature(enable = "sse2")]
1931 #[cfg_attr(test, assert_instr(cmpnlesd))]
1932 #[stable(feature = "simd_x86", since = "1.27.0")]
1933 pub unsafe fn _mm_cmpnge_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1934 simd_insert(_mm_cmpnle_sd(b
, a
), 1, simd_extract
::<_
, f64>(a
, 1))
1937 /// Compares corresponding elements in `a` and `b` for equality.
1939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
1941 #[target_feature(enable = "sse2")]
1942 #[cfg_attr(test, assert_instr(cmpeqpd))]
1943 #[stable(feature = "simd_x86", since = "1.27.0")]
1944 pub unsafe fn _mm_cmpeq_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1948 /// Compares corresponding elements in `a` and `b` for less-than.
1950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
1952 #[target_feature(enable = "sse2")]
1953 #[cfg_attr(test, assert_instr(cmpltpd))]
1954 #[stable(feature = "simd_x86", since = "1.27.0")]
1955 pub unsafe fn _mm_cmplt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1959 /// Compares corresponding elements in `a` and `b` for less-than-or-equal
1961 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
1963 #[target_feature(enable = "sse2")]
1964 #[cfg_attr(test, assert_instr(cmplepd))]
1965 #[stable(feature = "simd_x86", since = "1.27.0")]
1966 pub unsafe fn _mm_cmple_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1970 /// Compares corresponding elements in `a` and `b` for greater-than.
1972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
1974 #[target_feature(enable = "sse2")]
1975 #[cfg_attr(test, assert_instr(cmpltpd))]
1976 #[stable(feature = "simd_x86", since = "1.27.0")]
1977 pub unsafe fn _mm_cmpgt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1981 /// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
1983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
1985 #[target_feature(enable = "sse2")]
1986 #[cfg_attr(test, assert_instr(cmplepd))]
1987 #[stable(feature = "simd_x86", since = "1.27.0")]
1988 pub unsafe fn _mm_cmpge_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
1992 /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
1994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
1996 #[target_feature(enable = "sse2")]
1997 #[cfg_attr(test, assert_instr(cmpordpd))]
1998 #[stable(feature = "simd_x86", since = "1.27.0")]
1999 pub unsafe fn _mm_cmpord_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2003 /// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
2007 #[target_feature(enable = "sse2")]
2008 #[cfg_attr(test, assert_instr(cmpunordpd))]
2009 #[stable(feature = "simd_x86", since = "1.27.0")]
2010 pub unsafe fn _mm_cmpunord_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2014 /// Compares corresponding elements in `a` and `b` for not-equal.
2016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
2018 #[target_feature(enable = "sse2")]
2019 #[cfg_attr(test, assert_instr(cmpneqpd))]
2020 #[stable(feature = "simd_x86", since = "1.27.0")]
2021 pub unsafe fn _mm_cmpneq_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2025 /// Compares corresponding elements in `a` and `b` for not-less-than.
2027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
2029 #[target_feature(enable = "sse2")]
2030 #[cfg_attr(test, assert_instr(cmpnltpd))]
2031 #[stable(feature = "simd_x86", since = "1.27.0")]
2032 pub unsafe fn _mm_cmpnlt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2036 /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
2040 #[target_feature(enable = "sse2")]
2041 #[cfg_attr(test, assert_instr(cmpnlepd))]
2042 #[stable(feature = "simd_x86", since = "1.27.0")]
2043 pub unsafe fn _mm_cmpnle_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2047 /// Compares corresponding elements in `a` and `b` for not-greater-than.
2049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
2051 #[target_feature(enable = "sse2")]
2052 #[cfg_attr(test, assert_instr(cmpnltpd))]
2053 #[stable(feature = "simd_x86", since = "1.27.0")]
2054 pub unsafe fn _mm_cmpngt_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2058 /// Compares corresponding elements in `a` and `b` for
2059 /// not-greater-than-or-equal.
2061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
2063 #[target_feature(enable = "sse2")]
2064 #[cfg_attr(test, assert_instr(cmpnlepd))]
2065 #[stable(feature = "simd_x86", since = "1.27.0")]
2066 pub unsafe fn _mm_cmpnge_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2070 /// Compares the lower element of `a` and `b` for equality.
2072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
2074 #[target_feature(enable = "sse2")]
2075 #[cfg_attr(test, assert_instr(comisd))]
2076 #[stable(feature = "simd_x86", since = "1.27.0")]
2077 pub unsafe fn _mm_comieq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2081 /// Compares the lower element of `a` and `b` for less-than.
2083 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
2085 #[target_feature(enable = "sse2")]
2086 #[cfg_attr(test, assert_instr(comisd))]
2087 #[stable(feature = "simd_x86", since = "1.27.0")]
2088 pub unsafe fn _mm_comilt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2092 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
2096 #[target_feature(enable = "sse2")]
2097 #[cfg_attr(test, assert_instr(comisd))]
2098 #[stable(feature = "simd_x86", since = "1.27.0")]
2099 pub unsafe fn _mm_comile_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2103 /// Compares the lower element of `a` and `b` for greater-than.
2105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
2107 #[target_feature(enable = "sse2")]
2108 #[cfg_attr(test, assert_instr(comisd))]
2109 #[stable(feature = "simd_x86", since = "1.27.0")]
2110 pub unsafe fn _mm_comigt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2114 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2116 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
2118 #[target_feature(enable = "sse2")]
2119 #[cfg_attr(test, assert_instr(comisd))]
2120 #[stable(feature = "simd_x86", since = "1.27.0")]
2121 pub unsafe fn _mm_comige_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2125 /// Compares the lower element of `a` and `b` for not-equal.
2127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
2129 #[target_feature(enable = "sse2")]
2130 #[cfg_attr(test, assert_instr(comisd))]
2131 #[stable(feature = "simd_x86", since = "1.27.0")]
2132 pub unsafe fn _mm_comineq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2136 /// Compares the lower element of `a` and `b` for equality.
2138 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
2140 #[target_feature(enable = "sse2")]
2141 #[cfg_attr(test, assert_instr(ucomisd))]
2142 #[stable(feature = "simd_x86", since = "1.27.0")]
2143 pub unsafe fn _mm_ucomieq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2147 /// Compares the lower element of `a` and `b` for less-than.
2149 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
2151 #[target_feature(enable = "sse2")]
2152 #[cfg_attr(test, assert_instr(ucomisd))]
2153 #[stable(feature = "simd_x86", since = "1.27.0")]
2154 pub unsafe fn _mm_ucomilt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2158 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2160 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
2162 #[target_feature(enable = "sse2")]
2163 #[cfg_attr(test, assert_instr(ucomisd))]
2164 #[stable(feature = "simd_x86", since = "1.27.0")]
2165 pub unsafe fn _mm_ucomile_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2169 /// Compares the lower element of `a` and `b` for greater-than.
2171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
2173 #[target_feature(enable = "sse2")]
2174 #[cfg_attr(test, assert_instr(ucomisd))]
2175 #[stable(feature = "simd_x86", since = "1.27.0")]
2176 pub unsafe fn _mm_ucomigt_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2180 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2182 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
2184 #[target_feature(enable = "sse2")]
2185 #[cfg_attr(test, assert_instr(ucomisd))]
2186 #[stable(feature = "simd_x86", since = "1.27.0")]
2187 pub unsafe fn _mm_ucomige_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2191 /// Compares the lower element of `a` and `b` for not-equal.
2193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
2195 #[target_feature(enable = "sse2")]
2196 #[cfg_attr(test, assert_instr(ucomisd))]
2197 #[stable(feature = "simd_x86", since = "1.27.0")]
2198 pub unsafe fn _mm_ucomineq_sd(a
: __m128d
, b
: __m128d
) -> i32 {
2202 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2203 /// packed single-precision (32-bit) floating-point elements
2205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
2207 #[target_feature(enable = "sse2")]
2208 #[cfg_attr(test, assert_instr(cvtpd2ps))]
2209 #[stable(feature = "simd_x86", since = "1.27.0")]
2210 pub unsafe fn _mm_cvtpd_ps(a
: __m128d
) -> __m128
{
2214 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2216 /// double-precision (64-bit) floating-point elements.
2218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd)
2220 #[target_feature(enable = "sse2")]
2221 #[cfg_attr(test, assert_instr(cvtps2pd))]
2222 #[stable(feature = "simd_x86", since = "1.27.0")]
2223 pub unsafe fn _mm_cvtps_pd(a
: __m128
) -> __m128d
{
2227 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2228 /// packed 32-bit integers.
2230 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
2232 #[target_feature(enable = "sse2")]
2233 #[cfg_attr(test, assert_instr(cvtpd2dq))]
2234 #[stable(feature = "simd_x86", since = "1.27.0")]
2235 pub unsafe fn _mm_cvtpd_epi32(a
: __m128d
) -> __m128i
{
2236 transmute(cvtpd2dq(a
))
2239 /// Converts the lower double-precision (64-bit) floating-point element in a to
2240 /// a 32-bit integer.
2242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
2244 #[target_feature(enable = "sse2")]
2245 #[cfg_attr(test, assert_instr(cvtsd2si))]
2246 #[stable(feature = "simd_x86", since = "1.27.0")]
2247 pub unsafe fn _mm_cvtsd_si32(a
: __m128d
) -> i32 {
2251 /// Converts the lower double-precision (64-bit) floating-point element in `b`
2252 /// to a single-precision (32-bit) floating-point element, store the result in
2253 /// the lower element of the return value, and copies the upper element from `a`
2254 /// to the upper element the return value.
2256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
2258 #[target_feature(enable = "sse2")]
2259 #[cfg_attr(test, assert_instr(cvtsd2ss))]
2260 #[stable(feature = "simd_x86", since = "1.27.0")]
2261 pub unsafe fn _mm_cvtsd_ss(a
: __m128
, b
: __m128d
) -> __m128
{
2265 /// Returns the lower double-precision (64-bit) floating-point element of `a`.
2267 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
2269 #[target_feature(enable = "sse2")]
2270 #[stable(feature = "simd_x86", since = "1.27.0")]
2271 pub unsafe fn _mm_cvtsd_f64(a
: __m128d
) -> f64 {
2275 /// Converts the lower single-precision (32-bit) floating-point element in `b`
2276 /// to a double-precision (64-bit) floating-point element, store the result in
2277 /// the lower element of the return value, and copies the upper element from `a`
2278 /// to the upper element the return value.
2280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
2282 #[target_feature(enable = "sse2")]
2283 #[cfg_attr(test, assert_instr(cvtss2sd))]
2284 #[stable(feature = "simd_x86", since = "1.27.0")]
2285 pub unsafe fn _mm_cvtss_sd(a
: __m128d
, b
: __m128
) -> __m128d
{
2289 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2290 /// packed 32-bit integers with truncation.
2292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
2294 #[target_feature(enable = "sse2")]
2295 #[cfg_attr(test, assert_instr(cvttpd2dq))]
2296 #[stable(feature = "simd_x86", since = "1.27.0")]
2297 pub unsafe fn _mm_cvttpd_epi32(a
: __m128d
) -> __m128i
{
2298 transmute(cvttpd2dq(a
))
2301 /// Converts the lower double-precision (64-bit) floating-point element in `a`
2302 /// to a 32-bit integer with truncation.
2304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
2306 #[target_feature(enable = "sse2")]
2307 #[cfg_attr(test, assert_instr(cvttsd2si))]
2308 #[stable(feature = "simd_x86", since = "1.27.0")]
2309 pub unsafe fn _mm_cvttsd_si32(a
: __m128d
) -> i32 {
2313 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2314 /// packed 32-bit integers with truncation.
2316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
2318 #[target_feature(enable = "sse2")]
2319 #[cfg_attr(test, assert_instr(cvttps2dq))]
2320 #[stable(feature = "simd_x86", since = "1.27.0")]
2321 pub unsafe fn _mm_cvttps_epi32(a
: __m128
) -> __m128i
{
2322 transmute(cvttps2dq(a
))
2325 /// Copies double-precision (64-bit) floating-point element `a` to the lower
2326 /// element of the packed 64-bit return value.
2328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
2330 #[target_feature(enable = "sse2")]
2331 #[stable(feature = "simd_x86", since = "1.27.0")]
2332 pub unsafe fn _mm_set_sd(a
: f64) -> __m128d
{
2336 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2337 /// of the return value.
2339 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
2341 #[target_feature(enable = "sse2")]
2342 #[stable(feature = "simd_x86", since = "1.27.0")]
2343 pub unsafe fn _mm_set1_pd(a
: f64) -> __m128d
{
2347 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2348 /// of the return value.
2350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
2352 #[target_feature(enable = "sse2")]
2353 #[stable(feature = "simd_x86", since = "1.27.0")]
2354 pub unsafe fn _mm_set_pd1(a
: f64) -> __m128d
{
2358 /// Sets packed double-precision (64-bit) floating-point elements in the return
2359 /// value with the supplied values.
2361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
2363 #[target_feature(enable = "sse2")]
2364 #[stable(feature = "simd_x86", since = "1.27.0")]
2365 pub unsafe fn _mm_set_pd(a
: f64, b
: f64) -> __m128d
{
2369 /// Sets packed double-precision (64-bit) floating-point elements in the return
2370 /// value with the supplied values in reverse order.
2372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
2374 #[target_feature(enable = "sse2")]
2375 #[stable(feature = "simd_x86", since = "1.27.0")]
2376 pub unsafe fn _mm_setr_pd(a
: f64, b
: f64) -> __m128d
{
2380 /// Returns packed double-precision (64-bit) floating-point elements with all
2383 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd)
2385 #[target_feature(enable = "sse2")]
2386 #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2387 #[stable(feature = "simd_x86", since = "1.27.0")]
2388 pub unsafe fn _mm_setzero_pd() -> __m128d
{
2389 _mm_set_pd(0.0, 0.0)
2392 /// Returns a mask of the most significant bit of each element in `a`.
2394 /// The mask is stored in the 2 least significant bits of the return value.
2395 /// All other bits are set to `0`.
2397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd)
2399 #[target_feature(enable = "sse2")]
2400 #[cfg_attr(test, assert_instr(movmskpd))]
2401 #[stable(feature = "simd_x86", since = "1.27.0")]
2402 pub unsafe fn _mm_movemask_pd(a
: __m128d
) -> i32 {
2406 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2407 /// floating-point elements) from memory into the returned vector.
2408 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2409 /// exception may be generated.
2411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd)
2413 #[target_feature(enable = "sse2")]
2414 #[cfg_attr(test, assert_instr(movaps))]
2415 #[stable(feature = "simd_x86", since = "1.27.0")]
2416 #[allow(clippy::cast_ptr_alignment)]
2417 pub unsafe fn _mm_load_pd(mem_addr
: *const f64) -> __m128d
{
2418 *(mem_addr
as *const __m128d
)
2421 /// Loads a 64-bit double-precision value to the low element of a
2422 /// 128-bit integer vector and clears the upper element.
2424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd)
2426 #[target_feature(enable = "sse2")]
2427 #[cfg_attr(test, assert_instr(movsd))]
2428 #[stable(feature = "simd_x86", since = "1.27.0")]
2429 pub unsafe fn _mm_load_sd(mem_addr
: *const f64) -> __m128d
{
2430 _mm_setr_pd(*mem_addr
, 0.)
2433 /// Loads a double-precision value into the high-order bits of a 128-bit
2434 /// vector of `[2 x double]`. The low-order bits are copied from the low-order
2435 /// bits of the first operand.
2437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd)
2439 #[target_feature(enable = "sse2")]
2440 #[cfg_attr(test, assert_instr(movhps))]
2441 #[stable(feature = "simd_x86", since = "1.27.0")]
2442 pub unsafe fn _mm_loadh_pd(a
: __m128d
, mem_addr
: *const f64) -> __m128d
{
2443 _mm_setr_pd(simd_extract(a
, 0), *mem_addr
)
2446 /// Loads a double-precision value into the low-order bits of a 128-bit
2447 /// vector of `[2 x double]`. The high-order bits are copied from the
2448 /// high-order bits of the first operand.
2450 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd)
2452 #[target_feature(enable = "sse2")]
2453 #[cfg_attr(test, assert_instr(movlps))]
2454 #[stable(feature = "simd_x86", since = "1.27.0")]
2455 pub unsafe fn _mm_loadl_pd(a
: __m128d
, mem_addr
: *const f64) -> __m128d
{
2456 _mm_setr_pd(*mem_addr
, simd_extract(a
, 1))
2459 /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2460 /// aligned memory location.
2461 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
2462 /// used again soon).
2464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd)
2466 #[target_feature(enable = "sse2")]
2467 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2468 #[stable(feature = "simd_x86", since = "1.27.0")]
2469 #[allow(clippy::cast_ptr_alignment)]
2470 pub unsafe fn _mm_stream_pd(mem_addr
: *mut f64, a
: __m128d
) {
2471 intrinsics
::nontemporal_store(mem_addr
as *mut __m128d
, a
);
2474 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2475 /// memory location.
2477 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd)
2479 #[target_feature(enable = "sse2")]
2480 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2481 #[stable(feature = "simd_x86", since = "1.27.0")]
2482 pub unsafe fn _mm_store_sd(mem_addr
: *mut f64, a
: __m128d
) {
2483 *mem_addr
= simd_extract(a
, 0)
2486 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2487 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2488 /// on a 16-byte boundary or a general-protection exception may be generated.
2490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd)
2492 #[target_feature(enable = "sse2")]
2493 #[cfg_attr(test, assert_instr(movaps))]
2494 #[stable(feature = "simd_x86", since = "1.27.0")]
2495 #[allow(clippy::cast_ptr_alignment)]
2496 pub unsafe fn _mm_store_pd(mem_addr
: *mut f64, a
: __m128d
) {
2497 *(mem_addr
as *mut __m128d
) = a
;
2500 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2501 /// floating-point elements) from `a` into memory.
2502 /// `mem_addr` does not need to be aligned on any particular boundary.
2504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd)
2506 #[target_feature(enable = "sse2")]
2507 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2508 #[stable(feature = "simd_x86", since = "1.27.0")]
2509 pub unsafe fn _mm_storeu_pd(mem_addr
: *mut f64, a
: __m128d
) {
2510 storeupd(mem_addr
as *mut i8, a
);
2513 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2514 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2515 /// 16-byte boundary or a general-protection exception may be generated.
2517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd)
2519 #[target_feature(enable = "sse2")]
2520 #[stable(feature = "simd_x86", since = "1.27.0")]
2521 #[allow(clippy::cast_ptr_alignment)]
2522 pub unsafe fn _mm_store1_pd(mem_addr
: *mut f64, a
: __m128d
) {
2523 let b
: __m128d
= simd_shuffle2
!(a
, a
, [0, 0]);
2524 *(mem_addr
as *mut __m128d
) = b
;
2527 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2528 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2529 /// 16-byte boundary or a general-protection exception may be generated.
2531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1)
2533 #[target_feature(enable = "sse2")]
2534 #[stable(feature = "simd_x86", since = "1.27.0")]
2535 #[allow(clippy::cast_ptr_alignment)]
2536 pub unsafe fn _mm_store_pd1(mem_addr
: *mut f64, a
: __m128d
) {
2537 let b
: __m128d
= simd_shuffle2
!(a
, a
, [0, 0]);
2538 *(mem_addr
as *mut __m128d
) = b
;
2541 /// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2542 /// memory in reverse order.
2543 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2544 /// exception may be generated.
2546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd)
2548 #[target_feature(enable = "sse2")]
2549 #[stable(feature = "simd_x86", since = "1.27.0")]
2550 #[allow(clippy::cast_ptr_alignment)]
2551 pub unsafe fn _mm_storer_pd(mem_addr
: *mut f64, a
: __m128d
) {
2552 let b
: __m128d
= simd_shuffle2
!(a
, a
, [1, 0]);
2553 *(mem_addr
as *mut __m128d
) = b
;
2556 /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2557 /// memory location.
2559 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd)
2561 #[target_feature(enable = "sse2")]
2562 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2563 #[stable(feature = "simd_x86", since = "1.27.0")]
2564 pub unsafe fn _mm_storeh_pd(mem_addr
: *mut f64, a
: __m128d
) {
2565 *mem_addr
= simd_extract(a
, 1);
2568 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2569 /// memory location.
2571 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd)
2573 #[target_feature(enable = "sse2")]
2574 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2575 #[stable(feature = "simd_x86", since = "1.27.0")]
2576 pub unsafe fn _mm_storel_pd(mem_addr
: *mut f64, a
: __m128d
) {
2577 *mem_addr
= simd_extract(a
, 0);
2580 /// Loads a double-precision (64-bit) floating-point element from memory
2581 /// into both elements of returned vector.
2583 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
2585 #[target_feature(enable = "sse2")]
2586 // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2587 #[stable(feature = "simd_x86", since = "1.27.0")]
2588 pub unsafe fn _mm_load1_pd(mem_addr
: *const f64) -> __m128d
{
2593 /// Loads a double-precision (64-bit) floating-point element from memory
2594 /// into both elements of returned vector.
2596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
2598 #[target_feature(enable = "sse2")]
2599 // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2600 #[stable(feature = "simd_x86", since = "1.27.0")]
2601 pub unsafe fn _mm_load_pd1(mem_addr
: *const f64) -> __m128d
{
2602 _mm_load1_pd(mem_addr
)
2605 /// Loads 2 double-precision (64-bit) floating-point elements from memory into
2606 /// the returned vector in reverse order. `mem_addr` must be aligned on a
2607 /// 16-byte boundary or a general-protection exception may be generated.
2609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd)
2611 #[target_feature(enable = "sse2")]
2612 #[cfg_attr(test, assert_instr(movaps))]
2613 #[stable(feature = "simd_x86", since = "1.27.0")]
2614 pub unsafe fn _mm_loadr_pd(mem_addr
: *const f64) -> __m128d
{
2615 let a
= _mm_load_pd(mem_addr
);
2616 simd_shuffle2
!(a
, a
, [1, 0])
2619 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2620 /// floating-point elements) from memory into the returned vector.
2621 /// `mem_addr` does not need to be aligned on any particular boundary.
2623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd)
2625 #[target_feature(enable = "sse2")]
2626 #[cfg_attr(test, assert_instr(movups))]
2627 #[stable(feature = "simd_x86", since = "1.27.0")]
2628 pub unsafe fn _mm_loadu_pd(mem_addr
: *const f64) -> __m128d
{
2629 let mut dst
= _mm_undefined_pd();
2630 ptr
::copy_nonoverlapping(
2631 mem_addr
as *const u8,
2632 &mut dst
as *mut __m128d
as *mut u8,
2633 mem
::size_of
::<__m128d
>(),
2638 /// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2639 /// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2640 /// parameter as a specifier.
2642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd)
2644 #[target_feature(enable = "sse2")]
2645 #[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2646 #[rustc_legacy_const_generics(2)]
2647 #[stable(feature = "simd_x86", since = "1.27.0")]
2648 pub unsafe fn _mm_shuffle_pd
<const MASK
: i32>(a
: __m128d
, b
: __m128d
) -> __m128d
{
2649 static_assert_imm8
!(MASK
);
2650 simd_shuffle2
!(a
, b
, <const MASK
: i32> [MASK
as u32 & 0b1, ((MASK
as u32 >> 1) & 0b1) + 2])
2653 /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2654 /// 64 bits are set to the lower 64 bits of the second parameter. The upper
2655 /// 64 bits are set to the upper 64 bits of the first parameter.
2657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd)
2659 #[target_feature(enable = "sse2")]
2660 #[cfg_attr(test, assert_instr(movsd))]
2661 #[stable(feature = "simd_x86", since = "1.27.0")]
2662 pub unsafe fn _mm_move_sd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2663 _mm_setr_pd(simd_extract(b
, 0), simd_extract(a
, 1))
2666 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2667 /// floating-point vector of `[4 x float]`.
2669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps)
2671 #[target_feature(enable = "sse2")]
2672 #[stable(feature = "simd_x86", since = "1.27.0")]
2673 pub unsafe fn _mm_castpd_ps(a
: __m128d
) -> __m128
{
2677 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128)
2682 #[target_feature(enable = "sse2")]
2683 #[stable(feature = "simd_x86", since = "1.27.0")]
2684 pub unsafe fn _mm_castpd_si128(a
: __m128d
) -> __m128i
{
2688 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2689 /// floating-point vector of `[2 x double]`.
2691 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd)
2693 #[target_feature(enable = "sse2")]
2694 #[stable(feature = "simd_x86", since = "1.27.0")]
2695 pub unsafe fn _mm_castps_pd(a
: __m128
) -> __m128d
{
2699 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128)
2704 #[target_feature(enable = "sse2")]
2705 #[stable(feature = "simd_x86", since = "1.27.0")]
2706 pub unsafe fn _mm_castps_si128(a
: __m128
) -> __m128i
{
2710 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2711 /// of `[2 x double]`.
2713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd)
2715 #[target_feature(enable = "sse2")]
2716 #[stable(feature = "simd_x86", since = "1.27.0")]
2717 pub unsafe fn _mm_castsi128_pd(a
: __m128i
) -> __m128d
{
2721 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2722 /// of `[4 x float]`.
2724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps)
2726 #[target_feature(enable = "sse2")]
2727 #[stable(feature = "simd_x86", since = "1.27.0")]
2728 pub unsafe fn _mm_castsi128_ps(a
: __m128i
) -> __m128
{
2732 /// Returns vector of type __m128d with undefined elements.
2734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
2736 #[target_feature(enable = "sse2")]
2737 #[stable(feature = "simd_x86", since = "1.27.0")]
2738 pub unsafe fn _mm_undefined_pd() -> __m128d
{
2739 // FIXME: this function should return MaybeUninit<__m128d>
2740 mem
::MaybeUninit
::<__m128d
>::uninit().assume_init()
2743 /// Returns vector of type __m128i with undefined elements.
2745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
2747 #[target_feature(enable = "sse2")]
2748 #[stable(feature = "simd_x86", since = "1.27.0")]
2749 pub unsafe fn _mm_undefined_si128() -> __m128i
{
2750 // FIXME: this function should return MaybeUninit<__m128i>
2751 mem
::MaybeUninit
::<__m128i
>::uninit().assume_init()
2754 /// The resulting `__m128d` element is composed by the low-order values of
2755 /// the two `__m128d` interleaved input elements, i.e.:
2757 /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2758 /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
2763 #[target_feature(enable = "sse2")]
2764 #[cfg_attr(test, assert_instr(unpckhpd))]
2765 #[stable(feature = "simd_x86", since = "1.27.0")]
2766 pub unsafe fn _mm_unpackhi_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2767 simd_shuffle2
!(a
, b
, [1, 3])
2770 /// The resulting `__m128d` element is composed by the high-order values of
2771 /// the two `__m128d` interleaved input elements, i.e.:
2773 /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2774 /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd)
2778 #[target_feature(enable = "sse2")]
2779 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2780 #[stable(feature = "simd_x86", since = "1.27.0")]
2781 pub unsafe fn _mm_unpacklo_pd(a
: __m128d
, b
: __m128d
) -> __m128d
{
2782 simd_shuffle2
!(a
, b
, [0, 2])
2785 #[allow(improper_ctypes)]
2787 #[link_name = "llvm.x86.sse2.pause"]
2789 #[link_name = "llvm.x86.sse2.clflush"]
2790 fn clflush(p
: *const u8);
2791 #[link_name = "llvm.x86.sse2.lfence"]
2793 #[link_name = "llvm.x86.sse2.mfence"]
2795 #[link_name = "llvm.x86.sse2.pavg.b"]
2796 fn pavgb(a
: u8x16
, b
: u8x16
) -> u8x16
;
2797 #[link_name = "llvm.x86.sse2.pavg.w"]
2798 fn pavgw(a
: u16x8
, b
: u16x8
) -> u16x8
;
2799 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2800 fn pmaddwd(a
: i16x8
, b
: i16x8
) -> i32x4
;
2801 #[link_name = "llvm.x86.sse2.pmaxs.w"]
2802 fn pmaxsw(a
: i16x8
, b
: i16x8
) -> i16x8
;
2803 #[link_name = "llvm.x86.sse2.pmaxu.b"]
2804 fn pmaxub(a
: u8x16
, b
: u8x16
) -> u8x16
;
2805 #[link_name = "llvm.x86.sse2.pmins.w"]
2806 fn pminsw(a
: i16x8
, b
: i16x8
) -> i16x8
;
2807 #[link_name = "llvm.x86.sse2.pminu.b"]
2808 fn pminub(a
: u8x16
, b
: u8x16
) -> u8x16
;
2809 #[link_name = "llvm.x86.sse2.pmulh.w"]
2810 fn pmulhw(a
: i16x8
, b
: i16x8
) -> i16x8
;
2811 #[link_name = "llvm.x86.sse2.pmulhu.w"]
2812 fn pmulhuw(a
: u16x8
, b
: u16x8
) -> u16x8
;
2813 #[link_name = "llvm.x86.sse2.pmulu.dq"]
2814 fn pmuludq(a
: u32x4
, b
: u32x4
) -> u64x2
;
2815 #[link_name = "llvm.x86.sse2.psad.bw"]
2816 fn psadbw(a
: u8x16
, b
: u8x16
) -> u64x2
;
2817 #[link_name = "llvm.x86.sse2.pslli.w"]
2818 fn pslliw(a
: i16x8
, imm8
: i32) -> i16x8
;
2819 #[link_name = "llvm.x86.sse2.psll.w"]
2820 fn psllw(a
: i16x8
, count
: i16x8
) -> i16x8
;
2821 #[link_name = "llvm.x86.sse2.pslli.d"]
2822 fn psllid(a
: i32x4
, imm8
: i32) -> i32x4
;
2823 #[link_name = "llvm.x86.sse2.psll.d"]
2824 fn pslld(a
: i32x4
, count
: i32x4
) -> i32x4
;
2825 #[link_name = "llvm.x86.sse2.pslli.q"]
2826 fn pslliq(a
: i64x2
, imm8
: i32) -> i64x2
;
2827 #[link_name = "llvm.x86.sse2.psll.q"]
2828 fn psllq(a
: i64x2
, count
: i64x2
) -> i64x2
;
2829 #[link_name = "llvm.x86.sse2.psrai.w"]
2830 fn psraiw(a
: i16x8
, imm8
: i32) -> i16x8
;
2831 #[link_name = "llvm.x86.sse2.psra.w"]
2832 fn psraw(a
: i16x8
, count
: i16x8
) -> i16x8
;
2833 #[link_name = "llvm.x86.sse2.psrai.d"]
2834 fn psraid(a
: i32x4
, imm8
: i32) -> i32x4
;
2835 #[link_name = "llvm.x86.sse2.psra.d"]
2836 fn psrad(a
: i32x4
, count
: i32x4
) -> i32x4
;
2837 #[link_name = "llvm.x86.sse2.psrli.w"]
2838 fn psrliw(a
: i16x8
, imm8
: i32) -> i16x8
;
2839 #[link_name = "llvm.x86.sse2.psrl.w"]
2840 fn psrlw(a
: i16x8
, count
: i16x8
) -> i16x8
;
2841 #[link_name = "llvm.x86.sse2.psrli.d"]
2842 fn psrlid(a
: i32x4
, imm8
: i32) -> i32x4
;
2843 #[link_name = "llvm.x86.sse2.psrl.d"]
2844 fn psrld(a
: i32x4
, count
: i32x4
) -> i32x4
;
2845 #[link_name = "llvm.x86.sse2.psrli.q"]
2846 fn psrliq(a
: i64x2
, imm8
: i32) -> i64x2
;
2847 #[link_name = "llvm.x86.sse2.psrl.q"]
2848 fn psrlq(a
: i64x2
, count
: i64x2
) -> i64x2
;
2849 #[link_name = "llvm.x86.sse2.cvtdq2ps"]
2850 fn cvtdq2ps(a
: i32x4
) -> __m128
;
2851 #[link_name = "llvm.x86.sse2.cvtps2dq"]
2852 fn cvtps2dq(a
: __m128
) -> i32x4
;
2853 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
2854 fn maskmovdqu(a
: i8x16
, mask
: i8x16
, mem_addr
: *mut i8);
2855 #[link_name = "llvm.x86.sse2.packsswb.128"]
2856 fn packsswb(a
: i16x8
, b
: i16x8
) -> i8x16
;
2857 #[link_name = "llvm.x86.sse2.packssdw.128"]
2858 fn packssdw(a
: i32x4
, b
: i32x4
) -> i16x8
;
2859 #[link_name = "llvm.x86.sse2.packuswb.128"]
2860 fn packuswb(a
: i16x8
, b
: i16x8
) -> u8x16
;
2861 #[link_name = "llvm.x86.sse2.pmovmskb.128"]
2862 fn pmovmskb(a
: i8x16
) -> i32;
2863 #[link_name = "llvm.x86.sse2.max.sd"]
2864 fn maxsd(a
: __m128d
, b
: __m128d
) -> __m128d
;
2865 #[link_name = "llvm.x86.sse2.max.pd"]
2866 fn maxpd(a
: __m128d
, b
: __m128d
) -> __m128d
;
2867 #[link_name = "llvm.x86.sse2.min.sd"]
2868 fn minsd(a
: __m128d
, b
: __m128d
) -> __m128d
;
2869 #[link_name = "llvm.x86.sse2.min.pd"]
2870 fn minpd(a
: __m128d
, b
: __m128d
) -> __m128d
;
2871 #[link_name = "llvm.x86.sse2.sqrt.sd"]
2872 fn sqrtsd(a
: __m128d
) -> __m128d
;
2873 #[link_name = "llvm.x86.sse2.sqrt.pd"]
2874 fn sqrtpd(a
: __m128d
) -> __m128d
;
2875 #[link_name = "llvm.x86.sse2.cmp.sd"]
2876 fn cmpsd(a
: __m128d
, b
: __m128d
, imm8
: i8) -> __m128d
;
2877 #[link_name = "llvm.x86.sse2.cmp.pd"]
2878 fn cmppd(a
: __m128d
, b
: __m128d
, imm8
: i8) -> __m128d
;
2879 #[link_name = "llvm.x86.sse2.comieq.sd"]
2880 fn comieqsd(a
: __m128d
, b
: __m128d
) -> i32;
2881 #[link_name = "llvm.x86.sse2.comilt.sd"]
2882 fn comiltsd(a
: __m128d
, b
: __m128d
) -> i32;
2883 #[link_name = "llvm.x86.sse2.comile.sd"]
2884 fn comilesd(a
: __m128d
, b
: __m128d
) -> i32;
2885 #[link_name = "llvm.x86.sse2.comigt.sd"]
2886 fn comigtsd(a
: __m128d
, b
: __m128d
) -> i32;
2887 #[link_name = "llvm.x86.sse2.comige.sd"]
2888 fn comigesd(a
: __m128d
, b
: __m128d
) -> i32;
2889 #[link_name = "llvm.x86.sse2.comineq.sd"]
2890 fn comineqsd(a
: __m128d
, b
: __m128d
) -> i32;
2891 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
2892 fn ucomieqsd(a
: __m128d
, b
: __m128d
) -> i32;
2893 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
2894 fn ucomiltsd(a
: __m128d
, b
: __m128d
) -> i32;
2895 #[link_name = "llvm.x86.sse2.ucomile.sd"]
2896 fn ucomilesd(a
: __m128d
, b
: __m128d
) -> i32;
2897 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
2898 fn ucomigtsd(a
: __m128d
, b
: __m128d
) -> i32;
2899 #[link_name = "llvm.x86.sse2.ucomige.sd"]
2900 fn ucomigesd(a
: __m128d
, b
: __m128d
) -> i32;
2901 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
2902 fn ucomineqsd(a
: __m128d
, b
: __m128d
) -> i32;
2903 #[link_name = "llvm.x86.sse2.movmsk.pd"]
2904 fn movmskpd(a
: __m128d
) -> i32;
2905 #[link_name = "llvm.x86.sse2.cvtpd2ps"]
2906 fn cvtpd2ps(a
: __m128d
) -> __m128
;
2907 #[link_name = "llvm.x86.sse2.cvtps2pd"]
2908 fn cvtps2pd(a
: __m128
) -> __m128d
;
2909 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
2910 fn cvtpd2dq(a
: __m128d
) -> i32x4
;
2911 #[link_name = "llvm.x86.sse2.cvtsd2si"]
2912 fn cvtsd2si(a
: __m128d
) -> i32;
2913 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
2914 fn cvtsd2ss(a
: __m128
, b
: __m128d
) -> __m128
;
2915 #[link_name = "llvm.x86.sse2.cvtss2sd"]
2916 fn cvtss2sd(a
: __m128d
, b
: __m128
) -> __m128d
;
2917 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
2918 fn cvttpd2dq(a
: __m128d
) -> i32x4
;
2919 #[link_name = "llvm.x86.sse2.cvttsd2si"]
2920 fn cvttsd2si(a
: __m128d
) -> i32;
2921 #[link_name = "llvm.x86.sse2.cvttps2dq"]
2922 fn cvttps2dq(a
: __m128
) -> i32x4
;
2923 #[link_name = "llvm.x86.sse2.storeu.dq"]
2924 fn storeudq(mem_addr
: *mut i8, a
: __m128i
);
2925 #[link_name = "llvm.x86.sse2.storeu.pd"]
2926 fn storeupd(mem_addr
: *mut i8, a
: __m128d
);
2932 core_arch
::{simd::*, x86::*}
,
2939 mem
::{self, transmute}
,
2941 use stdarch_test
::simd_test
;
2944 fn test_mm_pause() {
2945 unsafe { _mm_pause() }
2948 #[simd_test(enable = "sse2")]
2949 unsafe fn test_mm_clflush() {
2951 _mm_clflush(&x
as *const _
);
2954 #[simd_test(enable = "sse2")]
2955 unsafe fn test_mm_lfence() {
2959 #[simd_test(enable = "sse2")]
2960 unsafe fn test_mm_mfence() {
2964 #[simd_test(enable = "sse2")]
2965 unsafe fn test_mm_add_epi8() {
2966 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2968 let b
= _mm_setr_epi8(
2969 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2971 let r
= _mm_add_epi8(a
, b
);
2973 let e
= _mm_setr_epi8(
2974 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
2976 assert_eq_m128i(r
, e
);
2979 #[simd_test(enable = "sse2")]
2980 unsafe fn test_mm_add_epi8_overflow() {
2981 let a
= _mm_set1_epi8(0x7F);
2982 let b
= _mm_set1_epi8(1);
2983 let r
= _mm_add_epi8(a
, b
);
2984 assert_eq_m128i(r
, _mm_set1_epi8(-128));
2987 #[simd_test(enable = "sse2")]
2988 unsafe fn test_mm_add_epi16() {
2989 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2990 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
2991 let r
= _mm_add_epi16(a
, b
);
2992 let e
= _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
2993 assert_eq_m128i(r
, e
);
2996 #[simd_test(enable = "sse2")]
2997 unsafe fn test_mm_add_epi32() {
2998 let a
= _mm_setr_epi32(0, 1, 2, 3);
2999 let b
= _mm_setr_epi32(4, 5, 6, 7);
3000 let r
= _mm_add_epi32(a
, b
);
3001 let e
= _mm_setr_epi32(4, 6, 8, 10);
3002 assert_eq_m128i(r
, e
);
3005 #[simd_test(enable = "sse2")]
3006 unsafe fn test_mm_add_epi64() {
3007 let a
= _mm_setr_epi64x(0, 1);
3008 let b
= _mm_setr_epi64x(2, 3);
3009 let r
= _mm_add_epi64(a
, b
);
3010 let e
= _mm_setr_epi64x(2, 4);
3011 assert_eq_m128i(r
, e
);
3014 #[simd_test(enable = "sse2")]
3015 unsafe fn test_mm_adds_epi8() {
3016 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3018 let b
= _mm_setr_epi8(
3019 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3021 let r
= _mm_adds_epi8(a
, b
);
3023 let e
= _mm_setr_epi8(
3024 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3026 assert_eq_m128i(r
, e
);
3029 #[simd_test(enable = "sse2")]
3030 unsafe fn test_mm_adds_epi8_saturate_positive() {
3031 let a
= _mm_set1_epi8(0x7F);
3032 let b
= _mm_set1_epi8(1);
3033 let r
= _mm_adds_epi8(a
, b
);
3034 assert_eq_m128i(r
, a
);
3037 #[simd_test(enable = "sse2")]
3038 unsafe fn test_mm_adds_epi8_saturate_negative() {
3039 let a
= _mm_set1_epi8(-0x80);
3040 let b
= _mm_set1_epi8(-1);
3041 let r
= _mm_adds_epi8(a
, b
);
3042 assert_eq_m128i(r
, a
);
3045 #[simd_test(enable = "sse2")]
3046 unsafe fn test_mm_adds_epi16() {
3047 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3048 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3049 let r
= _mm_adds_epi16(a
, b
);
3050 let e
= _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3051 assert_eq_m128i(r
, e
);
3054 #[simd_test(enable = "sse2")]
3055 unsafe fn test_mm_adds_epi16_saturate_positive() {
3056 let a
= _mm_set1_epi16(0x7FFF);
3057 let b
= _mm_set1_epi16(1);
3058 let r
= _mm_adds_epi16(a
, b
);
3059 assert_eq_m128i(r
, a
);
3062 #[simd_test(enable = "sse2")]
3063 unsafe fn test_mm_adds_epi16_saturate_negative() {
3064 let a
= _mm_set1_epi16(-0x8000);
3065 let b
= _mm_set1_epi16(-1);
3066 let r
= _mm_adds_epi16(a
, b
);
3067 assert_eq_m128i(r
, a
);
3070 #[simd_test(enable = "sse2")]
3071 unsafe fn test_mm_adds_epu8() {
3072 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3074 let b
= _mm_setr_epi8(
3075 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3077 let r
= _mm_adds_epu8(a
, b
);
3079 let e
= _mm_setr_epi8(
3080 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3082 assert_eq_m128i(r
, e
);
3085 #[simd_test(enable = "sse2")]
3086 unsafe fn test_mm_adds_epu8_saturate() {
3087 let a
= _mm_set1_epi8(!0);
3088 let b
= _mm_set1_epi8(1);
3089 let r
= _mm_adds_epu8(a
, b
);
3090 assert_eq_m128i(r
, a
);
3093 #[simd_test(enable = "sse2")]
3094 unsafe fn test_mm_adds_epu16() {
3095 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3096 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3097 let r
= _mm_adds_epu16(a
, b
);
3098 let e
= _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3099 assert_eq_m128i(r
, e
);
3102 #[simd_test(enable = "sse2")]
3103 unsafe fn test_mm_adds_epu16_saturate() {
3104 let a
= _mm_set1_epi16(!0);
3105 let b
= _mm_set1_epi16(1);
3106 let r
= _mm_adds_epu16(a
, b
);
3107 assert_eq_m128i(r
, a
);
3110 #[simd_test(enable = "sse2")]
3111 unsafe fn test_mm_avg_epu8() {
3112 let (a
, b
) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3113 let r
= _mm_avg_epu8(a
, b
);
3114 assert_eq_m128i(r
, _mm_set1_epi8(6));
3117 #[simd_test(enable = "sse2")]
3118 unsafe fn test_mm_avg_epu16() {
3119 let (a
, b
) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3120 let r
= _mm_avg_epu16(a
, b
);
3121 assert_eq_m128i(r
, _mm_set1_epi16(6));
3124 #[simd_test(enable = "sse2")]
3125 unsafe fn test_mm_madd_epi16() {
3126 let a
= _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3127 let b
= _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3128 let r
= _mm_madd_epi16(a
, b
);
3129 let e
= _mm_setr_epi32(29, 81, 149, 233);
3130 assert_eq_m128i(r
, e
);
3133 #[simd_test(enable = "sse2")]
3134 unsafe fn test_mm_max_epi16() {
3135 let a
= _mm_set1_epi16(1);
3136 let b
= _mm_set1_epi16(-1);
3137 let r
= _mm_max_epi16(a
, b
);
3138 assert_eq_m128i(r
, a
);
3141 #[simd_test(enable = "sse2")]
3142 unsafe fn test_mm_max_epu8() {
3143 let a
= _mm_set1_epi8(1);
3144 let b
= _mm_set1_epi8(!0);
3145 let r
= _mm_max_epu8(a
, b
);
3146 assert_eq_m128i(r
, b
);
3149 #[simd_test(enable = "sse2")]
3150 unsafe fn test_mm_min_epi16() {
3151 let a
= _mm_set1_epi16(1);
3152 let b
= _mm_set1_epi16(-1);
3153 let r
= _mm_min_epi16(a
, b
);
3154 assert_eq_m128i(r
, b
);
3157 #[simd_test(enable = "sse2")]
3158 unsafe fn test_mm_min_epu8() {
3159 let a
= _mm_set1_epi8(1);
3160 let b
= _mm_set1_epi8(!0);
3161 let r
= _mm_min_epu8(a
, b
);
3162 assert_eq_m128i(r
, a
);
3165 #[simd_test(enable = "sse2")]
3166 unsafe fn test_mm_mulhi_epi16() {
3167 let (a
, b
) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3168 let r
= _mm_mulhi_epi16(a
, b
);
3169 assert_eq_m128i(r
, _mm_set1_epi16(-16));
3172 #[simd_test(enable = "sse2")]
3173 unsafe fn test_mm_mulhi_epu16() {
3174 let (a
, b
) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3175 let r
= _mm_mulhi_epu16(a
, b
);
3176 assert_eq_m128i(r
, _mm_set1_epi16(15));
3179 #[simd_test(enable = "sse2")]
3180 unsafe fn test_mm_mullo_epi16() {
3181 let (a
, b
) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3182 let r
= _mm_mullo_epi16(a
, b
);
3183 assert_eq_m128i(r
, _mm_set1_epi16(-17960));
3186 #[simd_test(enable = "sse2")]
3187 unsafe fn test_mm_mul_epu32() {
3188 let a
= _mm_setr_epi64x(1_000_000_000, 1 << 34);
3189 let b
= _mm_setr_epi64x(1_000_000_000, 1 << 35);
3190 let r
= _mm_mul_epu32(a
, b
);
3191 let e
= _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3192 assert_eq_m128i(r
, e
);
3195 #[simd_test(enable = "sse2")]
3196 unsafe fn test_mm_sad_epu8() {
3198 let a
= _mm_setr_epi8(
3199 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3201 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3204 let b
= _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3205 let r
= _mm_sad_epu8(a
, b
);
3206 let e
= _mm_setr_epi64x(1020, 614);
3207 assert_eq_m128i(r
, e
);
3210 #[simd_test(enable = "sse2")]
3211 unsafe fn test_mm_sub_epi8() {
3212 let (a
, b
) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3213 let r
= _mm_sub_epi8(a
, b
);
3214 assert_eq_m128i(r
, _mm_set1_epi8(-1));
3217 #[simd_test(enable = "sse2")]
3218 unsafe fn test_mm_sub_epi16() {
3219 let (a
, b
) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3220 let r
= _mm_sub_epi16(a
, b
);
3221 assert_eq_m128i(r
, _mm_set1_epi16(-1));
3224 #[simd_test(enable = "sse2")]
3225 unsafe fn test_mm_sub_epi32() {
3226 let (a
, b
) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3227 let r
= _mm_sub_epi32(a
, b
);
3228 assert_eq_m128i(r
, _mm_set1_epi32(-1));
3231 #[simd_test(enable = "sse2")]
3232 unsafe fn test_mm_sub_epi64() {
3233 let (a
, b
) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3234 let r
= _mm_sub_epi64(a
, b
);
3235 assert_eq_m128i(r
, _mm_set1_epi64x(-1));
3238 #[simd_test(enable = "sse2")]
3239 unsafe fn test_mm_subs_epi8() {
3240 let (a
, b
) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3241 let r
= _mm_subs_epi8(a
, b
);
3242 assert_eq_m128i(r
, _mm_set1_epi8(3));
3245 #[simd_test(enable = "sse2")]
3246 unsafe fn test_mm_subs_epi8_saturate_positive() {
3247 let a
= _mm_set1_epi8(0x7F);
3248 let b
= _mm_set1_epi8(-1);
3249 let r
= _mm_subs_epi8(a
, b
);
3250 assert_eq_m128i(r
, a
);
3253 #[simd_test(enable = "sse2")]
3254 unsafe fn test_mm_subs_epi8_saturate_negative() {
3255 let a
= _mm_set1_epi8(-0x80);
3256 let b
= _mm_set1_epi8(1);
3257 let r
= _mm_subs_epi8(a
, b
);
3258 assert_eq_m128i(r
, a
);
3261 #[simd_test(enable = "sse2")]
3262 unsafe fn test_mm_subs_epi16() {
3263 let (a
, b
) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3264 let r
= _mm_subs_epi16(a
, b
);
3265 assert_eq_m128i(r
, _mm_set1_epi16(3));
3268 #[simd_test(enable = "sse2")]
3269 unsafe fn test_mm_subs_epi16_saturate_positive() {
3270 let a
= _mm_set1_epi16(0x7FFF);
3271 let b
= _mm_set1_epi16(-1);
3272 let r
= _mm_subs_epi16(a
, b
);
3273 assert_eq_m128i(r
, a
);
3276 #[simd_test(enable = "sse2")]
3277 unsafe fn test_mm_subs_epi16_saturate_negative() {
3278 let a
= _mm_set1_epi16(-0x8000);
3279 let b
= _mm_set1_epi16(1);
3280 let r
= _mm_subs_epi16(a
, b
);
3281 assert_eq_m128i(r
, a
);
3284 #[simd_test(enable = "sse2")]
3285 unsafe fn test_mm_subs_epu8() {
3286 let (a
, b
) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3287 let r
= _mm_subs_epu8(a
, b
);
3288 assert_eq_m128i(r
, _mm_set1_epi8(3));
3291 #[simd_test(enable = "sse2")]
3292 unsafe fn test_mm_subs_epu8_saturate() {
3293 let a
= _mm_set1_epi8(0);
3294 let b
= _mm_set1_epi8(1);
3295 let r
= _mm_subs_epu8(a
, b
);
3296 assert_eq_m128i(r
, a
);
3299 #[simd_test(enable = "sse2")]
3300 unsafe fn test_mm_subs_epu16() {
3301 let (a
, b
) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3302 let r
= _mm_subs_epu16(a
, b
);
3303 assert_eq_m128i(r
, _mm_set1_epi16(3));
3306 #[simd_test(enable = "sse2")]
3307 unsafe fn test_mm_subs_epu16_saturate() {
3308 let a
= _mm_set1_epi16(0);
3309 let b
= _mm_set1_epi16(1);
3310 let r
= _mm_subs_epu16(a
, b
);
3311 assert_eq_m128i(r
, a
);
3314 #[simd_test(enable = "sse2")]
3315 unsafe fn test_mm_slli_si128() {
3317 let a
= _mm_setr_epi8(
3318 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3320 let r
= _mm_slli_si128
::<1>(a
);
3321 let e
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3322 assert_eq_m128i(r
, e
);
3325 let a
= _mm_setr_epi8(
3326 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3328 let r
= _mm_slli_si128
::<15>(a
);
3329 let e
= _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3330 assert_eq_m128i(r
, e
);
3333 let a
= _mm_setr_epi8(
3334 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3336 let r
= _mm_slli_si128
::<16>(a
);
3337 assert_eq_m128i(r
, _mm_set1_epi8(0));
3340 #[simd_test(enable = "sse2")]
3341 unsafe fn test_mm_slli_epi16() {
3343 let a
= _mm_setr_epi16(
3344 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3346 let r
= _mm_slli_epi16
::<4>(a
);
3349 let e
= _mm_setr_epi16(
3350 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
3353 assert_eq_m128i(r
, e
);
3356 #[simd_test(enable = "sse2")]
3357 unsafe fn test_mm_sll_epi16() {
3358 let a
= _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3359 let r
= _mm_sll_epi16(a
, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3360 assert_eq_m128i(r
, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0));
3361 let r
= _mm_sll_epi16(a
, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3362 assert_eq_m128i(r
, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3365 #[simd_test(enable = "sse2")]
3366 unsafe fn test_mm_slli_epi32() {
3367 let r
= _mm_slli_epi32
::<4>(_mm_set1_epi32(0xFFFF));
3368 assert_eq_m128i(r
, _mm_set1_epi32(0xFFFF0));
3371 #[simd_test(enable = "sse2")]
3372 unsafe fn test_mm_sll_epi32() {
3373 let a
= _mm_set1_epi32(0xFFFF);
3374 let b
= _mm_setr_epi32(4, 0, 0, 0);
3375 let r
= _mm_sll_epi32(a
, b
);
3376 assert_eq_m128i(r
, _mm_set1_epi32(0xFFFF0));
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_slli_epi64() {
3381 let r
= _mm_slli_epi64
::<4>(_mm_set1_epi64x(0xFFFFFFFF));
3382 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFFF0));
3385 #[simd_test(enable = "sse2")]
3386 unsafe fn test_mm_sll_epi64() {
3387 let a
= _mm_set1_epi64x(0xFFFFFFFF);
3388 let b
= _mm_setr_epi64x(4, 0);
3389 let r
= _mm_sll_epi64(a
, b
);
3390 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFFF0));
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_srai_epi16() {
3395 let r
= _mm_srai_epi16
::<1>(_mm_set1_epi16(-1));
3396 assert_eq_m128i(r
, _mm_set1_epi16(-1));
3399 #[simd_test(enable = "sse2")]
3400 unsafe fn test_mm_sra_epi16() {
3401 let a
= _mm_set1_epi16(-1);
3402 let b
= _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
3403 let r
= _mm_sra_epi16(a
, b
);
3404 assert_eq_m128i(r
, _mm_set1_epi16(-1));
3407 #[simd_test(enable = "sse2")]
3408 unsafe fn test_mm_srai_epi32() {
3409 let r
= _mm_srai_epi32
::<1>(_mm_set1_epi32(-1));
3410 assert_eq_m128i(r
, _mm_set1_epi32(-1));
3413 #[simd_test(enable = "sse2")]
3414 unsafe fn test_mm_sra_epi32() {
3415 let a
= _mm_set1_epi32(-1);
3416 let b
= _mm_setr_epi32(1, 0, 0, 0);
3417 let r
= _mm_sra_epi32(a
, b
);
3418 assert_eq_m128i(r
, _mm_set1_epi32(-1));
3421 #[simd_test(enable = "sse2")]
3422 unsafe fn test_mm_srli_si128() {
3424 let a
= _mm_setr_epi8(
3425 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3427 let r
= _mm_srli_si128
::<1>(a
);
3429 let e
= _mm_setr_epi8(
3430 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3432 assert_eq_m128i(r
, e
);
3435 let a
= _mm_setr_epi8(
3436 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3438 let r
= _mm_srli_si128
::<15>(a
);
3439 let e
= _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3440 assert_eq_m128i(r
, e
);
3443 let a
= _mm_setr_epi8(
3444 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3446 let r
= _mm_srli_si128
::<16>(a
);
3447 assert_eq_m128i(r
, _mm_set1_epi8(0));
3450 #[simd_test(enable = "sse2")]
3451 unsafe fn test_mm_srli_epi16() {
3453 let a
= _mm_setr_epi16(
3454 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3456 let r
= _mm_srli_epi16
::<4>(a
);
3458 let e
= _mm_setr_epi16(
3459 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
3461 assert_eq_m128i(r
, e
);
3464 #[simd_test(enable = "sse2")]
3465 unsafe fn test_mm_srl_epi16() {
3466 let a
= _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3467 let r
= _mm_srl_epi16(a
, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3468 assert_eq_m128i(r
, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0));
3469 let r
= _mm_srl_epi16(a
, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3470 assert_eq_m128i(r
, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3473 #[simd_test(enable = "sse2")]
3474 unsafe fn test_mm_srli_epi32() {
3475 let r
= _mm_srli_epi32
::<4>(_mm_set1_epi32(0xFFFF));
3476 assert_eq_m128i(r
, _mm_set1_epi32(0xFFF));
3479 #[simd_test(enable = "sse2")]
3480 unsafe fn test_mm_srl_epi32() {
3481 let a
= _mm_set1_epi32(0xFFFF);
3482 let b
= _mm_setr_epi32(4, 0, 0, 0);
3483 let r
= _mm_srl_epi32(a
, b
);
3484 assert_eq_m128i(r
, _mm_set1_epi32(0xFFF));
3487 #[simd_test(enable = "sse2")]
3488 unsafe fn test_mm_srli_epi64() {
3489 let r
= _mm_srli_epi64
::<4>(_mm_set1_epi64x(0xFFFFFFFF));
3490 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFF));
3493 #[simd_test(enable = "sse2")]
3494 unsafe fn test_mm_srl_epi64() {
3495 let a
= _mm_set1_epi64x(0xFFFFFFFF);
3496 let b
= _mm_setr_epi64x(4, 0);
3497 let r
= _mm_srl_epi64(a
, b
);
3498 assert_eq_m128i(r
, _mm_set1_epi64x(0xFFFFFFF));
3501 #[simd_test(enable = "sse2")]
3502 unsafe fn test_mm_and_si128() {
3503 let a
= _mm_set1_epi8(5);
3504 let b
= _mm_set1_epi8(3);
3505 let r
= _mm_and_si128(a
, b
);
3506 assert_eq_m128i(r
, _mm_set1_epi8(1));
3509 #[simd_test(enable = "sse2")]
3510 unsafe fn test_mm_andnot_si128() {
3511 let a
= _mm_set1_epi8(5);
3512 let b
= _mm_set1_epi8(3);
3513 let r
= _mm_andnot_si128(a
, b
);
3514 assert_eq_m128i(r
, _mm_set1_epi8(2));
3517 #[simd_test(enable = "sse2")]
3518 unsafe fn test_mm_or_si128() {
3519 let a
= _mm_set1_epi8(5);
3520 let b
= _mm_set1_epi8(3);
3521 let r
= _mm_or_si128(a
, b
);
3522 assert_eq_m128i(r
, _mm_set1_epi8(7));
3525 #[simd_test(enable = "sse2")]
3526 unsafe fn test_mm_xor_si128() {
3527 let a
= _mm_set1_epi8(5);
3528 let b
= _mm_set1_epi8(3);
3529 let r
= _mm_xor_si128(a
, b
);
3530 assert_eq_m128i(r
, _mm_set1_epi8(6));
3533 #[simd_test(enable = "sse2")]
3534 unsafe fn test_mm_cmpeq_epi8() {
3535 let a
= _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3536 let b
= _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3537 let r
= _mm_cmpeq_epi8(a
, b
);
3542 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3547 #[simd_test(enable = "sse2")]
3548 unsafe fn test_mm_cmpeq_epi16() {
3549 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3550 let b
= _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3551 let r
= _mm_cmpeq_epi16(a
, b
);
3552 assert_eq_m128i(r
, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3555 #[simd_test(enable = "sse2")]
3556 unsafe fn test_mm_cmpeq_epi32() {
3557 let a
= _mm_setr_epi32(0, 1, 2, 3);
3558 let b
= _mm_setr_epi32(3, 2, 2, 0);
3559 let r
= _mm_cmpeq_epi32(a
, b
);
3560 assert_eq_m128i(r
, _mm_setr_epi32(0, 0, !0, 0));
3563 #[simd_test(enable = "sse2")]
3564 unsafe fn test_mm_cmpgt_epi8() {
3565 let a
= _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3566 let b
= _mm_set1_epi8(0);
3567 let r
= _mm_cmpgt_epi8(a
, b
);
3568 let e
= _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3569 assert_eq_m128i(r
, e
);
3572 #[simd_test(enable = "sse2")]
3573 unsafe fn test_mm_cmpgt_epi16() {
3574 let a
= _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3575 let b
= _mm_set1_epi16(0);
3576 let r
= _mm_cmpgt_epi16(a
, b
);
3577 let e
= _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3578 assert_eq_m128i(r
, e
);
3581 #[simd_test(enable = "sse2")]
3582 unsafe fn test_mm_cmpgt_epi32() {
3583 let a
= _mm_set_epi32(5, 0, 0, 0);
3584 let b
= _mm_set1_epi32(0);
3585 let r
= _mm_cmpgt_epi32(a
, b
);
3586 assert_eq_m128i(r
, _mm_set_epi32(!0, 0, 0, 0));
3589 #[simd_test(enable = "sse2")]
3590 unsafe fn test_mm_cmplt_epi8() {
3591 let a
= _mm_set1_epi8(0);
3592 let b
= _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3593 let r
= _mm_cmplt_epi8(a
, b
);
3594 let e
= _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3595 assert_eq_m128i(r
, e
);
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_cmplt_epi16() {
3600 let a
= _mm_set1_epi16(0);
3601 let b
= _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3602 let r
= _mm_cmplt_epi16(a
, b
);
3603 let e
= _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3604 assert_eq_m128i(r
, e
);
3607 #[simd_test(enable = "sse2")]
3608 unsafe fn test_mm_cmplt_epi32() {
3609 let a
= _mm_set1_epi32(0);
3610 let b
= _mm_set_epi32(5, 0, 0, 0);
3611 let r
= _mm_cmplt_epi32(a
, b
);
3612 assert_eq_m128i(r
, _mm_set_epi32(!0, 0, 0, 0));
3615 #[simd_test(enable = "sse2")]
3616 unsafe fn test_mm_cvtepi32_pd() {
3617 let a
= _mm_set_epi32(35, 25, 15, 5);
3618 let r
= _mm_cvtepi32_pd(a
);
3619 assert_eq_m128d(r
, _mm_setr_pd(5.0, 15.0));
3622 #[simd_test(enable = "sse2")]
3623 unsafe fn test_mm_cvtsi32_sd() {
3624 let a
= _mm_set1_pd(3.5);
3625 let r
= _mm_cvtsi32_sd(a
, 5);
3626 assert_eq_m128d(r
, _mm_setr_pd(5.0, 3.5));
3629 #[simd_test(enable = "sse2")]
3630 unsafe fn test_mm_cvtepi32_ps() {
3631 let a
= _mm_setr_epi32(1, 2, 3, 4);
3632 let r
= _mm_cvtepi32_ps(a
);
3633 assert_eq_m128(r
, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3636 #[simd_test(enable = "sse2")]
3637 unsafe fn test_mm_cvtps_epi32() {
3638 let a
= _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3639 let r
= _mm_cvtps_epi32(a
);
3640 assert_eq_m128i(r
, _mm_setr_epi32(1, 2, 3, 4));
3643 #[simd_test(enable = "sse2")]
3644 unsafe fn test_mm_cvtsi32_si128() {
3645 let r
= _mm_cvtsi32_si128(5);
3646 assert_eq_m128i(r
, _mm_setr_epi32(5, 0, 0, 0));
3649 #[simd_test(enable = "sse2")]
3650 unsafe fn test_mm_cvtsi128_si32() {
3651 let r
= _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3655 #[simd_test(enable = "sse2")]
3656 unsafe fn test_mm_set_epi64x() {
3657 let r
= _mm_set_epi64x(0, 1);
3658 assert_eq_m128i(r
, _mm_setr_epi64x(1, 0));
3661 #[simd_test(enable = "sse2")]
3662 unsafe fn test_mm_set_epi32() {
3663 let r
= _mm_set_epi32(0, 1, 2, 3);
3664 assert_eq_m128i(r
, _mm_setr_epi32(3, 2, 1, 0));
3667 #[simd_test(enable = "sse2")]
3668 unsafe fn test_mm_set_epi16() {
3669 let r
= _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3670 assert_eq_m128i(r
, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3673 #[simd_test(enable = "sse2")]
3674 unsafe fn test_mm_set_epi8() {
3676 let r
= _mm_set_epi8(
3677 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3680 let e
= _mm_setr_epi8(
3681 15, 14, 13, 12, 11, 10, 9, 8,
3682 7, 6, 5, 4, 3, 2, 1, 0,
3684 assert_eq_m128i(r
, e
);
3687 #[simd_test(enable = "sse2")]
3688 unsafe fn test_mm_set1_epi64x() {
3689 let r
= _mm_set1_epi64x(1);
3690 assert_eq_m128i(r
, _mm_set1_epi64x(1));
3693 #[simd_test(enable = "sse2")]
3694 unsafe fn test_mm_set1_epi32() {
3695 let r
= _mm_set1_epi32(1);
3696 assert_eq_m128i(r
, _mm_set1_epi32(1));
3699 #[simd_test(enable = "sse2")]
3700 unsafe fn test_mm_set1_epi16() {
3701 let r
= _mm_set1_epi16(1);
3702 assert_eq_m128i(r
, _mm_set1_epi16(1));
3705 #[simd_test(enable = "sse2")]
3706 unsafe fn test_mm_set1_epi8() {
3707 let r
= _mm_set1_epi8(1);
3708 assert_eq_m128i(r
, _mm_set1_epi8(1));
3711 #[simd_test(enable = "sse2")]
3712 unsafe fn test_mm_setr_epi32() {
3713 let r
= _mm_setr_epi32(0, 1, 2, 3);
3714 assert_eq_m128i(r
, _mm_setr_epi32(0, 1, 2, 3));
3717 #[simd_test(enable = "sse2")]
3718 unsafe fn test_mm_setr_epi16() {
3719 let r
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3720 assert_eq_m128i(r
, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3723 #[simd_test(enable = "sse2")]
3724 unsafe fn test_mm_setr_epi8() {
3726 let r
= _mm_setr_epi8(
3727 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3730 let e
= _mm_setr_epi8(
3731 0, 1, 2, 3, 4, 5, 6, 7,
3732 8, 9, 10, 11, 12, 13, 14, 15,
3734 assert_eq_m128i(r
, e
);
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_setzero_si128() {
3739 let r
= _mm_setzero_si128();
3740 assert_eq_m128i(r
, _mm_set1_epi64x(0));
3743 #[simd_test(enable = "sse2")]
3744 unsafe fn test_mm_loadl_epi64() {
3745 let a
= _mm_setr_epi64x(6, 5);
3746 let r
= _mm_loadl_epi64(&a
as *const _
);
3747 assert_eq_m128i(r
, _mm_setr_epi64x(6, 0));
3750 #[simd_test(enable = "sse2")]
3751 unsafe fn test_mm_load_si128() {
3752 let a
= _mm_set_epi64x(5, 6);
3753 let r
= _mm_load_si128(&a
as *const _
as *const _
);
3754 assert_eq_m128i(a
, r
);
3757 #[simd_test(enable = "sse2")]
3758 unsafe fn test_mm_loadu_si128() {
3759 let a
= _mm_set_epi64x(5, 6);
3760 let r
= _mm_loadu_si128(&a
as *const _
as *const _
);
3761 assert_eq_m128i(a
, r
);
3764 #[simd_test(enable = "sse2")]
3765 unsafe fn test_mm_maskmoveu_si128() {
3766 let a
= _mm_set1_epi8(9);
3768 let mask
= _mm_set_epi8(
3769 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3770 0, 0, 0, 0, 0, 0, 0, 0,
3772 let mut r
= _mm_set1_epi8(0);
3773 _mm_maskmoveu_si128(a
, mask
, &mut r
as *mut _
as *mut i8);
3774 let e
= _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3775 assert_eq_m128i(r
, e
);
3778 #[simd_test(enable = "sse2")]
3779 unsafe fn test_mm_store_si128() {
3780 let a
= _mm_set1_epi8(9);
3781 let mut r
= _mm_set1_epi8(0);
3782 _mm_store_si128(&mut r
as *mut _
as *mut __m128i
, a
);
3783 assert_eq_m128i(r
, a
);
3786 #[simd_test(enable = "sse2")]
3787 unsafe fn test_mm_storeu_si128() {
3788 let a
= _mm_set1_epi8(9);
3789 let mut r
= _mm_set1_epi8(0);
3790 _mm_storeu_si128(&mut r
as *mut _
as *mut __m128i
, a
);
3791 assert_eq_m128i(r
, a
);
3794 #[simd_test(enable = "sse2")]
3795 unsafe fn test_mm_storel_epi64() {
3796 let a
= _mm_setr_epi64x(2, 9);
3797 let mut r
= _mm_set1_epi8(0);
3798 _mm_storel_epi64(&mut r
as *mut _
as *mut __m128i
, a
);
3799 assert_eq_m128i(r
, _mm_setr_epi64x(2, 0));
3802 #[simd_test(enable = "sse2")]
3803 unsafe fn test_mm_stream_si128() {
3804 let a
= _mm_setr_epi32(1, 2, 3, 4);
3805 let mut r
= _mm_undefined_si128();
3806 _mm_stream_si128(&mut r
as *mut _
, a
);
3807 assert_eq_m128i(r
, a
);
3810 #[simd_test(enable = "sse2")]
3811 unsafe fn test_mm_stream_si32() {
3813 let mut mem
= boxed
::Box
::<i32>::new(-1);
3814 _mm_stream_si32(&mut *mem
as *mut i32, a
);
3815 assert_eq
!(a
, *mem
);
3818 #[simd_test(enable = "sse2")]
3819 unsafe fn test_mm_move_epi64() {
3820 let a
= _mm_setr_epi64x(5, 6);
3821 let r
= _mm_move_epi64(a
);
3822 assert_eq_m128i(r
, _mm_setr_epi64x(5, 0));
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_packs_epi16() {
3827 let a
= _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
3828 let b
= _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
3829 let r
= _mm_packs_epi16(a
, b
);
3834 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
3839 #[simd_test(enable = "sse2")]
3840 unsafe fn test_mm_packs_epi32() {
3841 let a
= _mm_setr_epi32(0x8000, -0x8001, 0, 0);
3842 let b
= _mm_setr_epi32(0, 0, -0x8001, 0x8000);
3843 let r
= _mm_packs_epi32(a
, b
);
3846 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
3850 #[simd_test(enable = "sse2")]
3851 unsafe fn test_mm_packus_epi16() {
3852 let a
= _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
3853 let b
= _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
3854 let r
= _mm_packus_epi16(a
, b
);
3857 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
3861 #[simd_test(enable = "sse2")]
3862 unsafe fn test_mm_extract_epi16() {
3863 let a
= _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
3864 let r1
= _mm_extract_epi16
::<0>(a
);
3865 let r2
= _mm_extract_epi16
::<3>(a
);
3866 assert_eq
!(r1
, 0xFFFF);
3870 #[simd_test(enable = "sse2")]
3871 unsafe fn test_mm_insert_epi16() {
3872 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3873 let r
= _mm_insert_epi16
::<0>(a
, 9);
3874 let e
= _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
3875 assert_eq_m128i(r
, e
);
3878 #[simd_test(enable = "sse2")]
3879 unsafe fn test_mm_movemask_epi8() {
3881 let a
= _mm_setr_epi8(
3882 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
3883 0b0101, 0b1111_0000u8 as i8, 0, 0,
3884 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
3885 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
3887 let r
= _mm_movemask_epi8(a
);
3888 assert_eq
!(r
, 0b10100110_00100101);
3891 #[simd_test(enable = "sse2")]
3892 unsafe fn test_mm_shuffle_epi32() {
3893 let a
= _mm_setr_epi32(5, 10, 15, 20);
3894 let r
= _mm_shuffle_epi32
::<0b00_01_01_11>(a
);
3895 let e
= _mm_setr_epi32(20, 10, 10, 5);
3896 assert_eq_m128i(r
, e
);
3899 #[simd_test(enable = "sse2")]
3900 unsafe fn test_mm_shufflehi_epi16() {
3901 let a
= _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
3902 let r
= _mm_shufflehi_epi16
::<0b00_01_01_11>(a
);
3903 let e
= _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
3904 assert_eq_m128i(r
, e
);
3907 #[simd_test(enable = "sse2")]
3908 unsafe fn test_mm_shufflelo_epi16() {
3909 let a
= _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
3910 let r
= _mm_shufflelo_epi16
::<0b00_01_01_11>(a
);
3911 let e
= _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
3912 assert_eq_m128i(r
, e
);
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_unpackhi_epi8() {
3918 let a
= _mm_setr_epi8(
3919 0, 1, 2, 3, 4, 5, 6, 7,
3920 8, 9, 10, 11, 12, 13, 14, 15,
3923 let b
= _mm_setr_epi8(
3924 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3926 let r
= _mm_unpackhi_epi8(a
, b
);
3928 let e
= _mm_setr_epi8(
3929 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
3931 assert_eq_m128i(r
, e
);
3934 #[simd_test(enable = "sse2")]
3935 unsafe fn test_mm_unpackhi_epi16() {
3936 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3937 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3938 let r
= _mm_unpackhi_epi16(a
, b
);
3939 let e
= _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
3940 assert_eq_m128i(r
, e
);
3943 #[simd_test(enable = "sse2")]
3944 unsafe fn test_mm_unpackhi_epi32() {
3945 let a
= _mm_setr_epi32(0, 1, 2, 3);
3946 let b
= _mm_setr_epi32(4, 5, 6, 7);
3947 let r
= _mm_unpackhi_epi32(a
, b
);
3948 let e
= _mm_setr_epi32(2, 6, 3, 7);
3949 assert_eq_m128i(r
, e
);
3952 #[simd_test(enable = "sse2")]
3953 unsafe fn test_mm_unpackhi_epi64() {
3954 let a
= _mm_setr_epi64x(0, 1);
3955 let b
= _mm_setr_epi64x(2, 3);
3956 let r
= _mm_unpackhi_epi64(a
, b
);
3957 let e
= _mm_setr_epi64x(1, 3);
3958 assert_eq_m128i(r
, e
);
3961 #[simd_test(enable = "sse2")]
3962 unsafe fn test_mm_unpacklo_epi8() {
3964 let a
= _mm_setr_epi8(
3965 0, 1, 2, 3, 4, 5, 6, 7,
3966 8, 9, 10, 11, 12, 13, 14, 15,
3969 let b
= _mm_setr_epi8(
3970 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3972 let r
= _mm_unpacklo_epi8(a
, b
);
3974 let e
= _mm_setr_epi8(
3975 0, 16, 1, 17, 2, 18, 3, 19,
3976 4, 20, 5, 21, 6, 22, 7, 23,
3978 assert_eq_m128i(r
, e
);
3981 #[simd_test(enable = "sse2")]
3982 unsafe fn test_mm_unpacklo_epi16() {
3983 let a
= _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3984 let b
= _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3985 let r
= _mm_unpacklo_epi16(a
, b
);
3986 let e
= _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
3987 assert_eq_m128i(r
, e
);
3990 #[simd_test(enable = "sse2")]
3991 unsafe fn test_mm_unpacklo_epi32() {
3992 let a
= _mm_setr_epi32(0, 1, 2, 3);
3993 let b
= _mm_setr_epi32(4, 5, 6, 7);
3994 let r
= _mm_unpacklo_epi32(a
, b
);
3995 let e
= _mm_setr_epi32(0, 4, 1, 5);
3996 assert_eq_m128i(r
, e
);
3999 #[simd_test(enable = "sse2")]
4000 unsafe fn test_mm_unpacklo_epi64() {
4001 let a
= _mm_setr_epi64x(0, 1);
4002 let b
= _mm_setr_epi64x(2, 3);
4003 let r
= _mm_unpacklo_epi64(a
, b
);
4004 let e
= _mm_setr_epi64x(0, 2);
4005 assert_eq_m128i(r
, e
);
4008 #[simd_test(enable = "sse2")]
4009 unsafe fn test_mm_add_sd() {
4010 let a
= _mm_setr_pd(1.0, 2.0);
4011 let b
= _mm_setr_pd(5.0, 10.0);
4012 let r
= _mm_add_sd(a
, b
);
4013 assert_eq_m128d(r
, _mm_setr_pd(6.0, 2.0));
4016 #[simd_test(enable = "sse2")]
4017 unsafe fn test_mm_add_pd() {
4018 let a
= _mm_setr_pd(1.0, 2.0);
4019 let b
= _mm_setr_pd(5.0, 10.0);
4020 let r
= _mm_add_pd(a
, b
);
4021 assert_eq_m128d(r
, _mm_setr_pd(6.0, 12.0));
4024 #[simd_test(enable = "sse2")]
4025 unsafe fn test_mm_div_sd() {
4026 let a
= _mm_setr_pd(1.0, 2.0);
4027 let b
= _mm_setr_pd(5.0, 10.0);
4028 let r
= _mm_div_sd(a
, b
);
4029 assert_eq_m128d(r
, _mm_setr_pd(0.2, 2.0));
4032 #[simd_test(enable = "sse2")]
4033 unsafe fn test_mm_div_pd() {
4034 let a
= _mm_setr_pd(1.0, 2.0);
4035 let b
= _mm_setr_pd(5.0, 10.0);
4036 let r
= _mm_div_pd(a
, b
);
4037 assert_eq_m128d(r
, _mm_setr_pd(0.2, 0.2));
4040 #[simd_test(enable = "sse2")]
4041 unsafe fn test_mm_max_sd() {
4042 let a
= _mm_setr_pd(1.0, 2.0);
4043 let b
= _mm_setr_pd(5.0, 10.0);
4044 let r
= _mm_max_sd(a
, b
);
4045 assert_eq_m128d(r
, _mm_setr_pd(5.0, 2.0));
4048 #[simd_test(enable = "sse2")]
4049 unsafe fn test_mm_max_pd() {
4050 let a
= _mm_setr_pd(1.0, 2.0);
4051 let b
= _mm_setr_pd(5.0, 10.0);
4052 let r
= _mm_max_pd(a
, b
);
4053 assert_eq_m128d(r
, _mm_setr_pd(5.0, 10.0));
4056 #[simd_test(enable = "sse2")]
4057 unsafe fn test_mm_min_sd() {
4058 let a
= _mm_setr_pd(1.0, 2.0);
4059 let b
= _mm_setr_pd(5.0, 10.0);
4060 let r
= _mm_min_sd(a
, b
);
4061 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.0));
4064 #[simd_test(enable = "sse2")]
4065 unsafe fn test_mm_min_pd() {
4066 let a
= _mm_setr_pd(1.0, 2.0);
4067 let b
= _mm_setr_pd(5.0, 10.0);
4068 let r
= _mm_min_pd(a
, b
);
4069 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.0));
4072 #[simd_test(enable = "sse2")]
4073 unsafe fn test_mm_mul_sd() {
4074 let a
= _mm_setr_pd(1.0, 2.0);
4075 let b
= _mm_setr_pd(5.0, 10.0);
4076 let r
= _mm_mul_sd(a
, b
);
4077 assert_eq_m128d(r
, _mm_setr_pd(5.0, 2.0));
4080 #[simd_test(enable = "sse2")]
4081 unsafe fn test_mm_mul_pd() {
4082 let a
= _mm_setr_pd(1.0, 2.0);
4083 let b
= _mm_setr_pd(5.0, 10.0);
4084 let r
= _mm_mul_pd(a
, b
);
4085 assert_eq_m128d(r
, _mm_setr_pd(5.0, 20.0));
4088 #[simd_test(enable = "sse2")]
4089 unsafe fn test_mm_sqrt_sd() {
4090 let a
= _mm_setr_pd(1.0, 2.0);
4091 let b
= _mm_setr_pd(5.0, 10.0);
4092 let r
= _mm_sqrt_sd(a
, b
);
4093 assert_eq_m128d(r
, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4096 #[simd_test(enable = "sse2")]
4097 unsafe fn test_mm_sqrt_pd() {
4098 let r
= _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4099 assert_eq_m128d(r
, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4102 #[simd_test(enable = "sse2")]
4103 unsafe fn test_mm_sub_sd() {
4104 let a
= _mm_setr_pd(1.0, 2.0);
4105 let b
= _mm_setr_pd(5.0, 10.0);
4106 let r
= _mm_sub_sd(a
, b
);
4107 assert_eq_m128d(r
, _mm_setr_pd(-4.0, 2.0));
4110 #[simd_test(enable = "sse2")]
4111 unsafe fn test_mm_sub_pd() {
4112 let a
= _mm_setr_pd(1.0, 2.0);
4113 let b
= _mm_setr_pd(5.0, 10.0);
4114 let r
= _mm_sub_pd(a
, b
);
4115 assert_eq_m128d(r
, _mm_setr_pd(-4.0, -8.0));
4118 #[simd_test(enable = "sse2")]
4119 unsafe fn test_mm_and_pd() {
4120 let a
= transmute(u64x2
::splat(5));
4121 let b
= transmute(u64x2
::splat(3));
4122 let r
= _mm_and_pd(a
, b
);
4123 let e
= transmute(u64x2
::splat(1));
4124 assert_eq_m128d(r
, e
);
4127 #[simd_test(enable = "sse2")]
4128 unsafe fn test_mm_andnot_pd() {
4129 let a
= transmute(u64x2
::splat(5));
4130 let b
= transmute(u64x2
::splat(3));
4131 let r
= _mm_andnot_pd(a
, b
);
4132 let e
= transmute(u64x2
::splat(2));
4133 assert_eq_m128d(r
, e
);
4136 #[simd_test(enable = "sse2")]
4137 unsafe fn test_mm_or_pd() {
4138 let a
= transmute(u64x2
::splat(5));
4139 let b
= transmute(u64x2
::splat(3));
4140 let r
= _mm_or_pd(a
, b
);
4141 let e
= transmute(u64x2
::splat(7));
4142 assert_eq_m128d(r
, e
);
4145 #[simd_test(enable = "sse2")]
4146 unsafe fn test_mm_xor_pd() {
4147 let a
= transmute(u64x2
::splat(5));
4148 let b
= transmute(u64x2
::splat(3));
4149 let r
= _mm_xor_pd(a
, b
);
4150 let e
= transmute(u64x2
::splat(6));
4151 assert_eq_m128d(r
, e
);
4154 #[simd_test(enable = "sse2")]
4155 unsafe fn test_mm_cmpeq_sd() {
4156 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4157 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4158 let r
= transmute
::<_
, __m128i
>(_mm_cmpeq_sd(a
, b
));
4159 assert_eq_m128i(r
, e
);
4162 #[simd_test(enable = "sse2")]
4163 unsafe fn test_mm_cmplt_sd() {
4164 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4165 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4166 let r
= transmute
::<_
, __m128i
>(_mm_cmplt_sd(a
, b
));
4167 assert_eq_m128i(r
, e
);
4170 #[simd_test(enable = "sse2")]
4171 unsafe fn test_mm_cmple_sd() {
4172 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4173 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4174 let r
= transmute
::<_
, __m128i
>(_mm_cmple_sd(a
, b
));
4175 assert_eq_m128i(r
, e
);
4178 #[simd_test(enable = "sse2")]
4179 unsafe fn test_mm_cmpgt_sd() {
4180 let (a
, b
) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4181 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4182 let r
= transmute
::<_
, __m128i
>(_mm_cmpgt_sd(a
, b
));
4183 assert_eq_m128i(r
, e
);
4186 #[simd_test(enable = "sse2")]
4187 unsafe fn test_mm_cmpge_sd() {
4188 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4189 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4190 let r
= transmute
::<_
, __m128i
>(_mm_cmpge_sd(a
, b
));
4191 assert_eq_m128i(r
, e
);
4194 #[simd_test(enable = "sse2")]
4195 unsafe fn test_mm_cmpord_sd() {
4196 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4197 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4198 let r
= transmute
::<_
, __m128i
>(_mm_cmpord_sd(a
, b
));
4199 assert_eq_m128i(r
, e
);
4202 #[simd_test(enable = "sse2")]
4203 unsafe fn test_mm_cmpunord_sd() {
4204 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4205 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4206 let r
= transmute
::<_
, __m128i
>(_mm_cmpunord_sd(a
, b
));
4207 assert_eq_m128i(r
, e
);
4210 #[simd_test(enable = "sse2")]
4211 unsafe fn test_mm_cmpneq_sd() {
4212 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4213 let e
= _mm_setr_epi64x(!0, transmute(2.0f64));
4214 let r
= transmute
::<_
, __m128i
>(_mm_cmpneq_sd(a
, b
));
4215 assert_eq_m128i(r
, e
);
4218 #[simd_test(enable = "sse2")]
4219 unsafe fn test_mm_cmpnlt_sd() {
4220 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4221 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4222 let r
= transmute
::<_
, __m128i
>(_mm_cmpnlt_sd(a
, b
));
4223 assert_eq_m128i(r
, e
);
4226 #[simd_test(enable = "sse2")]
4227 unsafe fn test_mm_cmpnle_sd() {
4228 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4229 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4230 let r
= transmute
::<_
, __m128i
>(_mm_cmpnle_sd(a
, b
));
4231 assert_eq_m128i(r
, e
);
4234 #[simd_test(enable = "sse2")]
4235 unsafe fn test_mm_cmpngt_sd() {
4236 let (a
, b
) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4237 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4238 let r
= transmute
::<_
, __m128i
>(_mm_cmpngt_sd(a
, b
));
4239 assert_eq_m128i(r
, e
);
4242 #[simd_test(enable = "sse2")]
4243 unsafe fn test_mm_cmpnge_sd() {
4244 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4245 let e
= _mm_setr_epi64x(0, transmute(2.0f64));
4246 let r
= transmute
::<_
, __m128i
>(_mm_cmpnge_sd(a
, b
));
4247 assert_eq_m128i(r
, e
);
4250 #[simd_test(enable = "sse2")]
4251 unsafe fn test_mm_cmpeq_pd() {
4252 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4253 let e
= _mm_setr_epi64x(!0, 0);
4254 let r
= transmute
::<_
, __m128i
>(_mm_cmpeq_pd(a
, b
));
4255 assert_eq_m128i(r
, e
);
4258 #[simd_test(enable = "sse2")]
4259 unsafe fn test_mm_cmplt_pd() {
4260 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4261 let e
= _mm_setr_epi64x(0, !0);
4262 let r
= transmute
::<_
, __m128i
>(_mm_cmplt_pd(a
, b
));
4263 assert_eq_m128i(r
, e
);
4266 #[simd_test(enable = "sse2")]
4267 unsafe fn test_mm_cmple_pd() {
4268 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4269 let e
= _mm_setr_epi64x(!0, !0);
4270 let r
= transmute
::<_
, __m128i
>(_mm_cmple_pd(a
, b
));
4271 assert_eq_m128i(r
, e
);
4274 #[simd_test(enable = "sse2")]
4275 unsafe fn test_mm_cmpgt_pd() {
4276 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4277 let e
= _mm_setr_epi64x(0, 0);
4278 let r
= transmute
::<_
, __m128i
>(_mm_cmpgt_pd(a
, b
));
4279 assert_eq_m128i(r
, e
);
4282 #[simd_test(enable = "sse2")]
4283 unsafe fn test_mm_cmpge_pd() {
4284 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4285 let e
= _mm_setr_epi64x(!0, 0);
4286 let r
= transmute
::<_
, __m128i
>(_mm_cmpge_pd(a
, b
));
4287 assert_eq_m128i(r
, e
);
4290 #[simd_test(enable = "sse2")]
4291 unsafe fn test_mm_cmpord_pd() {
4292 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4293 let e
= _mm_setr_epi64x(0, !0);
4294 let r
= transmute
::<_
, __m128i
>(_mm_cmpord_pd(a
, b
));
4295 assert_eq_m128i(r
, e
);
4298 #[simd_test(enable = "sse2")]
4299 unsafe fn test_mm_cmpunord_pd() {
4300 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(5.0, 3.0));
4301 let e
= _mm_setr_epi64x(!0, 0);
4302 let r
= transmute
::<_
, __m128i
>(_mm_cmpunord_pd(a
, b
));
4303 assert_eq_m128i(r
, e
);
4306 #[simd_test(enable = "sse2")]
4307 unsafe fn test_mm_cmpneq_pd() {
4308 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4309 let e
= _mm_setr_epi64x(!0, !0);
4310 let r
= transmute
::<_
, __m128i
>(_mm_cmpneq_pd(a
, b
));
4311 assert_eq_m128i(r
, e
);
4314 #[simd_test(enable = "sse2")]
4315 unsafe fn test_mm_cmpnlt_pd() {
4316 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4317 let e
= _mm_setr_epi64x(0, 0);
4318 let r
= transmute
::<_
, __m128i
>(_mm_cmpnlt_pd(a
, b
));
4319 assert_eq_m128i(r
, e
);
4322 #[simd_test(enable = "sse2")]
4323 unsafe fn test_mm_cmpnle_pd() {
4324 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4325 let e
= _mm_setr_epi64x(0, 0);
4326 let r
= transmute
::<_
, __m128i
>(_mm_cmpnle_pd(a
, b
));
4327 assert_eq_m128i(r
, e
);
4330 #[simd_test(enable = "sse2")]
4331 unsafe fn test_mm_cmpngt_pd() {
4332 let (a
, b
) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4333 let e
= _mm_setr_epi64x(0, !0);
4334 let r
= transmute
::<_
, __m128i
>(_mm_cmpngt_pd(a
, b
));
4335 assert_eq_m128i(r
, e
);
4338 #[simd_test(enable = "sse2")]
4339 unsafe fn test_mm_cmpnge_pd() {
4340 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4341 let e
= _mm_setr_epi64x(0, !0);
4342 let r
= transmute
::<_
, __m128i
>(_mm_cmpnge_pd(a
, b
));
4343 assert_eq_m128i(r
, e
);
4346 #[simd_test(enable = "sse2")]
4347 unsafe fn test_mm_comieq_sd() {
4348 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4349 assert
!(_mm_comieq_sd(a
, b
) != 0);
4351 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(1.0, 3.0));
4352 assert
!(_mm_comieq_sd(a
, b
) == 0);
4355 #[simd_test(enable = "sse2")]
4356 unsafe fn test_mm_comilt_sd() {
4357 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4358 assert
!(_mm_comilt_sd(a
, b
) == 0);
4361 #[simd_test(enable = "sse2")]
4362 unsafe fn test_mm_comile_sd() {
4363 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4364 assert
!(_mm_comile_sd(a
, b
) != 0);
4367 #[simd_test(enable = "sse2")]
4368 unsafe fn test_mm_comigt_sd() {
4369 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4370 assert
!(_mm_comigt_sd(a
, b
) == 0);
4373 #[simd_test(enable = "sse2")]
4374 unsafe fn test_mm_comige_sd() {
4375 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4376 assert
!(_mm_comige_sd(a
, b
) != 0);
4379 #[simd_test(enable = "sse2")]
4380 unsafe fn test_mm_comineq_sd() {
4381 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4382 assert
!(_mm_comineq_sd(a
, b
) == 0);
4385 #[simd_test(enable = "sse2")]
4386 unsafe fn test_mm_ucomieq_sd() {
4387 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4388 assert
!(_mm_ucomieq_sd(a
, b
) != 0);
4390 let (a
, b
) = (_mm_setr_pd(NAN
, 2.0), _mm_setr_pd(NAN
, 3.0));
4391 assert
!(_mm_ucomieq_sd(a
, b
) == 0);
4394 #[simd_test(enable = "sse2")]
4395 unsafe fn test_mm_ucomilt_sd() {
4396 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4397 assert
!(_mm_ucomilt_sd(a
, b
) == 0);
4400 #[simd_test(enable = "sse2")]
4401 unsafe fn test_mm_ucomile_sd() {
4402 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4403 assert
!(_mm_ucomile_sd(a
, b
) != 0);
4406 #[simd_test(enable = "sse2")]
4407 unsafe fn test_mm_ucomigt_sd() {
4408 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4409 assert
!(_mm_ucomigt_sd(a
, b
) == 0);
4412 #[simd_test(enable = "sse2")]
4413 unsafe fn test_mm_ucomige_sd() {
4414 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4415 assert
!(_mm_ucomige_sd(a
, b
) != 0);
4418 #[simd_test(enable = "sse2")]
4419 unsafe fn test_mm_ucomineq_sd() {
4420 let (a
, b
) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4421 assert
!(_mm_ucomineq_sd(a
, b
) == 0);
4424 #[simd_test(enable = "sse2")]
4425 unsafe fn test_mm_movemask_pd() {
4426 let r
= _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4427 assert_eq
!(r
, 0b01);
4429 let r
= _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4430 assert_eq
!(r
, 0b11);
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_load_pd() {
4441 data
: [1.0f64, 2.0, 3.0, 4.0],
4443 let vals
= &mem
.data
;
4444 let d
= vals
.as_ptr();
4446 let r
= _mm_load_pd(d
);
4447 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.0));
4450 #[simd_test(enable = "sse2")]
4451 unsafe fn test_mm_load_sd() {
4453 let expected
= _mm_setr_pd(a
, 0.);
4454 let r
= _mm_load_sd(&a
);
4455 assert_eq_m128d(r
, expected
);
4458 #[simd_test(enable = "sse2")]
4459 unsafe fn test_mm_loadh_pd() {
4460 let a
= _mm_setr_pd(1., 2.);
4462 let expected
= _mm_setr_pd(_mm_cvtsd_f64(a
), 3.);
4463 let r
= _mm_loadh_pd(a
, &b
);
4464 assert_eq_m128d(r
, expected
);
4467 #[simd_test(enable = "sse2")]
4468 unsafe fn test_mm_loadl_pd() {
4469 let a
= _mm_setr_pd(1., 2.);
4471 let expected
= _mm_setr_pd(3., get_m128d(a
, 1));
4472 let r
= _mm_loadl_pd(a
, &b
);
4473 assert_eq_m128d(r
, expected
);
4476 #[simd_test(enable = "sse2")]
4477 unsafe fn test_mm_stream_pd() {
4482 let a
= _mm_set1_pd(7.0);
4483 let mut mem
= Memory { data: [-1.0; 2] }
;
4485 _mm_stream_pd(&mut mem
.data
[0] as *mut f64, a
);
4487 assert_eq
!(mem
.data
[i
], get_m128d(a
, i
));
4491 #[simd_test(enable = "sse2")]
4492 unsafe fn test_mm_store_sd() {
4494 let a
= _mm_setr_pd(1., 2.);
4495 _mm_store_sd(&mut dest
, a
);
4496 assert_eq
!(dest
, _mm_cvtsd_f64(a
));
4499 #[simd_test(enable = "sse2")]
4500 unsafe fn test_mm_store_pd() {
4501 let mut mem
= Memory { data: [0.0f64; 4] }
;
4502 let vals
= &mut mem
.data
;
4503 let a
= _mm_setr_pd(1.0, 2.0);
4504 let d
= vals
.as_mut_ptr();
4506 _mm_store_pd(d
, *black_box(&a
));
4507 assert_eq
!(vals
[0], 1.0);
4508 assert_eq
!(vals
[1], 2.0);
4511 #[simd_test(enable = "sse2")]
4512 unsafe fn test_mm_storeu_pd() {
4513 let mut mem
= Memory { data: [0.0f64; 4] }
;
4514 let vals
= &mut mem
.data
;
4515 let a
= _mm_setr_pd(1.0, 2.0);
4518 let mut p
= vals
.as_mut_ptr();
4520 // Make sure p is **not** aligned to 16-byte boundary
4521 if (p
as usize) & 0xf == 0 {
4526 _mm_storeu_pd(p
, *black_box(&a
));
4529 assert_eq
!(vals
[ofs
- 1], 0.0);
4531 assert_eq
!(vals
[ofs
+ 0], 1.0);
4532 assert_eq
!(vals
[ofs
+ 1], 2.0);
4535 #[simd_test(enable = "sse2")]
4536 unsafe fn test_mm_store1_pd() {
4537 let mut mem
= Memory { data: [0.0f64; 4] }
;
4538 let vals
= &mut mem
.data
;
4539 let a
= _mm_setr_pd(1.0, 2.0);
4540 let d
= vals
.as_mut_ptr();
4542 _mm_store1_pd(d
, *black_box(&a
));
4543 assert_eq
!(vals
[0], 1.0);
4544 assert_eq
!(vals
[1], 1.0);
4547 #[simd_test(enable = "sse2")]
4548 unsafe fn test_mm_store_pd1() {
4549 let mut mem
= Memory { data: [0.0f64; 4] }
;
4550 let vals
= &mut mem
.data
;
4551 let a
= _mm_setr_pd(1.0, 2.0);
4552 let d
= vals
.as_mut_ptr();
4554 _mm_store_pd1(d
, *black_box(&a
));
4555 assert_eq
!(vals
[0], 1.0);
4556 assert_eq
!(vals
[1], 1.0);
4559 #[simd_test(enable = "sse2")]
4560 unsafe fn test_mm_storer_pd() {
4561 let mut mem
= Memory { data: [0.0f64; 4] }
;
4562 let vals
= &mut mem
.data
;
4563 let a
= _mm_setr_pd(1.0, 2.0);
4564 let d
= vals
.as_mut_ptr();
4566 _mm_storer_pd(d
, *black_box(&a
));
4567 assert_eq
!(vals
[0], 2.0);
4568 assert_eq
!(vals
[1], 1.0);
4571 #[simd_test(enable = "sse2")]
4572 unsafe fn test_mm_storeh_pd() {
4574 let a
= _mm_setr_pd(1., 2.);
4575 _mm_storeh_pd(&mut dest
, a
);
4576 assert_eq
!(dest
, get_m128d(a
, 1));
4579 #[simd_test(enable = "sse2")]
4580 unsafe fn test_mm_storel_pd() {
4582 let a
= _mm_setr_pd(1., 2.);
4583 _mm_storel_pd(&mut dest
, a
);
4584 assert_eq
!(dest
, _mm_cvtsd_f64(a
));
4587 #[simd_test(enable = "sse2")]
4588 unsafe fn test_mm_loadr_pd() {
4589 let mut mem
= Memory
{
4590 data
: [1.0f64, 2.0, 3.0, 4.0],
4592 let vals
= &mut mem
.data
;
4593 let d
= vals
.as_ptr();
4595 let r
= _mm_loadr_pd(d
);
4596 assert_eq_m128d(r
, _mm_setr_pd(2.0, 1.0));
4599 #[simd_test(enable = "sse2")]
4600 unsafe fn test_mm_loadu_pd() {
4601 let mut mem
= Memory
{
4602 data
: [1.0f64, 2.0, 3.0, 4.0],
4604 let vals
= &mut mem
.data
;
4605 let mut d
= vals
.as_ptr();
4607 // make sure d is not aligned to 16-byte boundary
4609 if (d
as usize) & 0xf == 0 {
4611 d
= d
.offset(offset
as isize);
4614 let r
= _mm_loadu_pd(d
);
4615 let e
= _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset
as f64));
4616 assert_eq_m128d(r
, e
);
4619 #[simd_test(enable = "sse2")]
4620 unsafe fn test_mm_cvtpd_ps() {
4621 let r
= _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4622 assert_eq_m128(r
, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4624 let r
= _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4625 assert_eq_m128(r
, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4627 let r
= _mm_cvtpd_ps(_mm_setr_pd(f64::MAX
, f64::MIN
));
4628 assert_eq_m128(r
, _mm_setr_ps(f32::INFINITY
, f32::NEG_INFINITY
, 0.0, 0.0));
4630 let r
= _mm_cvtpd_ps(_mm_setr_pd(f32::MAX
as f64, f32::MIN
as f64));
4631 assert_eq_m128(r
, _mm_setr_ps(f32::MAX
, f32::MIN
, 0.0, 0.0));
4634 #[simd_test(enable = "sse2")]
4635 unsafe fn test_mm_cvtps_pd() {
4636 let r
= _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4637 assert_eq_m128d(r
, _mm_setr_pd(-1.0, 2.0));
4639 let r
= _mm_cvtps_pd(_mm_setr_ps(
4645 assert_eq_m128d(r
, _mm_setr_pd(f32::MAX
as f64, f64::INFINITY
));
4648 #[simd_test(enable = "sse2")]
4649 unsafe fn test_mm_cvtpd_epi32() {
4650 let r
= _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4651 assert_eq_m128i(r
, _mm_setr_epi32(-1, 5, 0, 0));
4653 let r
= _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4654 assert_eq_m128i(r
, _mm_setr_epi32(-1, -5, 0, 0));
4656 let r
= _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX
, f64::MIN
));
4657 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4659 let r
= _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY
, f64::NEG_INFINITY
));
4660 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4662 let r
= _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN
, f64::NAN
));
4663 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4666 #[simd_test(enable = "sse2")]
4667 unsafe fn test_mm_cvtsd_si32() {
4668 let r
= _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4671 let r
= _mm_cvtsd_si32(_mm_setr_pd(f64::MAX
, f64::MIN
));
4672 assert_eq
!(r
, i32::MIN
);
4674 let r
= _mm_cvtsd_si32(_mm_setr_pd(f64::NAN
, f64::NAN
));
4675 assert_eq
!(r
, i32::MIN
);
4678 #[simd_test(enable = "sse2")]
4679 unsafe fn test_mm_cvtsd_ss() {
4680 let a
= _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4681 let b
= _mm_setr_pd(2.0, -5.0);
4683 let r
= _mm_cvtsd_ss(a
, b
);
4685 assert_eq_m128(r
, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4687 let a
= _mm_setr_ps(-1.1, f32::NEG_INFINITY
, f32::MAX
, f32::NEG_INFINITY
);
4688 let b
= _mm_setr_pd(f64::INFINITY
, -5.0);
4690 let r
= _mm_cvtsd_ss(a
, b
);
4703 #[simd_test(enable = "sse2")]
4704 unsafe fn test_mm_cvtsd_f64() {
4705 let r
= _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4706 assert_eq
!(r
, -1.1);
4709 #[simd_test(enable = "sse2")]
4710 unsafe fn test_mm_cvtss_sd() {
4711 let a
= _mm_setr_pd(-1.1, 2.2);
4712 let b
= _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4714 let r
= _mm_cvtss_sd(a
, b
);
4715 assert_eq_m128d(r
, _mm_setr_pd(1.0, 2.2));
4717 let a
= _mm_setr_pd(-1.1, f64::INFINITY
);
4718 let b
= _mm_setr_ps(f32::NEG_INFINITY
, 2.0, 3.0, 4.0);
4720 let r
= _mm_cvtss_sd(a
, b
);
4721 assert_eq_m128d(r
, _mm_setr_pd(f64::NEG_INFINITY
, f64::INFINITY
));
4724 #[simd_test(enable = "sse2")]
4725 unsafe fn test_mm_cvttpd_epi32() {
4726 let a
= _mm_setr_pd(-1.1, 2.2);
4727 let r
= _mm_cvttpd_epi32(a
);
4728 assert_eq_m128i(r
, _mm_setr_epi32(-1, 2, 0, 0));
4730 let a
= _mm_setr_pd(f64::NEG_INFINITY
, f64::NAN
);
4731 let r
= _mm_cvttpd_epi32(a
);
4732 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, 0, 0));
4735 #[simd_test(enable = "sse2")]
4736 unsafe fn test_mm_cvttsd_si32() {
4737 let a
= _mm_setr_pd(-1.1, 2.2);
4738 let r
= _mm_cvttsd_si32(a
);
4741 let a
= _mm_setr_pd(f64::NEG_INFINITY
, f64::NAN
);
4742 let r
= _mm_cvttsd_si32(a
);
4743 assert_eq
!(r
, i32::MIN
);
4746 #[simd_test(enable = "sse2")]
4747 unsafe fn test_mm_cvttps_epi32() {
4748 let a
= _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4749 let r
= _mm_cvttps_epi32(a
);
4750 assert_eq_m128i(r
, _mm_setr_epi32(-1, 2, -3, 6));
4752 let a
= _mm_setr_ps(f32::NEG_INFINITY
, f32::INFINITY
, f32::MIN
, f32::MAX
);
4753 let r
= _mm_cvttps_epi32(a
);
4754 assert_eq_m128i(r
, _mm_setr_epi32(i32::MIN
, i32::MIN
, i32::MIN
, i32::MIN
));
4757 #[simd_test(enable = "sse2")]
4758 unsafe fn test_mm_set_sd() {
4759 let r
= _mm_set_sd(-1.0_f64);
4760 assert_eq_m128d(r
, _mm_setr_pd(-1.0_f64, 0_f64));
4763 #[simd_test(enable = "sse2")]
4764 unsafe fn test_mm_set1_pd() {
4765 let r
= _mm_set1_pd(-1.0_f64);
4766 assert_eq_m128d(r
, _mm_setr_pd(-1.0_f64, -1.0_f64));
4769 #[simd_test(enable = "sse2")]
4770 unsafe fn test_mm_set_pd1() {
4771 let r
= _mm_set_pd1(-2.0_f64);
4772 assert_eq_m128d(r
, _mm_setr_pd(-2.0_f64, -2.0_f64));
4775 #[simd_test(enable = "sse2")]
4776 unsafe fn test_mm_set_pd() {
4777 let r
= _mm_set_pd(1.0_f64, 5.0_f64);
4778 assert_eq_m128d(r
, _mm_setr_pd(5.0_f64, 1.0_f64));
4781 #[simd_test(enable = "sse2")]
4782 unsafe fn test_mm_setr_pd() {
4783 let r
= _mm_setr_pd(1.0_f64, -5.0_f64);
4784 assert_eq_m128d(r
, _mm_setr_pd(1.0_f64, -5.0_f64));
4787 #[simd_test(enable = "sse2")]
4788 unsafe fn test_mm_setzero_pd() {
4789 let r
= _mm_setzero_pd();
4790 assert_eq_m128d(r
, _mm_setr_pd(0_f64, 0_f64));
4793 #[simd_test(enable = "sse2")]
4794 unsafe fn test_mm_load1_pd() {
4796 let r
= _mm_load1_pd(&d
);
4797 assert_eq_m128d(r
, _mm_setr_pd(d
, d
));
4800 #[simd_test(enable = "sse2")]
4801 unsafe fn test_mm_load_pd1() {
4803 let r
= _mm_load_pd1(&d
);
4804 assert_eq_m128d(r
, _mm_setr_pd(d
, d
));
4807 #[simd_test(enable = "sse2")]
4808 unsafe fn test_mm_unpackhi_pd() {
4809 let a
= _mm_setr_pd(1.0, 2.0);
4810 let b
= _mm_setr_pd(3.0, 4.0);
4811 let r
= _mm_unpackhi_pd(a
, b
);
4812 assert_eq_m128d(r
, _mm_setr_pd(2.0, 4.0));
4815 #[simd_test(enable = "sse2")]
4816 unsafe fn test_mm_unpacklo_pd() {
4817 let a
= _mm_setr_pd(1.0, 2.0);
4818 let b
= _mm_setr_pd(3.0, 4.0);
4819 let r
= _mm_unpacklo_pd(a
, b
);
4820 assert_eq_m128d(r
, _mm_setr_pd(1.0, 3.0));
4823 #[simd_test(enable = "sse2")]
4824 unsafe fn test_mm_shuffle_pd() {
4825 let a
= _mm_setr_pd(1., 2.);
4826 let b
= _mm_setr_pd(3., 4.);
4827 let expected
= _mm_setr_pd(1., 3.);
4828 let r
= _mm_shuffle_pd
::<0b00_00_00_00>(a
, b
);
4829 assert_eq_m128d(r
, expected
);
4832 #[simd_test(enable = "sse2")]
4833 unsafe fn test_mm_move_sd() {
4834 let a
= _mm_setr_pd(1., 2.);
4835 let b
= _mm_setr_pd(3., 4.);
4836 let expected
= _mm_setr_pd(3., 2.);
4837 let r
= _mm_move_sd(a
, b
);
4838 assert_eq_m128d(r
, expected
);
4841 #[simd_test(enable = "sse2")]
4842 unsafe fn test_mm_castpd_ps() {
4843 let a
= _mm_set1_pd(0.);
4844 let expected
= _mm_set1_ps(0.);
4845 let r
= _mm_castpd_ps(a
);
4846 assert_eq_m128(r
, expected
);
4849 #[simd_test(enable = "sse2")]
4850 unsafe fn test_mm_castpd_si128() {
4851 let a
= _mm_set1_pd(0.);
4852 let expected
= _mm_set1_epi64x(0);
4853 let r
= _mm_castpd_si128(a
);
4854 assert_eq_m128i(r
, expected
);
4857 #[simd_test(enable = "sse2")]
4858 unsafe fn test_mm_castps_pd() {
4859 let a
= _mm_set1_ps(0.);
4860 let expected
= _mm_set1_pd(0.);
4861 let r
= _mm_castps_pd(a
);
4862 assert_eq_m128d(r
, expected
);
4865 #[simd_test(enable = "sse2")]
4866 unsafe fn test_mm_castps_si128() {
4867 let a
= _mm_set1_ps(0.);
4868 let expected
= _mm_set1_epi32(0);
4869 let r
= _mm_castps_si128(a
);
4870 assert_eq_m128i(r
, expected
);
4873 #[simd_test(enable = "sse2")]
4874 unsafe fn test_mm_castsi128_pd() {
4875 let a
= _mm_set1_epi64x(0);
4876 let expected
= _mm_set1_pd(0.);
4877 let r
= _mm_castsi128_pd(a
);
4878 assert_eq_m128d(r
, expected
);
4881 #[simd_test(enable = "sse2")]
4882 unsafe fn test_mm_castsi128_ps() {
4883 let a
= _mm_set1_epi32(0);
4884 let expected
= _mm_set1_ps(0.);
4885 let r
= _mm_castsi128_ps(a
);
4886 assert_eq_m128(r
, expected
);