]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/core_arch/src/x86/sse2.rs
New upstream version 1.54.0+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / sse2.rs
CommitLineData
0531ce1d
XL
1//! Streaming SIMD Extensions 2 (SSE2)
2
3#[cfg(test)]
416331ca 4use stdarch_test::assert_instr;
0531ce1d 5
532ac7d7
XL
6use crate::{
7 core_arch::{simd::*, simd_llvm::*, x86::*},
8 intrinsics,
9 mem::{self, transmute},
10 ptr,
11};
0531ce1d 12
532ac7d7 13/// Provides a hint to the processor that the code sequence is a spin-wait loop.
0531ce1d
XL
14///
15/// This can help improve the performance and power consumption of spin-wait
16/// loops.
83c7162d
XL
17///
18/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause)
0531ce1d 19#[inline]
532ac7d7 20#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
83c7162d 21#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 22pub unsafe fn _mm_pause() {
532ac7d7
XL
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
0531ce1d
XL
25 pause()
26}
27
532ac7d7 28/// Invalidates and flushes the cache line that contains `p` from all levels of
0531ce1d 29/// the cache hierarchy.
83c7162d
XL
30///
31/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
0531ce1d
XL
32#[inline]
33#[target_feature(enable = "sse2")]
34#[cfg_attr(test, assert_instr(clflush))]
83c7162d 35#[stable(feature = "simd_x86", since = "1.27.0")]
416331ca 36pub unsafe fn _mm_clflush(p: *const u8) {
0531ce1d
XL
37 clflush(p)
38}
39
532ac7d7 40/// Performs a serializing operation on all load-from-memory instructions
0531ce1d
XL
41/// that were issued prior to this instruction.
42///
43/// Guarantees that every load instruction that precedes, in program order, is
44/// globally visible before any load instruction which follows the fence in
45/// program order.
83c7162d
XL
46///
47/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence)
0531ce1d
XL
48#[inline]
49#[target_feature(enable = "sse2")]
50#[cfg_attr(test, assert_instr(lfence))]
83c7162d 51#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
52pub unsafe fn _mm_lfence() {
53 lfence()
54}
55
532ac7d7 56/// Performs a serializing operation on all load-from-memory and store-to-memory
0531ce1d
XL
57/// instructions that were issued prior to this instruction.
58///
59/// Guarantees that every memory access that precedes, in program order, the
60/// memory fence instruction is globally visible before any memory instruction
61/// which follows the fence in program order.
83c7162d
XL
62///
63/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence)
0531ce1d
XL
64#[inline]
65#[target_feature(enable = "sse2")]
66#[cfg_attr(test, assert_instr(mfence))]
83c7162d 67#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
68pub unsafe fn _mm_mfence() {
69 mfence()
70}
71
532ac7d7 72/// Adds packed 8-bit integers in `a` and `b`.
83c7162d
XL
73///
74/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
0531ce1d
XL
75#[inline]
76#[target_feature(enable = "sse2")]
77#[cfg_attr(test, assert_instr(paddb))]
83c7162d 78#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 79pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 80 transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
81}
82
532ac7d7 83/// Adds packed 16-bit integers in `a` and `b`.
83c7162d
XL
84///
85/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
0531ce1d
XL
86#[inline]
87#[target_feature(enable = "sse2")]
88#[cfg_attr(test, assert_instr(paddw))]
83c7162d 89#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 90pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 91 transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
92}
93
532ac7d7 94/// Adds packed 32-bit integers in `a` and `b`.
83c7162d
XL
95///
96/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
0531ce1d
XL
97#[inline]
98#[target_feature(enable = "sse2")]
99#[cfg_attr(test, assert_instr(paddd))]
83c7162d 100#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 101pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 102 transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
103}
104
416331ca 105/// Adds packed 64-bit integers in `a` and `b`.
83c7162d
XL
106///
107/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
0531ce1d
XL
108#[inline]
109#[target_feature(enable = "sse2")]
110#[cfg_attr(test, assert_instr(paddq))]
83c7162d 111#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 112pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 113 transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
0531ce1d
XL
114}
115
532ac7d7 116/// Adds packed 8-bit integers in `a` and `b` using saturation.
83c7162d
XL
117///
118/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
0531ce1d
XL
119#[inline]
120#[target_feature(enable = "sse2")]
121#[cfg_attr(test, assert_instr(paddsb))]
83c7162d 122#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 123pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
74b04a01 124 transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
125}
126
532ac7d7 127/// Adds packed 16-bit integers in `a` and `b` using saturation.
83c7162d
XL
128///
129/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
0531ce1d
XL
130#[inline]
131#[target_feature(enable = "sse2")]
132#[cfg_attr(test, assert_instr(paddsw))]
83c7162d 133#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 134pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
74b04a01 135 transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
136}
137
532ac7d7 138/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
83c7162d
XL
139///
140/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
0531ce1d
XL
141#[inline]
142#[target_feature(enable = "sse2")]
143#[cfg_attr(test, assert_instr(paddusb))]
83c7162d 144#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 145pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
74b04a01 146 transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
147}
148
532ac7d7 149/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
83c7162d
XL
150///
151/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
0531ce1d
XL
152#[inline]
153#[target_feature(enable = "sse2")]
154#[cfg_attr(test, assert_instr(paddusw))]
83c7162d 155#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 156pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
74b04a01 157 transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8()))
0531ce1d
XL
158}
159
532ac7d7 160/// Averages packed unsigned 8-bit integers in `a` and `b`.
83c7162d
XL
161///
162/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
0531ce1d
XL
163#[inline]
164#[target_feature(enable = "sse2")]
165#[cfg_attr(test, assert_instr(pavgb))]
83c7162d 166#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 167pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 168 transmute(pavgb(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
169}
170
532ac7d7 171/// Averages packed unsigned 16-bit integers in `a` and `b`.
83c7162d
XL
172///
173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
0531ce1d
XL
174#[inline]
175#[target_feature(enable = "sse2")]
176#[cfg_attr(test, assert_instr(pavgw))]
83c7162d 177#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 178pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 179 transmute(pavgw(a.as_u16x8(), b.as_u16x8()))
0531ce1d
XL
180}
181
532ac7d7 182/// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
0531ce1d 183///
532ac7d7 184/// Multiplies packed signed 16-bit integers in `a` and `b`, producing
0531ce1d
XL
185/// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
186/// intermediate 32-bit integers.
83c7162d
XL
187///
188/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16)
0531ce1d
XL
189#[inline]
190#[target_feature(enable = "sse2")]
191#[cfg_attr(test, assert_instr(pmaddwd))]
83c7162d 192#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 193pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 194 transmute(pmaddwd(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
195}
196
532ac7d7 197/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
0531ce1d 198/// maximum values.
83c7162d
XL
199///
200/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
0531ce1d
XL
201#[inline]
202#[target_feature(enable = "sse2")]
203#[cfg_attr(test, assert_instr(pmaxsw))]
83c7162d 204#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 205pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 206 transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
207}
208
532ac7d7 209/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
0531ce1d 210/// packed maximum values.
83c7162d
XL
211///
212/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
0531ce1d
XL
213#[inline]
214#[target_feature(enable = "sse2")]
215#[cfg_attr(test, assert_instr(pmaxub))]
83c7162d 216#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 217pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 218 transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
219}
220
532ac7d7 221/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
0531ce1d 222/// minimum values.
83c7162d
XL
223///
224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
0531ce1d
XL
225#[inline]
226#[target_feature(enable = "sse2")]
227#[cfg_attr(test, assert_instr(pminsw))]
83c7162d 228#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 229pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 230 transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
231}
232
532ac7d7 233/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
0531ce1d 234/// packed minimum values.
83c7162d
XL
235///
236/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
0531ce1d
XL
237#[inline]
238#[target_feature(enable = "sse2")]
239#[cfg_attr(test, assert_instr(pminub))]
83c7162d 240#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 241pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 242 transmute(pminub(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
243}
244
532ac7d7 245/// Multiplies the packed 16-bit integers in `a` and `b`.
0531ce1d
XL
246///
247/// The multiplication produces intermediate 32-bit integers, and returns the
248/// high 16 bits of the intermediate integers.
83c7162d
XL
249///
250/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16)
0531ce1d
XL
251#[inline]
252#[target_feature(enable = "sse2")]
253#[cfg_attr(test, assert_instr(pmulhw))]
83c7162d 254#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 255pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 256 transmute(pmulhw(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
257}
258
532ac7d7 259/// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
0531ce1d
XL
260///
261/// The multiplication produces intermediate 32-bit integers, and returns the
262/// high 16 bits of the intermediate integers.
83c7162d
XL
263///
264/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16)
0531ce1d
XL
265#[inline]
266#[target_feature(enable = "sse2")]
267#[cfg_attr(test, assert_instr(pmulhuw))]
83c7162d 268#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 269pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 270 transmute(pmulhuw(a.as_u16x8(), b.as_u16x8()))
0531ce1d
XL
271}
272
532ac7d7 273/// Multiplies the packed 16-bit integers in `a` and `b`.
0531ce1d
XL
274///
275/// The multiplication produces intermediate 32-bit integers, and returns the
276/// low 16 bits of the intermediate integers.
83c7162d
XL
277///
278/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16)
0531ce1d
XL
279#[inline]
280#[target_feature(enable = "sse2")]
281#[cfg_attr(test, assert_instr(pmullw))]
83c7162d 282#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 283pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 284 transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
285}
286
532ac7d7 287/// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
0531ce1d
XL
288/// in `a` and `b`.
289///
532ac7d7 290/// Returns the unsigned 64-bit results.
83c7162d
XL
291///
292/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
0531ce1d
XL
293#[inline]
294#[target_feature(enable = "sse2")]
295#[cfg_attr(test, assert_instr(pmuludq))]
83c7162d 296#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 297pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 298 transmute(pmuludq(a.as_u32x4(), b.as_u32x4()))
0531ce1d
XL
299}
300
301/// Sum the absolute differences of packed unsigned 8-bit integers.
302///
532ac7d7 303/// Computes the absolute differences of packed unsigned 8-bit integers in `a`
0531ce1d
XL
304/// and `b`, then horizontally sum each consecutive 8 differences to produce
305/// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
306/// the low 16 bits of 64-bit elements returned.
83c7162d
XL
307///
308/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8)
0531ce1d
XL
309#[inline]
310#[target_feature(enable = "sse2")]
311#[cfg_attr(test, assert_instr(psadbw))]
83c7162d 312#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 313pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 314 transmute(psadbw(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
315}
316
532ac7d7 317/// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
83c7162d
XL
318///
319/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
0531ce1d
XL
320#[inline]
321#[target_feature(enable = "sse2")]
322#[cfg_attr(test, assert_instr(psubb))]
83c7162d 323#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 324pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 325 transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
326}
327
532ac7d7 328/// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
83c7162d
XL
329///
330/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
0531ce1d
XL
331#[inline]
332#[target_feature(enable = "sse2")]
333#[cfg_attr(test, assert_instr(psubw))]
83c7162d 334#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 335pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 336 transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
337}
338
339/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
83c7162d
XL
340///
341/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32)
0531ce1d
XL
342#[inline]
343#[target_feature(enable = "sse2")]
344#[cfg_attr(test, assert_instr(psubd))]
83c7162d 345#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 346pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 347 transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
348}
349
350/// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
83c7162d
XL
351///
352/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64)
0531ce1d
XL
353#[inline]
354#[target_feature(enable = "sse2")]
355#[cfg_attr(test, assert_instr(psubq))]
83c7162d 356#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 357pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 358 transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
0531ce1d
XL
359}
360
361/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
362/// using saturation.
83c7162d
XL
363///
364/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8)
0531ce1d
XL
365#[inline]
366#[target_feature(enable = "sse2")]
367#[cfg_attr(test, assert_instr(psubsb))]
83c7162d 368#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 369pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
74b04a01 370 transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
371}
372
373/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
374/// using saturation.
83c7162d
XL
375///
376/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16)
0531ce1d
XL
377#[inline]
378#[target_feature(enable = "sse2")]
379#[cfg_attr(test, assert_instr(psubsw))]
83c7162d 380#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 381pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
74b04a01 382 transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
383}
384
385/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
386/// integers in `a` using saturation.
83c7162d
XL
387///
388/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8)
0531ce1d
XL
389#[inline]
390#[target_feature(enable = "sse2")]
391#[cfg_attr(test, assert_instr(psubusb))]
83c7162d 392#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 393pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
74b04a01 394 transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
395}
396
397/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
398/// integers in `a` using saturation.
83c7162d
XL
399///
400/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16)
0531ce1d
XL
401#[inline]
402#[target_feature(enable = "sse2")]
403#[cfg_attr(test, assert_instr(psubusw))]
83c7162d 404#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 405pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
74b04a01 406 transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8()))
0531ce1d
XL
407}
408
17df50a5 409/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
83c7162d
XL
410///
411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
0531ce1d
XL
412#[inline]
413#[target_feature(enable = "sse2")]
17df50a5
XL
414#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
415#[rustc_legacy_const_generics(1)]
83c7162d 416#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
417pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
418 static_assert_imm8!(IMM8);
419 _mm_slli_si128_impl::<IMM8>(a)
0531ce1d
XL
420}
421
422/// Implementation detail: converts the immediate argument of the
423/// `_mm_slli_si128` intrinsic into a compile-time constant.
424#[inline]
425#[target_feature(enable = "sse2")]
17df50a5
XL
426unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
427 const fn mask(shift: i32, i: u32) -> u32 {
428 let shift = shift as u32 & 0xff;
429 if shift > 15 {
430 i
431 } else {
432 16 - shift + i
433 }
0531ce1d 434 }
17df50a5
XL
435 let zero = _mm_set1_epi8(0).as_i8x16();
436 transmute::<i8x16, _>(simd_shuffle16!(
437 zero,
438 a.as_i8x16(),
439 <const IMM8: i32> [
440 mask(IMM8, 0),
441 mask(IMM8, 1),
442 mask(IMM8, 2),
443 mask(IMM8, 3),
444 mask(IMM8, 4),
445 mask(IMM8, 5),
446 mask(IMM8, 6),
447 mask(IMM8, 7),
448 mask(IMM8, 8),
449 mask(IMM8, 9),
450 mask(IMM8, 10),
451 mask(IMM8, 11),
452 mask(IMM8, 12),
453 mask(IMM8, 13),
454 mask(IMM8, 14),
455 mask(IMM8, 15),
456 ],
457 ))
0531ce1d
XL
458}
459
17df50a5 460/// Shifts `a` left by `IMM8` bytes while shifting in zeros.
83c7162d
XL
461///
462/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
0531ce1d
XL
463#[inline]
464#[target_feature(enable = "sse2")]
17df50a5
XL
465#[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
466#[rustc_legacy_const_generics(1)]
83c7162d 467#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
468pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
469 static_assert_imm8!(IMM8);
470 _mm_slli_si128_impl::<IMM8>(a)
0531ce1d
XL
471}
472
17df50a5 473/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
83c7162d
XL
474///
475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
0531ce1d
XL
476#[inline]
477#[target_feature(enable = "sse2")]
17df50a5
XL
478#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
479#[rustc_legacy_const_generics(1)]
83c7162d 480#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
481pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
482 static_assert_imm8!(IMM8);
483 _mm_srli_si128_impl::<IMM8>(a)
0531ce1d
XL
484}
485
17df50a5 486/// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
83c7162d
XL
487///
488/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
0531ce1d
XL
489#[inline]
490#[target_feature(enable = "sse2")]
17df50a5
XL
491#[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
492#[rustc_legacy_const_generics(1)]
83c7162d 493#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
494pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
495 static_assert_imm8!(IMM8);
496 transmute(pslliw(a.as_i16x8(), IMM8))
0531ce1d
XL
497}
498
532ac7d7 499/// Shifts packed 16-bit integers in `a` left by `count` while shifting in
0531ce1d 500/// zeros.
83c7162d
XL
501///
502/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
0531ce1d
XL
503#[inline]
504#[target_feature(enable = "sse2")]
505#[cfg_attr(test, assert_instr(psllw))]
83c7162d 506#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 507pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 508 transmute(psllw(a.as_i16x8(), count.as_i16x8()))
0531ce1d
XL
509}
510
17df50a5 511/// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
83c7162d
XL
512///
513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
0531ce1d
XL
514#[inline]
515#[target_feature(enable = "sse2")]
17df50a5
XL
516#[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
517#[rustc_legacy_const_generics(1)]
83c7162d 518#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
519pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
520 static_assert_imm8!(IMM8);
521 transmute(psllid(a.as_i32x4(), IMM8))
0531ce1d
XL
522}
523
532ac7d7 524/// Shifts packed 32-bit integers in `a` left by `count` while shifting in
0531ce1d 525/// zeros.
83c7162d
XL
526///
527/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
0531ce1d
XL
528#[inline]
529#[target_feature(enable = "sse2")]
530#[cfg_attr(test, assert_instr(pslld))]
83c7162d 531#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 532pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 533 transmute(pslld(a.as_i32x4(), count.as_i32x4()))
0531ce1d
XL
534}
535
17df50a5 536/// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
83c7162d
XL
537///
538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
0531ce1d
XL
539#[inline]
540#[target_feature(enable = "sse2")]
17df50a5
XL
541#[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
542#[rustc_legacy_const_generics(1)]
83c7162d 543#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
544pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
545 static_assert_imm8!(IMM8);
546 transmute(pslliq(a.as_i64x2(), IMM8))
0531ce1d
XL
547}
548
532ac7d7 549/// Shifts packed 64-bit integers in `a` left by `count` while shifting in
0531ce1d 550/// zeros.
83c7162d
XL
551///
552/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
0531ce1d
XL
553#[inline]
554#[target_feature(enable = "sse2")]
555#[cfg_attr(test, assert_instr(psllq))]
83c7162d 556#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 557pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 558 transmute(psllq(a.as_i64x2(), count.as_i64x2()))
0531ce1d
XL
559}
560
17df50a5 561/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
0531ce1d 562/// bits.
83c7162d
XL
563///
564/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
0531ce1d
XL
565#[inline]
566#[target_feature(enable = "sse2")]
17df50a5
XL
567#[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
568#[rustc_legacy_const_generics(1)]
83c7162d 569#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
570pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
571 static_assert_imm8!(IMM8);
572 transmute(psraiw(a.as_i16x8(), IMM8))
0531ce1d
XL
573}
574
532ac7d7 575/// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
0531ce1d 576/// bits.
83c7162d
XL
577///
578/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
0531ce1d
XL
579#[inline]
580#[target_feature(enable = "sse2")]
581#[cfg_attr(test, assert_instr(psraw))]
83c7162d 582#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 583pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 584 transmute(psraw(a.as_i16x8(), count.as_i16x8()))
0531ce1d
XL
585}
586
17df50a5 587/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
0531ce1d 588/// bits.
83c7162d
XL
589///
590/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
0531ce1d
XL
591#[inline]
592#[target_feature(enable = "sse2")]
17df50a5
XL
593#[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
594#[rustc_legacy_const_generics(1)]
83c7162d 595#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
596pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_imm8!(IMM8);
598 transmute(psraid(a.as_i32x4(), IMM8))
0531ce1d
XL
599}
600
532ac7d7 601/// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
0531ce1d 602/// bits.
83c7162d
XL
603///
604/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
0531ce1d
XL
605#[inline]
606#[target_feature(enable = "sse2")]
607#[cfg_attr(test, assert_instr(psrad))]
83c7162d 608#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 609pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 610 transmute(psrad(a.as_i32x4(), count.as_i32x4()))
0531ce1d
XL
611}
612
17df50a5 613/// Shifts `a` right by `IMM8` bytes while shifting in zeros.
83c7162d
XL
614///
615/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
0531ce1d
XL
616#[inline]
617#[target_feature(enable = "sse2")]
17df50a5
XL
618#[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
619#[rustc_legacy_const_generics(1)]
83c7162d 620#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
621pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
622 static_assert_imm8!(IMM8);
623 _mm_srli_si128_impl::<IMM8>(a)
0531ce1d
XL
624}
625
626/// Implementation detail: converts the immediate argument of the
627/// `_mm_srli_si128` intrinsic into a compile-time constant.
628#[inline]
629#[target_feature(enable = "sse2")]
17df50a5
XL
630unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
631 const fn mask(shift: i32, i: u32) -> u32 {
632 if (shift as u32) > 15 {
633 i + 16
634 } else {
635 i + (shift as u32)
636 }
0531ce1d 637 }
17df50a5
XL
638 let zero = _mm_set1_epi8(0).as_i8x16();
639 let x: i8x16 = simd_shuffle16!(
640 a.as_i8x16(),
641 zero,
642 <const IMM8: i32> [
643 mask(IMM8, 0),
644 mask(IMM8, 1),
645 mask(IMM8, 2),
646 mask(IMM8, 3),
647 mask(IMM8, 4),
648 mask(IMM8, 5),
649 mask(IMM8, 6),
650 mask(IMM8, 7),
651 mask(IMM8, 8),
652 mask(IMM8, 9),
653 mask(IMM8, 10),
654 mask(IMM8, 11),
655 mask(IMM8, 12),
656 mask(IMM8, 13),
657 mask(IMM8, 14),
658 mask(IMM8, 15),
659 ],
660 );
532ac7d7 661 transmute(x)
0531ce1d
XL
662}
663
17df50a5 664/// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
0531ce1d 665/// zeros.
83c7162d
XL
666///
667/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
0531ce1d
XL
668#[inline]
669#[target_feature(enable = "sse2")]
17df50a5
XL
670#[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
671#[rustc_legacy_const_generics(1)]
83c7162d 672#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
673pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
674 static_assert_imm8!(IMM8);
675 transmute(psrliw(a.as_i16x8(), IMM8))
0531ce1d
XL
676}
677
532ac7d7 678/// Shifts packed 16-bit integers in `a` right by `count` while shifting in
0531ce1d 679/// zeros.
83c7162d
XL
680///
681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
0531ce1d
XL
682#[inline]
683#[target_feature(enable = "sse2")]
684#[cfg_attr(test, assert_instr(psrlw))]
83c7162d 685#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 686pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 687 transmute(psrlw(a.as_i16x8(), count.as_i16x8()))
0531ce1d
XL
688}
689
17df50a5 690/// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
0531ce1d 691/// zeros.
83c7162d
XL
692///
693/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
0531ce1d
XL
694#[inline]
695#[target_feature(enable = "sse2")]
17df50a5
XL
696#[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
697#[rustc_legacy_const_generics(1)]
83c7162d 698#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
699pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
700 static_assert_imm8!(IMM8);
701 transmute(psrlid(a.as_i32x4(), IMM8))
0531ce1d
XL
702}
703
532ac7d7 704/// Shifts packed 32-bit integers in `a` right by `count` while shifting in
0531ce1d 705/// zeros.
83c7162d
XL
706///
707/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
0531ce1d
XL
708#[inline]
709#[target_feature(enable = "sse2")]
710#[cfg_attr(test, assert_instr(psrld))]
83c7162d 711#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 712pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 713 transmute(psrld(a.as_i32x4(), count.as_i32x4()))
0531ce1d
XL
714}
715
17df50a5 716/// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
0531ce1d 717/// zeros.
83c7162d
XL
718///
719/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
0531ce1d
XL
720#[inline]
721#[target_feature(enable = "sse2")]
17df50a5
XL
722#[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
723#[rustc_legacy_const_generics(1)]
83c7162d 724#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
725pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
726 static_assert_imm8!(IMM8);
727 transmute(psrliq(a.as_i64x2(), IMM8))
0531ce1d
XL
728}
729
532ac7d7 730/// Shifts packed 64-bit integers in `a` right by `count` while shifting in
0531ce1d 731/// zeros.
83c7162d
XL
732///
733/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
0531ce1d
XL
734#[inline]
735#[target_feature(enable = "sse2")]
736#[cfg_attr(test, assert_instr(psrlq))]
83c7162d 737#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 738pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
532ac7d7 739 transmute(psrlq(a.as_i64x2(), count.as_i64x2()))
0531ce1d
XL
740}
741
532ac7d7 742/// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
0531ce1d 743/// `b`.
83c7162d
XL
744///
745/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
0531ce1d
XL
746#[inline]
747#[target_feature(enable = "sse2")]
748#[cfg_attr(test, assert_instr(andps))]
83c7162d 749#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
750pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
751 simd_and(a, b)
752}
753
532ac7d7 754/// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
0531ce1d 755/// then AND with `b`.
83c7162d
XL
756///
757/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
0531ce1d
XL
758#[inline]
759#[target_feature(enable = "sse2")]
760#[cfg_attr(test, assert_instr(andnps))]
83c7162d 761#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
762pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
763 simd_and(simd_xor(_mm_set1_epi8(-1), a), b)
764}
765
532ac7d7 766/// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
0531ce1d 767/// `b`.
83c7162d
XL
768///
769/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
0531ce1d
XL
770#[inline]
771#[target_feature(enable = "sse2")]
772#[cfg_attr(test, assert_instr(orps))]
83c7162d 773#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
774pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
775 simd_or(a, b)
776}
777
532ac7d7 778/// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
0531ce1d 779/// `b`.
83c7162d
XL
780///
781/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
0531ce1d
XL
782#[inline]
783#[target_feature(enable = "sse2")]
784#[cfg_attr(test, assert_instr(xorps))]
83c7162d 785#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
786pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
787 simd_xor(a, b)
788}
789
532ac7d7 790/// Compares packed 8-bit integers in `a` and `b` for equality.
83c7162d
XL
791///
792/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
0531ce1d
XL
793#[inline]
794#[target_feature(enable = "sse2")]
795#[cfg_attr(test, assert_instr(pcmpeqb))]
83c7162d 796#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 797pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 798 transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
799}
800
532ac7d7 801/// Compares packed 16-bit integers in `a` and `b` for equality.
83c7162d
XL
802///
803/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
0531ce1d
XL
804#[inline]
805#[target_feature(enable = "sse2")]
806#[cfg_attr(test, assert_instr(pcmpeqw))]
83c7162d 807#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 808pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 809 transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
810}
811
532ac7d7 812/// Compares packed 32-bit integers in `a` and `b` for equality.
83c7162d
XL
813///
814/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
0531ce1d
XL
815#[inline]
816#[target_feature(enable = "sse2")]
817#[cfg_attr(test, assert_instr(pcmpeqd))]
83c7162d 818#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 819pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 820 transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
821}
822
532ac7d7 823/// Compares packed 8-bit integers in `a` and `b` for greater-than.
83c7162d
XL
824///
825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
0531ce1d
XL
826#[inline]
827#[target_feature(enable = "sse2")]
828#[cfg_attr(test, assert_instr(pcmpgtb))]
83c7162d 829#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 830pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 831 transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
832}
833
532ac7d7 834/// Compares packed 16-bit integers in `a` and `b` for greater-than.
83c7162d
XL
835///
836/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
0531ce1d
XL
837#[inline]
838#[target_feature(enable = "sse2")]
839#[cfg_attr(test, assert_instr(pcmpgtw))]
83c7162d 840#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 841pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 842 transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
843}
844
532ac7d7 845/// Compares packed 32-bit integers in `a` and `b` for greater-than.
83c7162d
XL
846///
847/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
0531ce1d
XL
848#[inline]
849#[target_feature(enable = "sse2")]
850#[cfg_attr(test, assert_instr(pcmpgtd))]
83c7162d 851#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 852pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 853 transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
854}
855
532ac7d7 856/// Compares packed 8-bit integers in `a` and `b` for less-than.
83c7162d
XL
857///
858/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
0531ce1d
XL
859#[inline]
860#[target_feature(enable = "sse2")]
861#[cfg_attr(test, assert_instr(pcmpgtb))]
83c7162d 862#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 863pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 864 transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
865}
866
532ac7d7 867/// Compares packed 16-bit integers in `a` and `b` for less-than.
83c7162d
XL
868///
869/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
0531ce1d
XL
870#[inline]
871#[target_feature(enable = "sse2")]
872#[cfg_attr(test, assert_instr(pcmpgtw))]
83c7162d 873#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 874pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 875 transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
876}
877
532ac7d7 878/// Compares packed 32-bit integers in `a` and `b` for less-than.
83c7162d
XL
879///
880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
0531ce1d
XL
881#[inline]
882#[target_feature(enable = "sse2")]
883#[cfg_attr(test, assert_instr(pcmpgtd))]
83c7162d 884#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 885pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 886 transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
887}
888
532ac7d7 889/// Converts the lower two packed 32-bit integers in `a` to packed
0531ce1d 890/// double-precision (64-bit) floating-point elements.
83c7162d
XL
891///
892/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
0531ce1d
XL
893#[inline]
894#[target_feature(enable = "sse2")]
895#[cfg_attr(test, assert_instr(cvtdq2pd))]
83c7162d 896#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
897pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
898 let a = a.as_i32x4();
17df50a5 899 simd_cast::<i32x2, __m128d>(simd_shuffle2!(a, a, [0, 1]))
0531ce1d
XL
900}
901
532ac7d7 902/// Returns `a` with its lower element replaced by `b` after converting it to
0531ce1d 903/// an `f64`.
83c7162d
XL
904///
905/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
0531ce1d
XL
906#[inline]
907#[target_feature(enable = "sse2")]
908#[cfg_attr(test, assert_instr(cvtsi2sd))]
83c7162d 909#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
910pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
911 simd_insert(a, 0, b as f64)
912}
913
532ac7d7 914/// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
0531ce1d 915/// floating-point elements.
83c7162d
XL
916///
917/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
0531ce1d
XL
918#[inline]
919#[target_feature(enable = "sse2")]
920#[cfg_attr(test, assert_instr(cvtdq2ps))]
83c7162d 921#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
922pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
923 cvtdq2ps(a.as_i32x4())
924}
925
532ac7d7 926/// Converts packed single-precision (32-bit) floating-point elements in `a`
0531ce1d 927/// to packed 32-bit integers.
83c7162d
XL
928///
929/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
0531ce1d
XL
930#[inline]
931#[target_feature(enable = "sse2")]
932#[cfg_attr(test, assert_instr(cvtps2dq))]
83c7162d 933#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 934pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
532ac7d7 935 transmute(cvtps2dq(a))
0531ce1d
XL
936}
937
532ac7d7 938/// Returns a vector whose lowest element is `a` and all higher elements are
0531ce1d 939/// `0`.
83c7162d
XL
940///
941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
0531ce1d
XL
942#[inline]
943#[target_feature(enable = "sse2")]
944#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
83c7162d 945#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 946pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
532ac7d7 947 transmute(i32x4::new(a, 0, 0, 0))
0531ce1d
XL
948}
949
532ac7d7 950/// Returns the lowest element of `a`.
83c7162d
XL
951///
952/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
0531ce1d
XL
953#[inline]
954#[target_feature(enable = "sse2")]
0731742a 955#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))]
83c7162d 956#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
957pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
958 simd_extract(a.as_i32x4(), 0)
959}
960
532ac7d7 961/// Sets packed 64-bit integers with the supplied values, from highest to
0531ce1d 962/// lowest.
83c7162d
XL
963///
964/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
0531ce1d
XL
965#[inline]
966#[target_feature(enable = "sse2")]
967// no particular instruction to test
83c7162d 968#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 969pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
532ac7d7 970 transmute(i64x2::new(e0, e1))
0531ce1d
XL
971}
972
532ac7d7 973/// Sets packed 32-bit integers with the supplied values.
83c7162d
XL
974///
975/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
0531ce1d
XL
976#[inline]
977#[target_feature(enable = "sse2")]
978// no particular instruction to test
83c7162d 979#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 980pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
532ac7d7 981 transmute(i32x4::new(e0, e1, e2, e3))
0531ce1d
XL
982}
983
532ac7d7 984/// Sets packed 16-bit integers with the supplied values.
83c7162d
XL
985///
986/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
0531ce1d
XL
987#[inline]
988#[target_feature(enable = "sse2")]
989// no particular instruction to test
83c7162d 990#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 991pub unsafe fn _mm_set_epi16(
0731742a
XL
992 e7: i16,
993 e6: i16,
994 e5: i16,
995 e4: i16,
996 e3: i16,
997 e2: i16,
998 e1: i16,
999 e0: i16,
0531ce1d 1000) -> __m128i {
532ac7d7 1001 transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
0531ce1d
XL
1002}
1003
532ac7d7 1004/// Sets packed 8-bit integers with the supplied values.
83c7162d
XL
1005///
1006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
0531ce1d
XL
1007#[inline]
1008#[target_feature(enable = "sse2")]
1009// no particular instruction to test
83c7162d 1010#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1011pub unsafe fn _mm_set_epi8(
0731742a
XL
1012 e15: i8,
1013 e14: i8,
1014 e13: i8,
1015 e12: i8,
1016 e11: i8,
1017 e10: i8,
1018 e9: i8,
1019 e8: i8,
1020 e7: i8,
1021 e6: i8,
1022 e5: i8,
1023 e4: i8,
1024 e3: i8,
1025 e2: i8,
1026 e1: i8,
1027 e0: i8,
0531ce1d 1028) -> __m128i {
0731742a 1029 #[rustfmt::skip]
532ac7d7 1030 transmute(i8x16::new(
0531ce1d
XL
1031 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1032 ))
1033}
1034
532ac7d7 1035/// Broadcasts 64-bit integer `a` to all elements.
83c7162d
XL
1036///
1037/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
0531ce1d
XL
1038#[inline]
1039#[target_feature(enable = "sse2")]
1040// no particular instruction to test
83c7162d 1041#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1042pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1043 _mm_set_epi64x(a, a)
1044}
1045
532ac7d7 1046/// Broadcasts 32-bit integer `a` to all elements.
83c7162d
XL
1047///
1048/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
0531ce1d
XL
1049#[inline]
1050#[target_feature(enable = "sse2")]
1051// no particular instruction to test
83c7162d 1052#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1053pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1054 _mm_set_epi32(a, a, a, a)
1055}
1056
532ac7d7 1057/// Broadcasts 16-bit integer `a` to all elements.
83c7162d
XL
1058///
1059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
0531ce1d
XL
1060#[inline]
1061#[target_feature(enable = "sse2")]
1062// no particular instruction to test
83c7162d 1063#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1064pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1065 _mm_set_epi16(a, a, a, a, a, a, a, a)
1066}
1067
532ac7d7 1068/// Broadcasts 8-bit integer `a` to all elements.
83c7162d
XL
1069///
1070/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
0531ce1d
XL
1071#[inline]
1072#[target_feature(enable = "sse2")]
1073// no particular instruction to test
83c7162d 1074#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1075pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1076 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1077}
1078
532ac7d7 1079/// Sets packed 32-bit integers with the supplied values in reverse order.
83c7162d
XL
1080///
1081/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
0531ce1d
XL
1082#[inline]
1083#[target_feature(enable = "sse2")]
1084// no particular instruction to test
83c7162d 1085#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1086pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1087 _mm_set_epi32(e0, e1, e2, e3)
1088}
1089
532ac7d7 1090/// Sets packed 16-bit integers with the supplied values in reverse order.
83c7162d
XL
1091///
1092/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
0531ce1d
XL
1093#[inline]
1094#[target_feature(enable = "sse2")]
1095// no particular instruction to test
83c7162d 1096#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1097pub unsafe fn _mm_setr_epi16(
0731742a
XL
1098 e7: i16,
1099 e6: i16,
1100 e5: i16,
1101 e4: i16,
1102 e3: i16,
1103 e2: i16,
1104 e1: i16,
1105 e0: i16,
0531ce1d
XL
1106) -> __m128i {
1107 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1108}
1109
532ac7d7 1110/// Sets packed 8-bit integers with the supplied values in reverse order.
83c7162d
XL
1111///
1112/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
0531ce1d
XL
1113#[inline]
1114#[target_feature(enable = "sse2")]
1115// no particular instruction to test
83c7162d 1116#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1117pub unsafe fn _mm_setr_epi8(
0731742a
XL
1118 e15: i8,
1119 e14: i8,
1120 e13: i8,
1121 e12: i8,
1122 e11: i8,
1123 e10: i8,
1124 e9: i8,
1125 e8: i8,
1126 e7: i8,
1127 e6: i8,
1128 e5: i8,
1129 e4: i8,
1130 e3: i8,
1131 e2: i8,
1132 e1: i8,
1133 e0: i8,
0531ce1d 1134) -> __m128i {
0731742a 1135 #[rustfmt::skip]
0531ce1d
XL
1136 _mm_set_epi8(
1137 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1138 )
1139}
1140
1141/// Returns a vector with all elements set to zero.
83c7162d
XL
1142///
1143/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128)
0531ce1d
XL
1144#[inline]
1145#[target_feature(enable = "sse2")]
1146#[cfg_attr(test, assert_instr(xorps))]
83c7162d 1147#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1148pub unsafe fn _mm_setzero_si128() -> __m128i {
1149 _mm_set1_epi64x(0)
1150}
1151
532ac7d7 1152/// Loads 64-bit integer from memory into first element of returned vector.
83c7162d
XL
1153///
1154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
0531ce1d
XL
1155#[inline]
1156#[target_feature(enable = "sse2")]
1157// FIXME movsd on windows
8faf50e0
XL
1158#[cfg_attr(
1159 all(
1160 test,
1161 not(windows),
1162 not(all(target_os = "linux", target_arch = "x86_64")),
1163 target_arch = "x86_64"
1164 ),
1165 assert_instr(movq)
1166)]
83c7162d 1167#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1168pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
a1dfa0c6 1169 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
0531ce1d
XL
1170}
1171
532ac7d7 1172/// Loads 128-bits of integer data from memory into a new vector.
0531ce1d
XL
1173///
1174/// `mem_addr` must be aligned on a 16-byte boundary.
83c7162d
XL
1175///
1176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128)
0531ce1d
XL
1177#[inline]
1178#[target_feature(enable = "sse2")]
1179#[cfg_attr(test, assert_instr(movaps))]
83c7162d 1180#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1181pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1182 *mem_addr
1183}
1184
532ac7d7 1185/// Loads 128-bits of integer data from memory into a new vector.
0531ce1d
XL
1186///
1187/// `mem_addr` does not need to be aligned on any particular boundary.
83c7162d
XL
1188///
1189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128)
0531ce1d
XL
1190#[inline]
1191#[target_feature(enable = "sse2")]
1192#[cfg_attr(test, assert_instr(movups))]
83c7162d 1193#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1194pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1195 let mut dst: __m128i = _mm_undefined_si128();
1196 ptr::copy_nonoverlapping(
1197 mem_addr as *const u8,
1198 &mut dst as *mut __m128i as *mut u8,
1199 mem::size_of::<__m128i>(),
1200 );
1201 dst
1202}
1203
1204/// Conditionally store 8-bit integer elements from `a` into memory using
1205/// `mask`.
1206///
1207/// Elements are not stored when the highest bit is not set in the
1208/// corresponding element.
1209///
1210/// `mem_addr` should correspond to a 128-bit memory location and does not need
1211/// to be aligned on any particular boundary.
83c7162d
XL
1212///
1213/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128)
0531ce1d
XL
1214#[inline]
1215#[target_feature(enable = "sse2")]
1216#[cfg_attr(test, assert_instr(maskmovdqu))]
83c7162d 1217#[stable(feature = "simd_x86", since = "1.27.0")]
0731742a 1218pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
0531ce1d
XL
1219 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1220}
1221
532ac7d7 1222/// Stores 128-bits of integer data from `a` into memory.
0531ce1d
XL
1223///
1224/// `mem_addr` must be aligned on a 16-byte boundary.
83c7162d
XL
1225///
1226/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128)
0531ce1d
XL
1227#[inline]
1228#[target_feature(enable = "sse2")]
1229#[cfg_attr(test, assert_instr(movaps))]
83c7162d 1230#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1231pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1232 *mem_addr = a;
1233}
1234
532ac7d7 1235/// Stores 128-bits of integer data from `a` into memory.
0531ce1d
XL
1236///
1237/// `mem_addr` does not need to be aligned on any particular boundary.
83c7162d
XL
1238///
1239/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128)
0531ce1d
XL
1240#[inline]
1241#[target_feature(enable = "sse2")]
1242#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
83c7162d 1243#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1244pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1245 storeudq(mem_addr as *mut i8, a);
1246}
1247
532ac7d7 1248/// Stores the lower 64-bit integer `a` to a memory location.
0531ce1d
XL
1249///
1250/// `mem_addr` does not need to be aligned on any particular boundary.
83c7162d
XL
1251///
1252/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64)
0531ce1d
XL
1253#[inline]
1254#[target_feature(enable = "sse2")]
1255// FIXME mov on windows, movlps on i686
8faf50e0
XL
1256#[cfg_attr(
1257 all(
1258 test,
1259 not(windows),
1260 not(all(target_os = "linux", target_arch = "x86_64")),
1261 target_arch = "x86_64"
1262 ),
1263 assert_instr(movq)
1264)]
83c7162d 1265#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1266pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
0731742a 1267 ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8);
0531ce1d
XL
1268}
1269
1270/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1271/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1272/// used again soon).
83c7162d
XL
1273///
1274/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128)
0531ce1d
XL
1275#[inline]
1276#[target_feature(enable = "sse2")]
1277#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
83c7162d 1278#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1279pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
532ac7d7 1280 intrinsics::nontemporal_store(mem_addr, a);
0531ce1d
XL
1281}
1282
1283/// Stores a 32-bit integer value in the specified memory location.
1284/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1285/// used again soon).
83c7162d
XL
1286///
1287/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32)
0531ce1d
XL
1288#[inline]
1289#[target_feature(enable = "sse2")]
1290#[cfg_attr(test, assert_instr(movnti))]
83c7162d 1291#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1292pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
532ac7d7 1293 intrinsics::nontemporal_store(mem_addr, a);
0531ce1d
XL
1294}
1295
532ac7d7 1296/// Returns a vector where the low element is extracted from `a` and its upper
0531ce1d 1297/// element is zero.
83c7162d
XL
1298///
1299/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
0531ce1d
XL
1300#[inline]
1301#[target_feature(enable = "sse2")]
1302// FIXME movd on windows, movd on i686
0731742a 1303#[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
83c7162d 1304#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1305pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1306 let zero = _mm_setzero_si128();
17df50a5 1307 let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
532ac7d7 1308 transmute(r)
0531ce1d
XL
1309}
1310
532ac7d7 1311/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
0531ce1d 1312/// using signed saturation.
83c7162d
XL
1313///
1314/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
0531ce1d
XL
1315#[inline]
1316#[target_feature(enable = "sse2")]
1317#[cfg_attr(test, assert_instr(packsswb))]
83c7162d 1318#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1319pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 1320 transmute(packsswb(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
1321}
1322
532ac7d7 1323/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
0531ce1d 1324/// using signed saturation.
83c7162d
XL
1325///
1326/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
0531ce1d
XL
1327#[inline]
1328#[target_feature(enable = "sse2")]
1329#[cfg_attr(test, assert_instr(packssdw))]
83c7162d 1330#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1331pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 1332 transmute(packssdw(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
1333}
1334
532ac7d7 1335/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
0531ce1d 1336/// using unsigned saturation.
83c7162d
XL
1337///
1338/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
0531ce1d
XL
1339#[inline]
1340#[target_feature(enable = "sse2")]
1341#[cfg_attr(test, assert_instr(packuswb))]
83c7162d 1342#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1343pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 1344 transmute(packuswb(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
1345}
1346
532ac7d7 1347/// Returns the `imm8` element of `a`.
83c7162d
XL
1348///
1349/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
0531ce1d
XL
1350#[inline]
1351#[target_feature(enable = "sse2")]
17df50a5
XL
1352#[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1353#[rustc_legacy_const_generics(1)]
83c7162d 1354#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
1355pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1356 static_assert_imm3!(IMM8);
1357 simd_extract::<_, u16>(a.as_u16x8(), IMM8 as u32) as i32
0531ce1d
XL
1358}
1359
532ac7d7 1360/// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
83c7162d
XL
1361///
1362/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
0531ce1d
XL
1363#[inline]
1364#[target_feature(enable = "sse2")]
17df50a5
XL
1365#[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1366#[rustc_legacy_const_generics(2)]
83c7162d 1367#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
1368pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1369 static_assert_imm3!(IMM8);
1370 transmute(simd_insert(a.as_i16x8(), IMM8 as u32, i as i16))
0531ce1d
XL
1371}
1372
532ac7d7 1373/// Returns a mask of the most significant bit of each element in `a`.
83c7162d
XL
1374///
1375/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
0531ce1d
XL
1376#[inline]
1377#[target_feature(enable = "sse2")]
1378#[cfg_attr(test, assert_instr(pmovmskb))]
83c7162d 1379#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1380pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1381 pmovmskb(a.as_i8x16())
1382}
1383
17df50a5 1384/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
83c7162d
XL
1385///
1386/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
0531ce1d
XL
1387#[inline]
1388#[target_feature(enable = "sse2")]
17df50a5
XL
1389#[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1390#[rustc_legacy_const_generics(1)]
83c7162d 1391#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
1392pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1393 static_assert_imm8!(IMM8);
0531ce1d 1394 let a = a.as_i32x4();
17df50a5
XL
1395 let x: i32x4 = simd_shuffle4!(
1396 a,
1397 a,
1398 <const IMM8: i32> [
1399 IMM8 as u32 & 0b11,
1400 (IMM8 as u32 >> 2) & 0b11,
1401 (IMM8 as u32 >> 4) & 0b11,
1402 (IMM8 as u32 >> 6) & 0b11,
1403 ],
1404 );
532ac7d7 1405 transmute(x)
0531ce1d
XL
1406}
1407
532ac7d7 1408/// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
17df50a5 1409/// `IMM8`.
0531ce1d
XL
1410///
1411/// Put the results in the high 64 bits of the returned vector, with the low 64
1412/// bits being copied from from `a`.
83c7162d
XL
1413///
1414/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
0531ce1d
XL
1415#[inline]
1416#[target_feature(enable = "sse2")]
17df50a5
XL
1417#[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1418#[rustc_legacy_const_generics(1)]
83c7162d 1419#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
1420pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1421 static_assert_imm8!(IMM8);
0531ce1d 1422 let a = a.as_i16x8();
17df50a5
XL
1423 let x: i16x8 = simd_shuffle8!(
1424 a,
1425 a,
1426 <const IMM8: i32> [
1427 0,
1428 1,
1429 2,
1430 3,
1431 (IMM8 as u32 & 0b11) + 4,
1432 ((IMM8 as u32 >> 2) & 0b11) + 4,
1433 ((IMM8 as u32 >> 4) & 0b11) + 4,
1434 ((IMM8 as u32 >> 6) & 0b11) + 4,
1435 ],
1436 );
532ac7d7 1437 transmute(x)
0531ce1d
XL
1438}
1439
532ac7d7 1440/// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
17df50a5 1441/// `IMM8`.
0531ce1d
XL
1442///
1443/// Put the results in the low 64 bits of the returned vector, with the high 64
1444/// bits being copied from from `a`.
83c7162d
XL
1445///
1446/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
0531ce1d
XL
1447#[inline]
1448#[target_feature(enable = "sse2")]
17df50a5
XL
1449#[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1450#[rustc_legacy_const_generics(1)]
83c7162d 1451#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
1452pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1453 static_assert_imm8!(IMM8);
0531ce1d 1454 let a = a.as_i16x8();
17df50a5
XL
1455 let x: i16x8 = simd_shuffle8!(
1456 a,
1457 a,
1458 <const IMM8: i32> [
1459 IMM8 as u32 & 0b11,
1460 (IMM8 as u32 >> 2) & 0b11,
1461 (IMM8 as u32 >> 4) & 0b11,
1462 (IMM8 as u32 >> 6) & 0b11,
1463 4,
1464 5,
1465 6,
1466 7,
1467 ],
1468 );
532ac7d7 1469 transmute(x)
0531ce1d
XL
1470}
1471
532ac7d7 1472/// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
83c7162d
XL
1473///
1474/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
0531ce1d
XL
1475#[inline]
1476#[target_feature(enable = "sse2")]
1477#[cfg_attr(test, assert_instr(punpckhbw))]
83c7162d 1478#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1479pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1480 transmute::<i8x16, _>(simd_shuffle16!(
0531ce1d
XL
1481 a.as_i8x16(),
1482 b.as_i8x16(),
8faf50e0 1483 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
0531ce1d
XL
1484 ))
1485}
1486
532ac7d7 1487/// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
83c7162d
XL
1488///
1489/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
0531ce1d
XL
1490#[inline]
1491#[target_feature(enable = "sse2")]
1492#[cfg_attr(test, assert_instr(punpckhwd))]
83c7162d 1493#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1494pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1495 let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
532ac7d7 1496 transmute::<i16x8, _>(x)
0531ce1d
XL
1497}
1498
532ac7d7 1499/// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
83c7162d
XL
1500///
1501/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
0531ce1d
XL
1502#[inline]
1503#[target_feature(enable = "sse2")]
1504#[cfg_attr(test, assert_instr(unpckhps))]
83c7162d 1505#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1506pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1507 transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
0531ce1d
XL
1508}
1509
532ac7d7 1510/// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
83c7162d
XL
1511///
1512/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
0531ce1d
XL
1513#[inline]
1514#[target_feature(enable = "sse2")]
1515#[cfg_attr(test, assert_instr(unpckhpd))]
83c7162d 1516#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1517pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1518 transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
0531ce1d
XL
1519}
1520
532ac7d7 1521/// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
83c7162d
XL
1522///
1523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
0531ce1d
XL
1524#[inline]
1525#[target_feature(enable = "sse2")]
1526#[cfg_attr(test, assert_instr(punpcklbw))]
83c7162d 1527#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1528pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1529 transmute::<i8x16, _>(simd_shuffle16!(
0531ce1d
XL
1530 a.as_i8x16(),
1531 b.as_i8x16(),
8faf50e0 1532 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
0531ce1d
XL
1533 ))
1534}
1535
532ac7d7 1536/// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
83c7162d
XL
1537///
1538/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
0531ce1d
XL
1539#[inline]
1540#[target_feature(enable = "sse2")]
1541#[cfg_attr(test, assert_instr(punpcklwd))]
83c7162d 1542#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1543pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1544 let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
532ac7d7 1545 transmute::<i16x8, _>(x)
0531ce1d
XL
1546}
1547
532ac7d7 1548/// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
83c7162d
XL
1549///
1550/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
0531ce1d
XL
1551#[inline]
1552#[target_feature(enable = "sse2")]
1553#[cfg_attr(test, assert_instr(unpcklps))]
83c7162d 1554#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1555pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1556 transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
0531ce1d
XL
1557}
1558
532ac7d7 1559/// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
83c7162d
XL
1560///
1561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
0531ce1d
XL
1562#[inline]
1563#[target_feature(enable = "sse2")]
0731742a 1564#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
83c7162d 1565#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1566pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
17df50a5 1567 transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
0531ce1d
XL
1568}
1569
532ac7d7 1570/// Returns a new vector with the low element of `a` replaced by the sum of the
0531ce1d 1571/// low elements of `a` and `b`.
83c7162d
XL
1572///
1573/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
0531ce1d
XL
1574#[inline]
1575#[target_feature(enable = "sse2")]
1576#[cfg_attr(test, assert_instr(addsd))]
83c7162d 1577#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1578pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1579 simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1580}
1581
532ac7d7 1582/// Adds packed double-precision (64-bit) floating-point elements in `a` and
0531ce1d 1583/// `b`.
83c7162d
XL
1584///
1585/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
0531ce1d
XL
1586#[inline]
1587#[target_feature(enable = "sse2")]
1588#[cfg_attr(test, assert_instr(addpd))]
83c7162d 1589#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1590pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1591 simd_add(a, b)
1592}
1593
532ac7d7 1594/// Returns a new vector with the low element of `a` replaced by the result of
0531ce1d 1595/// diving the lower element of `a` by the lower element of `b`.
83c7162d
XL
1596///
1597/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
0531ce1d
XL
1598#[inline]
1599#[target_feature(enable = "sse2")]
1600#[cfg_attr(test, assert_instr(divsd))]
83c7162d 1601#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1602pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1603 simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1604}
1605
1606/// Divide packed double-precision (64-bit) floating-point elements in `a` by
1607/// packed elements in `b`.
83c7162d
XL
1608///
1609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd)
0531ce1d
XL
1610#[inline]
1611#[target_feature(enable = "sse2")]
1612#[cfg_attr(test, assert_instr(divpd))]
83c7162d 1613#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1614pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1615 simd_div(a, b)
1616}
1617
532ac7d7 1618/// Returns a new vector with the low element of `a` replaced by the maximum
0531ce1d 1619/// of the lower elements of `a` and `b`.
83c7162d
XL
1620///
1621/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
0531ce1d
XL
1622#[inline]
1623#[target_feature(enable = "sse2")]
1624#[cfg_attr(test, assert_instr(maxsd))]
83c7162d 1625#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1626pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1627 maxsd(a, b)
1628}
1629
532ac7d7 1630/// Returns a new vector with the maximum values from corresponding elements in
0531ce1d 1631/// `a` and `b`.
83c7162d
XL
1632///
1633/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
0531ce1d
XL
1634#[inline]
1635#[target_feature(enable = "sse2")]
1636#[cfg_attr(test, assert_instr(maxpd))]
83c7162d 1637#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1638pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1639 maxpd(a, b)
1640}
1641
532ac7d7 1642/// Returns a new vector with the low element of `a` replaced by the minimum
0531ce1d 1643/// of the lower elements of `a` and `b`.
83c7162d
XL
1644///
1645/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
0531ce1d
XL
1646#[inline]
1647#[target_feature(enable = "sse2")]
1648#[cfg_attr(test, assert_instr(minsd))]
83c7162d 1649#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1650pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1651 minsd(a, b)
1652}
1653
532ac7d7 1654/// Returns a new vector with the minimum values from corresponding elements in
0531ce1d 1655/// `a` and `b`.
83c7162d
XL
1656///
1657/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
0531ce1d
XL
1658#[inline]
1659#[target_feature(enable = "sse2")]
1660#[cfg_attr(test, assert_instr(minpd))]
83c7162d 1661#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1662pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1663 minpd(a, b)
1664}
1665
532ac7d7 1666/// Returns a new vector with the low element of `a` replaced by multiplying the
0531ce1d 1667/// low elements of `a` and `b`.
83c7162d
XL
1668///
1669/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
0531ce1d
XL
1670#[inline]
1671#[target_feature(enable = "sse2")]
1672#[cfg_attr(test, assert_instr(mulsd))]
83c7162d 1673#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1674pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1675 simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1676}
1677
532ac7d7 1678/// Multiplies packed double-precision (64-bit) floating-point elements in `a`
0531ce1d 1679/// and `b`.
83c7162d
XL
1680///
1681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
0531ce1d
XL
1682#[inline]
1683#[target_feature(enable = "sse2")]
1684#[cfg_attr(test, assert_instr(mulpd))]
83c7162d 1685#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1686pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1687 simd_mul(a, b)
1688}
1689
532ac7d7 1690/// Returns a new vector with the low element of `a` replaced by the square
0531ce1d 1691/// root of the lower element `b`.
83c7162d
XL
1692///
1693/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
0531ce1d
XL
1694#[inline]
1695#[target_feature(enable = "sse2")]
1696#[cfg_attr(test, assert_instr(sqrtsd))]
83c7162d 1697#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1698pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1699 simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b)))
1700}
1701
532ac7d7 1702/// Returns a new vector with the square root of each of the values in `a`.
83c7162d
XL
1703///
1704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
0531ce1d
XL
1705#[inline]
1706#[target_feature(enable = "sse2")]
1707#[cfg_attr(test, assert_instr(sqrtpd))]
83c7162d 1708#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1709pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
74b04a01 1710 simd_fsqrt(a)
0531ce1d
XL
1711}
1712
532ac7d7 1713/// Returns a new vector with the low element of `a` replaced by subtracting the
0531ce1d 1714/// low element by `b` from the low element of `a`.
83c7162d
XL
1715///
1716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
0531ce1d
XL
1717#[inline]
1718#[target_feature(enable = "sse2")]
1719#[cfg_attr(test, assert_instr(subsd))]
83c7162d 1720#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1721pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1722 simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1723}
1724
1725/// Subtract packed double-precision (64-bit) floating-point elements in `b`
1726/// from `a`.
83c7162d
XL
1727///
1728/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd)
0531ce1d
XL
1729#[inline]
1730#[target_feature(enable = "sse2")]
1731#[cfg_attr(test, assert_instr(subpd))]
83c7162d 1732#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1733pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1734 simd_sub(a, b)
1735}
1736
532ac7d7 1737/// Computes the bitwise AND of packed double-precision (64-bit) floating-point
0531ce1d 1738/// elements in `a` and `b`.
83c7162d
XL
1739///
1740/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
0531ce1d
XL
1741#[inline]
1742#[target_feature(enable = "sse2")]
1743#[cfg_attr(test, assert_instr(andps))]
83c7162d 1744#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1745pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
532ac7d7
XL
1746 let a: __m128i = transmute(a);
1747 let b: __m128i = transmute(b);
1748 transmute(_mm_and_si128(a, b))
0531ce1d
XL
1749}
1750
532ac7d7 1751/// Computes the bitwise NOT of `a` and then AND with `b`.
83c7162d
XL
1752///
1753/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
0531ce1d
XL
1754#[inline]
1755#[target_feature(enable = "sse2")]
1756#[cfg_attr(test, assert_instr(andnps))]
83c7162d 1757#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1758pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
532ac7d7
XL
1759 let a: __m128i = transmute(a);
1760 let b: __m128i = transmute(b);
1761 transmute(_mm_andnot_si128(a, b))
0531ce1d
XL
1762}
1763
532ac7d7 1764/// Computes the bitwise OR of `a` and `b`.
83c7162d
XL
1765///
1766/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
0531ce1d
XL
1767#[inline]
1768#[target_feature(enable = "sse2")]
1769#[cfg_attr(test, assert_instr(orps))]
83c7162d 1770#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1771pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
532ac7d7
XL
1772 let a: __m128i = transmute(a);
1773 let b: __m128i = transmute(b);
1774 transmute(_mm_or_si128(a, b))
0531ce1d
XL
1775}
1776
532ac7d7 1777/// Computes the bitwise OR of `a` and `b`.
83c7162d
XL
1778///
1779/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
0531ce1d
XL
1780#[inline]
1781#[target_feature(enable = "sse2")]
1782#[cfg_attr(test, assert_instr(xorps))]
83c7162d 1783#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1784pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
532ac7d7
XL
1785 let a: __m128i = transmute(a);
1786 let b: __m128i = transmute(b);
1787 transmute(_mm_xor_si128(a, b))
0531ce1d
XL
1788}
1789
532ac7d7 1790/// Returns a new vector with the low element of `a` replaced by the equality
0531ce1d 1791/// comparison of the lower elements of `a` and `b`.
83c7162d
XL
1792///
1793/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
0531ce1d
XL
1794#[inline]
1795#[target_feature(enable = "sse2")]
1796#[cfg_attr(test, assert_instr(cmpeqsd))]
83c7162d 1797#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1798pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1799 cmpsd(a, b, 0)
1800}
1801
532ac7d7 1802/// Returns a new vector with the low element of `a` replaced by the less-than
0531ce1d 1803/// comparison of the lower elements of `a` and `b`.
83c7162d
XL
1804///
1805/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
0531ce1d
XL
1806#[inline]
1807#[target_feature(enable = "sse2")]
1808#[cfg_attr(test, assert_instr(cmpltsd))]
83c7162d 1809#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1810pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1811 cmpsd(a, b, 1)
1812}
1813
532ac7d7 1814/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1815/// less-than-or-equal comparison of the lower elements of `a` and `b`.
83c7162d
XL
1816///
1817/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
0531ce1d
XL
1818#[inline]
1819#[target_feature(enable = "sse2")]
1820#[cfg_attr(test, assert_instr(cmplesd))]
83c7162d 1821#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1822pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1823 cmpsd(a, b, 2)
1824}
1825
532ac7d7 1826/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1827/// greater-than comparison of the lower elements of `a` and `b`.
83c7162d
XL
1828///
1829/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
0531ce1d
XL
1830#[inline]
1831#[target_feature(enable = "sse2")]
1832#[cfg_attr(test, assert_instr(cmpltsd))]
83c7162d 1833#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1834pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
8faf50e0 1835 simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
0531ce1d
XL
1836}
1837
532ac7d7 1838/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1839/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
83c7162d
XL
1840///
1841/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
0531ce1d
XL
1842#[inline]
1843#[target_feature(enable = "sse2")]
1844#[cfg_attr(test, assert_instr(cmplesd))]
83c7162d 1845#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1846pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
8faf50e0 1847 simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
0531ce1d
XL
1848}
1849
532ac7d7 1850/// Returns a new vector with the low element of `a` replaced by the result
0531ce1d
XL
1851/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1852/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1853/// otherwise.
83c7162d
XL
1854///
1855/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd)
0531ce1d
XL
1856#[inline]
1857#[target_feature(enable = "sse2")]
1858#[cfg_attr(test, assert_instr(cmpordsd))]
83c7162d 1859#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1860pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1861 cmpsd(a, b, 7)
1862}
1863
532ac7d7 1864/// Returns a new vector with the low element of `a` replaced by the result of
0531ce1d
XL
1865/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1866/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
83c7162d
XL
1867///
1868/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd)
0531ce1d
XL
1869#[inline]
1870#[target_feature(enable = "sse2")]
1871#[cfg_attr(test, assert_instr(cmpunordsd))]
83c7162d 1872#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1873pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1874 cmpsd(a, b, 3)
1875}
1876
532ac7d7 1877/// Returns a new vector with the low element of `a` replaced by the not-equal
0531ce1d 1878/// comparison of the lower elements of `a` and `b`.
83c7162d
XL
1879///
1880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
0531ce1d
XL
1881#[inline]
1882#[target_feature(enable = "sse2")]
1883#[cfg_attr(test, assert_instr(cmpneqsd))]
83c7162d 1884#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1885pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1886 cmpsd(a, b, 4)
1887}
1888
532ac7d7 1889/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1890/// not-less-than comparison of the lower elements of `a` and `b`.
83c7162d
XL
1891///
1892/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
0531ce1d
XL
1893#[inline]
1894#[target_feature(enable = "sse2")]
1895#[cfg_attr(test, assert_instr(cmpnltsd))]
83c7162d 1896#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1897pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1898 cmpsd(a, b, 5)
1899}
1900
532ac7d7 1901/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1902/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
83c7162d
XL
1903///
1904/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
0531ce1d
XL
1905#[inline]
1906#[target_feature(enable = "sse2")]
1907#[cfg_attr(test, assert_instr(cmpnlesd))]
83c7162d 1908#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1909pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1910 cmpsd(a, b, 6)
1911}
1912
532ac7d7 1913/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1914/// not-greater-than comparison of the lower elements of `a` and `b`.
83c7162d
XL
1915///
1916/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
0531ce1d
XL
1917#[inline]
1918#[target_feature(enable = "sse2")]
1919#[cfg_attr(test, assert_instr(cmpnltsd))]
83c7162d 1920#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1921pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
8faf50e0 1922 simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
0531ce1d
XL
1923}
1924
532ac7d7 1925/// Returns a new vector with the low element of `a` replaced by the
0531ce1d 1926/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
83c7162d
XL
1927///
1928/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
0531ce1d
XL
1929#[inline]
1930#[target_feature(enable = "sse2")]
1931#[cfg_attr(test, assert_instr(cmpnlesd))]
83c7162d 1932#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 1933pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
8faf50e0 1934 simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
0531ce1d
XL
1935}
1936
532ac7d7 1937/// Compares corresponding elements in `a` and `b` for equality.
83c7162d
XL
1938///
1939/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
0531ce1d
XL
1940#[inline]
1941#[target_feature(enable = "sse2")]
1942#[cfg_attr(test, assert_instr(cmpeqpd))]
83c7162d 1943#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1944pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
1945 cmppd(a, b, 0)
1946}
1947
532ac7d7 1948/// Compares corresponding elements in `a` and `b` for less-than.
83c7162d
XL
1949///
1950/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
0531ce1d
XL
1951#[inline]
1952#[target_feature(enable = "sse2")]
1953#[cfg_attr(test, assert_instr(cmpltpd))]
83c7162d 1954#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1955pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
1956 cmppd(a, b, 1)
1957}
1958
532ac7d7 1959/// Compares corresponding elements in `a` and `b` for less-than-or-equal
83c7162d
XL
1960///
1961/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
0531ce1d
XL
1962#[inline]
1963#[target_feature(enable = "sse2")]
1964#[cfg_attr(test, assert_instr(cmplepd))]
83c7162d 1965#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1966pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
1967 cmppd(a, b, 2)
1968}
1969
532ac7d7 1970/// Compares corresponding elements in `a` and `b` for greater-than.
83c7162d
XL
1971///
1972/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
0531ce1d
XL
1973#[inline]
1974#[target_feature(enable = "sse2")]
1975#[cfg_attr(test, assert_instr(cmpltpd))]
83c7162d 1976#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1977pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
1978 _mm_cmplt_pd(b, a)
1979}
1980
532ac7d7 1981/// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
83c7162d
XL
1982///
1983/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
0531ce1d
XL
1984#[inline]
1985#[target_feature(enable = "sse2")]
1986#[cfg_attr(test, assert_instr(cmplepd))]
83c7162d 1987#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1988pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
1989 _mm_cmple_pd(b, a)
1990}
1991
532ac7d7 1992/// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
83c7162d
XL
1993///
1994/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
0531ce1d
XL
1995#[inline]
1996#[target_feature(enable = "sse2")]
1997#[cfg_attr(test, assert_instr(cmpordpd))]
83c7162d 1998#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1999pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2000 cmppd(a, b, 7)
2001}
2002
532ac7d7 2003/// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
83c7162d
XL
2004///
2005/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
0531ce1d
XL
2006#[inline]
2007#[target_feature(enable = "sse2")]
2008#[cfg_attr(test, assert_instr(cmpunordpd))]
83c7162d 2009#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2010pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2011 cmppd(a, b, 3)
2012}
2013
532ac7d7 2014/// Compares corresponding elements in `a` and `b` for not-equal.
83c7162d
XL
2015///
2016/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
0531ce1d
XL
2017#[inline]
2018#[target_feature(enable = "sse2")]
2019#[cfg_attr(test, assert_instr(cmpneqpd))]
83c7162d 2020#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2021pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2022 cmppd(a, b, 4)
2023}
2024
532ac7d7 2025/// Compares corresponding elements in `a` and `b` for not-less-than.
83c7162d
XL
2026///
2027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
0531ce1d
XL
2028#[inline]
2029#[target_feature(enable = "sse2")]
2030#[cfg_attr(test, assert_instr(cmpnltpd))]
83c7162d 2031#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2032pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2033 cmppd(a, b, 5)
2034}
2035
532ac7d7 2036/// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
83c7162d
XL
2037///
2038/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
0531ce1d
XL
2039#[inline]
2040#[target_feature(enable = "sse2")]
2041#[cfg_attr(test, assert_instr(cmpnlepd))]
83c7162d 2042#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2043pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2044 cmppd(a, b, 6)
2045}
2046
532ac7d7 2047/// Compares corresponding elements in `a` and `b` for not-greater-than.
83c7162d
XL
2048///
2049/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
0531ce1d
XL
2050#[inline]
2051#[target_feature(enable = "sse2")]
2052#[cfg_attr(test, assert_instr(cmpnltpd))]
83c7162d 2053#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2054pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2055 _mm_cmpnlt_pd(b, a)
2056}
2057
532ac7d7 2058/// Compares corresponding elements in `a` and `b` for
0531ce1d 2059/// not-greater-than-or-equal.
83c7162d
XL
2060///
2061/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
0531ce1d
XL
2062#[inline]
2063#[target_feature(enable = "sse2")]
2064#[cfg_attr(test, assert_instr(cmpnlepd))]
83c7162d 2065#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2066pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2067 _mm_cmpnle_pd(b, a)
2068}
2069
532ac7d7 2070/// Compares the lower element of `a` and `b` for equality.
83c7162d
XL
2071///
2072/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
0531ce1d
XL
2073#[inline]
2074#[target_feature(enable = "sse2")]
2075#[cfg_attr(test, assert_instr(comisd))]
83c7162d 2076#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2077pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2078 comieqsd(a, b)
2079}
2080
532ac7d7 2081/// Compares the lower element of `a` and `b` for less-than.
83c7162d
XL
2082///
2083/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
0531ce1d
XL
2084#[inline]
2085#[target_feature(enable = "sse2")]
2086#[cfg_attr(test, assert_instr(comisd))]
83c7162d 2087#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2088pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2089 comiltsd(a, b)
2090}
2091
532ac7d7 2092/// Compares the lower element of `a` and `b` for less-than-or-equal.
83c7162d
XL
2093///
2094/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
0531ce1d
XL
2095#[inline]
2096#[target_feature(enable = "sse2")]
2097#[cfg_attr(test, assert_instr(comisd))]
83c7162d 2098#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2099pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2100 comilesd(a, b)
2101}
2102
532ac7d7 2103/// Compares the lower element of `a` and `b` for greater-than.
83c7162d
XL
2104///
2105/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
0531ce1d
XL
2106#[inline]
2107#[target_feature(enable = "sse2")]
2108#[cfg_attr(test, assert_instr(comisd))]
83c7162d 2109#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2110pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2111 comigtsd(a, b)
2112}
2113
532ac7d7 2114/// Compares the lower element of `a` and `b` for greater-than-or-equal.
83c7162d
XL
2115///
2116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
0531ce1d
XL
2117#[inline]
2118#[target_feature(enable = "sse2")]
2119#[cfg_attr(test, assert_instr(comisd))]
83c7162d 2120#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2121pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2122 comigesd(a, b)
2123}
2124
532ac7d7 2125/// Compares the lower element of `a` and `b` for not-equal.
83c7162d
XL
2126///
2127/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
0531ce1d
XL
2128#[inline]
2129#[target_feature(enable = "sse2")]
2130#[cfg_attr(test, assert_instr(comisd))]
83c7162d 2131#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2132pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2133 comineqsd(a, b)
2134}
2135
532ac7d7 2136/// Compares the lower element of `a` and `b` for equality.
83c7162d
XL
2137///
2138/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
0531ce1d
XL
2139#[inline]
2140#[target_feature(enable = "sse2")]
2141#[cfg_attr(test, assert_instr(ucomisd))]
83c7162d 2142#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2143pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2144 ucomieqsd(a, b)
2145}
2146
532ac7d7 2147/// Compares the lower element of `a` and `b` for less-than.
83c7162d
XL
2148///
2149/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
0531ce1d
XL
2150#[inline]
2151#[target_feature(enable = "sse2")]
2152#[cfg_attr(test, assert_instr(ucomisd))]
83c7162d 2153#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2154pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2155 ucomiltsd(a, b)
2156}
2157
532ac7d7 2158/// Compares the lower element of `a` and `b` for less-than-or-equal.
83c7162d
XL
2159///
2160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
0531ce1d
XL
2161#[inline]
2162#[target_feature(enable = "sse2")]
2163#[cfg_attr(test, assert_instr(ucomisd))]
83c7162d 2164#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2165pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2166 ucomilesd(a, b)
2167}
2168
532ac7d7 2169/// Compares the lower element of `a` and `b` for greater-than.
83c7162d
XL
2170///
2171/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
0531ce1d
XL
2172#[inline]
2173#[target_feature(enable = "sse2")]
2174#[cfg_attr(test, assert_instr(ucomisd))]
83c7162d 2175#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2176pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2177 ucomigtsd(a, b)
2178}
2179
532ac7d7 2180/// Compares the lower element of `a` and `b` for greater-than-or-equal.
83c7162d
XL
2181///
2182/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
0531ce1d
XL
2183#[inline]
2184#[target_feature(enable = "sse2")]
2185#[cfg_attr(test, assert_instr(ucomisd))]
83c7162d 2186#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2187pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2188 ucomigesd(a, b)
2189}
2190
532ac7d7 2191/// Compares the lower element of `a` and `b` for not-equal.
83c7162d
XL
2192///
2193/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
0531ce1d
XL
2194#[inline]
2195#[target_feature(enable = "sse2")]
2196#[cfg_attr(test, assert_instr(ucomisd))]
83c7162d 2197#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2198pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2199 ucomineqsd(a, b)
2200}
2201
e1599b0c 2202/// Converts packed double-precision (64-bit) floating-point elements in `a` to
0531ce1d 2203/// packed single-precision (32-bit) floating-point elements
83c7162d
XL
2204///
2205/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
0531ce1d
XL
2206#[inline]
2207#[target_feature(enable = "sse2")]
2208#[cfg_attr(test, assert_instr(cvtpd2ps))]
83c7162d 2209#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2210pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2211 cvtpd2ps(a)
2212}
2213
532ac7d7 2214/// Converts packed single-precision (32-bit) floating-point elements in `a` to
0531ce1d
XL
2215/// packed
2216/// double-precision (64-bit) floating-point elements.
83c7162d
XL
2217///
2218/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd)
0531ce1d
XL
2219#[inline]
2220#[target_feature(enable = "sse2")]
2221#[cfg_attr(test, assert_instr(cvtps2pd))]
83c7162d 2222#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2223pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2224 cvtps2pd(a)
2225}
2226
532ac7d7 2227/// Converts packed double-precision (64-bit) floating-point elements in `a` to
0531ce1d 2228/// packed 32-bit integers.
83c7162d
XL
2229///
2230/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
0531ce1d
XL
2231#[inline]
2232#[target_feature(enable = "sse2")]
2233#[cfg_attr(test, assert_instr(cvtpd2dq))]
83c7162d 2234#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2235pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
532ac7d7 2236 transmute(cvtpd2dq(a))
0531ce1d
XL
2237}
2238
532ac7d7 2239/// Converts the lower double-precision (64-bit) floating-point element in a to
0531ce1d 2240/// a 32-bit integer.
83c7162d
XL
2241///
2242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
0531ce1d
XL
2243#[inline]
2244#[target_feature(enable = "sse2")]
2245#[cfg_attr(test, assert_instr(cvtsd2si))]
83c7162d 2246#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2247pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2248 cvtsd2si(a)
2249}
2250
532ac7d7 2251/// Converts the lower double-precision (64-bit) floating-point element in `b`
0531ce1d 2252/// to a single-precision (32-bit) floating-point element, store the result in
532ac7d7 2253/// the lower element of the return value, and copies the upper element from `a`
0531ce1d 2254/// to the upper element the return value.
83c7162d
XL
2255///
2256/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
0531ce1d
XL
2257#[inline]
2258#[target_feature(enable = "sse2")]
2259#[cfg_attr(test, assert_instr(cvtsd2ss))]
83c7162d 2260#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2261pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2262 cvtsd2ss(a, b)
2263}
2264
e1599b0c 2265/// Returns the lower double-precision (64-bit) floating-point element of `a`.
83c7162d
XL
2266///
2267/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
0531ce1d
XL
2268#[inline]
2269#[target_feature(enable = "sse2")]
83c7162d 2270#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2271pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2272 simd_extract(a, 0)
2273}
2274
532ac7d7 2275/// Converts the lower single-precision (32-bit) floating-point element in `b`
0531ce1d 2276/// to a double-precision (64-bit) floating-point element, store the result in
532ac7d7 2277/// the lower element of the return value, and copies the upper element from `a`
0531ce1d 2278/// to the upper element the return value.
83c7162d
XL
2279///
2280/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
0531ce1d
XL
2281#[inline]
2282#[target_feature(enable = "sse2")]
2283#[cfg_attr(test, assert_instr(cvtss2sd))]
83c7162d 2284#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2285pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2286 cvtss2sd(a, b)
2287}
2288
532ac7d7 2289/// Converts packed double-precision (64-bit) floating-point elements in `a` to
0531ce1d 2290/// packed 32-bit integers with truncation.
83c7162d
XL
2291///
2292/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
0531ce1d
XL
2293#[inline]
2294#[target_feature(enable = "sse2")]
2295#[cfg_attr(test, assert_instr(cvttpd2dq))]
83c7162d 2296#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2297pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
532ac7d7 2298 transmute(cvttpd2dq(a))
0531ce1d
XL
2299}
2300
532ac7d7 2301/// Converts the lower double-precision (64-bit) floating-point element in `a`
0531ce1d 2302/// to a 32-bit integer with truncation.
83c7162d
XL
2303///
2304/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
0531ce1d
XL
2305#[inline]
2306#[target_feature(enable = "sse2")]
2307#[cfg_attr(test, assert_instr(cvttsd2si))]
83c7162d 2308#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2309pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2310 cvttsd2si(a)
2311}
2312
532ac7d7 2313/// Converts packed single-precision (32-bit) floating-point elements in `a` to
0531ce1d 2314/// packed 32-bit integers with truncation.
83c7162d
XL
2315///
2316/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
0531ce1d
XL
2317#[inline]
2318#[target_feature(enable = "sse2")]
2319#[cfg_attr(test, assert_instr(cvttps2dq))]
83c7162d 2320#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2321pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
532ac7d7 2322 transmute(cvttps2dq(a))
0531ce1d
XL
2323}
2324
532ac7d7 2325/// Copies double-precision (64-bit) floating-point element `a` to the lower
0531ce1d 2326/// element of the packed 64-bit return value.
83c7162d
XL
2327///
2328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
0531ce1d
XL
2329#[inline]
2330#[target_feature(enable = "sse2")]
83c7162d 2331#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2332pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2333 _mm_set_pd(0.0, a)
2334}
2335
532ac7d7 2336/// Broadcasts double-precision (64-bit) floating-point value a to all elements
0531ce1d 2337/// of the return value.
83c7162d
XL
2338///
2339/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
0531ce1d
XL
2340#[inline]
2341#[target_feature(enable = "sse2")]
83c7162d 2342#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2343pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2344 _mm_set_pd(a, a)
2345}
2346
532ac7d7 2347/// Broadcasts double-precision (64-bit) floating-point value a to all elements
0531ce1d 2348/// of the return value.
83c7162d
XL
2349///
2350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
0531ce1d
XL
2351#[inline]
2352#[target_feature(enable = "sse2")]
83c7162d 2353#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2354pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2355 _mm_set_pd(a, a)
2356}
2357
532ac7d7 2358/// Sets packed double-precision (64-bit) floating-point elements in the return
0531ce1d 2359/// value with the supplied values.
83c7162d
XL
2360///
2361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
0531ce1d
XL
2362#[inline]
2363#[target_feature(enable = "sse2")]
83c7162d 2364#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2365pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2366 __m128d(b, a)
2367}
2368
532ac7d7 2369/// Sets packed double-precision (64-bit) floating-point elements in the return
0531ce1d 2370/// value with the supplied values in reverse order.
83c7162d
XL
2371///
2372/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
0531ce1d
XL
2373#[inline]
2374#[target_feature(enable = "sse2")]
83c7162d 2375#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2376pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2377 _mm_set_pd(b, a)
2378}
2379
2380/// Returns packed double-precision (64-bit) floating-point elements with all
2381/// zeros.
83c7162d
XL
2382///
2383/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd)
0531ce1d
XL
2384#[inline]
2385#[target_feature(enable = "sse2")]
2386#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
83c7162d 2387#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2388pub unsafe fn _mm_setzero_pd() -> __m128d {
2389 _mm_set_pd(0.0, 0.0)
2390}
2391
532ac7d7 2392/// Returns a mask of the most significant bit of each element in `a`.
0531ce1d
XL
2393///
2394/// The mask is stored in the 2 least significant bits of the return value.
2395/// All other bits are set to `0`.
83c7162d
XL
2396///
2397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd)
0531ce1d
XL
2398#[inline]
2399#[target_feature(enable = "sse2")]
2400#[cfg_attr(test, assert_instr(movmskpd))]
83c7162d 2401#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2402pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2403 movmskpd(a)
2404}
2405
532ac7d7 2406/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
0531ce1d
XL
2407/// floating-point elements) from memory into the returned vector.
2408/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2409/// exception may be generated.
83c7162d
XL
2410///
2411/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd)
0531ce1d
XL
2412#[inline]
2413#[target_feature(enable = "sse2")]
2414#[cfg_attr(test, assert_instr(movaps))]
83c7162d 2415#[stable(feature = "simd_x86", since = "1.27.0")]
48663c56 2416#[allow(clippy::cast_ptr_alignment)]
0531ce1d
XL
2417pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2418 *(mem_addr as *const __m128d)
2419}
2420
2421/// Loads a 64-bit double-precision value to the low element of a
2422/// 128-bit integer vector and clears the upper element.
83c7162d
XL
2423///
2424/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd)
0531ce1d
XL
2425#[inline]
2426#[target_feature(enable = "sse2")]
2427#[cfg_attr(test, assert_instr(movsd))]
83c7162d 2428#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2429pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2430 _mm_setr_pd(*mem_addr, 0.)
2431}
2432
2433/// Loads a double-precision value into the high-order bits of a 128-bit
83c7162d 2434/// vector of `[2 x double]`. The low-order bits are copied from the low-order
0531ce1d 2435/// bits of the first operand.
83c7162d
XL
2436///
2437/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd)
0531ce1d
XL
2438#[inline]
2439#[target_feature(enable = "sse2")]
e1599b0c 2440#[cfg_attr(test, assert_instr(movhps))]
83c7162d 2441#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2442pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2443 _mm_setr_pd(simd_extract(a, 0), *mem_addr)
2444}
2445
2446/// Loads a double-precision value into the low-order bits of a 128-bit
83c7162d 2447/// vector of `[2 x double]`. The high-order bits are copied from the
0531ce1d 2448/// high-order bits of the first operand.
83c7162d
XL
2449///
2450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd)
0531ce1d
XL
2451#[inline]
2452#[target_feature(enable = "sse2")]
e1599b0c 2453#[cfg_attr(test, assert_instr(movlps))]
83c7162d 2454#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2455pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2456 _mm_setr_pd(*mem_addr, simd_extract(a, 1))
2457}
2458
83c7162d 2459/// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
0531ce1d
XL
2460/// aligned memory location.
2461/// To minimize caching, the data is flagged as non-temporal (unlikely to be
2462/// used again soon).
83c7162d
XL
2463///
2464/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd)
0531ce1d
XL
2465#[inline]
2466#[target_feature(enable = "sse2")]
2467#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
83c7162d 2468#[stable(feature = "simd_x86", since = "1.27.0")]
48663c56 2469#[allow(clippy::cast_ptr_alignment)]
0531ce1d 2470pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
0731742a 2471 intrinsics::nontemporal_store(mem_addr as *mut __m128d, a);
0531ce1d
XL
2472}
2473
83c7162d 2474/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
0531ce1d 2475/// memory location.
83c7162d
XL
2476///
2477/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd)
0531ce1d
XL
2478#[inline]
2479#[target_feature(enable = "sse2")]
0731742a 2480#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
83c7162d 2481#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2482pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2483 *mem_addr = simd_extract(a, 0)
2484}
2485
532ac7d7 2486/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
0531ce1d
XL
2487/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2488/// on a 16-byte boundary or a general-protection exception may be generated.
83c7162d
XL
2489///
2490/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd)
0531ce1d
XL
2491#[inline]
2492#[target_feature(enable = "sse2")]
2493#[cfg_attr(test, assert_instr(movaps))]
83c7162d 2494#[stable(feature = "simd_x86", since = "1.27.0")]
48663c56 2495#[allow(clippy::cast_ptr_alignment)]
0531ce1d
XL
2496pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2497 *(mem_addr as *mut __m128d) = a;
2498}
2499
532ac7d7 2500/// Stores 128-bits (composed of 2 packed double-precision (64-bit)
0531ce1d
XL
2501/// floating-point elements) from `a` into memory.
2502/// `mem_addr` does not need to be aligned on any particular boundary.
83c7162d
XL
2503///
2504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd)
0531ce1d
XL
2505#[inline]
2506#[target_feature(enable = "sse2")]
2507#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
83c7162d 2508#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2509pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2510 storeupd(mem_addr as *mut i8, a);
2511}
2512
532ac7d7 2513/// Stores the lower double-precision (64-bit) floating-point element from `a`
0531ce1d
XL
2514/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2515/// 16-byte boundary or a general-protection exception may be generated.
83c7162d
XL
2516///
2517/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd)
0531ce1d
XL
2518#[inline]
2519#[target_feature(enable = "sse2")]
83c7162d 2520#[stable(feature = "simd_x86", since = "1.27.0")]
48663c56 2521#[allow(clippy::cast_ptr_alignment)]
0531ce1d 2522pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
17df50a5 2523 let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
0531ce1d
XL
2524 *(mem_addr as *mut __m128d) = b;
2525}
2526
532ac7d7 2527/// Stores the lower double-precision (64-bit) floating-point element from `a`
0531ce1d
XL
2528/// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2529/// 16-byte boundary or a general-protection exception may be generated.
83c7162d
XL
2530///
2531/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1)
0531ce1d
XL
2532#[inline]
2533#[target_feature(enable = "sse2")]
83c7162d 2534#[stable(feature = "simd_x86", since = "1.27.0")]
48663c56 2535#[allow(clippy::cast_ptr_alignment)]
0531ce1d 2536pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
17df50a5 2537 let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
0531ce1d
XL
2538 *(mem_addr as *mut __m128d) = b;
2539}
2540
532ac7d7 2541/// Stores 2 double-precision (64-bit) floating-point elements from `a` into
0531ce1d
XL
2542/// memory in reverse order.
2543/// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2544/// exception may be generated.
83c7162d
XL
2545///
2546/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd)
0531ce1d
XL
2547#[inline]
2548#[target_feature(enable = "sse2")]
83c7162d 2549#[stable(feature = "simd_x86", since = "1.27.0")]
48663c56 2550#[allow(clippy::cast_ptr_alignment)]
0531ce1d 2551pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
17df50a5 2552 let b: __m128d = simd_shuffle2!(a, a, [1, 0]);
0531ce1d
XL
2553 *(mem_addr as *mut __m128d) = b;
2554}
2555
83c7162d 2556/// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
0531ce1d 2557/// memory location.
83c7162d
XL
2558///
2559/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd)
0531ce1d
XL
2560#[inline]
2561#[target_feature(enable = "sse2")]
e1599b0c 2562#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
83c7162d 2563#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2564pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2565 *mem_addr = simd_extract(a, 1);
2566}
2567
83c7162d 2568/// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
0531ce1d 2569/// memory location.
83c7162d
XL
2570///
2571/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd)
0531ce1d
XL
2572#[inline]
2573#[target_feature(enable = "sse2")]
0731742a 2574#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
83c7162d 2575#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2576pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2577 *mem_addr = simd_extract(a, 0);
2578}
2579
532ac7d7 2580/// Loads a double-precision (64-bit) floating-point element from memory
0531ce1d 2581/// into both elements of returned vector.
83c7162d
XL
2582///
2583/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
0531ce1d
XL
2584#[inline]
2585#[target_feature(enable = "sse2")]
2586// #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
83c7162d 2587#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2588pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2589 let d = *mem_addr;
2590 _mm_setr_pd(d, d)
2591}
2592
532ac7d7 2593/// Loads a double-precision (64-bit) floating-point element from memory
0531ce1d 2594/// into both elements of returned vector.
83c7162d
XL
2595///
2596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
0531ce1d
XL
2597#[inline]
2598#[target_feature(enable = "sse2")]
2599// #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
83c7162d 2600#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2601pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2602 _mm_load1_pd(mem_addr)
2603}
2604
532ac7d7 2605/// Loads 2 double-precision (64-bit) floating-point elements from memory into
0531ce1d
XL
2606/// the returned vector in reverse order. `mem_addr` must be aligned on a
2607/// 16-byte boundary or a general-protection exception may be generated.
83c7162d
XL
2608///
2609/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd)
0531ce1d
XL
2610#[inline]
2611#[target_feature(enable = "sse2")]
e1599b0c 2612#[cfg_attr(test, assert_instr(movaps))]
83c7162d 2613#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2614pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2615 let a = _mm_load_pd(mem_addr);
17df50a5 2616 simd_shuffle2!(a, a, [1, 0])
0531ce1d
XL
2617}
2618
532ac7d7 2619/// Loads 128-bits (composed of 2 packed double-precision (64-bit)
0531ce1d
XL
2620/// floating-point elements) from memory into the returned vector.
2621/// `mem_addr` does not need to be aligned on any particular boundary.
83c7162d
XL
2622///
2623/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd)
0531ce1d
XL
2624#[inline]
2625#[target_feature(enable = "sse2")]
2626#[cfg_attr(test, assert_instr(movups))]
83c7162d 2627#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2628pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2629 let mut dst = _mm_undefined_pd();
2630 ptr::copy_nonoverlapping(
2631 mem_addr as *const u8,
2632 &mut dst as *mut __m128d as *mut u8,
2633 mem::size_of::<__m128d>(),
2634 );
2635 dst
2636}
2637
83c7162d
XL
2638/// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2639/// 128-bit vector parameters of `[2 x double]`, using the immediate-value
0531ce1d 2640/// parameter as a specifier.
83c7162d
XL
2641///
2642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd)
0531ce1d
XL
2643#[inline]
2644#[target_feature(enable = "sse2")]
fc512014
XL
2645#[cfg_attr(
2646 all(test, any(not(target_os = "windows"), target_arch = "x86")),
17df50a5 2647 cfg_attr(test, assert_instr(shufps, MASK = 2)) // FIXME shufpd expected
fc512014
XL
2648)]
2649#[cfg_attr(
2650 all(test, all(target_os = "windows", target_arch = "x86_64")),
17df50a5 2651 cfg_attr(test, assert_instr(shufpd, MASK = 1))
fc512014 2652)]
17df50a5 2653#[rustc_legacy_const_generics(2)]
83c7162d 2654#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5
XL
2655pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2656 static_assert_imm8!(MASK);
2657 simd_shuffle2!(a, b, <const MASK: i32> [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
0531ce1d
XL
2658}
2659
83c7162d 2660/// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
0531ce1d
XL
2661/// 64 bits are set to the lower 64 bits of the second parameter. The upper
2662/// 64 bits are set to the upper 64 bits of the first parameter.
83c7162d
XL
2663///
2664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd)
0531ce1d
XL
2665#[inline]
2666#[target_feature(enable = "sse2")]
fc512014
XL
2667#[cfg_attr(
2668 all(test, any(not(target_os = "windows"), target_arch = "x86")),
2669 assert_instr(movsd)
2670)]
2671#[cfg_attr(
2672 all(test, all(target_os = "windows", target_arch = "x86_64")),
2673 assert_instr(movlps)
2674)]
83c7162d 2675#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
2676pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2677 _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1))
2678}
2679
83c7162d
XL
2680/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2681/// floating-point vector of `[4 x float]`.
2682///
2683/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps)
0531ce1d
XL
2684#[inline]
2685#[target_feature(enable = "sse2")]
83c7162d 2686#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2687pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
532ac7d7 2688 transmute(a)
0531ce1d
XL
2689}
2690
83c7162d 2691/// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
0531ce1d 2692/// integer vector.
83c7162d
XL
2693///
2694/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128)
0531ce1d
XL
2695#[inline]
2696#[target_feature(enable = "sse2")]
83c7162d 2697#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2698pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
532ac7d7 2699 transmute(a)
0531ce1d
XL
2700}
2701
83c7162d
XL
2702/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2703/// floating-point vector of `[2 x double]`.
2704///
2705/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd)
0531ce1d
XL
2706#[inline]
2707#[target_feature(enable = "sse2")]
83c7162d 2708#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2709pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
532ac7d7 2710 transmute(a)
0531ce1d
XL
2711}
2712
83c7162d 2713/// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
0531ce1d 2714/// integer vector.
83c7162d
XL
2715///
2716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128)
0531ce1d
XL
2717#[inline]
2718#[target_feature(enable = "sse2")]
83c7162d 2719#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2720pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
532ac7d7 2721 transmute(a)
0531ce1d
XL
2722}
2723
2724/// Casts a 128-bit integer vector into a 128-bit floating-point vector
83c7162d
XL
2725/// of `[2 x double]`.
2726///
2727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd)
0531ce1d
XL
2728#[inline]
2729#[target_feature(enable = "sse2")]
83c7162d 2730#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2731pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
532ac7d7 2732 transmute(a)
0531ce1d
XL
2733}
2734
2735/// Casts a 128-bit integer vector into a 128-bit floating-point vector
83c7162d
XL
2736/// of `[4 x float]`.
2737///
2738/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps)
0531ce1d
XL
2739#[inline]
2740#[target_feature(enable = "sse2")]
83c7162d 2741#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2742pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
532ac7d7 2743 transmute(a)
0531ce1d
XL
2744}
2745
532ac7d7 2746/// Returns vector of type __m128d with undefined elements.
83c7162d
XL
2747///
2748/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
0531ce1d
XL
2749#[inline]
2750#[target_feature(enable = "sse2")]
83c7162d 2751#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2752pub unsafe fn _mm_undefined_pd() -> __m128d {
0731742a 2753 // FIXME: this function should return MaybeUninit<__m128d>
532ac7d7 2754 mem::MaybeUninit::<__m128d>::uninit().assume_init()
0531ce1d
XL
2755}
2756
532ac7d7 2757/// Returns vector of type __m128i with undefined elements.
83c7162d
XL
2758///
2759/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
0531ce1d
XL
2760#[inline]
2761#[target_feature(enable = "sse2")]
83c7162d 2762#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2763pub unsafe fn _mm_undefined_si128() -> __m128i {
0731742a 2764 // FIXME: this function should return MaybeUninit<__m128i>
532ac7d7 2765 mem::MaybeUninit::<__m128i>::uninit().assume_init()
0531ce1d
XL
2766}
2767
2768/// The resulting `__m128d` element is composed by the low-order values of
2769/// the two `__m128d` interleaved input elements, i.e.:
2770///
8faf50e0
XL
2771/// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2772/// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2773/// input
83c7162d
XL
2774///
2775/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
0531ce1d
XL
2776#[inline]
2777#[target_feature(enable = "sse2")]
2778#[cfg_attr(test, assert_instr(unpckhpd))]
83c7162d 2779#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2780pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
17df50a5 2781 simd_shuffle2!(a, b, [1, 3])
0531ce1d
XL
2782}
2783
2784/// The resulting `__m128d` element is composed by the high-order values of
2785/// the two `__m128d` interleaved input elements, i.e.:
2786///
83c7162d
XL
2787/// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2788/// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2789///
2790/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd)
0531ce1d
XL
2791#[inline]
2792#[target_feature(enable = "sse2")]
0731742a 2793#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
83c7162d 2794#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 2795pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
17df50a5 2796 simd_shuffle2!(a, b, [0, 2])
0531ce1d
XL
2797}
2798
0531ce1d
XL
2799#[allow(improper_ctypes)]
2800extern "C" {
2801 #[link_name = "llvm.x86.sse2.pause"]
2802 fn pause();
2803 #[link_name = "llvm.x86.sse2.clflush"]
416331ca 2804 fn clflush(p: *const u8);
0531ce1d
XL
2805 #[link_name = "llvm.x86.sse2.lfence"]
2806 fn lfence();
2807 #[link_name = "llvm.x86.sse2.mfence"]
2808 fn mfence();
0531ce1d
XL
2809 #[link_name = "llvm.x86.sse2.pavg.b"]
2810 fn pavgb(a: u8x16, b: u8x16) -> u8x16;
2811 #[link_name = "llvm.x86.sse2.pavg.w"]
2812 fn pavgw(a: u16x8, b: u16x8) -> u16x8;
2813 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2814 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2815 #[link_name = "llvm.x86.sse2.pmaxs.w"]
2816 fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
2817 #[link_name = "llvm.x86.sse2.pmaxu.b"]
2818 fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
2819 #[link_name = "llvm.x86.sse2.pmins.w"]
2820 fn pminsw(a: i16x8, b: i16x8) -> i16x8;
2821 #[link_name = "llvm.x86.sse2.pminu.b"]
2822 fn pminub(a: u8x16, b: u8x16) -> u8x16;
2823 #[link_name = "llvm.x86.sse2.pmulh.w"]
2824 fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
2825 #[link_name = "llvm.x86.sse2.pmulhu.w"]
2826 fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
2827 #[link_name = "llvm.x86.sse2.pmulu.dq"]
2828 fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
2829 #[link_name = "llvm.x86.sse2.psad.bw"]
2830 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
0531ce1d
XL
2831 #[link_name = "llvm.x86.sse2.pslli.w"]
2832 fn pslliw(a: i16x8, imm8: i32) -> i16x8;
2833 #[link_name = "llvm.x86.sse2.psll.w"]
2834 fn psllw(a: i16x8, count: i16x8) -> i16x8;
2835 #[link_name = "llvm.x86.sse2.pslli.d"]
2836 fn psllid(a: i32x4, imm8: i32) -> i32x4;
2837 #[link_name = "llvm.x86.sse2.psll.d"]
2838 fn pslld(a: i32x4, count: i32x4) -> i32x4;
2839 #[link_name = "llvm.x86.sse2.pslli.q"]
2840 fn pslliq(a: i64x2, imm8: i32) -> i64x2;
2841 #[link_name = "llvm.x86.sse2.psll.q"]
2842 fn psllq(a: i64x2, count: i64x2) -> i64x2;
2843 #[link_name = "llvm.x86.sse2.psrai.w"]
2844 fn psraiw(a: i16x8, imm8: i32) -> i16x8;
2845 #[link_name = "llvm.x86.sse2.psra.w"]
2846 fn psraw(a: i16x8, count: i16x8) -> i16x8;
2847 #[link_name = "llvm.x86.sse2.psrai.d"]
2848 fn psraid(a: i32x4, imm8: i32) -> i32x4;
2849 #[link_name = "llvm.x86.sse2.psra.d"]
2850 fn psrad(a: i32x4, count: i32x4) -> i32x4;
2851 #[link_name = "llvm.x86.sse2.psrli.w"]
2852 fn psrliw(a: i16x8, imm8: i32) -> i16x8;
2853 #[link_name = "llvm.x86.sse2.psrl.w"]
2854 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2855 #[link_name = "llvm.x86.sse2.psrli.d"]
2856 fn psrlid(a: i32x4, imm8: i32) -> i32x4;
2857 #[link_name = "llvm.x86.sse2.psrl.d"]
2858 fn psrld(a: i32x4, count: i32x4) -> i32x4;
2859 #[link_name = "llvm.x86.sse2.psrli.q"]
2860 fn psrliq(a: i64x2, imm8: i32) -> i64x2;
2861 #[link_name = "llvm.x86.sse2.psrl.q"]
2862 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2863 #[link_name = "llvm.x86.sse2.cvtdq2ps"]
2864 fn cvtdq2ps(a: i32x4) -> __m128;
2865 #[link_name = "llvm.x86.sse2.cvtps2dq"]
2866 fn cvtps2dq(a: __m128) -> i32x4;
2867 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
2868 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2869 #[link_name = "llvm.x86.sse2.packsswb.128"]
2870 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2871 #[link_name = "llvm.x86.sse2.packssdw.128"]
2872 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2873 #[link_name = "llvm.x86.sse2.packuswb.128"]
2874 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2875 #[link_name = "llvm.x86.sse2.pmovmskb.128"]
2876 fn pmovmskb(a: i8x16) -> i32;
2877 #[link_name = "llvm.x86.sse2.max.sd"]
2878 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2879 #[link_name = "llvm.x86.sse2.max.pd"]
2880 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2881 #[link_name = "llvm.x86.sse2.min.sd"]
2882 fn minsd(a: __m128d, b: __m128d) -> __m128d;
2883 #[link_name = "llvm.x86.sse2.min.pd"]
2884 fn minpd(a: __m128d, b: __m128d) -> __m128d;
2885 #[link_name = "llvm.x86.sse2.sqrt.sd"]
2886 fn sqrtsd(a: __m128d) -> __m128d;
2887 #[link_name = "llvm.x86.sse2.sqrt.pd"]
2888 fn sqrtpd(a: __m128d) -> __m128d;
2889 #[link_name = "llvm.x86.sse2.cmp.sd"]
2890 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2891 #[link_name = "llvm.x86.sse2.cmp.pd"]
2892 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2893 #[link_name = "llvm.x86.sse2.comieq.sd"]
2894 fn comieqsd(a: __m128d, b: __m128d) -> i32;
2895 #[link_name = "llvm.x86.sse2.comilt.sd"]
2896 fn comiltsd(a: __m128d, b: __m128d) -> i32;
2897 #[link_name = "llvm.x86.sse2.comile.sd"]
2898 fn comilesd(a: __m128d, b: __m128d) -> i32;
2899 #[link_name = "llvm.x86.sse2.comigt.sd"]
2900 fn comigtsd(a: __m128d, b: __m128d) -> i32;
2901 #[link_name = "llvm.x86.sse2.comige.sd"]
2902 fn comigesd(a: __m128d, b: __m128d) -> i32;
2903 #[link_name = "llvm.x86.sse2.comineq.sd"]
2904 fn comineqsd(a: __m128d, b: __m128d) -> i32;
2905 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
2906 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
2907 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
2908 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
2909 #[link_name = "llvm.x86.sse2.ucomile.sd"]
2910 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
2911 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
2912 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
2913 #[link_name = "llvm.x86.sse2.ucomige.sd"]
2914 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
2915 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
2916 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
2917 #[link_name = "llvm.x86.sse2.movmsk.pd"]
2918 fn movmskpd(a: __m128d) -> i32;
2919 #[link_name = "llvm.x86.sse2.cvtpd2ps"]
2920 fn cvtpd2ps(a: __m128d) -> __m128;
2921 #[link_name = "llvm.x86.sse2.cvtps2pd"]
2922 fn cvtps2pd(a: __m128) -> __m128d;
2923 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
2924 fn cvtpd2dq(a: __m128d) -> i32x4;
2925 #[link_name = "llvm.x86.sse2.cvtsd2si"]
2926 fn cvtsd2si(a: __m128d) -> i32;
2927 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
2928 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
2929 #[link_name = "llvm.x86.sse2.cvtss2sd"]
2930 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
2931 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
2932 fn cvttpd2dq(a: __m128d) -> i32x4;
2933 #[link_name = "llvm.x86.sse2.cvttsd2si"]
2934 fn cvttsd2si(a: __m128d) -> i32;
2935 #[link_name = "llvm.x86.sse2.cvttps2dq"]
2936 fn cvttps2dq(a: __m128) -> i32x4;
2937 #[link_name = "llvm.x86.sse2.storeu.dq"]
2938 fn storeudq(mem_addr: *mut i8, a: __m128i);
2939 #[link_name = "llvm.x86.sse2.storeu.pd"]
2940 fn storeupd(mem_addr: *mut i8, a: __m128d);
0531ce1d
XL
2941}
2942
2943#[cfg(test)]
2944mod tests {
48663c56
XL
2945 use crate::{
2946 core_arch::{simd::*, x86::*},
2947 hint::black_box,
2948 };
2949 use std::{
2950 boxed, f32,
2951 f64::{self, NAN},
2952 i32,
2953 mem::{self, transmute},
2954 };
416331ca 2955 use stdarch_test::simd_test;
0531ce1d 2956
532ac7d7
XL
2957 #[test]
2958 fn test_mm_pause() {
2959 unsafe { _mm_pause() }
0531ce1d
XL
2960 }
2961
83c7162d 2962 #[simd_test(enable = "sse2")]
0531ce1d 2963 unsafe fn test_mm_clflush() {
416331ca
XL
2964 let x = 0_u8;
2965 _mm_clflush(&x as *const _);
0531ce1d
XL
2966 }
2967
83c7162d 2968 #[simd_test(enable = "sse2")]
0531ce1d
XL
2969 unsafe fn test_mm_lfence() {
2970 _mm_lfence();
2971 }
2972
83c7162d 2973 #[simd_test(enable = "sse2")]
0531ce1d
XL
2974 unsafe fn test_mm_mfence() {
2975 _mm_mfence();
2976 }
2977
83c7162d 2978 #[simd_test(enable = "sse2")]
0531ce1d 2979 unsafe fn test_mm_add_epi8() {
0731742a
XL
2980 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2981 #[rustfmt::skip]
0531ce1d
XL
2982 let b = _mm_setr_epi8(
2983 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2984 );
2985 let r = _mm_add_epi8(a, b);
0731742a 2986 #[rustfmt::skip]
0531ce1d
XL
2987 let e = _mm_setr_epi8(
2988 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
2989 );
2990 assert_eq_m128i(r, e);
2991 }
2992
83c7162d 2993 #[simd_test(enable = "sse2")]
0531ce1d
XL
2994 unsafe fn test_mm_add_epi8_overflow() {
2995 let a = _mm_set1_epi8(0x7F);
2996 let b = _mm_set1_epi8(1);
2997 let r = _mm_add_epi8(a, b);
2998 assert_eq_m128i(r, _mm_set1_epi8(-128));
2999 }
3000
83c7162d 3001 #[simd_test(enable = "sse2")]
0531ce1d
XL
3002 unsafe fn test_mm_add_epi16() {
3003 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3004 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3005 let r = _mm_add_epi16(a, b);
3006 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3007 assert_eq_m128i(r, e);
3008 }
3009
83c7162d 3010 #[simd_test(enable = "sse2")]
0531ce1d
XL
3011 unsafe fn test_mm_add_epi32() {
3012 let a = _mm_setr_epi32(0, 1, 2, 3);
3013 let b = _mm_setr_epi32(4, 5, 6, 7);
3014 let r = _mm_add_epi32(a, b);
3015 let e = _mm_setr_epi32(4, 6, 8, 10);
3016 assert_eq_m128i(r, e);
3017 }
3018
83c7162d 3019 #[simd_test(enable = "sse2")]
0531ce1d
XL
3020 unsafe fn test_mm_add_epi64() {
3021 let a = _mm_setr_epi64x(0, 1);
3022 let b = _mm_setr_epi64x(2, 3);
3023 let r = _mm_add_epi64(a, b);
3024 let e = _mm_setr_epi64x(2, 4);
3025 assert_eq_m128i(r, e);
3026 }
3027
83c7162d 3028 #[simd_test(enable = "sse2")]
0531ce1d 3029 unsafe fn test_mm_adds_epi8() {
0731742a
XL
3030 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3031 #[rustfmt::skip]
0531ce1d
XL
3032 let b = _mm_setr_epi8(
3033 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3034 );
3035 let r = _mm_adds_epi8(a, b);
0731742a 3036 #[rustfmt::skip]
0531ce1d
XL
3037 let e = _mm_setr_epi8(
3038 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3039 );
3040 assert_eq_m128i(r, e);
3041 }
3042
83c7162d 3043 #[simd_test(enable = "sse2")]
0531ce1d
XL
3044 unsafe fn test_mm_adds_epi8_saturate_positive() {
3045 let a = _mm_set1_epi8(0x7F);
3046 let b = _mm_set1_epi8(1);
3047 let r = _mm_adds_epi8(a, b);
3048 assert_eq_m128i(r, a);
3049 }
3050
83c7162d 3051 #[simd_test(enable = "sse2")]
0531ce1d
XL
3052 unsafe fn test_mm_adds_epi8_saturate_negative() {
3053 let a = _mm_set1_epi8(-0x80);
3054 let b = _mm_set1_epi8(-1);
3055 let r = _mm_adds_epi8(a, b);
3056 assert_eq_m128i(r, a);
3057 }
3058
83c7162d 3059 #[simd_test(enable = "sse2")]
0531ce1d
XL
3060 unsafe fn test_mm_adds_epi16() {
3061 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3062 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3063 let r = _mm_adds_epi16(a, b);
3064 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3065 assert_eq_m128i(r, e);
3066 }
3067
83c7162d 3068 #[simd_test(enable = "sse2")]
0531ce1d
XL
3069 unsafe fn test_mm_adds_epi16_saturate_positive() {
3070 let a = _mm_set1_epi16(0x7FFF);
3071 let b = _mm_set1_epi16(1);
3072 let r = _mm_adds_epi16(a, b);
3073 assert_eq_m128i(r, a);
3074 }
3075
83c7162d 3076 #[simd_test(enable = "sse2")]
0531ce1d
XL
3077 unsafe fn test_mm_adds_epi16_saturate_negative() {
3078 let a = _mm_set1_epi16(-0x8000);
3079 let b = _mm_set1_epi16(-1);
3080 let r = _mm_adds_epi16(a, b);
3081 assert_eq_m128i(r, a);
3082 }
3083
83c7162d 3084 #[simd_test(enable = "sse2")]
0531ce1d 3085 unsafe fn test_mm_adds_epu8() {
0731742a
XL
3086 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3087 #[rustfmt::skip]
0531ce1d
XL
3088 let b = _mm_setr_epi8(
3089 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3090 );
3091 let r = _mm_adds_epu8(a, b);
0731742a 3092 #[rustfmt::skip]
0531ce1d
XL
3093 let e = _mm_setr_epi8(
3094 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3095 );
3096 assert_eq_m128i(r, e);
3097 }
3098
83c7162d 3099 #[simd_test(enable = "sse2")]
0531ce1d
XL
3100 unsafe fn test_mm_adds_epu8_saturate() {
3101 let a = _mm_set1_epi8(!0);
3102 let b = _mm_set1_epi8(1);
3103 let r = _mm_adds_epu8(a, b);
3104 assert_eq_m128i(r, a);
3105 }
3106
83c7162d 3107 #[simd_test(enable = "sse2")]
0531ce1d
XL
3108 unsafe fn test_mm_adds_epu16() {
3109 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3110 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3111 let r = _mm_adds_epu16(a, b);
3112 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3113 assert_eq_m128i(r, e);
3114 }
3115
83c7162d 3116 #[simd_test(enable = "sse2")]
0531ce1d
XL
3117 unsafe fn test_mm_adds_epu16_saturate() {
3118 let a = _mm_set1_epi16(!0);
3119 let b = _mm_set1_epi16(1);
3120 let r = _mm_adds_epu16(a, b);
3121 assert_eq_m128i(r, a);
3122 }
3123
83c7162d 3124 #[simd_test(enable = "sse2")]
0531ce1d
XL
3125 unsafe fn test_mm_avg_epu8() {
3126 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3127 let r = _mm_avg_epu8(a, b);
3128 assert_eq_m128i(r, _mm_set1_epi8(6));
3129 }
3130
83c7162d 3131 #[simd_test(enable = "sse2")]
0531ce1d
XL
3132 unsafe fn test_mm_avg_epu16() {
3133 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3134 let r = _mm_avg_epu16(a, b);
3135 assert_eq_m128i(r, _mm_set1_epi16(6));
3136 }
3137
83c7162d 3138 #[simd_test(enable = "sse2")]
0531ce1d
XL
3139 unsafe fn test_mm_madd_epi16() {
3140 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3141 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3142 let r = _mm_madd_epi16(a, b);
3143 let e = _mm_setr_epi32(29, 81, 149, 233);
3144 assert_eq_m128i(r, e);
3145 }
3146
83c7162d 3147 #[simd_test(enable = "sse2")]
0531ce1d
XL
3148 unsafe fn test_mm_max_epi16() {
3149 let a = _mm_set1_epi16(1);
3150 let b = _mm_set1_epi16(-1);
3151 let r = _mm_max_epi16(a, b);
3152 assert_eq_m128i(r, a);
3153 }
3154
83c7162d 3155 #[simd_test(enable = "sse2")]
0531ce1d
XL
3156 unsafe fn test_mm_max_epu8() {
3157 let a = _mm_set1_epi8(1);
3158 let b = _mm_set1_epi8(!0);
3159 let r = _mm_max_epu8(a, b);
3160 assert_eq_m128i(r, b);
3161 }
3162
83c7162d 3163 #[simd_test(enable = "sse2")]
0531ce1d
XL
3164 unsafe fn test_mm_min_epi16() {
3165 let a = _mm_set1_epi16(1);
3166 let b = _mm_set1_epi16(-1);
3167 let r = _mm_min_epi16(a, b);
3168 assert_eq_m128i(r, b);
3169 }
3170
83c7162d 3171 #[simd_test(enable = "sse2")]
0531ce1d
XL
3172 unsafe fn test_mm_min_epu8() {
3173 let a = _mm_set1_epi8(1);
3174 let b = _mm_set1_epi8(!0);
3175 let r = _mm_min_epu8(a, b);
3176 assert_eq_m128i(r, a);
3177 }
3178
83c7162d 3179 #[simd_test(enable = "sse2")]
0531ce1d
XL
3180 unsafe fn test_mm_mulhi_epi16() {
3181 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3182 let r = _mm_mulhi_epi16(a, b);
3183 assert_eq_m128i(r, _mm_set1_epi16(-16));
3184 }
3185
83c7162d 3186 #[simd_test(enable = "sse2")]
0531ce1d
XL
3187 unsafe fn test_mm_mulhi_epu16() {
3188 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3189 let r = _mm_mulhi_epu16(a, b);
3190 assert_eq_m128i(r, _mm_set1_epi16(15));
3191 }
3192
83c7162d 3193 #[simd_test(enable = "sse2")]
0531ce1d
XL
3194 unsafe fn test_mm_mullo_epi16() {
3195 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3196 let r = _mm_mullo_epi16(a, b);
3197 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3198 }
3199
83c7162d 3200 #[simd_test(enable = "sse2")]
0531ce1d
XL
3201 unsafe fn test_mm_mul_epu32() {
3202 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3203 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3204 let r = _mm_mul_epu32(a, b);
3205 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3206 assert_eq_m128i(r, e);
3207 }
3208
83c7162d 3209 #[simd_test(enable = "sse2")]
0531ce1d 3210 unsafe fn test_mm_sad_epu8() {
0731742a 3211 #[rustfmt::skip]
0531ce1d
XL
3212 let a = _mm_setr_epi8(
3213 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3214 1, 2, 3, 4,
3215 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3216 1, 2, 3, 4,
3217 );
3218 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3219 let r = _mm_sad_epu8(a, b);
3220 let e = _mm_setr_epi64x(1020, 614);
3221 assert_eq_m128i(r, e);
3222 }
3223
83c7162d 3224 #[simd_test(enable = "sse2")]
0531ce1d
XL
3225 unsafe fn test_mm_sub_epi8() {
3226 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3227 let r = _mm_sub_epi8(a, b);
3228 assert_eq_m128i(r, _mm_set1_epi8(-1));
3229 }
3230
83c7162d 3231 #[simd_test(enable = "sse2")]
0531ce1d
XL
3232 unsafe fn test_mm_sub_epi16() {
3233 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3234 let r = _mm_sub_epi16(a, b);
3235 assert_eq_m128i(r, _mm_set1_epi16(-1));
3236 }
3237
83c7162d 3238 #[simd_test(enable = "sse2")]
0531ce1d
XL
3239 unsafe fn test_mm_sub_epi32() {
3240 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3241 let r = _mm_sub_epi32(a, b);
3242 assert_eq_m128i(r, _mm_set1_epi32(-1));
3243 }
3244
83c7162d 3245 #[simd_test(enable = "sse2")]
0531ce1d
XL
3246 unsafe fn test_mm_sub_epi64() {
3247 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3248 let r = _mm_sub_epi64(a, b);
3249 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3250 }
3251
83c7162d 3252 #[simd_test(enable = "sse2")]
0531ce1d
XL
3253 unsafe fn test_mm_subs_epi8() {
3254 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3255 let r = _mm_subs_epi8(a, b);
3256 assert_eq_m128i(r, _mm_set1_epi8(3));
3257 }
3258
83c7162d 3259 #[simd_test(enable = "sse2")]
0531ce1d
XL
3260 unsafe fn test_mm_subs_epi8_saturate_positive() {
3261 let a = _mm_set1_epi8(0x7F);
3262 let b = _mm_set1_epi8(-1);
3263 let r = _mm_subs_epi8(a, b);
3264 assert_eq_m128i(r, a);
3265 }
3266
83c7162d 3267 #[simd_test(enable = "sse2")]
0531ce1d
XL
3268 unsafe fn test_mm_subs_epi8_saturate_negative() {
3269 let a = _mm_set1_epi8(-0x80);
3270 let b = _mm_set1_epi8(1);
3271 let r = _mm_subs_epi8(a, b);
3272 assert_eq_m128i(r, a);
3273 }
3274
83c7162d 3275 #[simd_test(enable = "sse2")]
0531ce1d
XL
3276 unsafe fn test_mm_subs_epi16() {
3277 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3278 let r = _mm_subs_epi16(a, b);
3279 assert_eq_m128i(r, _mm_set1_epi16(3));
3280 }
3281
83c7162d 3282 #[simd_test(enable = "sse2")]
0531ce1d
XL
3283 unsafe fn test_mm_subs_epi16_saturate_positive() {
3284 let a = _mm_set1_epi16(0x7FFF);
3285 let b = _mm_set1_epi16(-1);
3286 let r = _mm_subs_epi16(a, b);
3287 assert_eq_m128i(r, a);
3288 }
3289
83c7162d 3290 #[simd_test(enable = "sse2")]
0531ce1d
XL
3291 unsafe fn test_mm_subs_epi16_saturate_negative() {
3292 let a = _mm_set1_epi16(-0x8000);
3293 let b = _mm_set1_epi16(1);
3294 let r = _mm_subs_epi16(a, b);
3295 assert_eq_m128i(r, a);
3296 }
3297
83c7162d 3298 #[simd_test(enable = "sse2")]
0531ce1d
XL
3299 unsafe fn test_mm_subs_epu8() {
3300 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3301 let r = _mm_subs_epu8(a, b);
3302 assert_eq_m128i(r, _mm_set1_epi8(3));
3303 }
3304
83c7162d 3305 #[simd_test(enable = "sse2")]
0531ce1d
XL
3306 unsafe fn test_mm_subs_epu8_saturate() {
3307 let a = _mm_set1_epi8(0);
3308 let b = _mm_set1_epi8(1);
3309 let r = _mm_subs_epu8(a, b);
3310 assert_eq_m128i(r, a);
3311 }
3312
83c7162d 3313 #[simd_test(enable = "sse2")]
0531ce1d
XL
3314 unsafe fn test_mm_subs_epu16() {
3315 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3316 let r = _mm_subs_epu16(a, b);
3317 assert_eq_m128i(r, _mm_set1_epi16(3));
3318 }
3319
83c7162d 3320 #[simd_test(enable = "sse2")]
0531ce1d
XL
3321 unsafe fn test_mm_subs_epu16_saturate() {
3322 let a = _mm_set1_epi16(0);
3323 let b = _mm_set1_epi16(1);
3324 let r = _mm_subs_epu16(a, b);
3325 assert_eq_m128i(r, a);
3326 }
3327
83c7162d 3328 #[simd_test(enable = "sse2")]
0531ce1d 3329 unsafe fn test_mm_slli_si128() {
0731742a 3330 #[rustfmt::skip]
0531ce1d
XL
3331 let a = _mm_setr_epi8(
3332 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3333 );
17df50a5 3334 let r = _mm_slli_si128::<1>(a);
0731742a 3335 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
0531ce1d
XL
3336 assert_eq_m128i(r, e);
3337
0731742a 3338 #[rustfmt::skip]
0531ce1d
XL
3339 let a = _mm_setr_epi8(
3340 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3341 );
17df50a5 3342 let r = _mm_slli_si128::<15>(a);
0531ce1d
XL
3343 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3344 assert_eq_m128i(r, e);
3345
0731742a 3346 #[rustfmt::skip]
0531ce1d
XL
3347 let a = _mm_setr_epi8(
3348 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3349 );
17df50a5 3350 let r = _mm_slli_si128::<16>(a);
0531ce1d
XL
3351 assert_eq_m128i(r, _mm_set1_epi8(0));
3352 }
3353
83c7162d 3354 #[simd_test(enable = "sse2")]
0531ce1d 3355 unsafe fn test_mm_slli_epi16() {
0731742a 3356 #[rustfmt::skip]
0531ce1d
XL
3357 let a = _mm_setr_epi16(
3358 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3359 );
17df50a5 3360 let r = _mm_slli_epi16::<4>(a);
0531ce1d 3361
0731742a 3362 #[rustfmt::skip]
0531ce1d
XL
3363 let e = _mm_setr_epi16(
3364 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
3365 0, 0, 0, 0,
3366 );
3367 assert_eq_m128i(r, e);
3368 }
3369
83c7162d 3370 #[simd_test(enable = "sse2")]
0531ce1d
XL
3371 unsafe fn test_mm_sll_epi16() {
3372 let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3373 let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3374 assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0));
3375 let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3376 assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3377 }
3378
83c7162d 3379 #[simd_test(enable = "sse2")]
0531ce1d 3380 unsafe fn test_mm_slli_epi32() {
17df50a5 3381 let r = _mm_slli_epi32::<4>(_mm_set1_epi32(0xFFFF));
0531ce1d
XL
3382 assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3383 }
3384
83c7162d 3385 #[simd_test(enable = "sse2")]
0531ce1d
XL
3386 unsafe fn test_mm_sll_epi32() {
3387 let a = _mm_set1_epi32(0xFFFF);
3388 let b = _mm_setr_epi32(4, 0, 0, 0);
3389 let r = _mm_sll_epi32(a, b);
3390 assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3391 }
3392
83c7162d 3393 #[simd_test(enable = "sse2")]
0531ce1d 3394 unsafe fn test_mm_slli_epi64() {
17df50a5 3395 let r = _mm_slli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF));
0531ce1d
XL
3396 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3397 }
3398
83c7162d 3399 #[simd_test(enable = "sse2")]
0531ce1d
XL
3400 unsafe fn test_mm_sll_epi64() {
3401 let a = _mm_set1_epi64x(0xFFFFFFFF);
3402 let b = _mm_setr_epi64x(4, 0);
3403 let r = _mm_sll_epi64(a, b);
3404 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3405 }
3406
83c7162d 3407 #[simd_test(enable = "sse2")]
0531ce1d 3408 unsafe fn test_mm_srai_epi16() {
17df50a5 3409 let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1));
0531ce1d
XL
3410 assert_eq_m128i(r, _mm_set1_epi16(-1));
3411 }
3412
83c7162d 3413 #[simd_test(enable = "sse2")]
0531ce1d
XL
3414 unsafe fn test_mm_sra_epi16() {
3415 let a = _mm_set1_epi16(-1);
3416 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
3417 let r = _mm_sra_epi16(a, b);
3418 assert_eq_m128i(r, _mm_set1_epi16(-1));
3419 }
3420
83c7162d 3421 #[simd_test(enable = "sse2")]
0531ce1d 3422 unsafe fn test_mm_srai_epi32() {
17df50a5 3423 let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1));
0531ce1d
XL
3424 assert_eq_m128i(r, _mm_set1_epi32(-1));
3425 }
3426
83c7162d 3427 #[simd_test(enable = "sse2")]
0531ce1d
XL
3428 unsafe fn test_mm_sra_epi32() {
3429 let a = _mm_set1_epi32(-1);
3430 let b = _mm_setr_epi32(1, 0, 0, 0);
3431 let r = _mm_sra_epi32(a, b);
3432 assert_eq_m128i(r, _mm_set1_epi32(-1));
3433 }
3434
83c7162d 3435 #[simd_test(enable = "sse2")]
0531ce1d 3436 unsafe fn test_mm_srli_si128() {
0731742a 3437 #[rustfmt::skip]
0531ce1d
XL
3438 let a = _mm_setr_epi8(
3439 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3440 );
17df50a5 3441 let r = _mm_srli_si128::<1>(a);
0731742a 3442 #[rustfmt::skip]
0531ce1d
XL
3443 let e = _mm_setr_epi8(
3444 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3445 );
3446 assert_eq_m128i(r, e);
3447
0731742a 3448 #[rustfmt::skip]
0531ce1d
XL
3449 let a = _mm_setr_epi8(
3450 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3451 );
17df50a5 3452 let r = _mm_srli_si128::<15>(a);
0531ce1d
XL
3453 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3454 assert_eq_m128i(r, e);
3455
0731742a 3456 #[rustfmt::skip]
0531ce1d
XL
3457 let a = _mm_setr_epi8(
3458 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3459 );
17df50a5 3460 let r = _mm_srli_si128::<16>(a);
0531ce1d
XL
3461 assert_eq_m128i(r, _mm_set1_epi8(0));
3462 }
3463
83c7162d 3464 #[simd_test(enable = "sse2")]
0531ce1d 3465 unsafe fn test_mm_srli_epi16() {
0731742a 3466 #[rustfmt::skip]
0531ce1d
XL
3467 let a = _mm_setr_epi16(
3468 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3469 );
17df50a5 3470 let r = _mm_srli_epi16::<4>(a);
0731742a 3471 #[rustfmt::skip]
0531ce1d
XL
3472 let e = _mm_setr_epi16(
3473 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
3474 );
3475 assert_eq_m128i(r, e);
3476 }
3477
83c7162d 3478 #[simd_test(enable = "sse2")]
0531ce1d
XL
3479 unsafe fn test_mm_srl_epi16() {
3480 let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3481 let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3482 assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0));
3483 let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3484 assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3485 }
3486
83c7162d 3487 #[simd_test(enable = "sse2")]
0531ce1d 3488 unsafe fn test_mm_srli_epi32() {
17df50a5 3489 let r = _mm_srli_epi32::<4>(_mm_set1_epi32(0xFFFF));
0531ce1d
XL
3490 assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3491 }
3492
83c7162d 3493 #[simd_test(enable = "sse2")]
0531ce1d
XL
3494 unsafe fn test_mm_srl_epi32() {
3495 let a = _mm_set1_epi32(0xFFFF);
3496 let b = _mm_setr_epi32(4, 0, 0, 0);
3497 let r = _mm_srl_epi32(a, b);
3498 assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3499 }
3500
83c7162d 3501 #[simd_test(enable = "sse2")]
0531ce1d 3502 unsafe fn test_mm_srli_epi64() {
17df50a5 3503 let r = _mm_srli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF));
0531ce1d
XL
3504 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3505 }
3506
83c7162d 3507 #[simd_test(enable = "sse2")]
0531ce1d
XL
3508 unsafe fn test_mm_srl_epi64() {
3509 let a = _mm_set1_epi64x(0xFFFFFFFF);
3510 let b = _mm_setr_epi64x(4, 0);
3511 let r = _mm_srl_epi64(a, b);
3512 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3513 }
3514
83c7162d 3515 #[simd_test(enable = "sse2")]
0531ce1d
XL
3516 unsafe fn test_mm_and_si128() {
3517 let a = _mm_set1_epi8(5);
3518 let b = _mm_set1_epi8(3);
3519 let r = _mm_and_si128(a, b);
3520 assert_eq_m128i(r, _mm_set1_epi8(1));
3521 }
3522
83c7162d 3523 #[simd_test(enable = "sse2")]
0531ce1d
XL
3524 unsafe fn test_mm_andnot_si128() {
3525 let a = _mm_set1_epi8(5);
3526 let b = _mm_set1_epi8(3);
3527 let r = _mm_andnot_si128(a, b);
3528 assert_eq_m128i(r, _mm_set1_epi8(2));
3529 }
3530
83c7162d 3531 #[simd_test(enable = "sse2")]
0531ce1d
XL
3532 unsafe fn test_mm_or_si128() {
3533 let a = _mm_set1_epi8(5);
3534 let b = _mm_set1_epi8(3);
3535 let r = _mm_or_si128(a, b);
3536 assert_eq_m128i(r, _mm_set1_epi8(7));
3537 }
3538
83c7162d 3539 #[simd_test(enable = "sse2")]
0531ce1d
XL
3540 unsafe fn test_mm_xor_si128() {
3541 let a = _mm_set1_epi8(5);
3542 let b = _mm_set1_epi8(3);
3543 let r = _mm_xor_si128(a, b);
3544 assert_eq_m128i(r, _mm_set1_epi8(6));
3545 }
3546
83c7162d 3547 #[simd_test(enable = "sse2")]
0531ce1d 3548 unsafe fn test_mm_cmpeq_epi8() {
0731742a
XL
3549 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3550 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
0531ce1d 3551 let r = _mm_cmpeq_epi8(a, b);
0731742a 3552 #[rustfmt::skip]
0531ce1d
XL
3553 assert_eq_m128i(
3554 r,
3555 _mm_setr_epi8(
3556 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3557 )
3558 );
3559 }
3560
83c7162d 3561 #[simd_test(enable = "sse2")]
0531ce1d
XL
3562 unsafe fn test_mm_cmpeq_epi16() {
3563 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3564 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3565 let r = _mm_cmpeq_epi16(a, b);
3566 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3567 }
3568
83c7162d 3569 #[simd_test(enable = "sse2")]
0531ce1d
XL
3570 unsafe fn test_mm_cmpeq_epi32() {
3571 let a = _mm_setr_epi32(0, 1, 2, 3);
3572 let b = _mm_setr_epi32(3, 2, 2, 0);
3573 let r = _mm_cmpeq_epi32(a, b);
3574 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3575 }
3576
83c7162d 3577 #[simd_test(enable = "sse2")]
0531ce1d
XL
3578 unsafe fn test_mm_cmpgt_epi8() {
3579 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3580 let b = _mm_set1_epi8(0);
3581 let r = _mm_cmpgt_epi8(a, b);
3582 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3583 assert_eq_m128i(r, e);
3584 }
3585
83c7162d 3586 #[simd_test(enable = "sse2")]
0531ce1d
XL
3587 unsafe fn test_mm_cmpgt_epi16() {
3588 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3589 let b = _mm_set1_epi16(0);
3590 let r = _mm_cmpgt_epi16(a, b);
3591 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3592 assert_eq_m128i(r, e);
3593 }
3594
83c7162d 3595 #[simd_test(enable = "sse2")]
0531ce1d
XL
3596 unsafe fn test_mm_cmpgt_epi32() {
3597 let a = _mm_set_epi32(5, 0, 0, 0);
3598 let b = _mm_set1_epi32(0);
3599 let r = _mm_cmpgt_epi32(a, b);
3600 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3601 }
3602
83c7162d 3603 #[simd_test(enable = "sse2")]
0531ce1d
XL
3604 unsafe fn test_mm_cmplt_epi8() {
3605 let a = _mm_set1_epi8(0);
3606 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3607 let r = _mm_cmplt_epi8(a, b);
3608 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3609 assert_eq_m128i(r, e);
3610 }
3611
83c7162d 3612 #[simd_test(enable = "sse2")]
0531ce1d
XL
3613 unsafe fn test_mm_cmplt_epi16() {
3614 let a = _mm_set1_epi16(0);
3615 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3616 let r = _mm_cmplt_epi16(a, b);
3617 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3618 assert_eq_m128i(r, e);
3619 }
3620
83c7162d 3621 #[simd_test(enable = "sse2")]
0531ce1d
XL
3622 unsafe fn test_mm_cmplt_epi32() {
3623 let a = _mm_set1_epi32(0);
3624 let b = _mm_set_epi32(5, 0, 0, 0);
3625 let r = _mm_cmplt_epi32(a, b);
3626 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3627 }
3628
83c7162d 3629 #[simd_test(enable = "sse2")]
0531ce1d
XL
3630 unsafe fn test_mm_cvtepi32_pd() {
3631 let a = _mm_set_epi32(35, 25, 15, 5);
3632 let r = _mm_cvtepi32_pd(a);
3633 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3634 }
3635
83c7162d 3636 #[simd_test(enable = "sse2")]
0531ce1d
XL
3637 unsafe fn test_mm_cvtsi32_sd() {
3638 let a = _mm_set1_pd(3.5);
3639 let r = _mm_cvtsi32_sd(a, 5);
3640 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3641 }
3642
83c7162d 3643 #[simd_test(enable = "sse2")]
0531ce1d
XL
3644 unsafe fn test_mm_cvtepi32_ps() {
3645 let a = _mm_setr_epi32(1, 2, 3, 4);
3646 let r = _mm_cvtepi32_ps(a);
3647 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3648 }
3649
83c7162d 3650 #[simd_test(enable = "sse2")]
0531ce1d
XL
3651 unsafe fn test_mm_cvtps_epi32() {
3652 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3653 let r = _mm_cvtps_epi32(a);
3654 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3655 }
3656
83c7162d 3657 #[simd_test(enable = "sse2")]
0531ce1d
XL
3658 unsafe fn test_mm_cvtsi32_si128() {
3659 let r = _mm_cvtsi32_si128(5);
3660 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3661 }
3662
83c7162d 3663 #[simd_test(enable = "sse2")]
0531ce1d
XL
3664 unsafe fn test_mm_cvtsi128_si32() {
3665 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3666 assert_eq!(r, 5);
3667 }
3668
83c7162d 3669 #[simd_test(enable = "sse2")]
0531ce1d
XL
3670 unsafe fn test_mm_set_epi64x() {
3671 let r = _mm_set_epi64x(0, 1);
3672 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3673 }
3674
83c7162d 3675 #[simd_test(enable = "sse2")]
0531ce1d
XL
3676 unsafe fn test_mm_set_epi32() {
3677 let r = _mm_set_epi32(0, 1, 2, 3);
3678 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3679 }
3680
83c7162d 3681 #[simd_test(enable = "sse2")]
0531ce1d
XL
3682 unsafe fn test_mm_set_epi16() {
3683 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3684 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3685 }
3686
83c7162d 3687 #[simd_test(enable = "sse2")]
0531ce1d 3688 unsafe fn test_mm_set_epi8() {
0731742a 3689 #[rustfmt::skip]
0531ce1d
XL
3690 let r = _mm_set_epi8(
3691 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3692 );
0731742a 3693 #[rustfmt::skip]
0531ce1d
XL
3694 let e = _mm_setr_epi8(
3695 15, 14, 13, 12, 11, 10, 9, 8,
3696 7, 6, 5, 4, 3, 2, 1, 0,
3697 );
3698 assert_eq_m128i(r, e);
3699 }
3700
83c7162d 3701 #[simd_test(enable = "sse2")]
0531ce1d
XL
3702 unsafe fn test_mm_set1_epi64x() {
3703 let r = _mm_set1_epi64x(1);
3704 assert_eq_m128i(r, _mm_set1_epi64x(1));
3705 }
3706
83c7162d 3707 #[simd_test(enable = "sse2")]
0531ce1d
XL
3708 unsafe fn test_mm_set1_epi32() {
3709 let r = _mm_set1_epi32(1);
3710 assert_eq_m128i(r, _mm_set1_epi32(1));
3711 }
3712
83c7162d 3713 #[simd_test(enable = "sse2")]
0531ce1d
XL
3714 unsafe fn test_mm_set1_epi16() {
3715 let r = _mm_set1_epi16(1);
3716 assert_eq_m128i(r, _mm_set1_epi16(1));
3717 }
3718
83c7162d 3719 #[simd_test(enable = "sse2")]
0531ce1d
XL
3720 unsafe fn test_mm_set1_epi8() {
3721 let r = _mm_set1_epi8(1);
3722 assert_eq_m128i(r, _mm_set1_epi8(1));
3723 }
3724
83c7162d 3725 #[simd_test(enable = "sse2")]
0531ce1d
XL
3726 unsafe fn test_mm_setr_epi32() {
3727 let r = _mm_setr_epi32(0, 1, 2, 3);
3728 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3729 }
3730
83c7162d 3731 #[simd_test(enable = "sse2")]
0531ce1d
XL
3732 unsafe fn test_mm_setr_epi16() {
3733 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3734 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3735 }
3736
83c7162d 3737 #[simd_test(enable = "sse2")]
0531ce1d 3738 unsafe fn test_mm_setr_epi8() {
0731742a 3739 #[rustfmt::skip]
0531ce1d
XL
3740 let r = _mm_setr_epi8(
3741 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3742 );
0731742a 3743 #[rustfmt::skip]
0531ce1d
XL
3744 let e = _mm_setr_epi8(
3745 0, 1, 2, 3, 4, 5, 6, 7,
3746 8, 9, 10, 11, 12, 13, 14, 15,
3747 );
3748 assert_eq_m128i(r, e);
3749 }
3750
83c7162d 3751 #[simd_test(enable = "sse2")]
0531ce1d
XL
3752 unsafe fn test_mm_setzero_si128() {
3753 let r = _mm_setzero_si128();
3754 assert_eq_m128i(r, _mm_set1_epi64x(0));
3755 }
3756
83c7162d 3757 #[simd_test(enable = "sse2")]
0531ce1d
XL
3758 unsafe fn test_mm_loadl_epi64() {
3759 let a = _mm_setr_epi64x(6, 5);
3760 let r = _mm_loadl_epi64(&a as *const _);
3761 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3762 }
3763
83c7162d 3764 #[simd_test(enable = "sse2")]
0531ce1d
XL
3765 unsafe fn test_mm_load_si128() {
3766 let a = _mm_set_epi64x(5, 6);
3767 let r = _mm_load_si128(&a as *const _ as *const _);
3768 assert_eq_m128i(a, r);
3769 }
3770
83c7162d 3771 #[simd_test(enable = "sse2")]
0531ce1d
XL
3772 unsafe fn test_mm_loadu_si128() {
3773 let a = _mm_set_epi64x(5, 6);
3774 let r = _mm_loadu_si128(&a as *const _ as *const _);
3775 assert_eq_m128i(a, r);
3776 }
3777
83c7162d 3778 #[simd_test(enable = "sse2")]
0531ce1d
XL
3779 unsafe fn test_mm_maskmoveu_si128() {
3780 let a = _mm_set1_epi8(9);
0731742a 3781 #[rustfmt::skip]
0531ce1d
XL
3782 let mask = _mm_set_epi8(
3783 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3784 0, 0, 0, 0, 0, 0, 0, 0,
3785 );
3786 let mut r = _mm_set1_epi8(0);
3787 _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
3788 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3789 assert_eq_m128i(r, e);
3790 }
3791
83c7162d 3792 #[simd_test(enable = "sse2")]
0531ce1d
XL
3793 unsafe fn test_mm_store_si128() {
3794 let a = _mm_set1_epi8(9);
3795 let mut r = _mm_set1_epi8(0);
3796 _mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
3797 assert_eq_m128i(r, a);
3798 }
3799
83c7162d 3800 #[simd_test(enable = "sse2")]
0531ce1d
XL
3801 unsafe fn test_mm_storeu_si128() {
3802 let a = _mm_set1_epi8(9);
3803 let mut r = _mm_set1_epi8(0);
3804 _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
3805 assert_eq_m128i(r, a);
3806 }
3807
83c7162d 3808 #[simd_test(enable = "sse2")]
0531ce1d
XL
3809 unsafe fn test_mm_storel_epi64() {
3810 let a = _mm_setr_epi64x(2, 9);
3811 let mut r = _mm_set1_epi8(0);
3812 _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
3813 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
3814 }
3815
83c7162d 3816 #[simd_test(enable = "sse2")]
0531ce1d
XL
3817 unsafe fn test_mm_stream_si128() {
3818 let a = _mm_setr_epi32(1, 2, 3, 4);
3819 let mut r = _mm_undefined_si128();
3820 _mm_stream_si128(&mut r as *mut _, a);
3821 assert_eq_m128i(r, a);
3822 }
3823
83c7162d 3824 #[simd_test(enable = "sse2")]
0531ce1d
XL
3825 unsafe fn test_mm_stream_si32() {
3826 let a: i32 = 7;
48663c56 3827 let mut mem = boxed::Box::<i32>::new(-1);
0531ce1d
XL
3828 _mm_stream_si32(&mut *mem as *mut i32, a);
3829 assert_eq!(a, *mem);
3830 }
3831
83c7162d 3832 #[simd_test(enable = "sse2")]
0531ce1d
XL
3833 unsafe fn test_mm_move_epi64() {
3834 let a = _mm_setr_epi64x(5, 6);
3835 let r = _mm_move_epi64(a);
3836 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
3837 }
3838
83c7162d 3839 #[simd_test(enable = "sse2")]
0531ce1d
XL
3840 unsafe fn test_mm_packs_epi16() {
3841 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
3842 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
3843 let r = _mm_packs_epi16(a, b);
0731742a 3844 #[rustfmt::skip]
0531ce1d
XL
3845 assert_eq_m128i(
3846 r,
3847 _mm_setr_epi8(
3848 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
3849 )
3850 );
3851 }
3852
83c7162d 3853 #[simd_test(enable = "sse2")]
0531ce1d
XL
3854 unsafe fn test_mm_packs_epi32() {
3855 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
3856 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
3857 let r = _mm_packs_epi32(a, b);
3858 assert_eq_m128i(
3859 r,
3860 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
3861 );
3862 }
3863
83c7162d 3864 #[simd_test(enable = "sse2")]
0531ce1d
XL
3865 unsafe fn test_mm_packus_epi16() {
3866 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
3867 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
3868 let r = _mm_packus_epi16(a, b);
3869 assert_eq_m128i(
3870 r,
3871 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
3872 );
3873 }
3874
83c7162d 3875 #[simd_test(enable = "sse2")]
0531ce1d
XL
3876 unsafe fn test_mm_extract_epi16() {
3877 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
17df50a5
XL
3878 let r1 = _mm_extract_epi16::<0>(a);
3879 let r2 = _mm_extract_epi16::<3>(a);
3dfed10e 3880 assert_eq!(r1, 0xFFFF);
0531ce1d
XL
3881 assert_eq!(r2, 3);
3882 }
3883
83c7162d 3884 #[simd_test(enable = "sse2")]
0531ce1d
XL
3885 unsafe fn test_mm_insert_epi16() {
3886 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
17df50a5 3887 let r = _mm_insert_epi16::<0>(a, 9);
0531ce1d
XL
3888 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
3889 assert_eq_m128i(r, e);
3890 }
3891
83c7162d 3892 #[simd_test(enable = "sse2")]
0531ce1d 3893 unsafe fn test_mm_movemask_epi8() {
0731742a 3894 #[rustfmt::skip]
0531ce1d
XL
3895 let a = _mm_setr_epi8(
3896 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
3897 0b0101, 0b1111_0000u8 as i8, 0, 0,
e1599b0c 3898 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
0531ce1d
XL
3899 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
3900 );
3901 let r = _mm_movemask_epi8(a);
e1599b0c 3902 assert_eq!(r, 0b10100110_00100101);
0531ce1d
XL
3903 }
3904
83c7162d 3905 #[simd_test(enable = "sse2")]
0531ce1d
XL
3906 unsafe fn test_mm_shuffle_epi32() {
3907 let a = _mm_setr_epi32(5, 10, 15, 20);
17df50a5 3908 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
0531ce1d
XL
3909 let e = _mm_setr_epi32(20, 10, 10, 5);
3910 assert_eq_m128i(r, e);
3911 }
3912
83c7162d 3913 #[simd_test(enable = "sse2")]
0531ce1d
XL
3914 unsafe fn test_mm_shufflehi_epi16() {
3915 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
17df50a5 3916 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
0531ce1d
XL
3917 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
3918 assert_eq_m128i(r, e);
3919 }
3920
83c7162d 3921 #[simd_test(enable = "sse2")]
0531ce1d
XL
3922 unsafe fn test_mm_shufflelo_epi16() {
3923 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
17df50a5 3924 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
0531ce1d
XL
3925 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
3926 assert_eq_m128i(r, e);
3927 }
3928
83c7162d 3929 #[simd_test(enable = "sse2")]
0531ce1d 3930 unsafe fn test_mm_unpackhi_epi8() {
0731742a 3931 #[rustfmt::skip]
0531ce1d
XL
3932 let a = _mm_setr_epi8(
3933 0, 1, 2, 3, 4, 5, 6, 7,
3934 8, 9, 10, 11, 12, 13, 14, 15,
3935 );
0731742a 3936 #[rustfmt::skip]
0531ce1d
XL
3937 let b = _mm_setr_epi8(
3938 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3939 );
3940 let r = _mm_unpackhi_epi8(a, b);
0731742a 3941 #[rustfmt::skip]
0531ce1d
XL
3942 let e = _mm_setr_epi8(
3943 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
3944 );
3945 assert_eq_m128i(r, e);
3946 }
3947
83c7162d 3948 #[simd_test(enable = "sse2")]
0531ce1d
XL
3949 unsafe fn test_mm_unpackhi_epi16() {
3950 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3951 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3952 let r = _mm_unpackhi_epi16(a, b);
3953 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
3954 assert_eq_m128i(r, e);
3955 }
3956
83c7162d 3957 #[simd_test(enable = "sse2")]
0531ce1d
XL
3958 unsafe fn test_mm_unpackhi_epi32() {
3959 let a = _mm_setr_epi32(0, 1, 2, 3);
3960 let b = _mm_setr_epi32(4, 5, 6, 7);
3961 let r = _mm_unpackhi_epi32(a, b);
3962 let e = _mm_setr_epi32(2, 6, 3, 7);
3963 assert_eq_m128i(r, e);
3964 }
3965
83c7162d 3966 #[simd_test(enable = "sse2")]
0531ce1d
XL
3967 unsafe fn test_mm_unpackhi_epi64() {
3968 let a = _mm_setr_epi64x(0, 1);
3969 let b = _mm_setr_epi64x(2, 3);
3970 let r = _mm_unpackhi_epi64(a, b);
3971 let e = _mm_setr_epi64x(1, 3);
3972 assert_eq_m128i(r, e);
3973 }
3974
83c7162d 3975 #[simd_test(enable = "sse2")]
0531ce1d 3976 unsafe fn test_mm_unpacklo_epi8() {
0731742a 3977 #[rustfmt::skip]
0531ce1d
XL
3978 let a = _mm_setr_epi8(
3979 0, 1, 2, 3, 4, 5, 6, 7,
3980 8, 9, 10, 11, 12, 13, 14, 15,
3981 );
0731742a 3982 #[rustfmt::skip]
0531ce1d
XL
3983 let b = _mm_setr_epi8(
3984 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3985 );
3986 let r = _mm_unpacklo_epi8(a, b);
0731742a 3987 #[rustfmt::skip]
0531ce1d
XL
3988 let e = _mm_setr_epi8(
3989 0, 16, 1, 17, 2, 18, 3, 19,
3990 4, 20, 5, 21, 6, 22, 7, 23,
3991 );
3992 assert_eq_m128i(r, e);
3993 }
3994
83c7162d 3995 #[simd_test(enable = "sse2")]
0531ce1d
XL
3996 unsafe fn test_mm_unpacklo_epi16() {
3997 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3998 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3999 let r = _mm_unpacklo_epi16(a, b);
4000 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4001 assert_eq_m128i(r, e);
4002 }
4003
83c7162d 4004 #[simd_test(enable = "sse2")]
0531ce1d
XL
4005 unsafe fn test_mm_unpacklo_epi32() {
4006 let a = _mm_setr_epi32(0, 1, 2, 3);
4007 let b = _mm_setr_epi32(4, 5, 6, 7);
4008 let r = _mm_unpacklo_epi32(a, b);
4009 let e = _mm_setr_epi32(0, 4, 1, 5);
4010 assert_eq_m128i(r, e);
4011 }
4012
83c7162d 4013 #[simd_test(enable = "sse2")]
0531ce1d
XL
4014 unsafe fn test_mm_unpacklo_epi64() {
4015 let a = _mm_setr_epi64x(0, 1);
4016 let b = _mm_setr_epi64x(2, 3);
4017 let r = _mm_unpacklo_epi64(a, b);
4018 let e = _mm_setr_epi64x(0, 2);
4019 assert_eq_m128i(r, e);
4020 }
4021
83c7162d 4022 #[simd_test(enable = "sse2")]
0531ce1d
XL
4023 unsafe fn test_mm_add_sd() {
4024 let a = _mm_setr_pd(1.0, 2.0);
4025 let b = _mm_setr_pd(5.0, 10.0);
4026 let r = _mm_add_sd(a, b);
4027 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4028 }
4029
83c7162d 4030 #[simd_test(enable = "sse2")]
0531ce1d
XL
4031 unsafe fn test_mm_add_pd() {
4032 let a = _mm_setr_pd(1.0, 2.0);
4033 let b = _mm_setr_pd(5.0, 10.0);
4034 let r = _mm_add_pd(a, b);
4035 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4036 }
4037
83c7162d 4038 #[simd_test(enable = "sse2")]
0531ce1d
XL
4039 unsafe fn test_mm_div_sd() {
4040 let a = _mm_setr_pd(1.0, 2.0);
4041 let b = _mm_setr_pd(5.0, 10.0);
4042 let r = _mm_div_sd(a, b);
4043 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4044 }
4045
83c7162d 4046 #[simd_test(enable = "sse2")]
0531ce1d
XL
4047 unsafe fn test_mm_div_pd() {
4048 let a = _mm_setr_pd(1.0, 2.0);
4049 let b = _mm_setr_pd(5.0, 10.0);
4050 let r = _mm_div_pd(a, b);
4051 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4052 }
4053
83c7162d 4054 #[simd_test(enable = "sse2")]
0531ce1d
XL
4055 unsafe fn test_mm_max_sd() {
4056 let a = _mm_setr_pd(1.0, 2.0);
4057 let b = _mm_setr_pd(5.0, 10.0);
4058 let r = _mm_max_sd(a, b);
4059 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4060 }
4061
83c7162d 4062 #[simd_test(enable = "sse2")]
0531ce1d
XL
4063 unsafe fn test_mm_max_pd() {
4064 let a = _mm_setr_pd(1.0, 2.0);
4065 let b = _mm_setr_pd(5.0, 10.0);
4066 let r = _mm_max_pd(a, b);
4067 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4068 }
4069
83c7162d 4070 #[simd_test(enable = "sse2")]
0531ce1d
XL
4071 unsafe fn test_mm_min_sd() {
4072 let a = _mm_setr_pd(1.0, 2.0);
4073 let b = _mm_setr_pd(5.0, 10.0);
4074 let r = _mm_min_sd(a, b);
4075 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4076 }
4077
83c7162d 4078 #[simd_test(enable = "sse2")]
0531ce1d
XL
4079 unsafe fn test_mm_min_pd() {
4080 let a = _mm_setr_pd(1.0, 2.0);
4081 let b = _mm_setr_pd(5.0, 10.0);
4082 let r = _mm_min_pd(a, b);
4083 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4084 }
4085
83c7162d 4086 #[simd_test(enable = "sse2")]
0531ce1d
XL
4087 unsafe fn test_mm_mul_sd() {
4088 let a = _mm_setr_pd(1.0, 2.0);
4089 let b = _mm_setr_pd(5.0, 10.0);
4090 let r = _mm_mul_sd(a, b);
4091 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4092 }
4093
83c7162d 4094 #[simd_test(enable = "sse2")]
0531ce1d
XL
4095 unsafe fn test_mm_mul_pd() {
4096 let a = _mm_setr_pd(1.0, 2.0);
4097 let b = _mm_setr_pd(5.0, 10.0);
4098 let r = _mm_mul_pd(a, b);
4099 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4100 }
4101
83c7162d 4102 #[simd_test(enable = "sse2")]
0531ce1d
XL
4103 unsafe fn test_mm_sqrt_sd() {
4104 let a = _mm_setr_pd(1.0, 2.0);
4105 let b = _mm_setr_pd(5.0, 10.0);
4106 let r = _mm_sqrt_sd(a, b);
4107 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4108 }
4109
83c7162d 4110 #[simd_test(enable = "sse2")]
0531ce1d
XL
4111 unsafe fn test_mm_sqrt_pd() {
4112 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4113 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4114 }
4115
83c7162d 4116 #[simd_test(enable = "sse2")]
0531ce1d
XL
4117 unsafe fn test_mm_sub_sd() {
4118 let a = _mm_setr_pd(1.0, 2.0);
4119 let b = _mm_setr_pd(5.0, 10.0);
4120 let r = _mm_sub_sd(a, b);
4121 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4122 }
4123
83c7162d 4124 #[simd_test(enable = "sse2")]
0531ce1d
XL
4125 unsafe fn test_mm_sub_pd() {
4126 let a = _mm_setr_pd(1.0, 2.0);
4127 let b = _mm_setr_pd(5.0, 10.0);
4128 let r = _mm_sub_pd(a, b);
4129 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4130 }
4131
83c7162d 4132 #[simd_test(enable = "sse2")]
0531ce1d
XL
4133 unsafe fn test_mm_and_pd() {
4134 let a = transmute(u64x2::splat(5));
4135 let b = transmute(u64x2::splat(3));
4136 let r = _mm_and_pd(a, b);
4137 let e = transmute(u64x2::splat(1));
4138 assert_eq_m128d(r, e);
4139 }
4140
83c7162d 4141 #[simd_test(enable = "sse2")]
0531ce1d
XL
4142 unsafe fn test_mm_andnot_pd() {
4143 let a = transmute(u64x2::splat(5));
4144 let b = transmute(u64x2::splat(3));
4145 let r = _mm_andnot_pd(a, b);
4146 let e = transmute(u64x2::splat(2));
4147 assert_eq_m128d(r, e);
4148 }
4149
83c7162d 4150 #[simd_test(enable = "sse2")]
0531ce1d
XL
4151 unsafe fn test_mm_or_pd() {
4152 let a = transmute(u64x2::splat(5));
4153 let b = transmute(u64x2::splat(3));
4154 let r = _mm_or_pd(a, b);
4155 let e = transmute(u64x2::splat(7));
4156 assert_eq_m128d(r, e);
4157 }
4158
83c7162d 4159 #[simd_test(enable = "sse2")]
0531ce1d
XL
4160 unsafe fn test_mm_xor_pd() {
4161 let a = transmute(u64x2::splat(5));
4162 let b = transmute(u64x2::splat(3));
4163 let r = _mm_xor_pd(a, b);
4164 let e = transmute(u64x2::splat(6));
4165 assert_eq_m128d(r, e);
4166 }
4167
83c7162d 4168 #[simd_test(enable = "sse2")]
0531ce1d
XL
4169 unsafe fn test_mm_cmpeq_sd() {
4170 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4171 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4172 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4173 assert_eq_m128i(r, e);
4174 }
4175
83c7162d 4176 #[simd_test(enable = "sse2")]
0531ce1d
XL
4177 unsafe fn test_mm_cmplt_sd() {
4178 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4179 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4180 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4181 assert_eq_m128i(r, e);
4182 }
4183
83c7162d 4184 #[simd_test(enable = "sse2")]
0531ce1d
XL
4185 unsafe fn test_mm_cmple_sd() {
4186 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4187 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4188 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4189 assert_eq_m128i(r, e);
4190 }
4191
83c7162d 4192 #[simd_test(enable = "sse2")]
0531ce1d
XL
4193 unsafe fn test_mm_cmpgt_sd() {
4194 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4195 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4196 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4197 assert_eq_m128i(r, e);
4198 }
4199
83c7162d 4200 #[simd_test(enable = "sse2")]
0531ce1d
XL
4201 unsafe fn test_mm_cmpge_sd() {
4202 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4203 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4204 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4205 assert_eq_m128i(r, e);
4206 }
4207
83c7162d 4208 #[simd_test(enable = "sse2")]
0531ce1d
XL
4209 unsafe fn test_mm_cmpord_sd() {
4210 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4211 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4212 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4213 assert_eq_m128i(r, e);
4214 }
4215
83c7162d 4216 #[simd_test(enable = "sse2")]
0531ce1d
XL
4217 unsafe fn test_mm_cmpunord_sd() {
4218 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4219 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4220 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4221 assert_eq_m128i(r, e);
4222 }
4223
83c7162d 4224 #[simd_test(enable = "sse2")]
0531ce1d
XL
4225 unsafe fn test_mm_cmpneq_sd() {
4226 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4227 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4228 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4229 assert_eq_m128i(r, e);
4230 }
4231
83c7162d 4232 #[simd_test(enable = "sse2")]
0531ce1d
XL
4233 unsafe fn test_mm_cmpnlt_sd() {
4234 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4235 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4236 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4237 assert_eq_m128i(r, e);
4238 }
4239
83c7162d 4240 #[simd_test(enable = "sse2")]
0531ce1d
XL
4241 unsafe fn test_mm_cmpnle_sd() {
4242 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4243 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4244 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4245 assert_eq_m128i(r, e);
4246 }
4247
83c7162d 4248 #[simd_test(enable = "sse2")]
0531ce1d
XL
4249 unsafe fn test_mm_cmpngt_sd() {
4250 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4251 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4252 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4253 assert_eq_m128i(r, e);
4254 }
4255
83c7162d 4256 #[simd_test(enable = "sse2")]
0531ce1d
XL
4257 unsafe fn test_mm_cmpnge_sd() {
4258 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4259 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4260 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4261 assert_eq_m128i(r, e);
4262 }
4263
83c7162d 4264 #[simd_test(enable = "sse2")]
0531ce1d
XL
4265 unsafe fn test_mm_cmpeq_pd() {
4266 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4267 let e = _mm_setr_epi64x(!0, 0);
4268 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4269 assert_eq_m128i(r, e);
4270 }
4271
83c7162d 4272 #[simd_test(enable = "sse2")]
0531ce1d
XL
4273 unsafe fn test_mm_cmplt_pd() {
4274 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4275 let e = _mm_setr_epi64x(0, !0);
4276 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4277 assert_eq_m128i(r, e);
4278 }
4279
83c7162d 4280 #[simd_test(enable = "sse2")]
0531ce1d
XL
4281 unsafe fn test_mm_cmple_pd() {
4282 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4283 let e = _mm_setr_epi64x(!0, !0);
4284 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4285 assert_eq_m128i(r, e);
4286 }
4287
83c7162d 4288 #[simd_test(enable = "sse2")]
0531ce1d
XL
4289 unsafe fn test_mm_cmpgt_pd() {
4290 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4291 let e = _mm_setr_epi64x(0, 0);
4292 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4293 assert_eq_m128i(r, e);
4294 }
4295
83c7162d 4296 #[simd_test(enable = "sse2")]
0531ce1d
XL
4297 unsafe fn test_mm_cmpge_pd() {
4298 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4299 let e = _mm_setr_epi64x(!0, 0);
4300 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4301 assert_eq_m128i(r, e);
4302 }
4303
83c7162d 4304 #[simd_test(enable = "sse2")]
0531ce1d
XL
4305 unsafe fn test_mm_cmpord_pd() {
4306 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4307 let e = _mm_setr_epi64x(0, !0);
4308 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4309 assert_eq_m128i(r, e);
4310 }
4311
83c7162d 4312 #[simd_test(enable = "sse2")]
0531ce1d
XL
4313 unsafe fn test_mm_cmpunord_pd() {
4314 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4315 let e = _mm_setr_epi64x(!0, 0);
4316 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4317 assert_eq_m128i(r, e);
4318 }
4319
83c7162d 4320 #[simd_test(enable = "sse2")]
0531ce1d
XL
4321 unsafe fn test_mm_cmpneq_pd() {
4322 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4323 let e = _mm_setr_epi64x(!0, !0);
4324 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4325 assert_eq_m128i(r, e);
4326 }
4327
83c7162d 4328 #[simd_test(enable = "sse2")]
0531ce1d
XL
4329 unsafe fn test_mm_cmpnlt_pd() {
4330 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4331 let e = _mm_setr_epi64x(0, 0);
4332 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4333 assert_eq_m128i(r, e);
4334 }
4335
83c7162d 4336 #[simd_test(enable = "sse2")]
0531ce1d
XL
4337 unsafe fn test_mm_cmpnle_pd() {
4338 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4339 let e = _mm_setr_epi64x(0, 0);
4340 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4341 assert_eq_m128i(r, e);
4342 }
4343
83c7162d 4344 #[simd_test(enable = "sse2")]
0531ce1d
XL
4345 unsafe fn test_mm_cmpngt_pd() {
4346 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4347 let e = _mm_setr_epi64x(0, !0);
4348 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4349 assert_eq_m128i(r, e);
4350 }
4351
83c7162d 4352 #[simd_test(enable = "sse2")]
0531ce1d
XL
4353 unsafe fn test_mm_cmpnge_pd() {
4354 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4355 let e = _mm_setr_epi64x(0, !0);
4356 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4357 assert_eq_m128i(r, e);
4358 }
4359
83c7162d 4360 #[simd_test(enable = "sse2")]
0531ce1d
XL
4361 unsafe fn test_mm_comieq_sd() {
4362 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4363 assert!(_mm_comieq_sd(a, b) != 0);
4364
4365 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4366 assert!(_mm_comieq_sd(a, b) == 0);
4367 }
4368
83c7162d 4369 #[simd_test(enable = "sse2")]
0531ce1d
XL
4370 unsafe fn test_mm_comilt_sd() {
4371 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4372 assert!(_mm_comilt_sd(a, b) == 0);
4373 }
4374
83c7162d 4375 #[simd_test(enable = "sse2")]
0531ce1d
XL
4376 unsafe fn test_mm_comile_sd() {
4377 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4378 assert!(_mm_comile_sd(a, b) != 0);
4379 }
4380
83c7162d 4381 #[simd_test(enable = "sse2")]
0531ce1d
XL
4382 unsafe fn test_mm_comigt_sd() {
4383 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4384 assert!(_mm_comigt_sd(a, b) == 0);
4385 }
4386
83c7162d 4387 #[simd_test(enable = "sse2")]
0531ce1d
XL
4388 unsafe fn test_mm_comige_sd() {
4389 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4390 assert!(_mm_comige_sd(a, b) != 0);
4391 }
4392
83c7162d 4393 #[simd_test(enable = "sse2")]
0531ce1d
XL
4394 unsafe fn test_mm_comineq_sd() {
4395 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4396 assert!(_mm_comineq_sd(a, b) == 0);
4397 }
4398
83c7162d 4399 #[simd_test(enable = "sse2")]
0531ce1d
XL
4400 unsafe fn test_mm_ucomieq_sd() {
4401 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4402 assert!(_mm_ucomieq_sd(a, b) != 0);
4403
4404 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4405 assert!(_mm_ucomieq_sd(a, b) == 0);
4406 }
4407
83c7162d 4408 #[simd_test(enable = "sse2")]
0531ce1d
XL
4409 unsafe fn test_mm_ucomilt_sd() {
4410 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4411 assert!(_mm_ucomilt_sd(a, b) == 0);
4412 }
4413
83c7162d 4414 #[simd_test(enable = "sse2")]
0531ce1d
XL
4415 unsafe fn test_mm_ucomile_sd() {
4416 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4417 assert!(_mm_ucomile_sd(a, b) != 0);
4418 }
4419
83c7162d 4420 #[simd_test(enable = "sse2")]
0531ce1d
XL
4421 unsafe fn test_mm_ucomigt_sd() {
4422 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4423 assert!(_mm_ucomigt_sd(a, b) == 0);
4424 }
4425
83c7162d 4426 #[simd_test(enable = "sse2")]
0531ce1d
XL
4427 unsafe fn test_mm_ucomige_sd() {
4428 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4429 assert!(_mm_ucomige_sd(a, b) != 0);
4430 }
4431
83c7162d 4432 #[simd_test(enable = "sse2")]
0531ce1d
XL
4433 unsafe fn test_mm_ucomineq_sd() {
4434 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4435 assert!(_mm_ucomineq_sd(a, b) == 0);
4436 }
4437
83c7162d 4438 #[simd_test(enable = "sse2")]
0531ce1d
XL
4439 unsafe fn test_mm_movemask_pd() {
4440 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4441 assert_eq!(r, 0b01);
4442
4443 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4444 assert_eq!(r, 0b11);
4445 }
4446
4447 #[repr(align(16))]
4448 struct Memory {
4449 data: [f64; 4],
4450 }
4451
83c7162d 4452 #[simd_test(enable = "sse2")]
0531ce1d
XL
4453 unsafe fn test_mm_load_pd() {
4454 let mem = Memory {
4455 data: [1.0f64, 2.0, 3.0, 4.0],
4456 };
4457 let vals = &mem.data;
4458 let d = vals.as_ptr();
4459
4460 let r = _mm_load_pd(d);
4461 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4462 }
4463
83c7162d 4464 #[simd_test(enable = "sse2")]
0531ce1d
XL
4465 unsafe fn test_mm_load_sd() {
4466 let a = 1.;
4467 let expected = _mm_setr_pd(a, 0.);
4468 let r = _mm_load_sd(&a);
4469 assert_eq_m128d(r, expected);
4470 }
4471
83c7162d 4472 #[simd_test(enable = "sse2")]
0531ce1d
XL
4473 unsafe fn test_mm_loadh_pd() {
4474 let a = _mm_setr_pd(1., 2.);
4475 let b = 3.;
4476 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4477 let r = _mm_loadh_pd(a, &b);
4478 assert_eq_m128d(r, expected);
4479 }
4480
83c7162d 4481 #[simd_test(enable = "sse2")]
0531ce1d
XL
4482 unsafe fn test_mm_loadl_pd() {
4483 let a = _mm_setr_pd(1., 2.);
4484 let b = 3.;
4485 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4486 let r = _mm_loadl_pd(a, &b);
4487 assert_eq_m128d(r, expected);
4488 }
4489
83c7162d 4490 #[simd_test(enable = "sse2")]
0531ce1d
XL
4491 unsafe fn test_mm_stream_pd() {
4492 #[repr(align(128))]
4493 struct Memory {
4494 pub data: [f64; 2],
4495 }
4496 let a = _mm_set1_pd(7.0);
8faf50e0 4497 let mut mem = Memory { data: [-1.0; 2] };
0531ce1d
XL
4498
4499 _mm_stream_pd(&mut mem.data[0] as *mut f64, a);
4500 for i in 0..2 {
4501 assert_eq!(mem.data[i], get_m128d(a, i));
4502 }
4503 }
4504
83c7162d 4505 #[simd_test(enable = "sse2")]
0531ce1d
XL
4506 unsafe fn test_mm_store_sd() {
4507 let mut dest = 0.;
4508 let a = _mm_setr_pd(1., 2.);
4509 _mm_store_sd(&mut dest, a);
4510 assert_eq!(dest, _mm_cvtsd_f64(a));
4511 }
4512
83c7162d 4513 #[simd_test(enable = "sse2")]
0531ce1d 4514 unsafe fn test_mm_store_pd() {
8faf50e0 4515 let mut mem = Memory { data: [0.0f64; 4] };
0531ce1d
XL
4516 let vals = &mut mem.data;
4517 let a = _mm_setr_pd(1.0, 2.0);
4518 let d = vals.as_mut_ptr();
4519
4520 _mm_store_pd(d, *black_box(&a));
4521 assert_eq!(vals[0], 1.0);
4522 assert_eq!(vals[1], 2.0);
4523 }
4524
cdc7bbd5 4525 #[simd_test(enable = "sse2")]
0531ce1d 4526 unsafe fn test_mm_storeu_pd() {
8faf50e0 4527 let mut mem = Memory { data: [0.0f64; 4] };
0531ce1d
XL
4528 let vals = &mut mem.data;
4529 let a = _mm_setr_pd(1.0, 2.0);
4530
4531 let mut ofs = 0;
4532 let mut p = vals.as_mut_ptr();
4533
532ac7d7 4534 // Make sure p is **not** aligned to 16-byte boundary
0531ce1d
XL
4535 if (p as usize) & 0xf == 0 {
4536 ofs = 1;
4537 p = p.offset(1);
4538 }
4539
4540 _mm_storeu_pd(p, *black_box(&a));
4541
4542 if ofs > 0 {
4543 assert_eq!(vals[ofs - 1], 0.0);
4544 }
4545 assert_eq!(vals[ofs + 0], 1.0);
4546 assert_eq!(vals[ofs + 1], 2.0);
4547 }
4548
83c7162d 4549 #[simd_test(enable = "sse2")]
0531ce1d 4550 unsafe fn test_mm_store1_pd() {
8faf50e0 4551 let mut mem = Memory { data: [0.0f64; 4] };
0531ce1d
XL
4552 let vals = &mut mem.data;
4553 let a = _mm_setr_pd(1.0, 2.0);
4554 let d = vals.as_mut_ptr();
4555
4556 _mm_store1_pd(d, *black_box(&a));
4557 assert_eq!(vals[0], 1.0);
4558 assert_eq!(vals[1], 1.0);
4559 }
4560
83c7162d 4561 #[simd_test(enable = "sse2")]
0531ce1d 4562 unsafe fn test_mm_store_pd1() {
8faf50e0 4563 let mut mem = Memory { data: [0.0f64; 4] };
0531ce1d
XL
4564 let vals = &mut mem.data;
4565 let a = _mm_setr_pd(1.0, 2.0);
4566 let d = vals.as_mut_ptr();
4567
4568 _mm_store_pd1(d, *black_box(&a));
4569 assert_eq!(vals[0], 1.0);
4570 assert_eq!(vals[1], 1.0);
4571 }
4572
83c7162d 4573 #[simd_test(enable = "sse2")]
0531ce1d 4574 unsafe fn test_mm_storer_pd() {
8faf50e0 4575 let mut mem = Memory { data: [0.0f64; 4] };
0531ce1d
XL
4576 let vals = &mut mem.data;
4577 let a = _mm_setr_pd(1.0, 2.0);
4578 let d = vals.as_mut_ptr();
4579
4580 _mm_storer_pd(d, *black_box(&a));
4581 assert_eq!(vals[0], 2.0);
4582 assert_eq!(vals[1], 1.0);
4583 }
4584
83c7162d 4585 #[simd_test(enable = "sse2")]
0531ce1d
XL
4586 unsafe fn test_mm_storeh_pd() {
4587 let mut dest = 0.;
4588 let a = _mm_setr_pd(1., 2.);
4589 _mm_storeh_pd(&mut dest, a);
4590 assert_eq!(dest, get_m128d(a, 1));
4591 }
4592
83c7162d 4593 #[simd_test(enable = "sse2")]
0531ce1d
XL
4594 unsafe fn test_mm_storel_pd() {
4595 let mut dest = 0.;
4596 let a = _mm_setr_pd(1., 2.);
4597 _mm_storel_pd(&mut dest, a);
4598 assert_eq!(dest, _mm_cvtsd_f64(a));
4599 }
4600
83c7162d 4601 #[simd_test(enable = "sse2")]
0531ce1d
XL
4602 unsafe fn test_mm_loadr_pd() {
4603 let mut mem = Memory {
4604 data: [1.0f64, 2.0, 3.0, 4.0],
4605 };
4606 let vals = &mut mem.data;
4607 let d = vals.as_ptr();
4608
4609 let r = _mm_loadr_pd(d);
4610 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4611 }
4612
83c7162d 4613 #[simd_test(enable = "sse2")]
0531ce1d
XL
4614 unsafe fn test_mm_loadu_pd() {
4615 let mut mem = Memory {
4616 data: [1.0f64, 2.0, 3.0, 4.0],
4617 };
4618 let vals = &mut mem.data;
4619 let mut d = vals.as_ptr();
4620
4621 // make sure d is not aligned to 16-byte boundary
4622 let mut offset = 0;
4623 if (d as usize) & 0xf == 0 {
4624 offset = 1;
4625 d = d.offset(offset as isize);
4626 }
4627
4628 let r = _mm_loadu_pd(d);
8faf50e0 4629 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
0531ce1d
XL
4630 assert_eq_m128d(r, e);
4631 }
4632
83c7162d 4633 #[simd_test(enable = "sse2")]
0531ce1d
XL
4634 unsafe fn test_mm_cvtpd_ps() {
4635 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4636 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4637
4638 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4639 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4640
4641 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
0731742a 4642 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
0531ce1d
XL
4643
4644 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4645 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4646 }
4647
83c7162d 4648 #[simd_test(enable = "sse2")]
0531ce1d
XL
4649 unsafe fn test_mm_cvtps_pd() {
4650 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4651 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4652
4653 let r = _mm_cvtps_pd(_mm_setr_ps(
4654 f32::MAX,
4655 f32::INFINITY,
4656 f32::NEG_INFINITY,
4657 f32::MIN,
4658 ));
4659 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4660 }
4661
83c7162d 4662 #[simd_test(enable = "sse2")]
0531ce1d
XL
4663 unsafe fn test_mm_cvtpd_epi32() {
4664 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4665 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4666
4667 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4668 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4669
4670 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4671 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4672
4673 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4674 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4675
4676 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4677 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4678 }
4679
83c7162d 4680 #[simd_test(enable = "sse2")]
0531ce1d
XL
4681 unsafe fn test_mm_cvtsd_si32() {
4682 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4683 assert_eq!(r, -2);
4684
4685 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4686 assert_eq!(r, i32::MIN);
4687
4688 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4689 assert_eq!(r, i32::MIN);
4690 }
4691
83c7162d 4692 #[simd_test(enable = "sse2")]
0531ce1d
XL
4693 unsafe fn test_mm_cvtsd_ss() {
4694 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4695 let b = _mm_setr_pd(2.0, -5.0);
4696
4697 let r = _mm_cvtsd_ss(a, b);
4698
4699 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4700
0731742a 4701 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
0531ce1d
XL
4702 let b = _mm_setr_pd(f64::INFINITY, -5.0);
4703
4704 let r = _mm_cvtsd_ss(a, b);
4705
4706 assert_eq_m128(
4707 r,
4708 _mm_setr_ps(
4709 f32::INFINITY,
4710 f32::NEG_INFINITY,
4711 f32::MAX,
4712 f32::NEG_INFINITY,
4713 ),
4714 );
4715 }
4716
83c7162d 4717 #[simd_test(enable = "sse2")]
0531ce1d
XL
4718 unsafe fn test_mm_cvtsd_f64() {
4719 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4720 assert_eq!(r, -1.1);
4721 }
4722
83c7162d 4723 #[simd_test(enable = "sse2")]
0531ce1d
XL
4724 unsafe fn test_mm_cvtss_sd() {
4725 let a = _mm_setr_pd(-1.1, 2.2);
4726 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4727
4728 let r = _mm_cvtss_sd(a, b);
4729 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
4730
4731 let a = _mm_setr_pd(-1.1, f64::INFINITY);
4732 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
4733
4734 let r = _mm_cvtss_sd(a, b);
4735 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4736 }
4737
83c7162d 4738 #[simd_test(enable = "sse2")]
0531ce1d
XL
4739 unsafe fn test_mm_cvttpd_epi32() {
4740 let a = _mm_setr_pd(-1.1, 2.2);
4741 let r = _mm_cvttpd_epi32(a);
4742 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
4743
4744 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4745 let r = _mm_cvttpd_epi32(a);
4746 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4747 }
4748
83c7162d 4749 #[simd_test(enable = "sse2")]
0531ce1d
XL
4750 unsafe fn test_mm_cvttsd_si32() {
4751 let a = _mm_setr_pd(-1.1, 2.2);
4752 let r = _mm_cvttsd_si32(a);
4753 assert_eq!(r, -1);
4754
4755 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4756 let r = _mm_cvttsd_si32(a);
4757 assert_eq!(r, i32::MIN);
4758 }
4759
83c7162d 4760 #[simd_test(enable = "sse2")]
0531ce1d
XL
4761 unsafe fn test_mm_cvttps_epi32() {
4762 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4763 let r = _mm_cvttps_epi32(a);
4764 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
4765
0731742a 4766 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
0531ce1d 4767 let r = _mm_cvttps_epi32(a);
0731742a 4768 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
0531ce1d
XL
4769 }
4770
83c7162d 4771 #[simd_test(enable = "sse2")]
0531ce1d
XL
4772 unsafe fn test_mm_set_sd() {
4773 let r = _mm_set_sd(-1.0_f64);
4774 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
4775 }
4776
83c7162d 4777 #[simd_test(enable = "sse2")]
0531ce1d
XL
4778 unsafe fn test_mm_set1_pd() {
4779 let r = _mm_set1_pd(-1.0_f64);
4780 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
4781 }
4782
83c7162d 4783 #[simd_test(enable = "sse2")]
0531ce1d
XL
4784 unsafe fn test_mm_set_pd1() {
4785 let r = _mm_set_pd1(-2.0_f64);
4786 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
4787 }
4788
83c7162d 4789 #[simd_test(enable = "sse2")]
0531ce1d
XL
4790 unsafe fn test_mm_set_pd() {
4791 let r = _mm_set_pd(1.0_f64, 5.0_f64);
4792 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
4793 }
4794
83c7162d 4795 #[simd_test(enable = "sse2")]
0531ce1d
XL
4796 unsafe fn test_mm_setr_pd() {
4797 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
4798 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
4799 }
4800
83c7162d 4801 #[simd_test(enable = "sse2")]
0531ce1d
XL
4802 unsafe fn test_mm_setzero_pd() {
4803 let r = _mm_setzero_pd();
4804 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
4805 }
4806
83c7162d 4807 #[simd_test(enable = "sse2")]
0531ce1d
XL
4808 unsafe fn test_mm_load1_pd() {
4809 let d = -5.0;
4810 let r = _mm_load1_pd(&d);
4811 assert_eq_m128d(r, _mm_setr_pd(d, d));
4812 }
4813
83c7162d 4814 #[simd_test(enable = "sse2")]
0531ce1d
XL
4815 unsafe fn test_mm_load_pd1() {
4816 let d = -5.0;
4817 let r = _mm_load_pd1(&d);
4818 assert_eq_m128d(r, _mm_setr_pd(d, d));
4819 }
4820
83c7162d 4821 #[simd_test(enable = "sse2")]
0531ce1d
XL
4822 unsafe fn test_mm_unpackhi_pd() {
4823 let a = _mm_setr_pd(1.0, 2.0);
4824 let b = _mm_setr_pd(3.0, 4.0);
4825 let r = _mm_unpackhi_pd(a, b);
4826 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
4827 }
4828
83c7162d 4829 #[simd_test(enable = "sse2")]
0531ce1d
XL
4830 unsafe fn test_mm_unpacklo_pd() {
4831 let a = _mm_setr_pd(1.0, 2.0);
4832 let b = _mm_setr_pd(3.0, 4.0);
4833 let r = _mm_unpacklo_pd(a, b);
4834 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
4835 }
4836
83c7162d 4837 #[simd_test(enable = "sse2")]
0531ce1d
XL
4838 unsafe fn test_mm_shuffle_pd() {
4839 let a = _mm_setr_pd(1., 2.);
4840 let b = _mm_setr_pd(3., 4.);
4841 let expected = _mm_setr_pd(1., 3.);
17df50a5 4842 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
0531ce1d
XL
4843 assert_eq_m128d(r, expected);
4844 }
4845
83c7162d 4846 #[simd_test(enable = "sse2")]
0531ce1d
XL
4847 unsafe fn test_mm_move_sd() {
4848 let a = _mm_setr_pd(1., 2.);
4849 let b = _mm_setr_pd(3., 4.);
4850 let expected = _mm_setr_pd(3., 2.);
4851 let r = _mm_move_sd(a, b);
4852 assert_eq_m128d(r, expected);
4853 }
4854
83c7162d 4855 #[simd_test(enable = "sse2")]
0531ce1d
XL
4856 unsafe fn test_mm_castpd_ps() {
4857 let a = _mm_set1_pd(0.);
4858 let expected = _mm_set1_ps(0.);
4859 let r = _mm_castpd_ps(a);
4860 assert_eq_m128(r, expected);
4861 }
4862
83c7162d 4863 #[simd_test(enable = "sse2")]
0531ce1d
XL
4864 unsafe fn test_mm_castpd_si128() {
4865 let a = _mm_set1_pd(0.);
4866 let expected = _mm_set1_epi64x(0);
4867 let r = _mm_castpd_si128(a);
4868 assert_eq_m128i(r, expected);
4869 }
4870
83c7162d 4871 #[simd_test(enable = "sse2")]
0531ce1d
XL
4872 unsafe fn test_mm_castps_pd() {
4873 let a = _mm_set1_ps(0.);
4874 let expected = _mm_set1_pd(0.);
4875 let r = _mm_castps_pd(a);
4876 assert_eq_m128d(r, expected);
4877 }
4878
83c7162d 4879 #[simd_test(enable = "sse2")]
0531ce1d
XL
4880 unsafe fn test_mm_castps_si128() {
4881 let a = _mm_set1_ps(0.);
4882 let expected = _mm_set1_epi32(0);
4883 let r = _mm_castps_si128(a);
4884 assert_eq_m128i(r, expected);
4885 }
4886
83c7162d 4887 #[simd_test(enable = "sse2")]
0531ce1d
XL
4888 unsafe fn test_mm_castsi128_pd() {
4889 let a = _mm_set1_epi64x(0);
4890 let expected = _mm_set1_pd(0.);
4891 let r = _mm_castsi128_pd(a);
4892 assert_eq_m128d(r, expected);
4893 }
4894
83c7162d 4895 #[simd_test(enable = "sse2")]
0531ce1d
XL
4896 unsafe fn test_mm_castsi128_ps() {
4897 let a = _mm_set1_epi32(0);
4898 let expected = _mm_set1_ps(0.);
4899 let r = _mm_castsi128_ps(a);
4900 assert_eq_m128(r, expected);
4901 }
0531ce1d 4902}