]> git.proxmox.com Git - rustc.git/blob - library/stdarch/crates/core_arch/src/x86/sse2.rs
New upstream version 1.56.0~beta.4+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / sse2.rs
1 //! Streaming SIMD Extensions 2 (SSE2)
2
3 #[cfg(test)]
4 use stdarch_test::assert_instr;
5
6 use crate::{
7 core_arch::{simd::*, simd_llvm::*, x86::*},
8 intrinsics,
9 mem::{self, transmute},
10 ptr,
11 };
12
13 /// Provides a hint to the processor that the code sequence is a spin-wait loop.
14 ///
15 /// This can help improve the performance and power consumption of spin-wait
16 /// loops.
17 ///
18 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause)
19 #[inline]
20 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21 #[stable(feature = "simd_x86", since = "1.27.0")]
22 pub unsafe fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
25 pause()
26 }
27
28 /// Invalidates and flushes the cache line that contains `p` from all levels of
29 /// the cache hierarchy.
30 ///
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
32 #[inline]
33 #[target_feature(enable = "sse2")]
34 #[cfg_attr(test, assert_instr(clflush))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38 }
39
40 /// Performs a serializing operation on all load-from-memory instructions
41 /// that were issued prior to this instruction.
42 ///
43 /// Guarantees that every load instruction that precedes, in program order, is
44 /// globally visible before any load instruction which follows the fence in
45 /// program order.
46 ///
47 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence)
48 #[inline]
49 #[target_feature(enable = "sse2")]
50 #[cfg_attr(test, assert_instr(lfence))]
51 #[stable(feature = "simd_x86", since = "1.27.0")]
52 pub unsafe fn _mm_lfence() {
53 lfence()
54 }
55
56 /// Performs a serializing operation on all load-from-memory and store-to-memory
57 /// instructions that were issued prior to this instruction.
58 ///
59 /// Guarantees that every memory access that precedes, in program order, the
60 /// memory fence instruction is globally visible before any memory instruction
61 /// which follows the fence in program order.
62 ///
63 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence)
64 #[inline]
65 #[target_feature(enable = "sse2")]
66 #[cfg_attr(test, assert_instr(mfence))]
67 #[stable(feature = "simd_x86", since = "1.27.0")]
68 pub unsafe fn _mm_mfence() {
69 mfence()
70 }
71
72 /// Adds packed 8-bit integers in `a` and `b`.
73 ///
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
75 #[inline]
76 #[target_feature(enable = "sse2")]
77 #[cfg_attr(test, assert_instr(paddb))]
78 #[stable(feature = "simd_x86", since = "1.27.0")]
79 pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
81 }
82
83 /// Adds packed 16-bit integers in `a` and `b`.
84 ///
85 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
86 #[inline]
87 #[target_feature(enable = "sse2")]
88 #[cfg_attr(test, assert_instr(paddw))]
89 #[stable(feature = "simd_x86", since = "1.27.0")]
90 pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
92 }
93
94 /// Adds packed 32-bit integers in `a` and `b`.
95 ///
96 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
97 #[inline]
98 #[target_feature(enable = "sse2")]
99 #[cfg_attr(test, assert_instr(paddd))]
100 #[stable(feature = "simd_x86", since = "1.27.0")]
101 pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
103 }
104
105 /// Adds packed 64-bit integers in `a` and `b`.
106 ///
107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
108 #[inline]
109 #[target_feature(enable = "sse2")]
110 #[cfg_attr(test, assert_instr(paddq))]
111 #[stable(feature = "simd_x86", since = "1.27.0")]
112 pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
114 }
115
116 /// Adds packed 8-bit integers in `a` and `b` using saturation.
117 ///
118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
119 #[inline]
120 #[target_feature(enable = "sse2")]
121 #[cfg_attr(test, assert_instr(paddsb))]
122 #[stable(feature = "simd_x86", since = "1.27.0")]
123 pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16()))
125 }
126
127 /// Adds packed 16-bit integers in `a` and `b` using saturation.
128 ///
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
130 #[inline]
131 #[target_feature(enable = "sse2")]
132 #[cfg_attr(test, assert_instr(paddsw))]
133 #[stable(feature = "simd_x86", since = "1.27.0")]
134 pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8()))
136 }
137
138 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139 ///
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
141 #[inline]
142 #[target_feature(enable = "sse2")]
143 #[cfg_attr(test, assert_instr(paddusb))]
144 #[stable(feature = "simd_x86", since = "1.27.0")]
145 pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16()))
147 }
148
149 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150 ///
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
152 #[inline]
153 #[target_feature(enable = "sse2")]
154 #[cfg_attr(test, assert_instr(paddusw))]
155 #[stable(feature = "simd_x86", since = "1.27.0")]
156 pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8()))
158 }
159
160 /// Averages packed unsigned 8-bit integers in `a` and `b`.
161 ///
162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
163 #[inline]
164 #[target_feature(enable = "sse2")]
165 #[cfg_attr(test, assert_instr(pavgb))]
166 #[stable(feature = "simd_x86", since = "1.27.0")]
167 pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 transmute(pavgb(a.as_u8x16(), b.as_u8x16()))
169 }
170
171 /// Averages packed unsigned 16-bit integers in `a` and `b`.
172 ///
173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
174 #[inline]
175 #[target_feature(enable = "sse2")]
176 #[cfg_attr(test, assert_instr(pavgw))]
177 #[stable(feature = "simd_x86", since = "1.27.0")]
178 pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
179 transmute(pavgw(a.as_u16x8(), b.as_u16x8()))
180 }
181
182 /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
183 ///
184 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
185 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
186 /// intermediate 32-bit integers.
187 ///
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16)
189 #[inline]
190 #[target_feature(enable = "sse2")]
191 #[cfg_attr(test, assert_instr(pmaddwd))]
192 #[stable(feature = "simd_x86", since = "1.27.0")]
193 pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
194 transmute(pmaddwd(a.as_i16x8(), b.as_i16x8()))
195 }
196
197 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
198 /// maximum values.
199 ///
200 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
201 #[inline]
202 #[target_feature(enable = "sse2")]
203 #[cfg_attr(test, assert_instr(pmaxsw))]
204 #[stable(feature = "simd_x86", since = "1.27.0")]
205 pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
206 transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
207 }
208
209 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
210 /// packed maximum values.
211 ///
212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
213 #[inline]
214 #[target_feature(enable = "sse2")]
215 #[cfg_attr(test, assert_instr(pmaxub))]
216 #[stable(feature = "simd_x86", since = "1.27.0")]
217 pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
218 transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
219 }
220
221 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
222 /// minimum values.
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
225 #[inline]
226 #[target_feature(enable = "sse2")]
227 #[cfg_attr(test, assert_instr(pminsw))]
228 #[stable(feature = "simd_x86", since = "1.27.0")]
229 pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
230 transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
231 }
232
233 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
234 /// packed minimum values.
235 ///
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
237 #[inline]
238 #[target_feature(enable = "sse2")]
239 #[cfg_attr(test, assert_instr(pminub))]
240 #[stable(feature = "simd_x86", since = "1.27.0")]
241 pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
242 transmute(pminub(a.as_u8x16(), b.as_u8x16()))
243 }
244
245 /// Multiplies the packed 16-bit integers in `a` and `b`.
246 ///
247 /// The multiplication produces intermediate 32-bit integers, and returns the
248 /// high 16 bits of the intermediate integers.
249 ///
250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16)
251 #[inline]
252 #[target_feature(enable = "sse2")]
253 #[cfg_attr(test, assert_instr(pmulhw))]
254 #[stable(feature = "simd_x86", since = "1.27.0")]
255 pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
256 transmute(pmulhw(a.as_i16x8(), b.as_i16x8()))
257 }
258
259 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
260 ///
261 /// The multiplication produces intermediate 32-bit integers, and returns the
262 /// high 16 bits of the intermediate integers.
263 ///
264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16)
265 #[inline]
266 #[target_feature(enable = "sse2")]
267 #[cfg_attr(test, assert_instr(pmulhuw))]
268 #[stable(feature = "simd_x86", since = "1.27.0")]
269 pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
270 transmute(pmulhuw(a.as_u16x8(), b.as_u16x8()))
271 }
272
273 /// Multiplies the packed 16-bit integers in `a` and `b`.
274 ///
275 /// The multiplication produces intermediate 32-bit integers, and returns the
276 /// low 16 bits of the intermediate integers.
277 ///
278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16)
279 #[inline]
280 #[target_feature(enable = "sse2")]
281 #[cfg_attr(test, assert_instr(pmullw))]
282 #[stable(feature = "simd_x86", since = "1.27.0")]
283 pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
284 transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
285 }
286
287 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
288 /// in `a` and `b`.
289 ///
290 /// Returns the unsigned 64-bit results.
291 ///
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
293 #[inline]
294 #[target_feature(enable = "sse2")]
295 #[cfg_attr(test, assert_instr(pmuludq))]
296 #[stable(feature = "simd_x86", since = "1.27.0")]
297 pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
298 transmute(pmuludq(a.as_u32x4(), b.as_u32x4()))
299 }
300
301 /// Sum the absolute differences of packed unsigned 8-bit integers.
302 ///
303 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
304 /// and `b`, then horizontally sum each consecutive 8 differences to produce
305 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
306 /// the low 16 bits of 64-bit elements returned.
307 ///
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8)
309 #[inline]
310 #[target_feature(enable = "sse2")]
311 #[cfg_attr(test, assert_instr(psadbw))]
312 #[stable(feature = "simd_x86", since = "1.27.0")]
313 pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
314 transmute(psadbw(a.as_u8x16(), b.as_u8x16()))
315 }
316
317 /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
318 ///
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
320 #[inline]
321 #[target_feature(enable = "sse2")]
322 #[cfg_attr(test, assert_instr(psubb))]
323 #[stable(feature = "simd_x86", since = "1.27.0")]
324 pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
325 transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
326 }
327
328 /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
329 ///
330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
331 #[inline]
332 #[target_feature(enable = "sse2")]
333 #[cfg_attr(test, assert_instr(psubw))]
334 #[stable(feature = "simd_x86", since = "1.27.0")]
335 pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
336 transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
337 }
338
339 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
340 ///
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32)
342 #[inline]
343 #[target_feature(enable = "sse2")]
344 #[cfg_attr(test, assert_instr(psubd))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
347 transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
348 }
349
350 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
351 ///
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64)
353 #[inline]
354 #[target_feature(enable = "sse2")]
355 #[cfg_attr(test, assert_instr(psubq))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
358 transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
359 }
360
361 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
362 /// using saturation.
363 ///
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8)
365 #[inline]
366 #[target_feature(enable = "sse2")]
367 #[cfg_attr(test, assert_instr(psubsb))]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
370 transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16()))
371 }
372
373 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
374 /// using saturation.
375 ///
376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16)
377 #[inline]
378 #[target_feature(enable = "sse2")]
379 #[cfg_attr(test, assert_instr(psubsw))]
380 #[stable(feature = "simd_x86", since = "1.27.0")]
381 pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
382 transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8()))
383 }
384
385 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
386 /// integers in `a` using saturation.
387 ///
388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8)
389 #[inline]
390 #[target_feature(enable = "sse2")]
391 #[cfg_attr(test, assert_instr(psubusb))]
392 #[stable(feature = "simd_x86", since = "1.27.0")]
393 pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
394 transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16()))
395 }
396
397 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
398 /// integers in `a` using saturation.
399 ///
400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16)
401 #[inline]
402 #[target_feature(enable = "sse2")]
403 #[cfg_attr(test, assert_instr(psubusw))]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
406 transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8()))
407 }
408
409 /// Shifts `a` left by `IMM8` bytes while shifting in zeros.
410 ///
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
412 #[inline]
413 #[target_feature(enable = "sse2")]
414 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
415 #[rustc_legacy_const_generics(1)]
416 #[stable(feature = "simd_x86", since = "1.27.0")]
417 pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
418 static_assert_imm8!(IMM8);
419 _mm_slli_si128_impl::<IMM8>(a)
420 }
421
422 /// Implementation detail: converts the immediate argument of the
423 /// `_mm_slli_si128` intrinsic into a compile-time constant.
424 #[inline]
425 #[target_feature(enable = "sse2")]
426 unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
427 const fn mask(shift: i32, i: u32) -> u32 {
428 let shift = shift as u32 & 0xff;
429 if shift > 15 {
430 i
431 } else {
432 16 - shift + i
433 }
434 }
435 let zero = _mm_set1_epi8(0).as_i8x16();
436 transmute::<i8x16, _>(simd_shuffle16!(
437 zero,
438 a.as_i8x16(),
439 <const IMM8: i32> [
440 mask(IMM8, 0),
441 mask(IMM8, 1),
442 mask(IMM8, 2),
443 mask(IMM8, 3),
444 mask(IMM8, 4),
445 mask(IMM8, 5),
446 mask(IMM8, 6),
447 mask(IMM8, 7),
448 mask(IMM8, 8),
449 mask(IMM8, 9),
450 mask(IMM8, 10),
451 mask(IMM8, 11),
452 mask(IMM8, 12),
453 mask(IMM8, 13),
454 mask(IMM8, 14),
455 mask(IMM8, 15),
456 ],
457 ))
458 }
459
460 /// Shifts `a` left by `IMM8` bytes while shifting in zeros.
461 ///
462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
463 #[inline]
464 #[target_feature(enable = "sse2")]
465 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
466 #[rustc_legacy_const_generics(1)]
467 #[stable(feature = "simd_x86", since = "1.27.0")]
468 pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
469 static_assert_imm8!(IMM8);
470 _mm_slli_si128_impl::<IMM8>(a)
471 }
472
473 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
474 ///
475 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
476 #[inline]
477 #[target_feature(enable = "sse2")]
478 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
479 #[rustc_legacy_const_generics(1)]
480 #[stable(feature = "simd_x86", since = "1.27.0")]
481 pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
482 static_assert_imm8!(IMM8);
483 _mm_srli_si128_impl::<IMM8>(a)
484 }
485
486 /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
487 ///
488 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
489 #[inline]
490 #[target_feature(enable = "sse2")]
491 #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
492 #[rustc_legacy_const_generics(1)]
493 #[stable(feature = "simd_x86", since = "1.27.0")]
494 pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
495 static_assert_imm8!(IMM8);
496 transmute(pslliw(a.as_i16x8(), IMM8))
497 }
498
499 /// Shifts packed 16-bit integers in `a` left by `count` while shifting in
500 /// zeros.
501 ///
502 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
503 #[inline]
504 #[target_feature(enable = "sse2")]
505 #[cfg_attr(test, assert_instr(psllw))]
506 #[stable(feature = "simd_x86", since = "1.27.0")]
507 pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
508 transmute(psllw(a.as_i16x8(), count.as_i16x8()))
509 }
510
511 /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
512 ///
513 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
514 #[inline]
515 #[target_feature(enable = "sse2")]
516 #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
517 #[rustc_legacy_const_generics(1)]
518 #[stable(feature = "simd_x86", since = "1.27.0")]
519 pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
520 static_assert_imm8!(IMM8);
521 transmute(psllid(a.as_i32x4(), IMM8))
522 }
523
524 /// Shifts packed 32-bit integers in `a` left by `count` while shifting in
525 /// zeros.
526 ///
527 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
528 #[inline]
529 #[target_feature(enable = "sse2")]
530 #[cfg_attr(test, assert_instr(pslld))]
531 #[stable(feature = "simd_x86", since = "1.27.0")]
532 pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
533 transmute(pslld(a.as_i32x4(), count.as_i32x4()))
534 }
535
536 /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
537 ///
538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
539 #[inline]
540 #[target_feature(enable = "sse2")]
541 #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
542 #[rustc_legacy_const_generics(1)]
543 #[stable(feature = "simd_x86", since = "1.27.0")]
544 pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
545 static_assert_imm8!(IMM8);
546 transmute(pslliq(a.as_i64x2(), IMM8))
547 }
548
549 /// Shifts packed 64-bit integers in `a` left by `count` while shifting in
550 /// zeros.
551 ///
552 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
553 #[inline]
554 #[target_feature(enable = "sse2")]
555 #[cfg_attr(test, assert_instr(psllq))]
556 #[stable(feature = "simd_x86", since = "1.27.0")]
557 pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
558 transmute(psllq(a.as_i64x2(), count.as_i64x2()))
559 }
560
561 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
562 /// bits.
563 ///
564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
565 #[inline]
566 #[target_feature(enable = "sse2")]
567 #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
568 #[rustc_legacy_const_generics(1)]
569 #[stable(feature = "simd_x86", since = "1.27.0")]
570 pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
571 static_assert_imm8!(IMM8);
572 transmute(psraiw(a.as_i16x8(), IMM8))
573 }
574
575 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
576 /// bits.
577 ///
578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
579 #[inline]
580 #[target_feature(enable = "sse2")]
581 #[cfg_attr(test, assert_instr(psraw))]
582 #[stable(feature = "simd_x86", since = "1.27.0")]
583 pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
584 transmute(psraw(a.as_i16x8(), count.as_i16x8()))
585 }
586
587 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
588 /// bits.
589 ///
590 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
591 #[inline]
592 #[target_feature(enable = "sse2")]
593 #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
594 #[rustc_legacy_const_generics(1)]
595 #[stable(feature = "simd_x86", since = "1.27.0")]
596 pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
597 static_assert_imm8!(IMM8);
598 transmute(psraid(a.as_i32x4(), IMM8))
599 }
600
601 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
602 /// bits.
603 ///
604 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
605 #[inline]
606 #[target_feature(enable = "sse2")]
607 #[cfg_attr(test, assert_instr(psrad))]
608 #[stable(feature = "simd_x86", since = "1.27.0")]
609 pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
610 transmute(psrad(a.as_i32x4(), count.as_i32x4()))
611 }
612
613 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
614 ///
615 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
616 #[inline]
617 #[target_feature(enable = "sse2")]
618 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
619 #[rustc_legacy_const_generics(1)]
620 #[stable(feature = "simd_x86", since = "1.27.0")]
621 pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
622 static_assert_imm8!(IMM8);
623 _mm_srli_si128_impl::<IMM8>(a)
624 }
625
626 /// Implementation detail: converts the immediate argument of the
627 /// `_mm_srli_si128` intrinsic into a compile-time constant.
628 #[inline]
629 #[target_feature(enable = "sse2")]
630 unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
631 const fn mask(shift: i32, i: u32) -> u32 {
632 if (shift as u32) > 15 {
633 i + 16
634 } else {
635 i + (shift as u32)
636 }
637 }
638 let zero = _mm_set1_epi8(0).as_i8x16();
639 let x: i8x16 = simd_shuffle16!(
640 a.as_i8x16(),
641 zero,
642 <const IMM8: i32> [
643 mask(IMM8, 0),
644 mask(IMM8, 1),
645 mask(IMM8, 2),
646 mask(IMM8, 3),
647 mask(IMM8, 4),
648 mask(IMM8, 5),
649 mask(IMM8, 6),
650 mask(IMM8, 7),
651 mask(IMM8, 8),
652 mask(IMM8, 9),
653 mask(IMM8, 10),
654 mask(IMM8, 11),
655 mask(IMM8, 12),
656 mask(IMM8, 13),
657 mask(IMM8, 14),
658 mask(IMM8, 15),
659 ],
660 );
661 transmute(x)
662 }
663
664 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
665 /// zeros.
666 ///
667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
668 #[inline]
669 #[target_feature(enable = "sse2")]
670 #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
671 #[rustc_legacy_const_generics(1)]
672 #[stable(feature = "simd_x86", since = "1.27.0")]
673 pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
674 static_assert_imm8!(IMM8);
675 transmute(psrliw(a.as_i16x8(), IMM8))
676 }
677
678 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
679 /// zeros.
680 ///
681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
682 #[inline]
683 #[target_feature(enable = "sse2")]
684 #[cfg_attr(test, assert_instr(psrlw))]
685 #[stable(feature = "simd_x86", since = "1.27.0")]
686 pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
687 transmute(psrlw(a.as_i16x8(), count.as_i16x8()))
688 }
689
690 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
691 /// zeros.
692 ///
693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
694 #[inline]
695 #[target_feature(enable = "sse2")]
696 #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
697 #[rustc_legacy_const_generics(1)]
698 #[stable(feature = "simd_x86", since = "1.27.0")]
699 pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
700 static_assert_imm8!(IMM8);
701 transmute(psrlid(a.as_i32x4(), IMM8))
702 }
703
704 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
705 /// zeros.
706 ///
707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
708 #[inline]
709 #[target_feature(enable = "sse2")]
710 #[cfg_attr(test, assert_instr(psrld))]
711 #[stable(feature = "simd_x86", since = "1.27.0")]
712 pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
713 transmute(psrld(a.as_i32x4(), count.as_i32x4()))
714 }
715
716 /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
717 /// zeros.
718 ///
719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
720 #[inline]
721 #[target_feature(enable = "sse2")]
722 #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
723 #[rustc_legacy_const_generics(1)]
724 #[stable(feature = "simd_x86", since = "1.27.0")]
725 pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
726 static_assert_imm8!(IMM8);
727 transmute(psrliq(a.as_i64x2(), IMM8))
728 }
729
730 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
731 /// zeros.
732 ///
733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
734 #[inline]
735 #[target_feature(enable = "sse2")]
736 #[cfg_attr(test, assert_instr(psrlq))]
737 #[stable(feature = "simd_x86", since = "1.27.0")]
738 pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
739 transmute(psrlq(a.as_i64x2(), count.as_i64x2()))
740 }
741
742 /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
743 /// `b`.
744 ///
745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
746 #[inline]
747 #[target_feature(enable = "sse2")]
748 #[cfg_attr(test, assert_instr(andps))]
749 #[stable(feature = "simd_x86", since = "1.27.0")]
750 pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
751 simd_and(a, b)
752 }
753
754 /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
755 /// then AND with `b`.
756 ///
757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
758 #[inline]
759 #[target_feature(enable = "sse2")]
760 #[cfg_attr(test, assert_instr(andnps))]
761 #[stable(feature = "simd_x86", since = "1.27.0")]
762 pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
763 simd_and(simd_xor(_mm_set1_epi8(-1), a), b)
764 }
765
766 /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
767 /// `b`.
768 ///
769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
770 #[inline]
771 #[target_feature(enable = "sse2")]
772 #[cfg_attr(test, assert_instr(orps))]
773 #[stable(feature = "simd_x86", since = "1.27.0")]
774 pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
775 simd_or(a, b)
776 }
777
778 /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
779 /// `b`.
780 ///
781 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
782 #[inline]
783 #[target_feature(enable = "sse2")]
784 #[cfg_attr(test, assert_instr(xorps))]
785 #[stable(feature = "simd_x86", since = "1.27.0")]
786 pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
787 simd_xor(a, b)
788 }
789
790 /// Compares packed 8-bit integers in `a` and `b` for equality.
791 ///
792 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
793 #[inline]
794 #[target_feature(enable = "sse2")]
795 #[cfg_attr(test, assert_instr(pcmpeqb))]
796 #[stable(feature = "simd_x86", since = "1.27.0")]
797 pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
798 transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
799 }
800
801 /// Compares packed 16-bit integers in `a` and `b` for equality.
802 ///
803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
804 #[inline]
805 #[target_feature(enable = "sse2")]
806 #[cfg_attr(test, assert_instr(pcmpeqw))]
807 #[stable(feature = "simd_x86", since = "1.27.0")]
808 pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
809 transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
810 }
811
812 /// Compares packed 32-bit integers in `a` and `b` for equality.
813 ///
814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
815 #[inline]
816 #[target_feature(enable = "sse2")]
817 #[cfg_attr(test, assert_instr(pcmpeqd))]
818 #[stable(feature = "simd_x86", since = "1.27.0")]
819 pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
820 transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
821 }
822
823 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
824 ///
825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
826 #[inline]
827 #[target_feature(enable = "sse2")]
828 #[cfg_attr(test, assert_instr(pcmpgtb))]
829 #[stable(feature = "simd_x86", since = "1.27.0")]
830 pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
831 transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
832 }
833
834 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
835 ///
836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
837 #[inline]
838 #[target_feature(enable = "sse2")]
839 #[cfg_attr(test, assert_instr(pcmpgtw))]
840 #[stable(feature = "simd_x86", since = "1.27.0")]
841 pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
842 transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
843 }
844
845 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
846 ///
847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
848 #[inline]
849 #[target_feature(enable = "sse2")]
850 #[cfg_attr(test, assert_instr(pcmpgtd))]
851 #[stable(feature = "simd_x86", since = "1.27.0")]
852 pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
853 transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
854 }
855
856 /// Compares packed 8-bit integers in `a` and `b` for less-than.
857 ///
858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
859 #[inline]
860 #[target_feature(enable = "sse2")]
861 #[cfg_attr(test, assert_instr(pcmpgtb))]
862 #[stable(feature = "simd_x86", since = "1.27.0")]
863 pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
864 transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
865 }
866
867 /// Compares packed 16-bit integers in `a` and `b` for less-than.
868 ///
869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
870 #[inline]
871 #[target_feature(enable = "sse2")]
872 #[cfg_attr(test, assert_instr(pcmpgtw))]
873 #[stable(feature = "simd_x86", since = "1.27.0")]
874 pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
875 transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
876 }
877
878 /// Compares packed 32-bit integers in `a` and `b` for less-than.
879 ///
880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
881 #[inline]
882 #[target_feature(enable = "sse2")]
883 #[cfg_attr(test, assert_instr(pcmpgtd))]
884 #[stable(feature = "simd_x86", since = "1.27.0")]
885 pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
886 transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
887 }
888
889 /// Converts the lower two packed 32-bit integers in `a` to packed
890 /// double-precision (64-bit) floating-point elements.
891 ///
892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
893 #[inline]
894 #[target_feature(enable = "sse2")]
895 #[cfg_attr(test, assert_instr(cvtdq2pd))]
896 #[stable(feature = "simd_x86", since = "1.27.0")]
897 pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
898 let a = a.as_i32x4();
899 simd_cast::<i32x2, __m128d>(simd_shuffle2!(a, a, [0, 1]))
900 }
901
902 /// Returns `a` with its lower element replaced by `b` after converting it to
903 /// an `f64`.
904 ///
905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
906 #[inline]
907 #[target_feature(enable = "sse2")]
908 #[cfg_attr(test, assert_instr(cvtsi2sd))]
909 #[stable(feature = "simd_x86", since = "1.27.0")]
910 pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
911 simd_insert(a, 0, b as f64)
912 }
913
914 /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
915 /// floating-point elements.
916 ///
917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
918 #[inline]
919 #[target_feature(enable = "sse2")]
920 #[cfg_attr(test, assert_instr(cvtdq2ps))]
921 #[stable(feature = "simd_x86", since = "1.27.0")]
922 pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
923 cvtdq2ps(a.as_i32x4())
924 }
925
926 /// Converts packed single-precision (32-bit) floating-point elements in `a`
927 /// to packed 32-bit integers.
928 ///
929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
930 #[inline]
931 #[target_feature(enable = "sse2")]
932 #[cfg_attr(test, assert_instr(cvtps2dq))]
933 #[stable(feature = "simd_x86", since = "1.27.0")]
934 pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
935 transmute(cvtps2dq(a))
936 }
937
938 /// Returns a vector whose lowest element is `a` and all higher elements are
939 /// `0`.
940 ///
941 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
942 #[inline]
943 #[target_feature(enable = "sse2")]
944 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
945 #[stable(feature = "simd_x86", since = "1.27.0")]
946 pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
947 transmute(i32x4::new(a, 0, 0, 0))
948 }
949
950 /// Returns the lowest element of `a`.
951 ///
952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
953 #[inline]
954 #[target_feature(enable = "sse2")]
955 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))]
956 #[stable(feature = "simd_x86", since = "1.27.0")]
957 pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
958 simd_extract(a.as_i32x4(), 0)
959 }
960
961 /// Sets packed 64-bit integers with the supplied values, from highest to
962 /// lowest.
963 ///
964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
965 #[inline]
966 #[target_feature(enable = "sse2")]
967 // no particular instruction to test
968 #[stable(feature = "simd_x86", since = "1.27.0")]
969 pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
970 transmute(i64x2::new(e0, e1))
971 }
972
973 /// Sets packed 32-bit integers with the supplied values.
974 ///
975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
976 #[inline]
977 #[target_feature(enable = "sse2")]
978 // no particular instruction to test
979 #[stable(feature = "simd_x86", since = "1.27.0")]
980 pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
981 transmute(i32x4::new(e0, e1, e2, e3))
982 }
983
984 /// Sets packed 16-bit integers with the supplied values.
985 ///
986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
987 #[inline]
988 #[target_feature(enable = "sse2")]
989 // no particular instruction to test
990 #[stable(feature = "simd_x86", since = "1.27.0")]
991 pub unsafe fn _mm_set_epi16(
992 e7: i16,
993 e6: i16,
994 e5: i16,
995 e4: i16,
996 e3: i16,
997 e2: i16,
998 e1: i16,
999 e0: i16,
1000 ) -> __m128i {
1001 transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
1002 }
1003
1004 /// Sets packed 8-bit integers with the supplied values.
1005 ///
1006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
1007 #[inline]
1008 #[target_feature(enable = "sse2")]
1009 // no particular instruction to test
1010 #[stable(feature = "simd_x86", since = "1.27.0")]
1011 pub unsafe fn _mm_set_epi8(
1012 e15: i8,
1013 e14: i8,
1014 e13: i8,
1015 e12: i8,
1016 e11: i8,
1017 e10: i8,
1018 e9: i8,
1019 e8: i8,
1020 e7: i8,
1021 e6: i8,
1022 e5: i8,
1023 e4: i8,
1024 e3: i8,
1025 e2: i8,
1026 e1: i8,
1027 e0: i8,
1028 ) -> __m128i {
1029 #[rustfmt::skip]
1030 transmute(i8x16::new(
1031 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1032 ))
1033 }
1034
1035 /// Broadcasts 64-bit integer `a` to all elements.
1036 ///
1037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
1038 #[inline]
1039 #[target_feature(enable = "sse2")]
1040 // no particular instruction to test
1041 #[stable(feature = "simd_x86", since = "1.27.0")]
1042 pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1043 _mm_set_epi64x(a, a)
1044 }
1045
1046 /// Broadcasts 32-bit integer `a` to all elements.
1047 ///
1048 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
1049 #[inline]
1050 #[target_feature(enable = "sse2")]
1051 // no particular instruction to test
1052 #[stable(feature = "simd_x86", since = "1.27.0")]
1053 pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1054 _mm_set_epi32(a, a, a, a)
1055 }
1056
1057 /// Broadcasts 16-bit integer `a` to all elements.
1058 ///
1059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
1060 #[inline]
1061 #[target_feature(enable = "sse2")]
1062 // no particular instruction to test
1063 #[stable(feature = "simd_x86", since = "1.27.0")]
1064 pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1065 _mm_set_epi16(a, a, a, a, a, a, a, a)
1066 }
1067
1068 /// Broadcasts 8-bit integer `a` to all elements.
1069 ///
1070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
1071 #[inline]
1072 #[target_feature(enable = "sse2")]
1073 // no particular instruction to test
1074 #[stable(feature = "simd_x86", since = "1.27.0")]
1075 pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1076 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1077 }
1078
1079 /// Sets packed 32-bit integers with the supplied values in reverse order.
1080 ///
1081 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
1082 #[inline]
1083 #[target_feature(enable = "sse2")]
1084 // no particular instruction to test
1085 #[stable(feature = "simd_x86", since = "1.27.0")]
1086 pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1087 _mm_set_epi32(e0, e1, e2, e3)
1088 }
1089
1090 /// Sets packed 16-bit integers with the supplied values in reverse order.
1091 ///
1092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
1093 #[inline]
1094 #[target_feature(enable = "sse2")]
1095 // no particular instruction to test
1096 #[stable(feature = "simd_x86", since = "1.27.0")]
1097 pub unsafe fn _mm_setr_epi16(
1098 e7: i16,
1099 e6: i16,
1100 e5: i16,
1101 e4: i16,
1102 e3: i16,
1103 e2: i16,
1104 e1: i16,
1105 e0: i16,
1106 ) -> __m128i {
1107 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1108 }
1109
1110 /// Sets packed 8-bit integers with the supplied values in reverse order.
1111 ///
1112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
1113 #[inline]
1114 #[target_feature(enable = "sse2")]
1115 // no particular instruction to test
1116 #[stable(feature = "simd_x86", since = "1.27.0")]
1117 pub unsafe fn _mm_setr_epi8(
1118 e15: i8,
1119 e14: i8,
1120 e13: i8,
1121 e12: i8,
1122 e11: i8,
1123 e10: i8,
1124 e9: i8,
1125 e8: i8,
1126 e7: i8,
1127 e6: i8,
1128 e5: i8,
1129 e4: i8,
1130 e3: i8,
1131 e2: i8,
1132 e1: i8,
1133 e0: i8,
1134 ) -> __m128i {
1135 #[rustfmt::skip]
1136 _mm_set_epi8(
1137 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1138 )
1139 }
1140
1141 /// Returns a vector with all elements set to zero.
1142 ///
1143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128)
1144 #[inline]
1145 #[target_feature(enable = "sse2")]
1146 #[cfg_attr(test, assert_instr(xorps))]
1147 #[stable(feature = "simd_x86", since = "1.27.0")]
1148 pub unsafe fn _mm_setzero_si128() -> __m128i {
1149 _mm_set1_epi64x(0)
1150 }
1151
1152 /// Loads 64-bit integer from memory into first element of returned vector.
1153 ///
1154 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
1155 #[inline]
1156 #[target_feature(enable = "sse2")]
1157 // FIXME movsd on windows
1158 #[cfg_attr(
1159 all(
1160 test,
1161 not(windows),
1162 not(all(target_os = "linux", target_arch = "x86_64")),
1163 target_arch = "x86_64"
1164 ),
1165 assert_instr(movq)
1166 )]
1167 #[stable(feature = "simd_x86", since = "1.27.0")]
1168 pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1169 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1170 }
1171
1172 /// Loads 128-bits of integer data from memory into a new vector.
1173 ///
1174 /// `mem_addr` must be aligned on a 16-byte boundary.
1175 ///
1176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128)
1177 #[inline]
1178 #[target_feature(enable = "sse2")]
1179 #[cfg_attr(test, assert_instr(movaps))]
1180 #[stable(feature = "simd_x86", since = "1.27.0")]
1181 pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1182 *mem_addr
1183 }
1184
1185 /// Loads 128-bits of integer data from memory into a new vector.
1186 ///
1187 /// `mem_addr` does not need to be aligned on any particular boundary.
1188 ///
1189 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128)
1190 #[inline]
1191 #[target_feature(enable = "sse2")]
1192 #[cfg_attr(test, assert_instr(movups))]
1193 #[stable(feature = "simd_x86", since = "1.27.0")]
1194 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1195 let mut dst: __m128i = _mm_undefined_si128();
1196 ptr::copy_nonoverlapping(
1197 mem_addr as *const u8,
1198 &mut dst as *mut __m128i as *mut u8,
1199 mem::size_of::<__m128i>(),
1200 );
1201 dst
1202 }
1203
1204 /// Conditionally store 8-bit integer elements from `a` into memory using
1205 /// `mask`.
1206 ///
1207 /// Elements are not stored when the highest bit is not set in the
1208 /// corresponding element.
1209 ///
1210 /// `mem_addr` should correspond to a 128-bit memory location and does not need
1211 /// to be aligned on any particular boundary.
1212 ///
1213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128)
1214 #[inline]
1215 #[target_feature(enable = "sse2")]
1216 #[cfg_attr(test, assert_instr(maskmovdqu))]
1217 #[stable(feature = "simd_x86", since = "1.27.0")]
1218 pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1219 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1220 }
1221
1222 /// Stores 128-bits of integer data from `a` into memory.
1223 ///
1224 /// `mem_addr` must be aligned on a 16-byte boundary.
1225 ///
1226 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128)
1227 #[inline]
1228 #[target_feature(enable = "sse2")]
1229 #[cfg_attr(test, assert_instr(movaps))]
1230 #[stable(feature = "simd_x86", since = "1.27.0")]
1231 pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1232 *mem_addr = a;
1233 }
1234
1235 /// Stores 128-bits of integer data from `a` into memory.
1236 ///
1237 /// `mem_addr` does not need to be aligned on any particular boundary.
1238 ///
1239 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128)
1240 #[inline]
1241 #[target_feature(enable = "sse2")]
1242 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1243 #[stable(feature = "simd_x86", since = "1.27.0")]
1244 pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1245 storeudq(mem_addr as *mut i8, a);
1246 }
1247
1248 /// Stores the lower 64-bit integer `a` to a memory location.
1249 ///
1250 /// `mem_addr` does not need to be aligned on any particular boundary.
1251 ///
1252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64)
1253 #[inline]
1254 #[target_feature(enable = "sse2")]
1255 // FIXME mov on windows, movlps on i686
1256 #[cfg_attr(
1257 all(
1258 test,
1259 not(windows),
1260 not(all(target_os = "linux", target_arch = "x86_64")),
1261 target_arch = "x86_64"
1262 ),
1263 assert_instr(movq)
1264 )]
1265 #[stable(feature = "simd_x86", since = "1.27.0")]
1266 pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1267 ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8);
1268 }
1269
1270 /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1271 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1272 /// used again soon).
1273 ///
1274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128)
1275 #[inline]
1276 #[target_feature(enable = "sse2")]
1277 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1278 #[stable(feature = "simd_x86", since = "1.27.0")]
1279 pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1280 intrinsics::nontemporal_store(mem_addr, a);
1281 }
1282
1283 /// Stores a 32-bit integer value in the specified memory location.
1284 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1285 /// used again soon).
1286 ///
1287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32)
1288 #[inline]
1289 #[target_feature(enable = "sse2")]
1290 #[cfg_attr(test, assert_instr(movnti))]
1291 #[stable(feature = "simd_x86", since = "1.27.0")]
1292 pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1293 intrinsics::nontemporal_store(mem_addr, a);
1294 }
1295
1296 /// Returns a vector where the low element is extracted from `a` and its upper
1297 /// element is zero.
1298 ///
1299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
1300 #[inline]
1301 #[target_feature(enable = "sse2")]
1302 // FIXME movd on windows, movd on i686
1303 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1304 #[stable(feature = "simd_x86", since = "1.27.0")]
1305 pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1306 let zero = _mm_setzero_si128();
1307 let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
1308 transmute(r)
1309 }
1310
1311 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1312 /// using signed saturation.
1313 ///
1314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
1315 #[inline]
1316 #[target_feature(enable = "sse2")]
1317 #[cfg_attr(test, assert_instr(packsswb))]
1318 #[stable(feature = "simd_x86", since = "1.27.0")]
1319 pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1320 transmute(packsswb(a.as_i16x8(), b.as_i16x8()))
1321 }
1322
1323 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1324 /// using signed saturation.
1325 ///
1326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
1327 #[inline]
1328 #[target_feature(enable = "sse2")]
1329 #[cfg_attr(test, assert_instr(packssdw))]
1330 #[stable(feature = "simd_x86", since = "1.27.0")]
1331 pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1332 transmute(packssdw(a.as_i32x4(), b.as_i32x4()))
1333 }
1334
1335 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1336 /// using unsigned saturation.
1337 ///
1338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
1339 #[inline]
1340 #[target_feature(enable = "sse2")]
1341 #[cfg_attr(test, assert_instr(packuswb))]
1342 #[stable(feature = "simd_x86", since = "1.27.0")]
1343 pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1344 transmute(packuswb(a.as_i16x8(), b.as_i16x8()))
1345 }
1346
1347 /// Returns the `imm8` element of `a`.
1348 ///
1349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
1350 #[inline]
1351 #[target_feature(enable = "sse2")]
1352 #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1353 #[rustc_legacy_const_generics(1)]
1354 #[stable(feature = "simd_x86", since = "1.27.0")]
1355 pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1356 static_assert_imm3!(IMM8);
1357 simd_extract::<_, u16>(a.as_u16x8(), IMM8 as u32) as i32
1358 }
1359
1360 /// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1361 ///
1362 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
1363 #[inline]
1364 #[target_feature(enable = "sse2")]
1365 #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1366 #[rustc_legacy_const_generics(2)]
1367 #[stable(feature = "simd_x86", since = "1.27.0")]
1368 pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1369 static_assert_imm3!(IMM8);
1370 transmute(simd_insert(a.as_i16x8(), IMM8 as u32, i as i16))
1371 }
1372
1373 /// Returns a mask of the most significant bit of each element in `a`.
1374 ///
1375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
1376 #[inline]
1377 #[target_feature(enable = "sse2")]
1378 #[cfg_attr(test, assert_instr(pmovmskb))]
1379 #[stable(feature = "simd_x86", since = "1.27.0")]
1380 pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1381 pmovmskb(a.as_i8x16())
1382 }
1383
1384 /// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1385 ///
1386 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
1387 #[inline]
1388 #[target_feature(enable = "sse2")]
1389 #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1390 #[rustc_legacy_const_generics(1)]
1391 #[stable(feature = "simd_x86", since = "1.27.0")]
1392 pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1393 static_assert_imm8!(IMM8);
1394 let a = a.as_i32x4();
1395 let x: i32x4 = simd_shuffle4!(
1396 a,
1397 a,
1398 <const IMM8: i32> [
1399 IMM8 as u32 & 0b11,
1400 (IMM8 as u32 >> 2) & 0b11,
1401 (IMM8 as u32 >> 4) & 0b11,
1402 (IMM8 as u32 >> 6) & 0b11,
1403 ],
1404 );
1405 transmute(x)
1406 }
1407
1408 /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1409 /// `IMM8`.
1410 ///
1411 /// Put the results in the high 64 bits of the returned vector, with the low 64
1412 /// bits being copied from from `a`.
1413 ///
1414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
1415 #[inline]
1416 #[target_feature(enable = "sse2")]
1417 #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1418 #[rustc_legacy_const_generics(1)]
1419 #[stable(feature = "simd_x86", since = "1.27.0")]
1420 pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1421 static_assert_imm8!(IMM8);
1422 let a = a.as_i16x8();
1423 let x: i16x8 = simd_shuffle8!(
1424 a,
1425 a,
1426 <const IMM8: i32> [
1427 0,
1428 1,
1429 2,
1430 3,
1431 (IMM8 as u32 & 0b11) + 4,
1432 ((IMM8 as u32 >> 2) & 0b11) + 4,
1433 ((IMM8 as u32 >> 4) & 0b11) + 4,
1434 ((IMM8 as u32 >> 6) & 0b11) + 4,
1435 ],
1436 );
1437 transmute(x)
1438 }
1439
1440 /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1441 /// `IMM8`.
1442 ///
1443 /// Put the results in the low 64 bits of the returned vector, with the high 64
1444 /// bits being copied from from `a`.
1445 ///
1446 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
1447 #[inline]
1448 #[target_feature(enable = "sse2")]
1449 #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1450 #[rustc_legacy_const_generics(1)]
1451 #[stable(feature = "simd_x86", since = "1.27.0")]
1452 pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1453 static_assert_imm8!(IMM8);
1454 let a = a.as_i16x8();
1455 let x: i16x8 = simd_shuffle8!(
1456 a,
1457 a,
1458 <const IMM8: i32> [
1459 IMM8 as u32 & 0b11,
1460 (IMM8 as u32 >> 2) & 0b11,
1461 (IMM8 as u32 >> 4) & 0b11,
1462 (IMM8 as u32 >> 6) & 0b11,
1463 4,
1464 5,
1465 6,
1466 7,
1467 ],
1468 );
1469 transmute(x)
1470 }
1471
1472 /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1473 ///
1474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
1475 #[inline]
1476 #[target_feature(enable = "sse2")]
1477 #[cfg_attr(test, assert_instr(punpckhbw))]
1478 #[stable(feature = "simd_x86", since = "1.27.0")]
1479 pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1480 transmute::<i8x16, _>(simd_shuffle16!(
1481 a.as_i8x16(),
1482 b.as_i8x16(),
1483 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1484 ))
1485 }
1486
1487 /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1488 ///
1489 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
1490 #[inline]
1491 #[target_feature(enable = "sse2")]
1492 #[cfg_attr(test, assert_instr(punpckhwd))]
1493 #[stable(feature = "simd_x86", since = "1.27.0")]
1494 pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1495 let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1496 transmute::<i16x8, _>(x)
1497 }
1498
1499 /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1500 ///
1501 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
1502 #[inline]
1503 #[target_feature(enable = "sse2")]
1504 #[cfg_attr(test, assert_instr(unpckhps))]
1505 #[stable(feature = "simd_x86", since = "1.27.0")]
1506 pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1507 transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
1508 }
1509
1510 /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1511 ///
1512 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
1513 #[inline]
1514 #[target_feature(enable = "sse2")]
1515 #[cfg_attr(test, assert_instr(unpckhpd))]
1516 #[stable(feature = "simd_x86", since = "1.27.0")]
1517 pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1518 transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
1519 }
1520
1521 /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1522 ///
1523 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
1524 #[inline]
1525 #[target_feature(enable = "sse2")]
1526 #[cfg_attr(test, assert_instr(punpcklbw))]
1527 #[stable(feature = "simd_x86", since = "1.27.0")]
1528 pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1529 transmute::<i8x16, _>(simd_shuffle16!(
1530 a.as_i8x16(),
1531 b.as_i8x16(),
1532 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1533 ))
1534 }
1535
1536 /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1537 ///
1538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
1539 #[inline]
1540 #[target_feature(enable = "sse2")]
1541 #[cfg_attr(test, assert_instr(punpcklwd))]
1542 #[stable(feature = "simd_x86", since = "1.27.0")]
1543 pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1544 let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1545 transmute::<i16x8, _>(x)
1546 }
1547
1548 /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1549 ///
1550 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
1551 #[inline]
1552 #[target_feature(enable = "sse2")]
1553 #[cfg_attr(test, assert_instr(unpcklps))]
1554 #[stable(feature = "simd_x86", since = "1.27.0")]
1555 pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1556 transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
1557 }
1558
1559 /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1560 ///
1561 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
1562 #[inline]
1563 #[target_feature(enable = "sse2")]
1564 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1565 #[stable(feature = "simd_x86", since = "1.27.0")]
1566 pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1567 transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
1568 }
1569
1570 /// Returns a new vector with the low element of `a` replaced by the sum of the
1571 /// low elements of `a` and `b`.
1572 ///
1573 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
1574 #[inline]
1575 #[target_feature(enable = "sse2")]
1576 #[cfg_attr(test, assert_instr(addsd))]
1577 #[stable(feature = "simd_x86", since = "1.27.0")]
1578 pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1579 simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1580 }
1581
1582 /// Adds packed double-precision (64-bit) floating-point elements in `a` and
1583 /// `b`.
1584 ///
1585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
1586 #[inline]
1587 #[target_feature(enable = "sse2")]
1588 #[cfg_attr(test, assert_instr(addpd))]
1589 #[stable(feature = "simd_x86", since = "1.27.0")]
1590 pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1591 simd_add(a, b)
1592 }
1593
1594 /// Returns a new vector with the low element of `a` replaced by the result of
1595 /// diving the lower element of `a` by the lower element of `b`.
1596 ///
1597 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
1598 #[inline]
1599 #[target_feature(enable = "sse2")]
1600 #[cfg_attr(test, assert_instr(divsd))]
1601 #[stable(feature = "simd_x86", since = "1.27.0")]
1602 pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1603 simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1604 }
1605
1606 /// Divide packed double-precision (64-bit) floating-point elements in `a` by
1607 /// packed elements in `b`.
1608 ///
1609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd)
1610 #[inline]
1611 #[target_feature(enable = "sse2")]
1612 #[cfg_attr(test, assert_instr(divpd))]
1613 #[stable(feature = "simd_x86", since = "1.27.0")]
1614 pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1615 simd_div(a, b)
1616 }
1617
1618 /// Returns a new vector with the low element of `a` replaced by the maximum
1619 /// of the lower elements of `a` and `b`.
1620 ///
1621 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
1622 #[inline]
1623 #[target_feature(enable = "sse2")]
1624 #[cfg_attr(test, assert_instr(maxsd))]
1625 #[stable(feature = "simd_x86", since = "1.27.0")]
1626 pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1627 maxsd(a, b)
1628 }
1629
1630 /// Returns a new vector with the maximum values from corresponding elements in
1631 /// `a` and `b`.
1632 ///
1633 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
1634 #[inline]
1635 #[target_feature(enable = "sse2")]
1636 #[cfg_attr(test, assert_instr(maxpd))]
1637 #[stable(feature = "simd_x86", since = "1.27.0")]
1638 pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1639 maxpd(a, b)
1640 }
1641
1642 /// Returns a new vector with the low element of `a` replaced by the minimum
1643 /// of the lower elements of `a` and `b`.
1644 ///
1645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
1646 #[inline]
1647 #[target_feature(enable = "sse2")]
1648 #[cfg_attr(test, assert_instr(minsd))]
1649 #[stable(feature = "simd_x86", since = "1.27.0")]
1650 pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1651 minsd(a, b)
1652 }
1653
1654 /// Returns a new vector with the minimum values from corresponding elements in
1655 /// `a` and `b`.
1656 ///
1657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
1658 #[inline]
1659 #[target_feature(enable = "sse2")]
1660 #[cfg_attr(test, assert_instr(minpd))]
1661 #[stable(feature = "simd_x86", since = "1.27.0")]
1662 pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1663 minpd(a, b)
1664 }
1665
1666 /// Returns a new vector with the low element of `a` replaced by multiplying the
1667 /// low elements of `a` and `b`.
1668 ///
1669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
1670 #[inline]
1671 #[target_feature(enable = "sse2")]
1672 #[cfg_attr(test, assert_instr(mulsd))]
1673 #[stable(feature = "simd_x86", since = "1.27.0")]
1674 pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1675 simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1676 }
1677
1678 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1679 /// and `b`.
1680 ///
1681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
1682 #[inline]
1683 #[target_feature(enable = "sse2")]
1684 #[cfg_attr(test, assert_instr(mulpd))]
1685 #[stable(feature = "simd_x86", since = "1.27.0")]
1686 pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1687 simd_mul(a, b)
1688 }
1689
1690 /// Returns a new vector with the low element of `a` replaced by the square
1691 /// root of the lower element `b`.
1692 ///
1693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
1694 #[inline]
1695 #[target_feature(enable = "sse2")]
1696 #[cfg_attr(test, assert_instr(sqrtsd))]
1697 #[stable(feature = "simd_x86", since = "1.27.0")]
1698 pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1699 simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b)))
1700 }
1701
1702 /// Returns a new vector with the square root of each of the values in `a`.
1703 ///
1704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
1705 #[inline]
1706 #[target_feature(enable = "sse2")]
1707 #[cfg_attr(test, assert_instr(sqrtpd))]
1708 #[stable(feature = "simd_x86", since = "1.27.0")]
1709 pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1710 simd_fsqrt(a)
1711 }
1712
1713 /// Returns a new vector with the low element of `a` replaced by subtracting the
1714 /// low element by `b` from the low element of `a`.
1715 ///
1716 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
1717 #[inline]
1718 #[target_feature(enable = "sse2")]
1719 #[cfg_attr(test, assert_instr(subsd))]
1720 #[stable(feature = "simd_x86", since = "1.27.0")]
1721 pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1722 simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1723 }
1724
1725 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
1726 /// from `a`.
1727 ///
1728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd)
1729 #[inline]
1730 #[target_feature(enable = "sse2")]
1731 #[cfg_attr(test, assert_instr(subpd))]
1732 #[stable(feature = "simd_x86", since = "1.27.0")]
1733 pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1734 simd_sub(a, b)
1735 }
1736
1737 /// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1738 /// elements in `a` and `b`.
1739 ///
1740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
1741 #[inline]
1742 #[target_feature(enable = "sse2")]
1743 #[cfg_attr(test, assert_instr(andps))]
1744 #[stable(feature = "simd_x86", since = "1.27.0")]
1745 pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1746 let a: __m128i = transmute(a);
1747 let b: __m128i = transmute(b);
1748 transmute(_mm_and_si128(a, b))
1749 }
1750
1751 /// Computes the bitwise NOT of `a` and then AND with `b`.
1752 ///
1753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
1754 #[inline]
1755 #[target_feature(enable = "sse2")]
1756 #[cfg_attr(test, assert_instr(andnps))]
1757 #[stable(feature = "simd_x86", since = "1.27.0")]
1758 pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1759 let a: __m128i = transmute(a);
1760 let b: __m128i = transmute(b);
1761 transmute(_mm_andnot_si128(a, b))
1762 }
1763
1764 /// Computes the bitwise OR of `a` and `b`.
1765 ///
1766 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
1767 #[inline]
1768 #[target_feature(enable = "sse2")]
1769 #[cfg_attr(test, assert_instr(orps))]
1770 #[stable(feature = "simd_x86", since = "1.27.0")]
1771 pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1772 let a: __m128i = transmute(a);
1773 let b: __m128i = transmute(b);
1774 transmute(_mm_or_si128(a, b))
1775 }
1776
1777 /// Computes the bitwise OR of `a` and `b`.
1778 ///
1779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
1780 #[inline]
1781 #[target_feature(enable = "sse2")]
1782 #[cfg_attr(test, assert_instr(xorps))]
1783 #[stable(feature = "simd_x86", since = "1.27.0")]
1784 pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1785 let a: __m128i = transmute(a);
1786 let b: __m128i = transmute(b);
1787 transmute(_mm_xor_si128(a, b))
1788 }
1789
1790 /// Returns a new vector with the low element of `a` replaced by the equality
1791 /// comparison of the lower elements of `a` and `b`.
1792 ///
1793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
1794 #[inline]
1795 #[target_feature(enable = "sse2")]
1796 #[cfg_attr(test, assert_instr(cmpeqsd))]
1797 #[stable(feature = "simd_x86", since = "1.27.0")]
1798 pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1799 cmpsd(a, b, 0)
1800 }
1801
1802 /// Returns a new vector with the low element of `a` replaced by the less-than
1803 /// comparison of the lower elements of `a` and `b`.
1804 ///
1805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
1806 #[inline]
1807 #[target_feature(enable = "sse2")]
1808 #[cfg_attr(test, assert_instr(cmpltsd))]
1809 #[stable(feature = "simd_x86", since = "1.27.0")]
1810 pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1811 cmpsd(a, b, 1)
1812 }
1813
1814 /// Returns a new vector with the low element of `a` replaced by the
1815 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
1816 ///
1817 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
1818 #[inline]
1819 #[target_feature(enable = "sse2")]
1820 #[cfg_attr(test, assert_instr(cmplesd))]
1821 #[stable(feature = "simd_x86", since = "1.27.0")]
1822 pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1823 cmpsd(a, b, 2)
1824 }
1825
1826 /// Returns a new vector with the low element of `a` replaced by the
1827 /// greater-than comparison of the lower elements of `a` and `b`.
1828 ///
1829 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
1830 #[inline]
1831 #[target_feature(enable = "sse2")]
1832 #[cfg_attr(test, assert_instr(cmpltsd))]
1833 #[stable(feature = "simd_x86", since = "1.27.0")]
1834 pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1835 simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1836 }
1837
1838 /// Returns a new vector with the low element of `a` replaced by the
1839 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1840 ///
1841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
1842 #[inline]
1843 #[target_feature(enable = "sse2")]
1844 #[cfg_attr(test, assert_instr(cmplesd))]
1845 #[stable(feature = "simd_x86", since = "1.27.0")]
1846 pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1847 simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1848 }
1849
1850 /// Returns a new vector with the low element of `a` replaced by the result
1851 /// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1852 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1853 /// otherwise.
1854 ///
1855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd)
1856 #[inline]
1857 #[target_feature(enable = "sse2")]
1858 #[cfg_attr(test, assert_instr(cmpordsd))]
1859 #[stable(feature = "simd_x86", since = "1.27.0")]
1860 pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1861 cmpsd(a, b, 7)
1862 }
1863
1864 /// Returns a new vector with the low element of `a` replaced by the result of
1865 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1866 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1867 ///
1868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd)
1869 #[inline]
1870 #[target_feature(enable = "sse2")]
1871 #[cfg_attr(test, assert_instr(cmpunordsd))]
1872 #[stable(feature = "simd_x86", since = "1.27.0")]
1873 pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1874 cmpsd(a, b, 3)
1875 }
1876
1877 /// Returns a new vector with the low element of `a` replaced by the not-equal
1878 /// comparison of the lower elements of `a` and `b`.
1879 ///
1880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
1881 #[inline]
1882 #[target_feature(enable = "sse2")]
1883 #[cfg_attr(test, assert_instr(cmpneqsd))]
1884 #[stable(feature = "simd_x86", since = "1.27.0")]
1885 pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1886 cmpsd(a, b, 4)
1887 }
1888
1889 /// Returns a new vector with the low element of `a` replaced by the
1890 /// not-less-than comparison of the lower elements of `a` and `b`.
1891 ///
1892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
1893 #[inline]
1894 #[target_feature(enable = "sse2")]
1895 #[cfg_attr(test, assert_instr(cmpnltsd))]
1896 #[stable(feature = "simd_x86", since = "1.27.0")]
1897 pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1898 cmpsd(a, b, 5)
1899 }
1900
1901 /// Returns a new vector with the low element of `a` replaced by the
1902 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
1903 ///
1904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
1905 #[inline]
1906 #[target_feature(enable = "sse2")]
1907 #[cfg_attr(test, assert_instr(cmpnlesd))]
1908 #[stable(feature = "simd_x86", since = "1.27.0")]
1909 pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1910 cmpsd(a, b, 6)
1911 }
1912
1913 /// Returns a new vector with the low element of `a` replaced by the
1914 /// not-greater-than comparison of the lower elements of `a` and `b`.
1915 ///
1916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
1917 #[inline]
1918 #[target_feature(enable = "sse2")]
1919 #[cfg_attr(test, assert_instr(cmpnltsd))]
1920 #[stable(feature = "simd_x86", since = "1.27.0")]
1921 pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
1922 simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1923 }
1924
1925 /// Returns a new vector with the low element of `a` replaced by the
1926 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
1927 ///
1928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
1929 #[inline]
1930 #[target_feature(enable = "sse2")]
1931 #[cfg_attr(test, assert_instr(cmpnlesd))]
1932 #[stable(feature = "simd_x86", since = "1.27.0")]
1933 pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
1934 simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1935 }
1936
1937 /// Compares corresponding elements in `a` and `b` for equality.
1938 ///
1939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
1940 #[inline]
1941 #[target_feature(enable = "sse2")]
1942 #[cfg_attr(test, assert_instr(cmpeqpd))]
1943 #[stable(feature = "simd_x86", since = "1.27.0")]
1944 pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
1945 cmppd(a, b, 0)
1946 }
1947
1948 /// Compares corresponding elements in `a` and `b` for less-than.
1949 ///
1950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
1951 #[inline]
1952 #[target_feature(enable = "sse2")]
1953 #[cfg_attr(test, assert_instr(cmpltpd))]
1954 #[stable(feature = "simd_x86", since = "1.27.0")]
1955 pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
1956 cmppd(a, b, 1)
1957 }
1958
1959 /// Compares corresponding elements in `a` and `b` for less-than-or-equal
1960 ///
1961 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
1962 #[inline]
1963 #[target_feature(enable = "sse2")]
1964 #[cfg_attr(test, assert_instr(cmplepd))]
1965 #[stable(feature = "simd_x86", since = "1.27.0")]
1966 pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
1967 cmppd(a, b, 2)
1968 }
1969
1970 /// Compares corresponding elements in `a` and `b` for greater-than.
1971 ///
1972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
1973 #[inline]
1974 #[target_feature(enable = "sse2")]
1975 #[cfg_attr(test, assert_instr(cmpltpd))]
1976 #[stable(feature = "simd_x86", since = "1.27.0")]
1977 pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
1978 _mm_cmplt_pd(b, a)
1979 }
1980
1981 /// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
1982 ///
1983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
1984 #[inline]
1985 #[target_feature(enable = "sse2")]
1986 #[cfg_attr(test, assert_instr(cmplepd))]
1987 #[stable(feature = "simd_x86", since = "1.27.0")]
1988 pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
1989 _mm_cmple_pd(b, a)
1990 }
1991
1992 /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
1993 ///
1994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
1995 #[inline]
1996 #[target_feature(enable = "sse2")]
1997 #[cfg_attr(test, assert_instr(cmpordpd))]
1998 #[stable(feature = "simd_x86", since = "1.27.0")]
1999 pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2000 cmppd(a, b, 7)
2001 }
2002
2003 /// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2004 ///
2005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
2006 #[inline]
2007 #[target_feature(enable = "sse2")]
2008 #[cfg_attr(test, assert_instr(cmpunordpd))]
2009 #[stable(feature = "simd_x86", since = "1.27.0")]
2010 pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2011 cmppd(a, b, 3)
2012 }
2013
2014 /// Compares corresponding elements in `a` and `b` for not-equal.
2015 ///
2016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
2017 #[inline]
2018 #[target_feature(enable = "sse2")]
2019 #[cfg_attr(test, assert_instr(cmpneqpd))]
2020 #[stable(feature = "simd_x86", since = "1.27.0")]
2021 pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2022 cmppd(a, b, 4)
2023 }
2024
2025 /// Compares corresponding elements in `a` and `b` for not-less-than.
2026 ///
2027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
2028 #[inline]
2029 #[target_feature(enable = "sse2")]
2030 #[cfg_attr(test, assert_instr(cmpnltpd))]
2031 #[stable(feature = "simd_x86", since = "1.27.0")]
2032 pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2033 cmppd(a, b, 5)
2034 }
2035
2036 /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2037 ///
2038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
2039 #[inline]
2040 #[target_feature(enable = "sse2")]
2041 #[cfg_attr(test, assert_instr(cmpnlepd))]
2042 #[stable(feature = "simd_x86", since = "1.27.0")]
2043 pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2044 cmppd(a, b, 6)
2045 }
2046
2047 /// Compares corresponding elements in `a` and `b` for not-greater-than.
2048 ///
2049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
2050 #[inline]
2051 #[target_feature(enable = "sse2")]
2052 #[cfg_attr(test, assert_instr(cmpnltpd))]
2053 #[stable(feature = "simd_x86", since = "1.27.0")]
2054 pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2055 _mm_cmpnlt_pd(b, a)
2056 }
2057
2058 /// Compares corresponding elements in `a` and `b` for
2059 /// not-greater-than-or-equal.
2060 ///
2061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
2062 #[inline]
2063 #[target_feature(enable = "sse2")]
2064 #[cfg_attr(test, assert_instr(cmpnlepd))]
2065 #[stable(feature = "simd_x86", since = "1.27.0")]
2066 pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2067 _mm_cmpnle_pd(b, a)
2068 }
2069
2070 /// Compares the lower element of `a` and `b` for equality.
2071 ///
2072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
2073 #[inline]
2074 #[target_feature(enable = "sse2")]
2075 #[cfg_attr(test, assert_instr(comisd))]
2076 #[stable(feature = "simd_x86", since = "1.27.0")]
2077 pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2078 comieqsd(a, b)
2079 }
2080
2081 /// Compares the lower element of `a` and `b` for less-than.
2082 ///
2083 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
2084 #[inline]
2085 #[target_feature(enable = "sse2")]
2086 #[cfg_attr(test, assert_instr(comisd))]
2087 #[stable(feature = "simd_x86", since = "1.27.0")]
2088 pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2089 comiltsd(a, b)
2090 }
2091
2092 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2093 ///
2094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
2095 #[inline]
2096 #[target_feature(enable = "sse2")]
2097 #[cfg_attr(test, assert_instr(comisd))]
2098 #[stable(feature = "simd_x86", since = "1.27.0")]
2099 pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2100 comilesd(a, b)
2101 }
2102
2103 /// Compares the lower element of `a` and `b` for greater-than.
2104 ///
2105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
2106 #[inline]
2107 #[target_feature(enable = "sse2")]
2108 #[cfg_attr(test, assert_instr(comisd))]
2109 #[stable(feature = "simd_x86", since = "1.27.0")]
2110 pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2111 comigtsd(a, b)
2112 }
2113
2114 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2115 ///
2116 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
2117 #[inline]
2118 #[target_feature(enable = "sse2")]
2119 #[cfg_attr(test, assert_instr(comisd))]
2120 #[stable(feature = "simd_x86", since = "1.27.0")]
2121 pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2122 comigesd(a, b)
2123 }
2124
2125 /// Compares the lower element of `a` and `b` for not-equal.
2126 ///
2127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
2128 #[inline]
2129 #[target_feature(enable = "sse2")]
2130 #[cfg_attr(test, assert_instr(comisd))]
2131 #[stable(feature = "simd_x86", since = "1.27.0")]
2132 pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2133 comineqsd(a, b)
2134 }
2135
2136 /// Compares the lower element of `a` and `b` for equality.
2137 ///
2138 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
2139 #[inline]
2140 #[target_feature(enable = "sse2")]
2141 #[cfg_attr(test, assert_instr(ucomisd))]
2142 #[stable(feature = "simd_x86", since = "1.27.0")]
2143 pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2144 ucomieqsd(a, b)
2145 }
2146
2147 /// Compares the lower element of `a` and `b` for less-than.
2148 ///
2149 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
2150 #[inline]
2151 #[target_feature(enable = "sse2")]
2152 #[cfg_attr(test, assert_instr(ucomisd))]
2153 #[stable(feature = "simd_x86", since = "1.27.0")]
2154 pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2155 ucomiltsd(a, b)
2156 }
2157
2158 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2159 ///
2160 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
2161 #[inline]
2162 #[target_feature(enable = "sse2")]
2163 #[cfg_attr(test, assert_instr(ucomisd))]
2164 #[stable(feature = "simd_x86", since = "1.27.0")]
2165 pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2166 ucomilesd(a, b)
2167 }
2168
2169 /// Compares the lower element of `a` and `b` for greater-than.
2170 ///
2171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
2172 #[inline]
2173 #[target_feature(enable = "sse2")]
2174 #[cfg_attr(test, assert_instr(ucomisd))]
2175 #[stable(feature = "simd_x86", since = "1.27.0")]
2176 pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2177 ucomigtsd(a, b)
2178 }
2179
2180 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2181 ///
2182 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
2183 #[inline]
2184 #[target_feature(enable = "sse2")]
2185 #[cfg_attr(test, assert_instr(ucomisd))]
2186 #[stable(feature = "simd_x86", since = "1.27.0")]
2187 pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2188 ucomigesd(a, b)
2189 }
2190
2191 /// Compares the lower element of `a` and `b` for not-equal.
2192 ///
2193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
2194 #[inline]
2195 #[target_feature(enable = "sse2")]
2196 #[cfg_attr(test, assert_instr(ucomisd))]
2197 #[stable(feature = "simd_x86", since = "1.27.0")]
2198 pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2199 ucomineqsd(a, b)
2200 }
2201
2202 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2203 /// packed single-precision (32-bit) floating-point elements
2204 ///
2205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
2206 #[inline]
2207 #[target_feature(enable = "sse2")]
2208 #[cfg_attr(test, assert_instr(cvtpd2ps))]
2209 #[stable(feature = "simd_x86", since = "1.27.0")]
2210 pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2211 cvtpd2ps(a)
2212 }
2213
2214 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2215 /// packed
2216 /// double-precision (64-bit) floating-point elements.
2217 ///
2218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd)
2219 #[inline]
2220 #[target_feature(enable = "sse2")]
2221 #[cfg_attr(test, assert_instr(cvtps2pd))]
2222 #[stable(feature = "simd_x86", since = "1.27.0")]
2223 pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2224 cvtps2pd(a)
2225 }
2226
2227 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2228 /// packed 32-bit integers.
2229 ///
2230 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
2231 #[inline]
2232 #[target_feature(enable = "sse2")]
2233 #[cfg_attr(test, assert_instr(cvtpd2dq))]
2234 #[stable(feature = "simd_x86", since = "1.27.0")]
2235 pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2236 transmute(cvtpd2dq(a))
2237 }
2238
2239 /// Converts the lower double-precision (64-bit) floating-point element in a to
2240 /// a 32-bit integer.
2241 ///
2242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
2243 #[inline]
2244 #[target_feature(enable = "sse2")]
2245 #[cfg_attr(test, assert_instr(cvtsd2si))]
2246 #[stable(feature = "simd_x86", since = "1.27.0")]
2247 pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2248 cvtsd2si(a)
2249 }
2250
2251 /// Converts the lower double-precision (64-bit) floating-point element in `b`
2252 /// to a single-precision (32-bit) floating-point element, store the result in
2253 /// the lower element of the return value, and copies the upper element from `a`
2254 /// to the upper element the return value.
2255 ///
2256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
2257 #[inline]
2258 #[target_feature(enable = "sse2")]
2259 #[cfg_attr(test, assert_instr(cvtsd2ss))]
2260 #[stable(feature = "simd_x86", since = "1.27.0")]
2261 pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2262 cvtsd2ss(a, b)
2263 }
2264
2265 /// Returns the lower double-precision (64-bit) floating-point element of `a`.
2266 ///
2267 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
2268 #[inline]
2269 #[target_feature(enable = "sse2")]
2270 #[stable(feature = "simd_x86", since = "1.27.0")]
2271 pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2272 simd_extract(a, 0)
2273 }
2274
2275 /// Converts the lower single-precision (32-bit) floating-point element in `b`
2276 /// to a double-precision (64-bit) floating-point element, store the result in
2277 /// the lower element of the return value, and copies the upper element from `a`
2278 /// to the upper element the return value.
2279 ///
2280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
2281 #[inline]
2282 #[target_feature(enable = "sse2")]
2283 #[cfg_attr(test, assert_instr(cvtss2sd))]
2284 #[stable(feature = "simd_x86", since = "1.27.0")]
2285 pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2286 cvtss2sd(a, b)
2287 }
2288
2289 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2290 /// packed 32-bit integers with truncation.
2291 ///
2292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
2293 #[inline]
2294 #[target_feature(enable = "sse2")]
2295 #[cfg_attr(test, assert_instr(cvttpd2dq))]
2296 #[stable(feature = "simd_x86", since = "1.27.0")]
2297 pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2298 transmute(cvttpd2dq(a))
2299 }
2300
2301 /// Converts the lower double-precision (64-bit) floating-point element in `a`
2302 /// to a 32-bit integer with truncation.
2303 ///
2304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
2305 #[inline]
2306 #[target_feature(enable = "sse2")]
2307 #[cfg_attr(test, assert_instr(cvttsd2si))]
2308 #[stable(feature = "simd_x86", since = "1.27.0")]
2309 pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2310 cvttsd2si(a)
2311 }
2312
2313 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2314 /// packed 32-bit integers with truncation.
2315 ///
2316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
2317 #[inline]
2318 #[target_feature(enable = "sse2")]
2319 #[cfg_attr(test, assert_instr(cvttps2dq))]
2320 #[stable(feature = "simd_x86", since = "1.27.0")]
2321 pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2322 transmute(cvttps2dq(a))
2323 }
2324
2325 /// Copies double-precision (64-bit) floating-point element `a` to the lower
2326 /// element of the packed 64-bit return value.
2327 ///
2328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
2329 #[inline]
2330 #[target_feature(enable = "sse2")]
2331 #[stable(feature = "simd_x86", since = "1.27.0")]
2332 pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2333 _mm_set_pd(0.0, a)
2334 }
2335
2336 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2337 /// of the return value.
2338 ///
2339 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
2340 #[inline]
2341 #[target_feature(enable = "sse2")]
2342 #[stable(feature = "simd_x86", since = "1.27.0")]
2343 pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2344 _mm_set_pd(a, a)
2345 }
2346
2347 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2348 /// of the return value.
2349 ///
2350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
2351 #[inline]
2352 #[target_feature(enable = "sse2")]
2353 #[stable(feature = "simd_x86", since = "1.27.0")]
2354 pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2355 _mm_set_pd(a, a)
2356 }
2357
2358 /// Sets packed double-precision (64-bit) floating-point elements in the return
2359 /// value with the supplied values.
2360 ///
2361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
2362 #[inline]
2363 #[target_feature(enable = "sse2")]
2364 #[stable(feature = "simd_x86", since = "1.27.0")]
2365 pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2366 __m128d(b, a)
2367 }
2368
2369 /// Sets packed double-precision (64-bit) floating-point elements in the return
2370 /// value with the supplied values in reverse order.
2371 ///
2372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
2373 #[inline]
2374 #[target_feature(enable = "sse2")]
2375 #[stable(feature = "simd_x86", since = "1.27.0")]
2376 pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2377 _mm_set_pd(b, a)
2378 }
2379
2380 /// Returns packed double-precision (64-bit) floating-point elements with all
2381 /// zeros.
2382 ///
2383 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd)
2384 #[inline]
2385 #[target_feature(enable = "sse2")]
2386 #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2387 #[stable(feature = "simd_x86", since = "1.27.0")]
2388 pub unsafe fn _mm_setzero_pd() -> __m128d {
2389 _mm_set_pd(0.0, 0.0)
2390 }
2391
2392 /// Returns a mask of the most significant bit of each element in `a`.
2393 ///
2394 /// The mask is stored in the 2 least significant bits of the return value.
2395 /// All other bits are set to `0`.
2396 ///
2397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd)
2398 #[inline]
2399 #[target_feature(enable = "sse2")]
2400 #[cfg_attr(test, assert_instr(movmskpd))]
2401 #[stable(feature = "simd_x86", since = "1.27.0")]
2402 pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2403 movmskpd(a)
2404 }
2405
2406 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2407 /// floating-point elements) from memory into the returned vector.
2408 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2409 /// exception may be generated.
2410 ///
2411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd)
2412 #[inline]
2413 #[target_feature(enable = "sse2")]
2414 #[cfg_attr(test, assert_instr(movaps))]
2415 #[stable(feature = "simd_x86", since = "1.27.0")]
2416 #[allow(clippy::cast_ptr_alignment)]
2417 pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2418 *(mem_addr as *const __m128d)
2419 }
2420
2421 /// Loads a 64-bit double-precision value to the low element of a
2422 /// 128-bit integer vector and clears the upper element.
2423 ///
2424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd)
2425 #[inline]
2426 #[target_feature(enable = "sse2")]
2427 #[cfg_attr(test, assert_instr(movsd))]
2428 #[stable(feature = "simd_x86", since = "1.27.0")]
2429 pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2430 _mm_setr_pd(*mem_addr, 0.)
2431 }
2432
2433 /// Loads a double-precision value into the high-order bits of a 128-bit
2434 /// vector of `[2 x double]`. The low-order bits are copied from the low-order
2435 /// bits of the first operand.
2436 ///
2437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd)
2438 #[inline]
2439 #[target_feature(enable = "sse2")]
2440 #[cfg_attr(test, assert_instr(movhps))]
2441 #[stable(feature = "simd_x86", since = "1.27.0")]
2442 pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2443 _mm_setr_pd(simd_extract(a, 0), *mem_addr)
2444 }
2445
2446 /// Loads a double-precision value into the low-order bits of a 128-bit
2447 /// vector of `[2 x double]`. The high-order bits are copied from the
2448 /// high-order bits of the first operand.
2449 ///
2450 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd)
2451 #[inline]
2452 #[target_feature(enable = "sse2")]
2453 #[cfg_attr(test, assert_instr(movlps))]
2454 #[stable(feature = "simd_x86", since = "1.27.0")]
2455 pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2456 _mm_setr_pd(*mem_addr, simd_extract(a, 1))
2457 }
2458
2459 /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2460 /// aligned memory location.
2461 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
2462 /// used again soon).
2463 ///
2464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd)
2465 #[inline]
2466 #[target_feature(enable = "sse2")]
2467 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2468 #[stable(feature = "simd_x86", since = "1.27.0")]
2469 #[allow(clippy::cast_ptr_alignment)]
2470 pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2471 intrinsics::nontemporal_store(mem_addr as *mut __m128d, a);
2472 }
2473
2474 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2475 /// memory location.
2476 ///
2477 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd)
2478 #[inline]
2479 #[target_feature(enable = "sse2")]
2480 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2481 #[stable(feature = "simd_x86", since = "1.27.0")]
2482 pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2483 *mem_addr = simd_extract(a, 0)
2484 }
2485
2486 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2487 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2488 /// on a 16-byte boundary or a general-protection exception may be generated.
2489 ///
2490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd)
2491 #[inline]
2492 #[target_feature(enable = "sse2")]
2493 #[cfg_attr(test, assert_instr(movaps))]
2494 #[stable(feature = "simd_x86", since = "1.27.0")]
2495 #[allow(clippy::cast_ptr_alignment)]
2496 pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2497 *(mem_addr as *mut __m128d) = a;
2498 }
2499
2500 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2501 /// floating-point elements) from `a` into memory.
2502 /// `mem_addr` does not need to be aligned on any particular boundary.
2503 ///
2504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd)
2505 #[inline]
2506 #[target_feature(enable = "sse2")]
2507 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2508 #[stable(feature = "simd_x86", since = "1.27.0")]
2509 pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2510 storeupd(mem_addr as *mut i8, a);
2511 }
2512
2513 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2514 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2515 /// 16-byte boundary or a general-protection exception may be generated.
2516 ///
2517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd)
2518 #[inline]
2519 #[target_feature(enable = "sse2")]
2520 #[stable(feature = "simd_x86", since = "1.27.0")]
2521 #[allow(clippy::cast_ptr_alignment)]
2522 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2523 let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
2524 *(mem_addr as *mut __m128d) = b;
2525 }
2526
2527 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2528 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2529 /// 16-byte boundary or a general-protection exception may be generated.
2530 ///
2531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1)
2532 #[inline]
2533 #[target_feature(enable = "sse2")]
2534 #[stable(feature = "simd_x86", since = "1.27.0")]
2535 #[allow(clippy::cast_ptr_alignment)]
2536 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2537 let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
2538 *(mem_addr as *mut __m128d) = b;
2539 }
2540
2541 /// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2542 /// memory in reverse order.
2543 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2544 /// exception may be generated.
2545 ///
2546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd)
2547 #[inline]
2548 #[target_feature(enable = "sse2")]
2549 #[stable(feature = "simd_x86", since = "1.27.0")]
2550 #[allow(clippy::cast_ptr_alignment)]
2551 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2552 let b: __m128d = simd_shuffle2!(a, a, [1, 0]);
2553 *(mem_addr as *mut __m128d) = b;
2554 }
2555
2556 /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2557 /// memory location.
2558 ///
2559 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd)
2560 #[inline]
2561 #[target_feature(enable = "sse2")]
2562 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2563 #[stable(feature = "simd_x86", since = "1.27.0")]
2564 pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2565 *mem_addr = simd_extract(a, 1);
2566 }
2567
2568 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2569 /// memory location.
2570 ///
2571 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd)
2572 #[inline]
2573 #[target_feature(enable = "sse2")]
2574 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2575 #[stable(feature = "simd_x86", since = "1.27.0")]
2576 pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2577 *mem_addr = simd_extract(a, 0);
2578 }
2579
2580 /// Loads a double-precision (64-bit) floating-point element from memory
2581 /// into both elements of returned vector.
2582 ///
2583 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
2584 #[inline]
2585 #[target_feature(enable = "sse2")]
2586 // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2587 #[stable(feature = "simd_x86", since = "1.27.0")]
2588 pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2589 let d = *mem_addr;
2590 _mm_setr_pd(d, d)
2591 }
2592
2593 /// Loads a double-precision (64-bit) floating-point element from memory
2594 /// into both elements of returned vector.
2595 ///
2596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
2597 #[inline]
2598 #[target_feature(enable = "sse2")]
2599 // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2600 #[stable(feature = "simd_x86", since = "1.27.0")]
2601 pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2602 _mm_load1_pd(mem_addr)
2603 }
2604
2605 /// Loads 2 double-precision (64-bit) floating-point elements from memory into
2606 /// the returned vector in reverse order. `mem_addr` must be aligned on a
2607 /// 16-byte boundary or a general-protection exception may be generated.
2608 ///
2609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd)
2610 #[inline]
2611 #[target_feature(enable = "sse2")]
2612 #[cfg_attr(test, assert_instr(movaps))]
2613 #[stable(feature = "simd_x86", since = "1.27.0")]
2614 pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2615 let a = _mm_load_pd(mem_addr);
2616 simd_shuffle2!(a, a, [1, 0])
2617 }
2618
2619 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2620 /// floating-point elements) from memory into the returned vector.
2621 /// `mem_addr` does not need to be aligned on any particular boundary.
2622 ///
2623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd)
2624 #[inline]
2625 #[target_feature(enable = "sse2")]
2626 #[cfg_attr(test, assert_instr(movups))]
2627 #[stable(feature = "simd_x86", since = "1.27.0")]
2628 pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2629 let mut dst = _mm_undefined_pd();
2630 ptr::copy_nonoverlapping(
2631 mem_addr as *const u8,
2632 &mut dst as *mut __m128d as *mut u8,
2633 mem::size_of::<__m128d>(),
2634 );
2635 dst
2636 }
2637
2638 /// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2639 /// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2640 /// parameter as a specifier.
2641 ///
2642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd)
2643 #[inline]
2644 #[target_feature(enable = "sse2")]
2645 #[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2646 #[rustc_legacy_const_generics(2)]
2647 #[stable(feature = "simd_x86", since = "1.27.0")]
2648 pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2649 static_assert_imm8!(MASK);
2650 simd_shuffle2!(a, b, <const MASK: i32> [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
2651 }
2652
2653 /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2654 /// 64 bits are set to the lower 64 bits of the second parameter. The upper
2655 /// 64 bits are set to the upper 64 bits of the first parameter.
2656 ///
2657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd)
2658 #[inline]
2659 #[target_feature(enable = "sse2")]
2660 #[cfg_attr(test, assert_instr(movsd))]
2661 #[stable(feature = "simd_x86", since = "1.27.0")]
2662 pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2663 _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1))
2664 }
2665
2666 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2667 /// floating-point vector of `[4 x float]`.
2668 ///
2669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps)
2670 #[inline]
2671 #[target_feature(enable = "sse2")]
2672 #[stable(feature = "simd_x86", since = "1.27.0")]
2673 pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2674 transmute(a)
2675 }
2676
2677 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2678 /// integer vector.
2679 ///
2680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128)
2681 #[inline]
2682 #[target_feature(enable = "sse2")]
2683 #[stable(feature = "simd_x86", since = "1.27.0")]
2684 pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2685 transmute(a)
2686 }
2687
2688 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2689 /// floating-point vector of `[2 x double]`.
2690 ///
2691 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd)
2692 #[inline]
2693 #[target_feature(enable = "sse2")]
2694 #[stable(feature = "simd_x86", since = "1.27.0")]
2695 pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2696 transmute(a)
2697 }
2698
2699 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2700 /// integer vector.
2701 ///
2702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128)
2703 #[inline]
2704 #[target_feature(enable = "sse2")]
2705 #[stable(feature = "simd_x86", since = "1.27.0")]
2706 pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2707 transmute(a)
2708 }
2709
2710 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2711 /// of `[2 x double]`.
2712 ///
2713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd)
2714 #[inline]
2715 #[target_feature(enable = "sse2")]
2716 #[stable(feature = "simd_x86", since = "1.27.0")]
2717 pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2718 transmute(a)
2719 }
2720
2721 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2722 /// of `[4 x float]`.
2723 ///
2724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps)
2725 #[inline]
2726 #[target_feature(enable = "sse2")]
2727 #[stable(feature = "simd_x86", since = "1.27.0")]
2728 pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2729 transmute(a)
2730 }
2731
2732 /// Returns vector of type __m128d with undefined elements.
2733 ///
2734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
2735 #[inline]
2736 #[target_feature(enable = "sse2")]
2737 #[stable(feature = "simd_x86", since = "1.27.0")]
2738 pub unsafe fn _mm_undefined_pd() -> __m128d {
2739 // FIXME: this function should return MaybeUninit<__m128d>
2740 mem::MaybeUninit::<__m128d>::uninit().assume_init()
2741 }
2742
2743 /// Returns vector of type __m128i with undefined elements.
2744 ///
2745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
2746 #[inline]
2747 #[target_feature(enable = "sse2")]
2748 #[stable(feature = "simd_x86", since = "1.27.0")]
2749 pub unsafe fn _mm_undefined_si128() -> __m128i {
2750 // FIXME: this function should return MaybeUninit<__m128i>
2751 mem::MaybeUninit::<__m128i>::uninit().assume_init()
2752 }
2753
2754 /// The resulting `__m128d` element is composed by the low-order values of
2755 /// the two `__m128d` interleaved input elements, i.e.:
2756 ///
2757 /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2758 /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2759 /// input
2760 ///
2761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
2762 #[inline]
2763 #[target_feature(enable = "sse2")]
2764 #[cfg_attr(test, assert_instr(unpckhpd))]
2765 #[stable(feature = "simd_x86", since = "1.27.0")]
2766 pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2767 simd_shuffle2!(a, b, [1, 3])
2768 }
2769
2770 /// The resulting `__m128d` element is composed by the high-order values of
2771 /// the two `__m128d` interleaved input elements, i.e.:
2772 ///
2773 /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2774 /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2775 ///
2776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd)
2777 #[inline]
2778 #[target_feature(enable = "sse2")]
2779 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2780 #[stable(feature = "simd_x86", since = "1.27.0")]
2781 pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2782 simd_shuffle2!(a, b, [0, 2])
2783 }
2784
2785 #[allow(improper_ctypes)]
2786 extern "C" {
2787 #[link_name = "llvm.x86.sse2.pause"]
2788 fn pause();
2789 #[link_name = "llvm.x86.sse2.clflush"]
2790 fn clflush(p: *const u8);
2791 #[link_name = "llvm.x86.sse2.lfence"]
2792 fn lfence();
2793 #[link_name = "llvm.x86.sse2.mfence"]
2794 fn mfence();
2795 #[link_name = "llvm.x86.sse2.pavg.b"]
2796 fn pavgb(a: u8x16, b: u8x16) -> u8x16;
2797 #[link_name = "llvm.x86.sse2.pavg.w"]
2798 fn pavgw(a: u16x8, b: u16x8) -> u16x8;
2799 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2800 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2801 #[link_name = "llvm.x86.sse2.pmaxs.w"]
2802 fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
2803 #[link_name = "llvm.x86.sse2.pmaxu.b"]
2804 fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
2805 #[link_name = "llvm.x86.sse2.pmins.w"]
2806 fn pminsw(a: i16x8, b: i16x8) -> i16x8;
2807 #[link_name = "llvm.x86.sse2.pminu.b"]
2808 fn pminub(a: u8x16, b: u8x16) -> u8x16;
2809 #[link_name = "llvm.x86.sse2.pmulh.w"]
2810 fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
2811 #[link_name = "llvm.x86.sse2.pmulhu.w"]
2812 fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
2813 #[link_name = "llvm.x86.sse2.pmulu.dq"]
2814 fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
2815 #[link_name = "llvm.x86.sse2.psad.bw"]
2816 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2817 #[link_name = "llvm.x86.sse2.pslli.w"]
2818 fn pslliw(a: i16x8, imm8: i32) -> i16x8;
2819 #[link_name = "llvm.x86.sse2.psll.w"]
2820 fn psllw(a: i16x8, count: i16x8) -> i16x8;
2821 #[link_name = "llvm.x86.sse2.pslli.d"]
2822 fn psllid(a: i32x4, imm8: i32) -> i32x4;
2823 #[link_name = "llvm.x86.sse2.psll.d"]
2824 fn pslld(a: i32x4, count: i32x4) -> i32x4;
2825 #[link_name = "llvm.x86.sse2.pslli.q"]
2826 fn pslliq(a: i64x2, imm8: i32) -> i64x2;
2827 #[link_name = "llvm.x86.sse2.psll.q"]
2828 fn psllq(a: i64x2, count: i64x2) -> i64x2;
2829 #[link_name = "llvm.x86.sse2.psrai.w"]
2830 fn psraiw(a: i16x8, imm8: i32) -> i16x8;
2831 #[link_name = "llvm.x86.sse2.psra.w"]
2832 fn psraw(a: i16x8, count: i16x8) -> i16x8;
2833 #[link_name = "llvm.x86.sse2.psrai.d"]
2834 fn psraid(a: i32x4, imm8: i32) -> i32x4;
2835 #[link_name = "llvm.x86.sse2.psra.d"]
2836 fn psrad(a: i32x4, count: i32x4) -> i32x4;
2837 #[link_name = "llvm.x86.sse2.psrli.w"]
2838 fn psrliw(a: i16x8, imm8: i32) -> i16x8;
2839 #[link_name = "llvm.x86.sse2.psrl.w"]
2840 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2841 #[link_name = "llvm.x86.sse2.psrli.d"]
2842 fn psrlid(a: i32x4, imm8: i32) -> i32x4;
2843 #[link_name = "llvm.x86.sse2.psrl.d"]
2844 fn psrld(a: i32x4, count: i32x4) -> i32x4;
2845 #[link_name = "llvm.x86.sse2.psrli.q"]
2846 fn psrliq(a: i64x2, imm8: i32) -> i64x2;
2847 #[link_name = "llvm.x86.sse2.psrl.q"]
2848 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2849 #[link_name = "llvm.x86.sse2.cvtdq2ps"]
2850 fn cvtdq2ps(a: i32x4) -> __m128;
2851 #[link_name = "llvm.x86.sse2.cvtps2dq"]
2852 fn cvtps2dq(a: __m128) -> i32x4;
2853 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
2854 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2855 #[link_name = "llvm.x86.sse2.packsswb.128"]
2856 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2857 #[link_name = "llvm.x86.sse2.packssdw.128"]
2858 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2859 #[link_name = "llvm.x86.sse2.packuswb.128"]
2860 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2861 #[link_name = "llvm.x86.sse2.pmovmskb.128"]
2862 fn pmovmskb(a: i8x16) -> i32;
2863 #[link_name = "llvm.x86.sse2.max.sd"]
2864 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2865 #[link_name = "llvm.x86.sse2.max.pd"]
2866 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2867 #[link_name = "llvm.x86.sse2.min.sd"]
2868 fn minsd(a: __m128d, b: __m128d) -> __m128d;
2869 #[link_name = "llvm.x86.sse2.min.pd"]
2870 fn minpd(a: __m128d, b: __m128d) -> __m128d;
2871 #[link_name = "llvm.x86.sse2.sqrt.sd"]
2872 fn sqrtsd(a: __m128d) -> __m128d;
2873 #[link_name = "llvm.x86.sse2.sqrt.pd"]
2874 fn sqrtpd(a: __m128d) -> __m128d;
2875 #[link_name = "llvm.x86.sse2.cmp.sd"]
2876 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2877 #[link_name = "llvm.x86.sse2.cmp.pd"]
2878 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2879 #[link_name = "llvm.x86.sse2.comieq.sd"]
2880 fn comieqsd(a: __m128d, b: __m128d) -> i32;
2881 #[link_name = "llvm.x86.sse2.comilt.sd"]
2882 fn comiltsd(a: __m128d, b: __m128d) -> i32;
2883 #[link_name = "llvm.x86.sse2.comile.sd"]
2884 fn comilesd(a: __m128d, b: __m128d) -> i32;
2885 #[link_name = "llvm.x86.sse2.comigt.sd"]
2886 fn comigtsd(a: __m128d, b: __m128d) -> i32;
2887 #[link_name = "llvm.x86.sse2.comige.sd"]
2888 fn comigesd(a: __m128d, b: __m128d) -> i32;
2889 #[link_name = "llvm.x86.sse2.comineq.sd"]
2890 fn comineqsd(a: __m128d, b: __m128d) -> i32;
2891 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
2892 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
2893 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
2894 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
2895 #[link_name = "llvm.x86.sse2.ucomile.sd"]
2896 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
2897 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
2898 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
2899 #[link_name = "llvm.x86.sse2.ucomige.sd"]
2900 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
2901 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
2902 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
2903 #[link_name = "llvm.x86.sse2.movmsk.pd"]
2904 fn movmskpd(a: __m128d) -> i32;
2905 #[link_name = "llvm.x86.sse2.cvtpd2ps"]
2906 fn cvtpd2ps(a: __m128d) -> __m128;
2907 #[link_name = "llvm.x86.sse2.cvtps2pd"]
2908 fn cvtps2pd(a: __m128) -> __m128d;
2909 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
2910 fn cvtpd2dq(a: __m128d) -> i32x4;
2911 #[link_name = "llvm.x86.sse2.cvtsd2si"]
2912 fn cvtsd2si(a: __m128d) -> i32;
2913 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
2914 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
2915 #[link_name = "llvm.x86.sse2.cvtss2sd"]
2916 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
2917 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
2918 fn cvttpd2dq(a: __m128d) -> i32x4;
2919 #[link_name = "llvm.x86.sse2.cvttsd2si"]
2920 fn cvttsd2si(a: __m128d) -> i32;
2921 #[link_name = "llvm.x86.sse2.cvttps2dq"]
2922 fn cvttps2dq(a: __m128) -> i32x4;
2923 #[link_name = "llvm.x86.sse2.storeu.dq"]
2924 fn storeudq(mem_addr: *mut i8, a: __m128i);
2925 #[link_name = "llvm.x86.sse2.storeu.pd"]
2926 fn storeupd(mem_addr: *mut i8, a: __m128d);
2927 }
2928
2929 #[cfg(test)]
2930 mod tests {
2931 use crate::{
2932 core_arch::{simd::*, x86::*},
2933 hint::black_box,
2934 };
2935 use std::{
2936 boxed, f32,
2937 f64::{self, NAN},
2938 i32,
2939 mem::{self, transmute},
2940 };
2941 use stdarch_test::simd_test;
2942
2943 #[test]
2944 fn test_mm_pause() {
2945 unsafe { _mm_pause() }
2946 }
2947
2948 #[simd_test(enable = "sse2")]
2949 unsafe fn test_mm_clflush() {
2950 let x = 0_u8;
2951 _mm_clflush(&x as *const _);
2952 }
2953
2954 #[simd_test(enable = "sse2")]
2955 unsafe fn test_mm_lfence() {
2956 _mm_lfence();
2957 }
2958
2959 #[simd_test(enable = "sse2")]
2960 unsafe fn test_mm_mfence() {
2961 _mm_mfence();
2962 }
2963
2964 #[simd_test(enable = "sse2")]
2965 unsafe fn test_mm_add_epi8() {
2966 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2967 #[rustfmt::skip]
2968 let b = _mm_setr_epi8(
2969 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2970 );
2971 let r = _mm_add_epi8(a, b);
2972 #[rustfmt::skip]
2973 let e = _mm_setr_epi8(
2974 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
2975 );
2976 assert_eq_m128i(r, e);
2977 }
2978
2979 #[simd_test(enable = "sse2")]
2980 unsafe fn test_mm_add_epi8_overflow() {
2981 let a = _mm_set1_epi8(0x7F);
2982 let b = _mm_set1_epi8(1);
2983 let r = _mm_add_epi8(a, b);
2984 assert_eq_m128i(r, _mm_set1_epi8(-128));
2985 }
2986
2987 #[simd_test(enable = "sse2")]
2988 unsafe fn test_mm_add_epi16() {
2989 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2990 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
2991 let r = _mm_add_epi16(a, b);
2992 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
2993 assert_eq_m128i(r, e);
2994 }
2995
2996 #[simd_test(enable = "sse2")]
2997 unsafe fn test_mm_add_epi32() {
2998 let a = _mm_setr_epi32(0, 1, 2, 3);
2999 let b = _mm_setr_epi32(4, 5, 6, 7);
3000 let r = _mm_add_epi32(a, b);
3001 let e = _mm_setr_epi32(4, 6, 8, 10);
3002 assert_eq_m128i(r, e);
3003 }
3004
3005 #[simd_test(enable = "sse2")]
3006 unsafe fn test_mm_add_epi64() {
3007 let a = _mm_setr_epi64x(0, 1);
3008 let b = _mm_setr_epi64x(2, 3);
3009 let r = _mm_add_epi64(a, b);
3010 let e = _mm_setr_epi64x(2, 4);
3011 assert_eq_m128i(r, e);
3012 }
3013
3014 #[simd_test(enable = "sse2")]
3015 unsafe fn test_mm_adds_epi8() {
3016 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3017 #[rustfmt::skip]
3018 let b = _mm_setr_epi8(
3019 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3020 );
3021 let r = _mm_adds_epi8(a, b);
3022 #[rustfmt::skip]
3023 let e = _mm_setr_epi8(
3024 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3025 );
3026 assert_eq_m128i(r, e);
3027 }
3028
3029 #[simd_test(enable = "sse2")]
3030 unsafe fn test_mm_adds_epi8_saturate_positive() {
3031 let a = _mm_set1_epi8(0x7F);
3032 let b = _mm_set1_epi8(1);
3033 let r = _mm_adds_epi8(a, b);
3034 assert_eq_m128i(r, a);
3035 }
3036
3037 #[simd_test(enable = "sse2")]
3038 unsafe fn test_mm_adds_epi8_saturate_negative() {
3039 let a = _mm_set1_epi8(-0x80);
3040 let b = _mm_set1_epi8(-1);
3041 let r = _mm_adds_epi8(a, b);
3042 assert_eq_m128i(r, a);
3043 }
3044
3045 #[simd_test(enable = "sse2")]
3046 unsafe fn test_mm_adds_epi16() {
3047 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3048 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3049 let r = _mm_adds_epi16(a, b);
3050 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3051 assert_eq_m128i(r, e);
3052 }
3053
3054 #[simd_test(enable = "sse2")]
3055 unsafe fn test_mm_adds_epi16_saturate_positive() {
3056 let a = _mm_set1_epi16(0x7FFF);
3057 let b = _mm_set1_epi16(1);
3058 let r = _mm_adds_epi16(a, b);
3059 assert_eq_m128i(r, a);
3060 }
3061
3062 #[simd_test(enable = "sse2")]
3063 unsafe fn test_mm_adds_epi16_saturate_negative() {
3064 let a = _mm_set1_epi16(-0x8000);
3065 let b = _mm_set1_epi16(-1);
3066 let r = _mm_adds_epi16(a, b);
3067 assert_eq_m128i(r, a);
3068 }
3069
3070 #[simd_test(enable = "sse2")]
3071 unsafe fn test_mm_adds_epu8() {
3072 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3073 #[rustfmt::skip]
3074 let b = _mm_setr_epi8(
3075 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3076 );
3077 let r = _mm_adds_epu8(a, b);
3078 #[rustfmt::skip]
3079 let e = _mm_setr_epi8(
3080 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3081 );
3082 assert_eq_m128i(r, e);
3083 }
3084
3085 #[simd_test(enable = "sse2")]
3086 unsafe fn test_mm_adds_epu8_saturate() {
3087 let a = _mm_set1_epi8(!0);
3088 let b = _mm_set1_epi8(1);
3089 let r = _mm_adds_epu8(a, b);
3090 assert_eq_m128i(r, a);
3091 }
3092
3093 #[simd_test(enable = "sse2")]
3094 unsafe fn test_mm_adds_epu16() {
3095 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3096 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3097 let r = _mm_adds_epu16(a, b);
3098 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3099 assert_eq_m128i(r, e);
3100 }
3101
3102 #[simd_test(enable = "sse2")]
3103 unsafe fn test_mm_adds_epu16_saturate() {
3104 let a = _mm_set1_epi16(!0);
3105 let b = _mm_set1_epi16(1);
3106 let r = _mm_adds_epu16(a, b);
3107 assert_eq_m128i(r, a);
3108 }
3109
3110 #[simd_test(enable = "sse2")]
3111 unsafe fn test_mm_avg_epu8() {
3112 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3113 let r = _mm_avg_epu8(a, b);
3114 assert_eq_m128i(r, _mm_set1_epi8(6));
3115 }
3116
3117 #[simd_test(enable = "sse2")]
3118 unsafe fn test_mm_avg_epu16() {
3119 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3120 let r = _mm_avg_epu16(a, b);
3121 assert_eq_m128i(r, _mm_set1_epi16(6));
3122 }
3123
3124 #[simd_test(enable = "sse2")]
3125 unsafe fn test_mm_madd_epi16() {
3126 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3127 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3128 let r = _mm_madd_epi16(a, b);
3129 let e = _mm_setr_epi32(29, 81, 149, 233);
3130 assert_eq_m128i(r, e);
3131 }
3132
3133 #[simd_test(enable = "sse2")]
3134 unsafe fn test_mm_max_epi16() {
3135 let a = _mm_set1_epi16(1);
3136 let b = _mm_set1_epi16(-1);
3137 let r = _mm_max_epi16(a, b);
3138 assert_eq_m128i(r, a);
3139 }
3140
3141 #[simd_test(enable = "sse2")]
3142 unsafe fn test_mm_max_epu8() {
3143 let a = _mm_set1_epi8(1);
3144 let b = _mm_set1_epi8(!0);
3145 let r = _mm_max_epu8(a, b);
3146 assert_eq_m128i(r, b);
3147 }
3148
3149 #[simd_test(enable = "sse2")]
3150 unsafe fn test_mm_min_epi16() {
3151 let a = _mm_set1_epi16(1);
3152 let b = _mm_set1_epi16(-1);
3153 let r = _mm_min_epi16(a, b);
3154 assert_eq_m128i(r, b);
3155 }
3156
3157 #[simd_test(enable = "sse2")]
3158 unsafe fn test_mm_min_epu8() {
3159 let a = _mm_set1_epi8(1);
3160 let b = _mm_set1_epi8(!0);
3161 let r = _mm_min_epu8(a, b);
3162 assert_eq_m128i(r, a);
3163 }
3164
3165 #[simd_test(enable = "sse2")]
3166 unsafe fn test_mm_mulhi_epi16() {
3167 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3168 let r = _mm_mulhi_epi16(a, b);
3169 assert_eq_m128i(r, _mm_set1_epi16(-16));
3170 }
3171
3172 #[simd_test(enable = "sse2")]
3173 unsafe fn test_mm_mulhi_epu16() {
3174 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3175 let r = _mm_mulhi_epu16(a, b);
3176 assert_eq_m128i(r, _mm_set1_epi16(15));
3177 }
3178
3179 #[simd_test(enable = "sse2")]
3180 unsafe fn test_mm_mullo_epi16() {
3181 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3182 let r = _mm_mullo_epi16(a, b);
3183 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3184 }
3185
3186 #[simd_test(enable = "sse2")]
3187 unsafe fn test_mm_mul_epu32() {
3188 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3189 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3190 let r = _mm_mul_epu32(a, b);
3191 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3192 assert_eq_m128i(r, e);
3193 }
3194
3195 #[simd_test(enable = "sse2")]
3196 unsafe fn test_mm_sad_epu8() {
3197 #[rustfmt::skip]
3198 let a = _mm_setr_epi8(
3199 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3200 1, 2, 3, 4,
3201 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3202 1, 2, 3, 4,
3203 );
3204 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3205 let r = _mm_sad_epu8(a, b);
3206 let e = _mm_setr_epi64x(1020, 614);
3207 assert_eq_m128i(r, e);
3208 }
3209
3210 #[simd_test(enable = "sse2")]
3211 unsafe fn test_mm_sub_epi8() {
3212 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3213 let r = _mm_sub_epi8(a, b);
3214 assert_eq_m128i(r, _mm_set1_epi8(-1));
3215 }
3216
3217 #[simd_test(enable = "sse2")]
3218 unsafe fn test_mm_sub_epi16() {
3219 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3220 let r = _mm_sub_epi16(a, b);
3221 assert_eq_m128i(r, _mm_set1_epi16(-1));
3222 }
3223
3224 #[simd_test(enable = "sse2")]
3225 unsafe fn test_mm_sub_epi32() {
3226 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3227 let r = _mm_sub_epi32(a, b);
3228 assert_eq_m128i(r, _mm_set1_epi32(-1));
3229 }
3230
3231 #[simd_test(enable = "sse2")]
3232 unsafe fn test_mm_sub_epi64() {
3233 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3234 let r = _mm_sub_epi64(a, b);
3235 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3236 }
3237
3238 #[simd_test(enable = "sse2")]
3239 unsafe fn test_mm_subs_epi8() {
3240 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3241 let r = _mm_subs_epi8(a, b);
3242 assert_eq_m128i(r, _mm_set1_epi8(3));
3243 }
3244
3245 #[simd_test(enable = "sse2")]
3246 unsafe fn test_mm_subs_epi8_saturate_positive() {
3247 let a = _mm_set1_epi8(0x7F);
3248 let b = _mm_set1_epi8(-1);
3249 let r = _mm_subs_epi8(a, b);
3250 assert_eq_m128i(r, a);
3251 }
3252
3253 #[simd_test(enable = "sse2")]
3254 unsafe fn test_mm_subs_epi8_saturate_negative() {
3255 let a = _mm_set1_epi8(-0x80);
3256 let b = _mm_set1_epi8(1);
3257 let r = _mm_subs_epi8(a, b);
3258 assert_eq_m128i(r, a);
3259 }
3260
3261 #[simd_test(enable = "sse2")]
3262 unsafe fn test_mm_subs_epi16() {
3263 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3264 let r = _mm_subs_epi16(a, b);
3265 assert_eq_m128i(r, _mm_set1_epi16(3));
3266 }
3267
3268 #[simd_test(enable = "sse2")]
3269 unsafe fn test_mm_subs_epi16_saturate_positive() {
3270 let a = _mm_set1_epi16(0x7FFF);
3271 let b = _mm_set1_epi16(-1);
3272 let r = _mm_subs_epi16(a, b);
3273 assert_eq_m128i(r, a);
3274 }
3275
3276 #[simd_test(enable = "sse2")]
3277 unsafe fn test_mm_subs_epi16_saturate_negative() {
3278 let a = _mm_set1_epi16(-0x8000);
3279 let b = _mm_set1_epi16(1);
3280 let r = _mm_subs_epi16(a, b);
3281 assert_eq_m128i(r, a);
3282 }
3283
3284 #[simd_test(enable = "sse2")]
3285 unsafe fn test_mm_subs_epu8() {
3286 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3287 let r = _mm_subs_epu8(a, b);
3288 assert_eq_m128i(r, _mm_set1_epi8(3));
3289 }
3290
3291 #[simd_test(enable = "sse2")]
3292 unsafe fn test_mm_subs_epu8_saturate() {
3293 let a = _mm_set1_epi8(0);
3294 let b = _mm_set1_epi8(1);
3295 let r = _mm_subs_epu8(a, b);
3296 assert_eq_m128i(r, a);
3297 }
3298
3299 #[simd_test(enable = "sse2")]
3300 unsafe fn test_mm_subs_epu16() {
3301 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3302 let r = _mm_subs_epu16(a, b);
3303 assert_eq_m128i(r, _mm_set1_epi16(3));
3304 }
3305
3306 #[simd_test(enable = "sse2")]
3307 unsafe fn test_mm_subs_epu16_saturate() {
3308 let a = _mm_set1_epi16(0);
3309 let b = _mm_set1_epi16(1);
3310 let r = _mm_subs_epu16(a, b);
3311 assert_eq_m128i(r, a);
3312 }
3313
3314 #[simd_test(enable = "sse2")]
3315 unsafe fn test_mm_slli_si128() {
3316 #[rustfmt::skip]
3317 let a = _mm_setr_epi8(
3318 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3319 );
3320 let r = _mm_slli_si128::<1>(a);
3321 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3322 assert_eq_m128i(r, e);
3323
3324 #[rustfmt::skip]
3325 let a = _mm_setr_epi8(
3326 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3327 );
3328 let r = _mm_slli_si128::<15>(a);
3329 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3330 assert_eq_m128i(r, e);
3331
3332 #[rustfmt::skip]
3333 let a = _mm_setr_epi8(
3334 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3335 );
3336 let r = _mm_slli_si128::<16>(a);
3337 assert_eq_m128i(r, _mm_set1_epi8(0));
3338 }
3339
3340 #[simd_test(enable = "sse2")]
3341 unsafe fn test_mm_slli_epi16() {
3342 #[rustfmt::skip]
3343 let a = _mm_setr_epi16(
3344 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3345 );
3346 let r = _mm_slli_epi16::<4>(a);
3347
3348 #[rustfmt::skip]
3349 let e = _mm_setr_epi16(
3350 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
3351 0, 0, 0, 0,
3352 );
3353 assert_eq_m128i(r, e);
3354 }
3355
3356 #[simd_test(enable = "sse2")]
3357 unsafe fn test_mm_sll_epi16() {
3358 let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3359 let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3360 assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0));
3361 let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3362 assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3363 }
3364
3365 #[simd_test(enable = "sse2")]
3366 unsafe fn test_mm_slli_epi32() {
3367 let r = _mm_slli_epi32::<4>(_mm_set1_epi32(0xFFFF));
3368 assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3369 }
3370
3371 #[simd_test(enable = "sse2")]
3372 unsafe fn test_mm_sll_epi32() {
3373 let a = _mm_set1_epi32(0xFFFF);
3374 let b = _mm_setr_epi32(4, 0, 0, 0);
3375 let r = _mm_sll_epi32(a, b);
3376 assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3377 }
3378
3379 #[simd_test(enable = "sse2")]
3380 unsafe fn test_mm_slli_epi64() {
3381 let r = _mm_slli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF));
3382 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3383 }
3384
3385 #[simd_test(enable = "sse2")]
3386 unsafe fn test_mm_sll_epi64() {
3387 let a = _mm_set1_epi64x(0xFFFFFFFF);
3388 let b = _mm_setr_epi64x(4, 0);
3389 let r = _mm_sll_epi64(a, b);
3390 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3391 }
3392
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_srai_epi16() {
3395 let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1));
3396 assert_eq_m128i(r, _mm_set1_epi16(-1));
3397 }
3398
3399 #[simd_test(enable = "sse2")]
3400 unsafe fn test_mm_sra_epi16() {
3401 let a = _mm_set1_epi16(-1);
3402 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
3403 let r = _mm_sra_epi16(a, b);
3404 assert_eq_m128i(r, _mm_set1_epi16(-1));
3405 }
3406
3407 #[simd_test(enable = "sse2")]
3408 unsafe fn test_mm_srai_epi32() {
3409 let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1));
3410 assert_eq_m128i(r, _mm_set1_epi32(-1));
3411 }
3412
3413 #[simd_test(enable = "sse2")]
3414 unsafe fn test_mm_sra_epi32() {
3415 let a = _mm_set1_epi32(-1);
3416 let b = _mm_setr_epi32(1, 0, 0, 0);
3417 let r = _mm_sra_epi32(a, b);
3418 assert_eq_m128i(r, _mm_set1_epi32(-1));
3419 }
3420
3421 #[simd_test(enable = "sse2")]
3422 unsafe fn test_mm_srli_si128() {
3423 #[rustfmt::skip]
3424 let a = _mm_setr_epi8(
3425 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3426 );
3427 let r = _mm_srli_si128::<1>(a);
3428 #[rustfmt::skip]
3429 let e = _mm_setr_epi8(
3430 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3431 );
3432 assert_eq_m128i(r, e);
3433
3434 #[rustfmt::skip]
3435 let a = _mm_setr_epi8(
3436 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3437 );
3438 let r = _mm_srli_si128::<15>(a);
3439 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3440 assert_eq_m128i(r, e);
3441
3442 #[rustfmt::skip]
3443 let a = _mm_setr_epi8(
3444 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3445 );
3446 let r = _mm_srli_si128::<16>(a);
3447 assert_eq_m128i(r, _mm_set1_epi8(0));
3448 }
3449
3450 #[simd_test(enable = "sse2")]
3451 unsafe fn test_mm_srli_epi16() {
3452 #[rustfmt::skip]
3453 let a = _mm_setr_epi16(
3454 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3455 );
3456 let r = _mm_srli_epi16::<4>(a);
3457 #[rustfmt::skip]
3458 let e = _mm_setr_epi16(
3459 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
3460 );
3461 assert_eq_m128i(r, e);
3462 }
3463
3464 #[simd_test(enable = "sse2")]
3465 unsafe fn test_mm_srl_epi16() {
3466 let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3467 let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3468 assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0));
3469 let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3470 assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3471 }
3472
3473 #[simd_test(enable = "sse2")]
3474 unsafe fn test_mm_srli_epi32() {
3475 let r = _mm_srli_epi32::<4>(_mm_set1_epi32(0xFFFF));
3476 assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3477 }
3478
3479 #[simd_test(enable = "sse2")]
3480 unsafe fn test_mm_srl_epi32() {
3481 let a = _mm_set1_epi32(0xFFFF);
3482 let b = _mm_setr_epi32(4, 0, 0, 0);
3483 let r = _mm_srl_epi32(a, b);
3484 assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3485 }
3486
3487 #[simd_test(enable = "sse2")]
3488 unsafe fn test_mm_srli_epi64() {
3489 let r = _mm_srli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF));
3490 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3491 }
3492
3493 #[simd_test(enable = "sse2")]
3494 unsafe fn test_mm_srl_epi64() {
3495 let a = _mm_set1_epi64x(0xFFFFFFFF);
3496 let b = _mm_setr_epi64x(4, 0);
3497 let r = _mm_srl_epi64(a, b);
3498 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3499 }
3500
3501 #[simd_test(enable = "sse2")]
3502 unsafe fn test_mm_and_si128() {
3503 let a = _mm_set1_epi8(5);
3504 let b = _mm_set1_epi8(3);
3505 let r = _mm_and_si128(a, b);
3506 assert_eq_m128i(r, _mm_set1_epi8(1));
3507 }
3508
3509 #[simd_test(enable = "sse2")]
3510 unsafe fn test_mm_andnot_si128() {
3511 let a = _mm_set1_epi8(5);
3512 let b = _mm_set1_epi8(3);
3513 let r = _mm_andnot_si128(a, b);
3514 assert_eq_m128i(r, _mm_set1_epi8(2));
3515 }
3516
3517 #[simd_test(enable = "sse2")]
3518 unsafe fn test_mm_or_si128() {
3519 let a = _mm_set1_epi8(5);
3520 let b = _mm_set1_epi8(3);
3521 let r = _mm_or_si128(a, b);
3522 assert_eq_m128i(r, _mm_set1_epi8(7));
3523 }
3524
3525 #[simd_test(enable = "sse2")]
3526 unsafe fn test_mm_xor_si128() {
3527 let a = _mm_set1_epi8(5);
3528 let b = _mm_set1_epi8(3);
3529 let r = _mm_xor_si128(a, b);
3530 assert_eq_m128i(r, _mm_set1_epi8(6));
3531 }
3532
3533 #[simd_test(enable = "sse2")]
3534 unsafe fn test_mm_cmpeq_epi8() {
3535 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3536 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3537 let r = _mm_cmpeq_epi8(a, b);
3538 #[rustfmt::skip]
3539 assert_eq_m128i(
3540 r,
3541 _mm_setr_epi8(
3542 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3543 )
3544 );
3545 }
3546
3547 #[simd_test(enable = "sse2")]
3548 unsafe fn test_mm_cmpeq_epi16() {
3549 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3550 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3551 let r = _mm_cmpeq_epi16(a, b);
3552 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3553 }
3554
3555 #[simd_test(enable = "sse2")]
3556 unsafe fn test_mm_cmpeq_epi32() {
3557 let a = _mm_setr_epi32(0, 1, 2, 3);
3558 let b = _mm_setr_epi32(3, 2, 2, 0);
3559 let r = _mm_cmpeq_epi32(a, b);
3560 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3561 }
3562
3563 #[simd_test(enable = "sse2")]
3564 unsafe fn test_mm_cmpgt_epi8() {
3565 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3566 let b = _mm_set1_epi8(0);
3567 let r = _mm_cmpgt_epi8(a, b);
3568 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3569 assert_eq_m128i(r, e);
3570 }
3571
3572 #[simd_test(enable = "sse2")]
3573 unsafe fn test_mm_cmpgt_epi16() {
3574 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3575 let b = _mm_set1_epi16(0);
3576 let r = _mm_cmpgt_epi16(a, b);
3577 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3578 assert_eq_m128i(r, e);
3579 }
3580
3581 #[simd_test(enable = "sse2")]
3582 unsafe fn test_mm_cmpgt_epi32() {
3583 let a = _mm_set_epi32(5, 0, 0, 0);
3584 let b = _mm_set1_epi32(0);
3585 let r = _mm_cmpgt_epi32(a, b);
3586 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3587 }
3588
3589 #[simd_test(enable = "sse2")]
3590 unsafe fn test_mm_cmplt_epi8() {
3591 let a = _mm_set1_epi8(0);
3592 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3593 let r = _mm_cmplt_epi8(a, b);
3594 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3595 assert_eq_m128i(r, e);
3596 }
3597
3598 #[simd_test(enable = "sse2")]
3599 unsafe fn test_mm_cmplt_epi16() {
3600 let a = _mm_set1_epi16(0);
3601 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3602 let r = _mm_cmplt_epi16(a, b);
3603 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3604 assert_eq_m128i(r, e);
3605 }
3606
3607 #[simd_test(enable = "sse2")]
3608 unsafe fn test_mm_cmplt_epi32() {
3609 let a = _mm_set1_epi32(0);
3610 let b = _mm_set_epi32(5, 0, 0, 0);
3611 let r = _mm_cmplt_epi32(a, b);
3612 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3613 }
3614
3615 #[simd_test(enable = "sse2")]
3616 unsafe fn test_mm_cvtepi32_pd() {
3617 let a = _mm_set_epi32(35, 25, 15, 5);
3618 let r = _mm_cvtepi32_pd(a);
3619 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3620 }
3621
3622 #[simd_test(enable = "sse2")]
3623 unsafe fn test_mm_cvtsi32_sd() {
3624 let a = _mm_set1_pd(3.5);
3625 let r = _mm_cvtsi32_sd(a, 5);
3626 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3627 }
3628
3629 #[simd_test(enable = "sse2")]
3630 unsafe fn test_mm_cvtepi32_ps() {
3631 let a = _mm_setr_epi32(1, 2, 3, 4);
3632 let r = _mm_cvtepi32_ps(a);
3633 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3634 }
3635
3636 #[simd_test(enable = "sse2")]
3637 unsafe fn test_mm_cvtps_epi32() {
3638 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3639 let r = _mm_cvtps_epi32(a);
3640 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3641 }
3642
3643 #[simd_test(enable = "sse2")]
3644 unsafe fn test_mm_cvtsi32_si128() {
3645 let r = _mm_cvtsi32_si128(5);
3646 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3647 }
3648
3649 #[simd_test(enable = "sse2")]
3650 unsafe fn test_mm_cvtsi128_si32() {
3651 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3652 assert_eq!(r, 5);
3653 }
3654
3655 #[simd_test(enable = "sse2")]
3656 unsafe fn test_mm_set_epi64x() {
3657 let r = _mm_set_epi64x(0, 1);
3658 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3659 }
3660
3661 #[simd_test(enable = "sse2")]
3662 unsafe fn test_mm_set_epi32() {
3663 let r = _mm_set_epi32(0, 1, 2, 3);
3664 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3665 }
3666
3667 #[simd_test(enable = "sse2")]
3668 unsafe fn test_mm_set_epi16() {
3669 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3670 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3671 }
3672
3673 #[simd_test(enable = "sse2")]
3674 unsafe fn test_mm_set_epi8() {
3675 #[rustfmt::skip]
3676 let r = _mm_set_epi8(
3677 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3678 );
3679 #[rustfmt::skip]
3680 let e = _mm_setr_epi8(
3681 15, 14, 13, 12, 11, 10, 9, 8,
3682 7, 6, 5, 4, 3, 2, 1, 0,
3683 );
3684 assert_eq_m128i(r, e);
3685 }
3686
3687 #[simd_test(enable = "sse2")]
3688 unsafe fn test_mm_set1_epi64x() {
3689 let r = _mm_set1_epi64x(1);
3690 assert_eq_m128i(r, _mm_set1_epi64x(1));
3691 }
3692
3693 #[simd_test(enable = "sse2")]
3694 unsafe fn test_mm_set1_epi32() {
3695 let r = _mm_set1_epi32(1);
3696 assert_eq_m128i(r, _mm_set1_epi32(1));
3697 }
3698
3699 #[simd_test(enable = "sse2")]
3700 unsafe fn test_mm_set1_epi16() {
3701 let r = _mm_set1_epi16(1);
3702 assert_eq_m128i(r, _mm_set1_epi16(1));
3703 }
3704
3705 #[simd_test(enable = "sse2")]
3706 unsafe fn test_mm_set1_epi8() {
3707 let r = _mm_set1_epi8(1);
3708 assert_eq_m128i(r, _mm_set1_epi8(1));
3709 }
3710
3711 #[simd_test(enable = "sse2")]
3712 unsafe fn test_mm_setr_epi32() {
3713 let r = _mm_setr_epi32(0, 1, 2, 3);
3714 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3715 }
3716
3717 #[simd_test(enable = "sse2")]
3718 unsafe fn test_mm_setr_epi16() {
3719 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3720 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3721 }
3722
3723 #[simd_test(enable = "sse2")]
3724 unsafe fn test_mm_setr_epi8() {
3725 #[rustfmt::skip]
3726 let r = _mm_setr_epi8(
3727 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3728 );
3729 #[rustfmt::skip]
3730 let e = _mm_setr_epi8(
3731 0, 1, 2, 3, 4, 5, 6, 7,
3732 8, 9, 10, 11, 12, 13, 14, 15,
3733 );
3734 assert_eq_m128i(r, e);
3735 }
3736
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_setzero_si128() {
3739 let r = _mm_setzero_si128();
3740 assert_eq_m128i(r, _mm_set1_epi64x(0));
3741 }
3742
3743 #[simd_test(enable = "sse2")]
3744 unsafe fn test_mm_loadl_epi64() {
3745 let a = _mm_setr_epi64x(6, 5);
3746 let r = _mm_loadl_epi64(&a as *const _);
3747 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3748 }
3749
3750 #[simd_test(enable = "sse2")]
3751 unsafe fn test_mm_load_si128() {
3752 let a = _mm_set_epi64x(5, 6);
3753 let r = _mm_load_si128(&a as *const _ as *const _);
3754 assert_eq_m128i(a, r);
3755 }
3756
3757 #[simd_test(enable = "sse2")]
3758 unsafe fn test_mm_loadu_si128() {
3759 let a = _mm_set_epi64x(5, 6);
3760 let r = _mm_loadu_si128(&a as *const _ as *const _);
3761 assert_eq_m128i(a, r);
3762 }
3763
3764 #[simd_test(enable = "sse2")]
3765 unsafe fn test_mm_maskmoveu_si128() {
3766 let a = _mm_set1_epi8(9);
3767 #[rustfmt::skip]
3768 let mask = _mm_set_epi8(
3769 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3770 0, 0, 0, 0, 0, 0, 0, 0,
3771 );
3772 let mut r = _mm_set1_epi8(0);
3773 _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
3774 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3775 assert_eq_m128i(r, e);
3776 }
3777
3778 #[simd_test(enable = "sse2")]
3779 unsafe fn test_mm_store_si128() {
3780 let a = _mm_set1_epi8(9);
3781 let mut r = _mm_set1_epi8(0);
3782 _mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
3783 assert_eq_m128i(r, a);
3784 }
3785
3786 #[simd_test(enable = "sse2")]
3787 unsafe fn test_mm_storeu_si128() {
3788 let a = _mm_set1_epi8(9);
3789 let mut r = _mm_set1_epi8(0);
3790 _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
3791 assert_eq_m128i(r, a);
3792 }
3793
3794 #[simd_test(enable = "sse2")]
3795 unsafe fn test_mm_storel_epi64() {
3796 let a = _mm_setr_epi64x(2, 9);
3797 let mut r = _mm_set1_epi8(0);
3798 _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
3799 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
3800 }
3801
3802 #[simd_test(enable = "sse2")]
3803 unsafe fn test_mm_stream_si128() {
3804 let a = _mm_setr_epi32(1, 2, 3, 4);
3805 let mut r = _mm_undefined_si128();
3806 _mm_stream_si128(&mut r as *mut _, a);
3807 assert_eq_m128i(r, a);
3808 }
3809
3810 #[simd_test(enable = "sse2")]
3811 unsafe fn test_mm_stream_si32() {
3812 let a: i32 = 7;
3813 let mut mem = boxed::Box::<i32>::new(-1);
3814 _mm_stream_si32(&mut *mem as *mut i32, a);
3815 assert_eq!(a, *mem);
3816 }
3817
3818 #[simd_test(enable = "sse2")]
3819 unsafe fn test_mm_move_epi64() {
3820 let a = _mm_setr_epi64x(5, 6);
3821 let r = _mm_move_epi64(a);
3822 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
3823 }
3824
3825 #[simd_test(enable = "sse2")]
3826 unsafe fn test_mm_packs_epi16() {
3827 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
3828 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
3829 let r = _mm_packs_epi16(a, b);
3830 #[rustfmt::skip]
3831 assert_eq_m128i(
3832 r,
3833 _mm_setr_epi8(
3834 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
3835 )
3836 );
3837 }
3838
3839 #[simd_test(enable = "sse2")]
3840 unsafe fn test_mm_packs_epi32() {
3841 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
3842 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
3843 let r = _mm_packs_epi32(a, b);
3844 assert_eq_m128i(
3845 r,
3846 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
3847 );
3848 }
3849
3850 #[simd_test(enable = "sse2")]
3851 unsafe fn test_mm_packus_epi16() {
3852 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
3853 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
3854 let r = _mm_packus_epi16(a, b);
3855 assert_eq_m128i(
3856 r,
3857 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
3858 );
3859 }
3860
3861 #[simd_test(enable = "sse2")]
3862 unsafe fn test_mm_extract_epi16() {
3863 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
3864 let r1 = _mm_extract_epi16::<0>(a);
3865 let r2 = _mm_extract_epi16::<3>(a);
3866 assert_eq!(r1, 0xFFFF);
3867 assert_eq!(r2, 3);
3868 }
3869
3870 #[simd_test(enable = "sse2")]
3871 unsafe fn test_mm_insert_epi16() {
3872 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3873 let r = _mm_insert_epi16::<0>(a, 9);
3874 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
3875 assert_eq_m128i(r, e);
3876 }
3877
3878 #[simd_test(enable = "sse2")]
3879 unsafe fn test_mm_movemask_epi8() {
3880 #[rustfmt::skip]
3881 let a = _mm_setr_epi8(
3882 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
3883 0b0101, 0b1111_0000u8 as i8, 0, 0,
3884 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
3885 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
3886 );
3887 let r = _mm_movemask_epi8(a);
3888 assert_eq!(r, 0b10100110_00100101);
3889 }
3890
3891 #[simd_test(enable = "sse2")]
3892 unsafe fn test_mm_shuffle_epi32() {
3893 let a = _mm_setr_epi32(5, 10, 15, 20);
3894 let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
3895 let e = _mm_setr_epi32(20, 10, 10, 5);
3896 assert_eq_m128i(r, e);
3897 }
3898
3899 #[simd_test(enable = "sse2")]
3900 unsafe fn test_mm_shufflehi_epi16() {
3901 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
3902 let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
3903 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
3904 assert_eq_m128i(r, e);
3905 }
3906
3907 #[simd_test(enable = "sse2")]
3908 unsafe fn test_mm_shufflelo_epi16() {
3909 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
3910 let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
3911 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
3912 assert_eq_m128i(r, e);
3913 }
3914
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_unpackhi_epi8() {
3917 #[rustfmt::skip]
3918 let a = _mm_setr_epi8(
3919 0, 1, 2, 3, 4, 5, 6, 7,
3920 8, 9, 10, 11, 12, 13, 14, 15,
3921 );
3922 #[rustfmt::skip]
3923 let b = _mm_setr_epi8(
3924 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3925 );
3926 let r = _mm_unpackhi_epi8(a, b);
3927 #[rustfmt::skip]
3928 let e = _mm_setr_epi8(
3929 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
3930 );
3931 assert_eq_m128i(r, e);
3932 }
3933
3934 #[simd_test(enable = "sse2")]
3935 unsafe fn test_mm_unpackhi_epi16() {
3936 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3937 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3938 let r = _mm_unpackhi_epi16(a, b);
3939 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
3940 assert_eq_m128i(r, e);
3941 }
3942
3943 #[simd_test(enable = "sse2")]
3944 unsafe fn test_mm_unpackhi_epi32() {
3945 let a = _mm_setr_epi32(0, 1, 2, 3);
3946 let b = _mm_setr_epi32(4, 5, 6, 7);
3947 let r = _mm_unpackhi_epi32(a, b);
3948 let e = _mm_setr_epi32(2, 6, 3, 7);
3949 assert_eq_m128i(r, e);
3950 }
3951
3952 #[simd_test(enable = "sse2")]
3953 unsafe fn test_mm_unpackhi_epi64() {
3954 let a = _mm_setr_epi64x(0, 1);
3955 let b = _mm_setr_epi64x(2, 3);
3956 let r = _mm_unpackhi_epi64(a, b);
3957 let e = _mm_setr_epi64x(1, 3);
3958 assert_eq_m128i(r, e);
3959 }
3960
3961 #[simd_test(enable = "sse2")]
3962 unsafe fn test_mm_unpacklo_epi8() {
3963 #[rustfmt::skip]
3964 let a = _mm_setr_epi8(
3965 0, 1, 2, 3, 4, 5, 6, 7,
3966 8, 9, 10, 11, 12, 13, 14, 15,
3967 );
3968 #[rustfmt::skip]
3969 let b = _mm_setr_epi8(
3970 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3971 );
3972 let r = _mm_unpacklo_epi8(a, b);
3973 #[rustfmt::skip]
3974 let e = _mm_setr_epi8(
3975 0, 16, 1, 17, 2, 18, 3, 19,
3976 4, 20, 5, 21, 6, 22, 7, 23,
3977 );
3978 assert_eq_m128i(r, e);
3979 }
3980
3981 #[simd_test(enable = "sse2")]
3982 unsafe fn test_mm_unpacklo_epi16() {
3983 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3984 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3985 let r = _mm_unpacklo_epi16(a, b);
3986 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
3987 assert_eq_m128i(r, e);
3988 }
3989
3990 #[simd_test(enable = "sse2")]
3991 unsafe fn test_mm_unpacklo_epi32() {
3992 let a = _mm_setr_epi32(0, 1, 2, 3);
3993 let b = _mm_setr_epi32(4, 5, 6, 7);
3994 let r = _mm_unpacklo_epi32(a, b);
3995 let e = _mm_setr_epi32(0, 4, 1, 5);
3996 assert_eq_m128i(r, e);
3997 }
3998
3999 #[simd_test(enable = "sse2")]
4000 unsafe fn test_mm_unpacklo_epi64() {
4001 let a = _mm_setr_epi64x(0, 1);
4002 let b = _mm_setr_epi64x(2, 3);
4003 let r = _mm_unpacklo_epi64(a, b);
4004 let e = _mm_setr_epi64x(0, 2);
4005 assert_eq_m128i(r, e);
4006 }
4007
4008 #[simd_test(enable = "sse2")]
4009 unsafe fn test_mm_add_sd() {
4010 let a = _mm_setr_pd(1.0, 2.0);
4011 let b = _mm_setr_pd(5.0, 10.0);
4012 let r = _mm_add_sd(a, b);
4013 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4014 }
4015
4016 #[simd_test(enable = "sse2")]
4017 unsafe fn test_mm_add_pd() {
4018 let a = _mm_setr_pd(1.0, 2.0);
4019 let b = _mm_setr_pd(5.0, 10.0);
4020 let r = _mm_add_pd(a, b);
4021 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4022 }
4023
4024 #[simd_test(enable = "sse2")]
4025 unsafe fn test_mm_div_sd() {
4026 let a = _mm_setr_pd(1.0, 2.0);
4027 let b = _mm_setr_pd(5.0, 10.0);
4028 let r = _mm_div_sd(a, b);
4029 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4030 }
4031
4032 #[simd_test(enable = "sse2")]
4033 unsafe fn test_mm_div_pd() {
4034 let a = _mm_setr_pd(1.0, 2.0);
4035 let b = _mm_setr_pd(5.0, 10.0);
4036 let r = _mm_div_pd(a, b);
4037 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4038 }
4039
4040 #[simd_test(enable = "sse2")]
4041 unsafe fn test_mm_max_sd() {
4042 let a = _mm_setr_pd(1.0, 2.0);
4043 let b = _mm_setr_pd(5.0, 10.0);
4044 let r = _mm_max_sd(a, b);
4045 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4046 }
4047
4048 #[simd_test(enable = "sse2")]
4049 unsafe fn test_mm_max_pd() {
4050 let a = _mm_setr_pd(1.0, 2.0);
4051 let b = _mm_setr_pd(5.0, 10.0);
4052 let r = _mm_max_pd(a, b);
4053 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4054 }
4055
4056 #[simd_test(enable = "sse2")]
4057 unsafe fn test_mm_min_sd() {
4058 let a = _mm_setr_pd(1.0, 2.0);
4059 let b = _mm_setr_pd(5.0, 10.0);
4060 let r = _mm_min_sd(a, b);
4061 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4062 }
4063
4064 #[simd_test(enable = "sse2")]
4065 unsafe fn test_mm_min_pd() {
4066 let a = _mm_setr_pd(1.0, 2.0);
4067 let b = _mm_setr_pd(5.0, 10.0);
4068 let r = _mm_min_pd(a, b);
4069 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4070 }
4071
4072 #[simd_test(enable = "sse2")]
4073 unsafe fn test_mm_mul_sd() {
4074 let a = _mm_setr_pd(1.0, 2.0);
4075 let b = _mm_setr_pd(5.0, 10.0);
4076 let r = _mm_mul_sd(a, b);
4077 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4078 }
4079
4080 #[simd_test(enable = "sse2")]
4081 unsafe fn test_mm_mul_pd() {
4082 let a = _mm_setr_pd(1.0, 2.0);
4083 let b = _mm_setr_pd(5.0, 10.0);
4084 let r = _mm_mul_pd(a, b);
4085 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4086 }
4087
4088 #[simd_test(enable = "sse2")]
4089 unsafe fn test_mm_sqrt_sd() {
4090 let a = _mm_setr_pd(1.0, 2.0);
4091 let b = _mm_setr_pd(5.0, 10.0);
4092 let r = _mm_sqrt_sd(a, b);
4093 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4094 }
4095
4096 #[simd_test(enable = "sse2")]
4097 unsafe fn test_mm_sqrt_pd() {
4098 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4099 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4100 }
4101
4102 #[simd_test(enable = "sse2")]
4103 unsafe fn test_mm_sub_sd() {
4104 let a = _mm_setr_pd(1.0, 2.0);
4105 let b = _mm_setr_pd(5.0, 10.0);
4106 let r = _mm_sub_sd(a, b);
4107 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4108 }
4109
4110 #[simd_test(enable = "sse2")]
4111 unsafe fn test_mm_sub_pd() {
4112 let a = _mm_setr_pd(1.0, 2.0);
4113 let b = _mm_setr_pd(5.0, 10.0);
4114 let r = _mm_sub_pd(a, b);
4115 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4116 }
4117
4118 #[simd_test(enable = "sse2")]
4119 unsafe fn test_mm_and_pd() {
4120 let a = transmute(u64x2::splat(5));
4121 let b = transmute(u64x2::splat(3));
4122 let r = _mm_and_pd(a, b);
4123 let e = transmute(u64x2::splat(1));
4124 assert_eq_m128d(r, e);
4125 }
4126
4127 #[simd_test(enable = "sse2")]
4128 unsafe fn test_mm_andnot_pd() {
4129 let a = transmute(u64x2::splat(5));
4130 let b = transmute(u64x2::splat(3));
4131 let r = _mm_andnot_pd(a, b);
4132 let e = transmute(u64x2::splat(2));
4133 assert_eq_m128d(r, e);
4134 }
4135
4136 #[simd_test(enable = "sse2")]
4137 unsafe fn test_mm_or_pd() {
4138 let a = transmute(u64x2::splat(5));
4139 let b = transmute(u64x2::splat(3));
4140 let r = _mm_or_pd(a, b);
4141 let e = transmute(u64x2::splat(7));
4142 assert_eq_m128d(r, e);
4143 }
4144
4145 #[simd_test(enable = "sse2")]
4146 unsafe fn test_mm_xor_pd() {
4147 let a = transmute(u64x2::splat(5));
4148 let b = transmute(u64x2::splat(3));
4149 let r = _mm_xor_pd(a, b);
4150 let e = transmute(u64x2::splat(6));
4151 assert_eq_m128d(r, e);
4152 }
4153
4154 #[simd_test(enable = "sse2")]
4155 unsafe fn test_mm_cmpeq_sd() {
4156 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4157 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4158 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4159 assert_eq_m128i(r, e);
4160 }
4161
4162 #[simd_test(enable = "sse2")]
4163 unsafe fn test_mm_cmplt_sd() {
4164 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4165 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4166 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4167 assert_eq_m128i(r, e);
4168 }
4169
4170 #[simd_test(enable = "sse2")]
4171 unsafe fn test_mm_cmple_sd() {
4172 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4173 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4174 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4175 assert_eq_m128i(r, e);
4176 }
4177
4178 #[simd_test(enable = "sse2")]
4179 unsafe fn test_mm_cmpgt_sd() {
4180 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4181 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4182 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4183 assert_eq_m128i(r, e);
4184 }
4185
4186 #[simd_test(enable = "sse2")]
4187 unsafe fn test_mm_cmpge_sd() {
4188 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4189 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4190 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4191 assert_eq_m128i(r, e);
4192 }
4193
4194 #[simd_test(enable = "sse2")]
4195 unsafe fn test_mm_cmpord_sd() {
4196 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4197 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4198 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4199 assert_eq_m128i(r, e);
4200 }
4201
4202 #[simd_test(enable = "sse2")]
4203 unsafe fn test_mm_cmpunord_sd() {
4204 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4205 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4206 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4207 assert_eq_m128i(r, e);
4208 }
4209
4210 #[simd_test(enable = "sse2")]
4211 unsafe fn test_mm_cmpneq_sd() {
4212 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4213 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4214 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4215 assert_eq_m128i(r, e);
4216 }
4217
4218 #[simd_test(enable = "sse2")]
4219 unsafe fn test_mm_cmpnlt_sd() {
4220 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4221 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4222 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4223 assert_eq_m128i(r, e);
4224 }
4225
4226 #[simd_test(enable = "sse2")]
4227 unsafe fn test_mm_cmpnle_sd() {
4228 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4229 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4230 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4231 assert_eq_m128i(r, e);
4232 }
4233
4234 #[simd_test(enable = "sse2")]
4235 unsafe fn test_mm_cmpngt_sd() {
4236 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4237 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4238 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4239 assert_eq_m128i(r, e);
4240 }
4241
4242 #[simd_test(enable = "sse2")]
4243 unsafe fn test_mm_cmpnge_sd() {
4244 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4245 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4246 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4247 assert_eq_m128i(r, e);
4248 }
4249
4250 #[simd_test(enable = "sse2")]
4251 unsafe fn test_mm_cmpeq_pd() {
4252 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4253 let e = _mm_setr_epi64x(!0, 0);
4254 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4255 assert_eq_m128i(r, e);
4256 }
4257
4258 #[simd_test(enable = "sse2")]
4259 unsafe fn test_mm_cmplt_pd() {
4260 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4261 let e = _mm_setr_epi64x(0, !0);
4262 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4263 assert_eq_m128i(r, e);
4264 }
4265
4266 #[simd_test(enable = "sse2")]
4267 unsafe fn test_mm_cmple_pd() {
4268 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4269 let e = _mm_setr_epi64x(!0, !0);
4270 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4271 assert_eq_m128i(r, e);
4272 }
4273
4274 #[simd_test(enable = "sse2")]
4275 unsafe fn test_mm_cmpgt_pd() {
4276 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4277 let e = _mm_setr_epi64x(0, 0);
4278 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4279 assert_eq_m128i(r, e);
4280 }
4281
4282 #[simd_test(enable = "sse2")]
4283 unsafe fn test_mm_cmpge_pd() {
4284 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4285 let e = _mm_setr_epi64x(!0, 0);
4286 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4287 assert_eq_m128i(r, e);
4288 }
4289
4290 #[simd_test(enable = "sse2")]
4291 unsafe fn test_mm_cmpord_pd() {
4292 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4293 let e = _mm_setr_epi64x(0, !0);
4294 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4295 assert_eq_m128i(r, e);
4296 }
4297
4298 #[simd_test(enable = "sse2")]
4299 unsafe fn test_mm_cmpunord_pd() {
4300 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4301 let e = _mm_setr_epi64x(!0, 0);
4302 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4303 assert_eq_m128i(r, e);
4304 }
4305
4306 #[simd_test(enable = "sse2")]
4307 unsafe fn test_mm_cmpneq_pd() {
4308 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4309 let e = _mm_setr_epi64x(!0, !0);
4310 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4311 assert_eq_m128i(r, e);
4312 }
4313
4314 #[simd_test(enable = "sse2")]
4315 unsafe fn test_mm_cmpnlt_pd() {
4316 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4317 let e = _mm_setr_epi64x(0, 0);
4318 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4319 assert_eq_m128i(r, e);
4320 }
4321
4322 #[simd_test(enable = "sse2")]
4323 unsafe fn test_mm_cmpnle_pd() {
4324 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4325 let e = _mm_setr_epi64x(0, 0);
4326 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4327 assert_eq_m128i(r, e);
4328 }
4329
4330 #[simd_test(enable = "sse2")]
4331 unsafe fn test_mm_cmpngt_pd() {
4332 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4333 let e = _mm_setr_epi64x(0, !0);
4334 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4335 assert_eq_m128i(r, e);
4336 }
4337
4338 #[simd_test(enable = "sse2")]
4339 unsafe fn test_mm_cmpnge_pd() {
4340 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4341 let e = _mm_setr_epi64x(0, !0);
4342 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4343 assert_eq_m128i(r, e);
4344 }
4345
4346 #[simd_test(enable = "sse2")]
4347 unsafe fn test_mm_comieq_sd() {
4348 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4349 assert!(_mm_comieq_sd(a, b) != 0);
4350
4351 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4352 assert!(_mm_comieq_sd(a, b) == 0);
4353 }
4354
4355 #[simd_test(enable = "sse2")]
4356 unsafe fn test_mm_comilt_sd() {
4357 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4358 assert!(_mm_comilt_sd(a, b) == 0);
4359 }
4360
4361 #[simd_test(enable = "sse2")]
4362 unsafe fn test_mm_comile_sd() {
4363 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4364 assert!(_mm_comile_sd(a, b) != 0);
4365 }
4366
4367 #[simd_test(enable = "sse2")]
4368 unsafe fn test_mm_comigt_sd() {
4369 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4370 assert!(_mm_comigt_sd(a, b) == 0);
4371 }
4372
4373 #[simd_test(enable = "sse2")]
4374 unsafe fn test_mm_comige_sd() {
4375 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4376 assert!(_mm_comige_sd(a, b) != 0);
4377 }
4378
4379 #[simd_test(enable = "sse2")]
4380 unsafe fn test_mm_comineq_sd() {
4381 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4382 assert!(_mm_comineq_sd(a, b) == 0);
4383 }
4384
4385 #[simd_test(enable = "sse2")]
4386 unsafe fn test_mm_ucomieq_sd() {
4387 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4388 assert!(_mm_ucomieq_sd(a, b) != 0);
4389
4390 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4391 assert!(_mm_ucomieq_sd(a, b) == 0);
4392 }
4393
4394 #[simd_test(enable = "sse2")]
4395 unsafe fn test_mm_ucomilt_sd() {
4396 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4397 assert!(_mm_ucomilt_sd(a, b) == 0);
4398 }
4399
4400 #[simd_test(enable = "sse2")]
4401 unsafe fn test_mm_ucomile_sd() {
4402 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4403 assert!(_mm_ucomile_sd(a, b) != 0);
4404 }
4405
4406 #[simd_test(enable = "sse2")]
4407 unsafe fn test_mm_ucomigt_sd() {
4408 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4409 assert!(_mm_ucomigt_sd(a, b) == 0);
4410 }
4411
4412 #[simd_test(enable = "sse2")]
4413 unsafe fn test_mm_ucomige_sd() {
4414 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4415 assert!(_mm_ucomige_sd(a, b) != 0);
4416 }
4417
4418 #[simd_test(enable = "sse2")]
4419 unsafe fn test_mm_ucomineq_sd() {
4420 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4421 assert!(_mm_ucomineq_sd(a, b) == 0);
4422 }
4423
4424 #[simd_test(enable = "sse2")]
4425 unsafe fn test_mm_movemask_pd() {
4426 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4427 assert_eq!(r, 0b01);
4428
4429 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4430 assert_eq!(r, 0b11);
4431 }
4432
4433 #[repr(align(16))]
4434 struct Memory {
4435 data: [f64; 4],
4436 }
4437
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_load_pd() {
4440 let mem = Memory {
4441 data: [1.0f64, 2.0, 3.0, 4.0],
4442 };
4443 let vals = &mem.data;
4444 let d = vals.as_ptr();
4445
4446 let r = _mm_load_pd(d);
4447 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4448 }
4449
4450 #[simd_test(enable = "sse2")]
4451 unsafe fn test_mm_load_sd() {
4452 let a = 1.;
4453 let expected = _mm_setr_pd(a, 0.);
4454 let r = _mm_load_sd(&a);
4455 assert_eq_m128d(r, expected);
4456 }
4457
4458 #[simd_test(enable = "sse2")]
4459 unsafe fn test_mm_loadh_pd() {
4460 let a = _mm_setr_pd(1., 2.);
4461 let b = 3.;
4462 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4463 let r = _mm_loadh_pd(a, &b);
4464 assert_eq_m128d(r, expected);
4465 }
4466
4467 #[simd_test(enable = "sse2")]
4468 unsafe fn test_mm_loadl_pd() {
4469 let a = _mm_setr_pd(1., 2.);
4470 let b = 3.;
4471 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4472 let r = _mm_loadl_pd(a, &b);
4473 assert_eq_m128d(r, expected);
4474 }
4475
4476 #[simd_test(enable = "sse2")]
4477 unsafe fn test_mm_stream_pd() {
4478 #[repr(align(128))]
4479 struct Memory {
4480 pub data: [f64; 2],
4481 }
4482 let a = _mm_set1_pd(7.0);
4483 let mut mem = Memory { data: [-1.0; 2] };
4484
4485 _mm_stream_pd(&mut mem.data[0] as *mut f64, a);
4486 for i in 0..2 {
4487 assert_eq!(mem.data[i], get_m128d(a, i));
4488 }
4489 }
4490
4491 #[simd_test(enable = "sse2")]
4492 unsafe fn test_mm_store_sd() {
4493 let mut dest = 0.;
4494 let a = _mm_setr_pd(1., 2.);
4495 _mm_store_sd(&mut dest, a);
4496 assert_eq!(dest, _mm_cvtsd_f64(a));
4497 }
4498
4499 #[simd_test(enable = "sse2")]
4500 unsafe fn test_mm_store_pd() {
4501 let mut mem = Memory { data: [0.0f64; 4] };
4502 let vals = &mut mem.data;
4503 let a = _mm_setr_pd(1.0, 2.0);
4504 let d = vals.as_mut_ptr();
4505
4506 _mm_store_pd(d, *black_box(&a));
4507 assert_eq!(vals[0], 1.0);
4508 assert_eq!(vals[1], 2.0);
4509 }
4510
4511 #[simd_test(enable = "sse2")]
4512 unsafe fn test_mm_storeu_pd() {
4513 let mut mem = Memory { data: [0.0f64; 4] };
4514 let vals = &mut mem.data;
4515 let a = _mm_setr_pd(1.0, 2.0);
4516
4517 let mut ofs = 0;
4518 let mut p = vals.as_mut_ptr();
4519
4520 // Make sure p is **not** aligned to 16-byte boundary
4521 if (p as usize) & 0xf == 0 {
4522 ofs = 1;
4523 p = p.offset(1);
4524 }
4525
4526 _mm_storeu_pd(p, *black_box(&a));
4527
4528 if ofs > 0 {
4529 assert_eq!(vals[ofs - 1], 0.0);
4530 }
4531 assert_eq!(vals[ofs + 0], 1.0);
4532 assert_eq!(vals[ofs + 1], 2.0);
4533 }
4534
4535 #[simd_test(enable = "sse2")]
4536 unsafe fn test_mm_store1_pd() {
4537 let mut mem = Memory { data: [0.0f64; 4] };
4538 let vals = &mut mem.data;
4539 let a = _mm_setr_pd(1.0, 2.0);
4540 let d = vals.as_mut_ptr();
4541
4542 _mm_store1_pd(d, *black_box(&a));
4543 assert_eq!(vals[0], 1.0);
4544 assert_eq!(vals[1], 1.0);
4545 }
4546
4547 #[simd_test(enable = "sse2")]
4548 unsafe fn test_mm_store_pd1() {
4549 let mut mem = Memory { data: [0.0f64; 4] };
4550 let vals = &mut mem.data;
4551 let a = _mm_setr_pd(1.0, 2.0);
4552 let d = vals.as_mut_ptr();
4553
4554 _mm_store_pd1(d, *black_box(&a));
4555 assert_eq!(vals[0], 1.0);
4556 assert_eq!(vals[1], 1.0);
4557 }
4558
4559 #[simd_test(enable = "sse2")]
4560 unsafe fn test_mm_storer_pd() {
4561 let mut mem = Memory { data: [0.0f64; 4] };
4562 let vals = &mut mem.data;
4563 let a = _mm_setr_pd(1.0, 2.0);
4564 let d = vals.as_mut_ptr();
4565
4566 _mm_storer_pd(d, *black_box(&a));
4567 assert_eq!(vals[0], 2.0);
4568 assert_eq!(vals[1], 1.0);
4569 }
4570
4571 #[simd_test(enable = "sse2")]
4572 unsafe fn test_mm_storeh_pd() {
4573 let mut dest = 0.;
4574 let a = _mm_setr_pd(1., 2.);
4575 _mm_storeh_pd(&mut dest, a);
4576 assert_eq!(dest, get_m128d(a, 1));
4577 }
4578
4579 #[simd_test(enable = "sse2")]
4580 unsafe fn test_mm_storel_pd() {
4581 let mut dest = 0.;
4582 let a = _mm_setr_pd(1., 2.);
4583 _mm_storel_pd(&mut dest, a);
4584 assert_eq!(dest, _mm_cvtsd_f64(a));
4585 }
4586
4587 #[simd_test(enable = "sse2")]
4588 unsafe fn test_mm_loadr_pd() {
4589 let mut mem = Memory {
4590 data: [1.0f64, 2.0, 3.0, 4.0],
4591 };
4592 let vals = &mut mem.data;
4593 let d = vals.as_ptr();
4594
4595 let r = _mm_loadr_pd(d);
4596 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4597 }
4598
4599 #[simd_test(enable = "sse2")]
4600 unsafe fn test_mm_loadu_pd() {
4601 let mut mem = Memory {
4602 data: [1.0f64, 2.0, 3.0, 4.0],
4603 };
4604 let vals = &mut mem.data;
4605 let mut d = vals.as_ptr();
4606
4607 // make sure d is not aligned to 16-byte boundary
4608 let mut offset = 0;
4609 if (d as usize) & 0xf == 0 {
4610 offset = 1;
4611 d = d.offset(offset as isize);
4612 }
4613
4614 let r = _mm_loadu_pd(d);
4615 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4616 assert_eq_m128d(r, e);
4617 }
4618
4619 #[simd_test(enable = "sse2")]
4620 unsafe fn test_mm_cvtpd_ps() {
4621 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4622 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4623
4624 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4625 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4626
4627 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4628 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4629
4630 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4631 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4632 }
4633
4634 #[simd_test(enable = "sse2")]
4635 unsafe fn test_mm_cvtps_pd() {
4636 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4637 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4638
4639 let r = _mm_cvtps_pd(_mm_setr_ps(
4640 f32::MAX,
4641 f32::INFINITY,
4642 f32::NEG_INFINITY,
4643 f32::MIN,
4644 ));
4645 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4646 }
4647
4648 #[simd_test(enable = "sse2")]
4649 unsafe fn test_mm_cvtpd_epi32() {
4650 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4651 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4652
4653 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4654 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4655
4656 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4657 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4658
4659 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4660 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4661
4662 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4663 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4664 }
4665
4666 #[simd_test(enable = "sse2")]
4667 unsafe fn test_mm_cvtsd_si32() {
4668 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4669 assert_eq!(r, -2);
4670
4671 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4672 assert_eq!(r, i32::MIN);
4673
4674 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4675 assert_eq!(r, i32::MIN);
4676 }
4677
4678 #[simd_test(enable = "sse2")]
4679 unsafe fn test_mm_cvtsd_ss() {
4680 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4681 let b = _mm_setr_pd(2.0, -5.0);
4682
4683 let r = _mm_cvtsd_ss(a, b);
4684
4685 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4686
4687 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4688 let b = _mm_setr_pd(f64::INFINITY, -5.0);
4689
4690 let r = _mm_cvtsd_ss(a, b);
4691
4692 assert_eq_m128(
4693 r,
4694 _mm_setr_ps(
4695 f32::INFINITY,
4696 f32::NEG_INFINITY,
4697 f32::MAX,
4698 f32::NEG_INFINITY,
4699 ),
4700 );
4701 }
4702
4703 #[simd_test(enable = "sse2")]
4704 unsafe fn test_mm_cvtsd_f64() {
4705 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4706 assert_eq!(r, -1.1);
4707 }
4708
4709 #[simd_test(enable = "sse2")]
4710 unsafe fn test_mm_cvtss_sd() {
4711 let a = _mm_setr_pd(-1.1, 2.2);
4712 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4713
4714 let r = _mm_cvtss_sd(a, b);
4715 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
4716
4717 let a = _mm_setr_pd(-1.1, f64::INFINITY);
4718 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
4719
4720 let r = _mm_cvtss_sd(a, b);
4721 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4722 }
4723
4724 #[simd_test(enable = "sse2")]
4725 unsafe fn test_mm_cvttpd_epi32() {
4726 let a = _mm_setr_pd(-1.1, 2.2);
4727 let r = _mm_cvttpd_epi32(a);
4728 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
4729
4730 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4731 let r = _mm_cvttpd_epi32(a);
4732 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4733 }
4734
4735 #[simd_test(enable = "sse2")]
4736 unsafe fn test_mm_cvttsd_si32() {
4737 let a = _mm_setr_pd(-1.1, 2.2);
4738 let r = _mm_cvttsd_si32(a);
4739 assert_eq!(r, -1);
4740
4741 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4742 let r = _mm_cvttsd_si32(a);
4743 assert_eq!(r, i32::MIN);
4744 }
4745
4746 #[simd_test(enable = "sse2")]
4747 unsafe fn test_mm_cvttps_epi32() {
4748 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4749 let r = _mm_cvttps_epi32(a);
4750 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
4751
4752 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
4753 let r = _mm_cvttps_epi32(a);
4754 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
4755 }
4756
4757 #[simd_test(enable = "sse2")]
4758 unsafe fn test_mm_set_sd() {
4759 let r = _mm_set_sd(-1.0_f64);
4760 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
4761 }
4762
4763 #[simd_test(enable = "sse2")]
4764 unsafe fn test_mm_set1_pd() {
4765 let r = _mm_set1_pd(-1.0_f64);
4766 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
4767 }
4768
4769 #[simd_test(enable = "sse2")]
4770 unsafe fn test_mm_set_pd1() {
4771 let r = _mm_set_pd1(-2.0_f64);
4772 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
4773 }
4774
4775 #[simd_test(enable = "sse2")]
4776 unsafe fn test_mm_set_pd() {
4777 let r = _mm_set_pd(1.0_f64, 5.0_f64);
4778 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
4779 }
4780
4781 #[simd_test(enable = "sse2")]
4782 unsafe fn test_mm_setr_pd() {
4783 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
4784 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
4785 }
4786
4787 #[simd_test(enable = "sse2")]
4788 unsafe fn test_mm_setzero_pd() {
4789 let r = _mm_setzero_pd();
4790 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
4791 }
4792
4793 #[simd_test(enable = "sse2")]
4794 unsafe fn test_mm_load1_pd() {
4795 let d = -5.0;
4796 let r = _mm_load1_pd(&d);
4797 assert_eq_m128d(r, _mm_setr_pd(d, d));
4798 }
4799
4800 #[simd_test(enable = "sse2")]
4801 unsafe fn test_mm_load_pd1() {
4802 let d = -5.0;
4803 let r = _mm_load_pd1(&d);
4804 assert_eq_m128d(r, _mm_setr_pd(d, d));
4805 }
4806
4807 #[simd_test(enable = "sse2")]
4808 unsafe fn test_mm_unpackhi_pd() {
4809 let a = _mm_setr_pd(1.0, 2.0);
4810 let b = _mm_setr_pd(3.0, 4.0);
4811 let r = _mm_unpackhi_pd(a, b);
4812 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
4813 }
4814
4815 #[simd_test(enable = "sse2")]
4816 unsafe fn test_mm_unpacklo_pd() {
4817 let a = _mm_setr_pd(1.0, 2.0);
4818 let b = _mm_setr_pd(3.0, 4.0);
4819 let r = _mm_unpacklo_pd(a, b);
4820 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
4821 }
4822
4823 #[simd_test(enable = "sse2")]
4824 unsafe fn test_mm_shuffle_pd() {
4825 let a = _mm_setr_pd(1., 2.);
4826 let b = _mm_setr_pd(3., 4.);
4827 let expected = _mm_setr_pd(1., 3.);
4828 let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
4829 assert_eq_m128d(r, expected);
4830 }
4831
4832 #[simd_test(enable = "sse2")]
4833 unsafe fn test_mm_move_sd() {
4834 let a = _mm_setr_pd(1., 2.);
4835 let b = _mm_setr_pd(3., 4.);
4836 let expected = _mm_setr_pd(3., 2.);
4837 let r = _mm_move_sd(a, b);
4838 assert_eq_m128d(r, expected);
4839 }
4840
4841 #[simd_test(enable = "sse2")]
4842 unsafe fn test_mm_castpd_ps() {
4843 let a = _mm_set1_pd(0.);
4844 let expected = _mm_set1_ps(0.);
4845 let r = _mm_castpd_ps(a);
4846 assert_eq_m128(r, expected);
4847 }
4848
4849 #[simd_test(enable = "sse2")]
4850 unsafe fn test_mm_castpd_si128() {
4851 let a = _mm_set1_pd(0.);
4852 let expected = _mm_set1_epi64x(0);
4853 let r = _mm_castpd_si128(a);
4854 assert_eq_m128i(r, expected);
4855 }
4856
4857 #[simd_test(enable = "sse2")]
4858 unsafe fn test_mm_castps_pd() {
4859 let a = _mm_set1_ps(0.);
4860 let expected = _mm_set1_pd(0.);
4861 let r = _mm_castps_pd(a);
4862 assert_eq_m128d(r, expected);
4863 }
4864
4865 #[simd_test(enable = "sse2")]
4866 unsafe fn test_mm_castps_si128() {
4867 let a = _mm_set1_ps(0.);
4868 let expected = _mm_set1_epi32(0);
4869 let r = _mm_castps_si128(a);
4870 assert_eq_m128i(r, expected);
4871 }
4872
4873 #[simd_test(enable = "sse2")]
4874 unsafe fn test_mm_castsi128_pd() {
4875 let a = _mm_set1_epi64x(0);
4876 let expected = _mm_set1_pd(0.);
4877 let r = _mm_castsi128_pd(a);
4878 assert_eq_m128d(r, expected);
4879 }
4880
4881 #[simd_test(enable = "sse2")]
4882 unsafe fn test_mm_castsi128_ps() {
4883 let a = _mm_set1_epi32(0);
4884 let expected = _mm_set1_ps(0.);
4885 let r = _mm_castsi128_ps(a);
4886 assert_eq_m128(r, expected);
4887 }
4888 }